# Estimation of SAOM models using RSiena, calculation of post-hoc goodness of fit tests # For 2018 paper "Social interactions shape individual and collective personality in social spiders" ###################################################################################### rm(list=ls()) cpus=7 #The number of CPUs used for parallel processing on a cluster. #Because we had 8 CPUs, use 7 and leave 1 spare. setwd("/xx") #Below are two functions used for the post-hoc GOF tests ############### #For Geodesic distance we use this function here GeodesicDistribution <- function (i, data, sims, period, groupName, varName, levls=c(1:5,Inf), cumulative=TRUE, ...) { x <- networkExtraction(i, data, sims, period, groupName, varName) require(sna) a <- sna::geodist(x)$gdist if (cumulative) { gdi <- sapply(levls, function(i){ sum(a<=i) }) } else { gdi <- sapply(levls, function(i){ sum(a==i) }) } names(gdi) <- as.character(levls) gdi } # Holland and Leinhardt Triad Census; see ?sna::triad.census. TriadCensus <- function(i, data, sims, wave, groupName, varName, levls=1:16){ unloadNamespace("igraph") # to avoid package clashes require(network) require(sna) x <- networkExtraction(i, data, sims, wave, groupName, varName) if (network.edgecount(x) <= 0){x <- symmetrize(x)} # because else triad.census(x) will lead to an error tc <- sna::triad.census(x)[1,levls] # names are transferred automatically tc } ############################################################# require(abind) #use abind package to create sequential matrices for the 19 observations require(RSiena) #the below used RSiena 1.2-3 (September 8, 2017) cuddling=read.csv('spider_networks.csv') #by 'cuddling', we mean resting interaction networks... boldness= read.csv('spider_boldness.csv', header=TRUE) #Allocating source colony number labels cuddling$Source=as.character(cuddling$Colony) cuddling$Source[substring(cuddling$Source,1,4)=="BR10"]=1 cuddling$Source[substring(cuddling$Source,1,4)=="BR13"]=2 cuddling$Source[substring(cuddling$Source,1,3)=="BR3"]=3 cuddling$Source=as.factor(cuddling$Source) #Allocating treatment type labels cuddling$Treat.Type=as.character(cuddling$Colony) cuddling$Treat.Type[cuddling$Treat.Type=="BR10-1"]="allbold" cuddling$Treat.Type[cuddling$Treat.Type=="BR10-10"]="allbold" cuddling$Treat.Type[cuddling$Treat.Type=="BR10-2"]="allbold" cuddling$Treat.Type[cuddling$Treat.Type=="BR13-5"]="allbold" cuddling$Treat.Type[cuddling$Treat.Type=="BR13-9"]="allbold" cuddling$Treat.Type[cuddling$Treat.Type=="BR10-3"]="keystone" cuddling$Treat.Type[cuddling$Treat.Type=="BR10-4"]="keystone" cuddling$Treat.Type[cuddling$Treat.Type=="BR10-5"]="keystone" cuddling$Treat.Type[cuddling$Treat.Type=="BR10-9"]="keystone" cuddling$Treat.Type[cuddling$Treat.Type=="BR13-1"]="keystone" cuddling$Treat.Type[cuddling$Treat.Type=="BR13-2"]="keystone" cuddling$Treat.Type[cuddling$Treat.Type=="BR13-6"]="keystone" cuddling$Treat.Type[cuddling$Treat.Type=="BR3-2"]="keystone" cuddling$Treat.Type[cuddling$Treat.Type=="BR3-3"]="keystone" cuddling$Treat.Type[cuddling$Treat.Type=="BR3-4"]="keystone" cuddling$Treat.Type[cuddling$Treat.Type=="BR10-6"]="allshy" cuddling$Treat.Type[cuddling$Treat.Type=="BR10-7"]="allshy" cuddling$Treat.Type[cuddling$Treat.Type=="BR10-8"]="allshy" cuddling$Treat.Type[cuddling$Treat.Type=="BR13-3"]="allshy" cuddling$Treat.Type[cuddling$Treat.Type=="BR13-4"]="allshy" cuddling$Treat.Type[cuddling$Treat.Type=="BR13-7"]="allshy" cuddling$Treat.Type[cuddling$Treat.Type=="BR13-8"]="allshy" cuddling$Treat.Type[cuddling$Treat.Type=="BR3-1"]="allshy" cuddling$Treat.Type[cuddling$Treat.Type=="BR3-5"]="allshy" cuddling$Treat.Type=as.factor(cuddling$Treat.Type) boldness[2:8]=600-boldness[2:8] #So that boldness increases with quicker response times names(boldness)[2:8]=1:7 unq_treat=unique(cuddling$Treat.Type) unq_ass=unique(cuddling$Assay.Type) #One can set this up as a batch to estimate all 8 models at once; in practice this doesn't work very well #because some manual checking/tweaking is necessary along the way to ensure convergence is being achieved #Therefore work through the models one by one specifying s=1, t=1, etc #t=1 (allbold), t=2 (keystone), t=3 (all shy) #n=1 #8 cases (3 source colonies x 3 treatment types, -1 non-existent case) #for (s in 1:3){ # for (t in 1:3){ # if (!((s==3) & (t==1))){ #need to skip case for s=3, t=1 (allbold), no data s=1 t=1 #name="s1t1_minimal6.RData" #namep="RS_minimal_s1t1" name_save=paste("s",s,"t",t,"_final.RData",sep="") #name of the final save final namep=paste("RS_19_s",s,"t",t,sep="") #name of the RSiena output project file cuddling_pool=cuddling[cuddling$Source==s&cuddling$Treat.Type==unq_treat[t],] #grouping networks by source colony s and treatment t unq_colony=unique(cuddling_pool$Colony) #list of colonies present inside newly created subgroup source_treat_net=list() source_treat_bold=list() for (i in 1:length(unq_colony)){ ix=unq_colony[i] ind=boldness$Individual[boldness$colony==ix] net_series=matrix(data=NA,length(ind),length(ind)) #to be series of networks t=1,...,19 net_bold=matrix(data=NA,1,length(ind)) #to be series of boldness measurements for (jx in 1:7){ for (k in 1:3){ kx=unq_ass[k] bld=boldness[boldness$colony==ix,jx+1] atts=data.frame(ind,bld) sp1=cuddling_pool$Spider.1[cuddling_pool$Colony==ix&cuddling_pool$Week==jx&cuddling_pool$Assay.Type==kx] sp2=cuddling_pool$Spider.2[cuddling_pool$Colony==ix&cuddling_pool$Week==jx&cuddling_pool$Assay.Type==kx] #require(igraph) #Can be necessary to reload each iteration because of sna package's masking of functions, if GOF is calculated net=graph.data.frame(data.frame(sp1,sp2),vertices=atts,directed=FALSE) net_bold=abind(net_bold,t(as.matrix(V(net)$bld)),along=3) net_adj=as_adj(net,sparse=FALSE) dead=ind[is.na(bld)] #identify which spiders have died (boldness will be NA) #method of structural zeros for dead spiders, p.30 / 4.3.1 for (d in 1:length(dead)){ net_adj[dimnames(net_adj)[[1]]==dead[d],]=10 #10 indicates a structural zero in adjacency matrix (cannot form connections) net_adj[,dimnames(net_adj)[[1]]==dead[d]]=10 } net_series=abind(net_series, net_adj, along = 3) #bind series of networks/boldness measurements along 3rd dimension } } source_treat_net[[i]]=net_series #obtain 2-4 sequences of observations across multiple colonies, which then need to be combined into one sequence source_treat_bold[[i]]=net_bold } nodes=0 #while most colonies start with 10 spiders, a very few start with 11, so need to get suitable dimension (e.g. may be 41x41 adjacency matrix) for (i in 1:length(unq_colony)){ nodes=nodes+dim(source_treat_net[[i]])[1] } pooled_series=array(10,dim=c(nodes,nodes,22)) #pooled array full of structural zeros pooled_bold=array(0,dim=c(nodes,22)) r=1 for (i in 1:length(unq_colony)){ #now place colony adjacency matrices into one large shared matrix, with structural zeros to prevent between-colony interaction toadd=dim(source_treat_net[[i]])[1] pooled_series[r:((r-1)+toadd),r:((r-1)+toadd),]=source_treat_net[[i]] pooled_bold[r:((r-1)+toadd),]=source_treat_bold[[i]] r=r+toadd } pooled_series=pooled_series[,,2:20] #truncate empty first index pooled_bold=pooled_bold[,2:20] pooled_bold[is.na(pooled_bold)]=0 #where boldness is NA, zero is less confusing to RSiena (and will not be used either way because node will be structural zero) for (a in 1:dim(pooled_bold)[1]){ # Interpolation of boldness by experiment day. a is individual spider x_out=c(1,3,5,8,10,12,15,17,19,22,24,26,29,31,33,36,38,40,43) #list of days we want output for x=c(1,8,15,22,29,36,43) #list of days we actually have y=pooled_bold[a,c(1,4,7,10,13,16,19)] #list of boldness measurements that we have pooled_bold[a,]=approx(x,y,x_out)$y } for (a in 1:dim(pooled_bold)[1]){ #put 0-600 boldness on 1-3 scale for (b in 1:dim(pooled_bold)[2]){ if (pooled_bold[a,b] <= 200){pooled_bold[a,b]=1} if (pooled_bold[a,b] > 200 & pooled_bold[a,b] <= 400){pooled_bold[a,b]=2} if (pooled_bold[a,b] > 400){pooled_bold[a,b]=3} } } spider_cuddles=sienaDependent(pooled_series,allowOnly=FALSE) spider_boldness=sienaDependent(pooled_bold,type="behavior",allowOnly=FALSE) data_siena=sienaDataCreate(spider_cuddles,spider_boldness) #create RSiena data object combining the network and behaviour data siena_algm=sienaAlgorithmCreate(projname=namep) #default RSiena algorithm settings, specified output project file name # reportname=paste("data_s",s,"_t",t,sep="") #the print01Report function provides a useful summary of the data, particular interest in the Jaccard index of tie turnover > 0.2 # print01Report(data_siena,modelname=reportname) # network_effects=getEffects(data_siena) #getEffects gets the default set of effects from the data #effectsDocumentation(network_effects) #effectsDocumentation can be run on Windows/Mac (not the cluster) to open an HTML file listing all possible effects for the data provided network_effects$include[26]=FALSE #turn off the default transitive triads effect #network_effects=includeEffects(network_effects,transTriads,name="spider_cuddles",type="eval",include=FALSE) #this is the proper way of doing the above #add in the effects of interest/necessary for good convergence. Probably do not want to do all at once before estimating model for the first time. network_effects=includeEffects(network_effects,Jout,name="spider_cuddles",type="eval") #Jaccard similarity network_effects=includeEffects(network_effects,egoPlusAltX,name="spider_cuddles",type="eval",interaction1="spider_boldness") #boldness on network, egoX + altX for undirected network_effects=includeEffects(network_effects,avAlt,name="spider_boldness",type="eval",interaction1="spider_cuddles") #effect network on bold network_effects=includeEffects(network_effects,degPlus,name="spider_cuddles",type="eval") #inPop + outAct for undirected network_effects=includeEffects(network_effects,from.w.ind,name="spider_cuddles",type="eval",interaction1="spider_cuddles",interaction2="spider_cuddles") #Run the RSiena model estimation model. # batch=TRUE turns off the GUI for use on the cluster (or otherwise - not especially useful to see) # verbose=TRUE gives running update of what the algorithm is doing # returnDeps=TRUE saves the simulated networks for use in the post-hoc GOF tests. This makes the file larger (e.g. around 20MB) # useCluster=TRUE is for running on multiple CPUs # nbrNodes specifies the number of CPUs to use. For a laptop, commonly 2 available RS_model_t0 <- siena07(siena_algm, data=data_siena, effects=network_effects, batch=TRUE,verbose=TRUE,returnDeps=TRUE,useCluster=TRUE,nbrNodes=cpus) #Once a model has been estimated, the network_effects object, which contains the values for each of the effects specified, can be updated to # have new values based on the model output (provided it's going in the right direction - may need to try again) #The below is not set up for running in a loop, but rather manual iteration: network_effects=updateTheta(network_effects,RS_model_t0) #Sometimes the period-specific rate constants (network or boldness, more commonly the former) can diverge from e.g. 6,7 to 100+ #This can be fixed by setting (e.g.) network_effects$fix[1]=TRUE for period 1, and set (e.g.) network_effects$initialValue[1]=20 #i.e. a cap of 20 for such network constants, which allows the rest of the estimation to proceed smoothly. Once a good overall convergence level #has been achieved, the fix may be removed, e.g. network_effects$fix[1]=FALSE to see whether siena07 is now capable of finding a more #meaningful value for that constant. #the sienaAlgorithm can be made to run for longer. n3=6000, nsub=6 is recommded in the RSiena manual for publishable results. #but not necessary for the first few runs - do when near a final result siena_algm <- sienaAlgorithmCreate(projname=namep,cond=FALSE,n3=6000,nsub=6) #One may wish to create a new RSiena model object rather than over-writing the previous one. In this way, one can make incremental changes between # a good model ('RS_model_t0') and an iterated model ('RS_model_t1') which may or may not turn out to be an improvement RS_model_t1 <- siena07(siena_algm, data=data_siena, effects=network_effects, batch=TRUE,verbose=TRUE,returnDeps=TRUE,useCluster=TRUE,nbrNodes=cpus) #save.image(name_save) #if saving the estimated model before moving onto others # }}} #close brackets if being done for the whole dataset (need to fix final model estimation step first) #Once a good model has been obtained, run the GOF tests. These are hard to parallelise and run slowly (proportional to n3= x) s.gof1a<-sienaGOF(RS_model_t0,verbose=TRUE,OutdegreeDistribution,varName="spider_cuddles") s.gof1b<-sienaGOF(RS_model_t0,verbose=TRUE,GeodesicDistribution,varName="spider_cuddles") s.gof1c<-sienaGOF(RS_model_t0,verbose=TRUE,TriadCensus,varName="spider_cuddles") s.gof1d<-sienaGOF(RS_model_t0,verbose=TRUE,BehaviorDistribution,varName="spider_boldness") #Sufficient goodness of fit is indicated by p>0.01 #The test results can be plotted, with scale=TRUE,center=TRUE sometimes useful for readability #plot(s.gof1c,scale=TRUE,center=TRUE) #Once models are obtained, can perform meta-analysis #If for example, the models are saved in separate files of format s1t1_final.RData final_models=list() n=1 for (s in 1:3){ #source colony for (t in 1:3){ #treatment: t1=allbold, t2=keystone, t3=allshy if (!((s==3) & (t==1))){ #necessary because s3t1 does not exist final_models[[n]]=list() namep=paste("s",s,"t",t,"_final.RData",sep="") load(namep) final_models[[n]]=RS_model_t0 #this is the RSiena model object n=n+1 }}} #siena08 is RSiena's function for performing meta-analysis meta=siena08(final_models[[1]],final_models[[2]],final_models[[3]],final_models[[4]], final_models[[5]],final_models[[6]],final_models[[7]],final_models[[8]], maxit=100) meta #call meta object to display results