--- title: "Appendix S1: A guide to choosing and implementing reference models for social network analysis" author: "Matthew Silk" date: "02/02/2021" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` #### Please direct any questions about the examples presented in this R script to Matthew Silk (matthewsilk@outlook.com) ## Section 1 -- Preparation First we are going to prepare the R environment and load the necessary packages for our case study. If you want to explore the variability in the networks and result possible you can change the number in the set.seed() function to produce different networks. Note that throughout this script we use an edited version of the asnipe get_network2 function that doesn't print messages ```{r setup2, message=FALSE,warning=FALSE} #Set seed for reproducibility #Can be changed to produce different networks and explore variability in results set.seed(35) ##load packages library(asnipe) library(igraph) library(boot) library(prodlim) library(sna) library(assortnet) library(blockmodels) library(ergm) library(ergm.count) library(tnet) library(vegan) ``` ```{r edited_asnipe_function, include=FALSE} get_network2<-function (association_data, data_format = "GBI", association_index = "SRI", identities = NULL, which_identities = NULL, times = NULL, occurrences = NULL, locations = NULL, which_locations = NULL, start_time = NULL, end_time = NULL, classes = NULL, which_classes = NULL, enter_time = NULL, exit_time = NULL) { if (is.null(association_data)) { stop("No association_data data!") } if (length(dim(association_data)) != 2 & data_format == "GBI") { stop("Invalid dimensions for association_data") } if (length(dim(association_data)) != 3 & data_format == "SP") { stop("Invalid dimensions for association_data") } if ((length(identities) != ncol(association_data) & !is.null(identities)) == TRUE) { stop("Length of identities does not match number of individuals") } if ((length(times) != nrow(association_data) & !is.null(times)) == TRUE) { stop("Length of times does not match number of groups") } if ((length(occurrences[1, ]) != nrow(association_data) & !is.null(occurrences)) == TRUE) { stop("Number of occurrence periods does not match number of sampling periods") } if ((length(occurrences[, 1]) != ncol(association_data) & !is.null(occurrences)) == TRUE) { stop("Number of individuals in occurrences does not match number of individuals in sampling periods") } if ((length(locations) != nrow(association_data) & !is.null(locations)) == TRUE) { stop("Length of locations does not match number of groups") } if ((length(classes) != ncol(association_data) & !is.null(classes)) == TRUE) { stop("Length of classes does not match number of individuals") } if ((!is.null(which_identities) & is.null(identities)) == TRUE) { stop("Cannot apply which_identities without identities data") } if ((!is.null(which_locations) & is.null(locations)) == TRUE) { stop("Cannot apply which_locations without locations data") } if ((!is.null(start_time) & is.null(times)) == TRUE) { stop("Cannot apply start_time without times data") } if ((!is.null(end_time) & is.null(times)) == TRUE) { stop("Cannot apply end_time without times data") } if ((!is.null(which_classes) & is.null(classes)) == TRUE) { stop("Cannot apply which_class without classes data") } if ((!is.null(enter_time) & is.null(times)) == TRUE) { stop("Cannot control for overlapping time without observation times") } if ((!is.null(exit_time) & is.null(times)) == TRUE) { stop("Cannot control for overlapping time without observation times") } if (!is.null(colnames(association_data)) & !all(colnames(association_data) == identities)) { stop("Identities is not in the same order as columns in association_data") } if (!any(association_index %in% c("SRI", "HWI"))) { stop("Unknown association_index") } if (data_format == "GBI") { association_data <- as.matrix(association_data) } else { association_data <- as.array(association_data) } if (!is.null(which_identities)) { if (data_format == "GBI") association_data <- association_data[, which(identities %in% which_identities)] if (data_format == "SP") association_data <- association_data[, which(identities %in% which_identities), which(identities %in% which_identities)] identities <- identities[which(identities %in% which_identities)] } if (!is.null(start_time) & is.null(end_time)) { end_time <- max(times) } if (!is.null(end_time) & is.null(start_time)) { start_time <- min(times) } if (!is.null(start_time) & !is.null(end_time)) { subs <- which(times >= start_time & times <= end_time) if (data_format == "GBI") association_data <- association_data[subs, ] if (data_format == "SP") association_data <- association_data[subs, , ] locations <- locations[subs] times <- times[subs] } if (!is.null(which_locations)) { subs <- which(locations %in% which_locations) if (data_format == "GBI") association_data <- association_data[subs, ] if (data_format == "SP") association_data <- association_data[subs, , ] locations <- locations[subs] times <- times[subs] } if (!is.null(which_classes)) { if (data_format == "GBI") association_data <- association_data[, which(classes %in% which_classes)] if (data_format == "SP") association_data <- association_data[, which(classes %in% which_classes), which(classes %in% which_classes)] identities <- identities[which(classes %in% which_classes)] } do.SR <- function(GroupBy, input, association_index, present) { jumps <- c(seq(0, ncol(input), 50)) if (max(jumps) < ncol(input)) { jumps <- c(jumps, ncol(input)) } out <- matrix(nrow = 0, ncol = 1) for (i in 1:(length(jumps) - 1)) { tmp <- input[, GroupBy] + input[, (jumps[i] + 1):jumps[i + 1]] if (length(tmp) > nrow(input)) { x <- colSums(tmp == 2) } else { x <- sum(tmp == 2) } if (length(tmp) > nrow(input)) { yab <- colSums(tmp == 1) } else { yab <- sum(tmp == 1) } if (association_index == "SRI") { out <- c(out, x/(x + yab)) } else if (association_index == "HWI") { out <- c(out, x/(x + 0.5 * yab)) } } out } do.SR.time <- function(GroupBy, input, association_index, times, present) { jumps <- c(seq(0, ncol(input), 50)) if (max(jumps) < ncol(input)) { jumps <- c(jumps, ncol(input)) } out <- matrix(nrow = 0, ncol = 1) for (i in 1:(length(jumps) - 1)) { tmp <- input[, GroupBy] + input[, (jumps[i] + 1):jumps[i + 1], drop = FALSE] if (!is.null(enter_time) | !is.null(exit_time)) { tmp2 <- present[, GroupBy] + present[, (jumps[i] + 1):jumps[i + 1], drop = FALSE] tmp[which(tmp2 < 2, arr.ind = T)] <- 0 } if (length(tmp) > nrow(input)) { x <- colSums(tmp == 2) yab <- apply(tmp, 2, function(x) { sum(table(times[x == 1]) == 2) }) y <- colSums(tmp == 1) - (2 * yab) } else { x <- sum(tmp == 2) yab <- sum(table(times[tmp == 1]) == 2) y <- sum(tmp == 1) - (2 * yab) } if (association_index == "SRI") { out <- c(out, x/(x + y + yab)) } else if (association_index == "HWI") { out <- c(out, x/(x + y + 0.5 * yab)) } } out } do.SR2 <- function(i, a, association_index) { x <- apply(a[, i, ], 2, sum) n <- apply(a, 1, rowSums) n[n > 0] <- 1 seen <- t(apply(n, 1, function(x) x - n[i, ])) ya <- rowSums(seen < 0) yb <- rowSums(seen > 0) seen <- t(apply(n, 1, function(x) x + n[i, ])) yab <- rowSums(seen > 1) - x if (association_index == "SRI") { out <- x/(x + ya + yb + yab) } else if (association_index == "HWI") { out <- x/(x + ya + yb + 0.5 * yab) } return(out) } do.SR2.occurrences <- function(i, a, association_index, occurrences) { x <- apply(a[, i, ], 2, sum) seen <- sweep(occurrences, 2, occurrences[i, ], "+") yab <- rowSums(seen == 2) - x ya_b <- rowSums(seen == 1) if (association_index == "SRI") { out <- x/(x + ya_b + yab) } else if (association_index == "HWI") { out <- x/(x + ya_b + 0.5 * yab) } return(out) } if (!is.null(enter_time) | !is.null(exit_time)) { present <- matrix(1, nrow(association_data), ncol(association_data)) } else { present <- NA } if (!is.null(enter_time)) { for (i in 1:ncol(present)) { present[which(times < enter_time[i]), i] <- 0 } } if (!is.null(exit_time)) { for (i in 1:ncol(present)) { present[which(times > exit_time[i]), i] <- 0 } } if (data_format == "GBI" & is.null(times)) fradj_sorted <- do.call("rbind", lapply(seq(1, ncol(association_data), 1), FUN = do.SR, input = association_data, association_index)) if (data_format == "GBI" & !is.null(times)) fradj_sorted <- do.call("rbind", lapply(seq(1, ncol(association_data), 1), FUN = do.SR.time, input = association_data, association_index, times, present)) if (data_format == "SP" & is.null(occurrences)) fradj_sorted <- do.call("rbind", lapply(seq(1, ncol(association_data), 1), FUN = do.SR2, a = association_data, association_index)) if (data_format == "SP" & !is.null(occurrences)) fradj_sorted <- do.call("rbind", lapply(seq(1, ncol(association_data), 1), FUN = do.SR2.occurrences, a = association_data, association_index, occurrences)) fradj_sorted[is.nan(fradj_sorted)] <- 0 diag(fradj_sorted) <- 0 if (!is.null(identities)) { colnames(fradj_sorted) <- identities rownames(fradj_sorted) <- identities } else if (!is.null(colnames(association_data))) { colnames(fradj_sorted) <- colnames(association_data) rownames(fradj_sorted) <- colnames(association_data) } return(fradj_sorted) } ``` ## Section 2 -- Network Generation ### *Creating a population of burbils with social networks* *** Burbils live in open habitats throughout the world. They form fission-fusion societies characterised by stable social groups that roost together but fission into smaller subgroups when foraging during the day. Foraging subgroups from different groups occasionally meet and intermingle creating opportunities for between-group interactions. These between-group associations are more likely if the two Burbil groups belong to the same "clan". Burbil groups vary in size and we are unsure whether groups of different sizes have similar social network structures. Groups also contain two unique colour morphs: burbils with red noses, and those with orange noses. As well as being able to identify individual burbils (which we use to construct their social networks!), we are also able to distinguish male and female burbils as well as those from three distinct age classes (adults, subadults and juveniles). We know that burbils are involved in both dominance interactions and affiliative interactions with group-mates. We suspect they may have a dominance hierarchy, but we don't know this for sure. We have a lot to find out! *** *** ![A burbil](C:/Users/matth/Dropbox/burbilpic.png){width=30%} *** *** ### Section 2.1 -- Generate population network In this section of the code we create our burbil society (starting with the association network), explaining what we do as we go along. With practice it should be possible to change some of the numbers in this code to change the nature of social relationships in your burbil society. *** ```{r create_groups_and_association_network, fig.width=8, fig.height=6} #Set the mean group size GS<-20 #Here we create a grid of locations for our observations x<-seq(3,18,1) y<-seq(3,18,1) locs<-expand.grid(x,y) names(locs)<-c("x","y") #Here we assign coordinates to our groups. We create 9 groups in total. group_locs<-locs[locs$x%%4==0&locs$y%%4==0,] #Here we store the total number of groups n_groups<-dim(group_locs)[1] #Here we create three distinct clans of burbils. This will effect associations between members of different groups group_clans<-sample(c("A","B","C"),n_groups,replace=TRUE) #Set the probability of burbils from the same clan intermingling if they happen to forage at the same location p_wc<-1 #Set the probability of burbils from different clans intermingling if they happen to forage at the same location p_bc<-0.4 #Create a list to store individual IDs indss<-list() #Create a list to store group sizes gss<-list() #Create a list to store the sex of each individual sexes<-list() #Create a list to store the age of each individual ages<-list() #Create a list to store the nose colour of each individual noses<-list() #Create a list to store information on which day a subgroup is observed on daysl<-list() #Create a list to store a group-by-individual matrix for each burbil group gbis<-list() #Set the mean number of subgroups observed for each group each day sg_mn<-5 #Set the strength of assortativity based on nose colour #Set a number between 0 and 1 sg_ass<-0.15 #Generate association data within each burbil group for(j in 1:n_groups){ #individual identities inds<-seq(1,rpois(1,GS),1) indss[[j]]<-inds #group size gs<-length(inds) gss[[j]]<-gs #sex sex<-sample(c("M","F"),gs,replace=TRUE) sexes[[j]]<-sex #age age<-sample(c("AD","SUB","JUV"),gs,replace=TRUE,prob=c(0.6,0.2,0.2)) ages[[j]]<-age #nose nose<-sample(c("RED","ORANGE"),gs,replace=TRUE,prob=c(0.7,0.3)) noses[[j]]<-nose #--------------------------------- #Define number of subgroups on the first day n_sg<-rpois(1,sg_mn-1)+1 #find halfway point max_red<-floor(n_sg/2) #Sample subgroups on the first day subgroups1<-sample(n_sg,sum(nose=="RED"),replace=TRUE,prob=c(rep(0.5+sg_ass,max_red),rep(0.5-sg_ass,n_sg-max_red))) subgroups2<-sample(n_sg,sum(nose=="ORANGE"),replace=TRUE,prob=c(rep(0.5-sg_ass,max_red),rep(0.5+sg_ass,n_sg-max_red))) subgroups<-rep(NA,gs) subgroups[nose=="RED"]<-subgroups1 subgroups[nose=="ORANGE"]<-subgroups2 #Store relevant information in the group-by-individual matrix and days vector gbi<-matrix(0,nc=gs,nr=n_sg) gbi[cbind(subgroups,seq(1,gs,1))]<-1 days<-rep(1,nrow(gbi)) #Repeat process over 100 days of observations for(i in 2:100){ n_sg<-rpois(1,sg_mn-1)+1 #find halfway point max_red<-floor(n_sg/2) subgroups1<-sample(n_sg,sum(nose=="RED"),replace=TRUE,prob=c(rep(0.5+sg_ass,max_red),rep(0.5-sg_ass,n_sg-max_red))) subgroups2<-sample(n_sg,sum(nose=="ORANGE"),replace=TRUE,prob=c(rep(0.5-sg_ass,max_red),rep(0.5+sg_ass,n_sg-max_red))) subgroups<-rep(NA,gss[[j]]) subgroups[nose=="RED"]<-subgroups1 subgroups[nose=="ORANGE"]<-subgroups2 tgbi<-matrix(0,nc=gs,nr=n_sg) tgbi[cbind(subgroups,seq(1,gs,1))]<-1 days<-c(days,rep(i,nrow(tgbi))) gbi<-rbind(gbi,tgbi) } #We edit the group-by-individual matrix and days vector to delete any "empty" groups gbi2<-gbi[rowSums(gbi)>0,] days<-days[rowSums(gbi)>0] gbi<-gbi2 #We could create and plot the network for each burbil group #(NOT RUN HERE) #net<-get_network2(gbi) #net2<-graph.adjacency(net,mode="undirected",weighted=TRUE) #plot(net2,vertex.color=noses[[j]],edge.width=(edge_attr(net2)$weight*10)^2) daysl[[j]]<-days gbis[[j]]<-gbi } #We now go through and assign a location to every subgroup sglocs<-list() for(i in 1:n_groups){ tx<-rep(NA,dim(gbis[[i]])[1]) ty<-rep(NA,dim(gbis[[i]])[1]) sglocs[[i]]<-data.frame(tx,ty) names(sglocs[[i]])<-c("x","y") sglocs[[i]]$x<-group_locs[i,1]+round(rnorm(dim(gbis[[i]])[1],0,2)) sglocs[[i]]$y<-group_locs[i,2]+round(rnorm(dim(gbis[[i]])[1],0,2)) } #Vector recording number of individuals in each group n_inds<-numeric() for(i in 1:n_groups){ n_inds[i]<-dim(gbis[[i]])[2] } #Calculate total individuals in the population n_tot<-sum(n_inds) #Population-level individual identities inds_tot<-seq(1,n_tot,1) #Information on each individual's group membership g_tot<-rep(seq(1,n_groups,1),n_inds) #Information on each individual's within-group identity gi_tot<-seq(1,n_inds[1],1) for(i in 2:n_groups){ gi_tot<-c(gi_tot,seq(1,n_inds[i],1)) } #We now calculate the full population association network full_net<-matrix(0,nr=n_tot,nc=n_tot) #Counts up between-group associations for(i in 1:100){ for(j in 1:(n_groups-1)){ for(k in (j+1):n_groups){ tA<-paste0(sglocs[[j]][,1],"-",sglocs[[j]][,2]) tB<-paste0(sglocs[[k]][,1],"-",sglocs[[k]][,2]) tA2<-tA[daysl[[j]]==i] tB2<-tB[daysl[[k]]==i] tt<-match(tA2,tB2) if(sum(is.na(tt))1){ nipi<-rpois(1,m_nipi) indivs<-which(gbi[g,]==1) ni<-nipi*length(indivs) for(n in 1:ni){ i1<-sample(indivs,1) ifelse(rowSums(gbi)[g]==2,i2<-indivs[indivs!=i1],i2<-sample(indivs[indivs!=i1],1)) winner<-rbinom(1,1,inv.logit(RHPs1[i1]-RHPs1[i2])) GROUP[c]<-g if(winner==1){ WINNER[c]<-i1 LOSER[c]<-i2 } if(winner==0){ WINNER[c]<-i2 LOSER[c]<-i1 } grD[c]<-g c<-c+1 } } } #Create the dominance network in igraph format dom_net<-graph_from_edgelist(cbind(WINNER,LOSER), directed = TRUE) E(dom_net)$weight <- 1 dom_net<-simplify(dom_net, edge.attr.comb=list(weight="sum")) #Plot the dominance network that results (it is densely connected and so the network plot isn't especially informative) plot(dom_net,edge.width=log(edge_attr(dom_net)$weight,10)^5,layout=layout_in_circle,main="Dominance network",edge.arrow.size=0.5) #To show that our code to generate the dominance network works we plot the relationship between in-strength and out-strength and it is negatively correlated as would be expected for a linear dominance hierarchy plot(strength(dom_net,mode="out"),strength(dom_net,mode="in"),pch=16,xlab="Out-degree",ylab="In-degree",cex.lab=1.5,cex.axis=1,main="Correlation between out- and in-degree of nodes in the dominance network") ``` *** ##### Create affiliative interactions ```{r affiliative interactions, fig.width=8, fig.height=6} #We use an equivalent approach as for dominance networks so have kept much of the coding the same (hence the mismatch in names) #Set-up vectors to store results GROUP<-numeric() GIV<-numeric() REC<-numeric() #Define the tendency of different individuals to initiate affiliative interactions AHP_ad<- -1 AHP_sub<- -1 AHP_juv<-1 AHP_M<-0 AHP_nose<-1 AHP_resid<-0.2 AHPs1<-rnorm(gs1,AHP_ad*(age1=="AD")+AHP_sub*(age1=="SUB")+AHP_juv*(age1=="JUV")+AHP_M*(sex1=="M"),AHP_resid) #Define the mean number of interactions observed per individual in a subgroup m_nipi<-0.5 #record which group interactions occur in grA<-numeric() #Generate affiliative interaction data c<-1 for(g in 1:nrow(gbi)){ if(rowSums(gbi)[g]>1){ nipi<-rpois(1,m_nipi) indivs<-which(gbi[g,]==1) ni<-nipi*length(indivs) for(n in 1:ni){ i1<-sample(indivs,1) ifelse(rowSums(gbi)[g]==2,i2<-indivs[indivs!=i1],i2<-sample(indivs[indivs!=i1],1)) tn<-0 if(nose1[i1]==nose1[i2]){tn<-1} winner<-rbinom(1,1,inv.logit(AHPs1[i1]-AHPs1[i2]+tn)) GROUP[c]<-g if(winner==1){ GIV[c]<-i1 REC[c]<-i2 } if(winner==0){ GIV[c]<-i2 REC[c]<-i1 } grA[c]<-g c<-c+1 } } } #Create the affiliative network in igraph format aff_net<-graph_from_edgelist(cbind(GIV,REC), directed = TRUE) E(aff_net)$weight <- 1 aff_net<-simplify(aff_net, edge.attr.comb=list(weight="sum")) #Plot the affiliative network that results plot(aff_net,edge.width=log(edge_attr(aff_net)$weight,6)^5,layout=layout_in_circle,main="Affiliative network",edge.arrow.size=0.5) #Plot the same correlation used for dominance networks plot(strength(aff_net,mode="out"),strength(aff_net,mode="in"),pch=16,xlab="Out-degree",ylab="In-degree",cex.lab=1.5,cex.axis=1,main="Correlation between out- and in-degree of nodes in the affiliative network") ``` *** *** ### Section 2.3 -- Generate huddling networks Data were also collected on the huddling networks of two burbil groups while they were roosting during summer and winter. These data can be used to test if the huddling networks differ between small and large groups. We simulate these data here. ```{r huddling_networks, fig.width=8, fig.height=6} sm_g<-which.min(n_inds) bi_g<-which.max(n_inds) #Generate "roosting/huddling network of burbils in the smallest group in the summer hud_netSM<-sample_smallworld(dim=1, size=gss[[sm_g]], nei=3, p=0.05, loops = FALSE, multiple = FALSE) #Plot network plot(hud_netSM,main="Huddling network in small group") #Calculate betweenness of network igraph::betweenness(hud_netSM) ##----------------------------------------- ##----------------------------------------- #Generate "roosting/huddling network of burbils in the biggest group in the summer hud_netBI<-sample_smallworld(dim=1, size=gss[[bi_g]], nei=3, p=0.05, loops = FALSE, multiple = FALSE) #Plot network plot(hud_netBI,main="Huddling network in big group") #Calculate betweenness of network igraph::betweenness(hud_netBI) #Examine differences in betweenness by inspecting histograms hist(igraph::betweenness(hud_netSM),breaks=seq(0,200,1),col=rgb(1,0,0,0.3),border=NA,xlab="Betweenness",cex.lab=1.5,cex.axis=1,main="Betweenness centrality distribution in\n small group (red) and big group (blue) networks") hist(igraph::betweenness(hud_netBI),breaks=seq(0,200,1),col=rgb(0,0,1,0.3),border=NA,add=TRUE,cex.lab=1.5,cex.axis=1,main="") ##----------------------------------------- ##----------------------------------------- #Generate "roosting/huddling network of burbils in the smallest group in the winter hud_netSM_w<-erdos.renyi.game(n=gss[[sm_g]], p=0.3, loops = FALSE, multiple = FALSE) hud_netBI_w<-erdos.renyi.game(n=gss[[bi_g]], p=0.3, loops = FALSE, multiple = FALSE) ``` *** *** ### Section 2.4 -- A second population network We have also been sent association data from a similar but smaller burbil population by a colleague. They want to know whether their burbil population has a similar network structure to ours. ```{r alternative_association_network, fig.width=8, fig.height=6} #Set the mean group size GS_B<-20 #Here we create a grid of locations for our observations x_B<-seq(3,13,1) y_B<-seq(3,9,1) locs_B<-expand.grid(x_B,y_B) names(locs_B)<-c("x","y") #Here we assign coordinates to our groups. We create 9 groups in total. group_locs_B<-locs_B[locs_B$x%%4==0&locs_B$y%%4==0,] #Here we store the total number of groups n_groups_B<-dim(group_locs_B)[1] #Here we create three distinct clans of burbils. This will effect associations between members of different groups group_clans_B<-sample(c("A","B","C"),n_groups_B,replace=TRUE) #Set the probability of burbils from the same clan intermingling if they happen to forage at the same location p_wc_B<-1 #Set the probability of burbils from different clans intermingling if they happen to forage at the same location p_bc_B<-0.4 #Create a list to store individual IDs indss_B<-list() #Create a list to store group sizes gss_B<-list() #Create a list to store the sex of each individual sexes_B<-list() #Create a list to store the age of each individual ages_B<-list() #Create a list to store the nose colour of each individual noses_B<-list() #Create a list to store information on which day a subgroup is observed on daysl_B<-list() #Create a list to store a group-by-individual matrix for each burbil group gbis_B<-list() #Set the mean number of subgroups observed for each group each day sg_mn_B<-5 #Set the strength of assortativity based on nose colour #Set a number between 0 and 1 sg_ass_B<-0.1 #Generate association data within each burbil group for(j in 1:n_groups_B){ #individual identities inds_B<-seq(1,rpois(1,GS_B),1) indss_B[[j]]<-inds_B #group size gs_B<-length(inds_B) gss_B[[j]]<-gs_B #sex sex_B<-sample(c("M","F"),gs_B,replace=TRUE) sexes_B[[j]]<-sex_B #age age_B<-sample(c("AD","SUB","JUV"),gs_B,replace=TRUE,prob=c(0.6,0.2,0.2)) ages_B[[j]]<-age_B #nose nose_B<-sample(c("RED","ORANGE"),gs_B,replace=TRUE,prob=c(0.7,0.3)) noses_B[[j]]<-nose_B #--------------------------------- #Define number of subgroups on the first day n_sg_B<-rpois(1,sg_mn_B-1)+1 #find halfway point max_red_B<-floor(n_sg_B/2) #Sample subgroups on the first day subgroups1_B<-sample(n_sg_B,sum(nose_B=="RED"),replace=TRUE,prob=c(rep(0.5+sg_ass_B,max_red_B),rep(0.5-sg_ass_B,n_sg_B-max_red_B))) subgroups2_B<-sample(n_sg_B,sum(nose_B=="ORANGE"),replace=TRUE,prob=c(rep(0.5-sg_ass_B,max_red_B),rep(0.5+sg_ass_B,n_sg_B-max_red_B))) subgroups_B<-rep(NA,gs_B) subgroups_B[nose_B=="RED"]<-subgroups1_B subgroups_B[nose_B=="ORANGE"]<-subgroups2_B #Store relevant information in the group-by-individual matrix and days vector gbi_B<-matrix(0,nc=gs_B,nr=n_sg_B) gbi_B[cbind(subgroups_B,seq(1,gs_B,1))]<-1 days_B<-rep(1,nrow(gbi_B)) #Repeat process over 100 days of observations for(i in 2:100){ n_sg_B<-rpois(1,sg_mn_B-1)+1 #find halfway point max_red_B<-floor(n_sg_B/2) subgroups1_B<-sample(n_sg_B,sum(nose_B=="RED"),replace=TRUE,prob=c(rep(0.5+sg_ass_B,max_red_B),rep(0.5-sg_ass_B,n_sg_B-max_red_B))) subgroups2_B<-sample(n_sg_B,sum(nose_B=="ORANGE"),replace=TRUE,prob=c(rep(0.5-sg_ass_B,max_red_B),rep(0.5+sg_ass_B,n_sg_B-max_red_B))) subgroups_B<-rep(NA,gss_B[[j]]) subgroups_B[nose_B=="RED"]<-subgroups1_B subgroups_B[nose_B=="ORANGE"]<-subgroups2_B tgbi_B<-matrix(0,nc=gs_B,nr=n_sg_B) tgbi_B[cbind(subgroups_B,seq(1,gs_B,1))]<-1 days_B<-c(days_B,rep(i,nrow(tgbi_B))) gbi_B<-rbind(gbi_B,tgbi_B) } #We edit the group-by-individual matrix and days vector to delete any "empty" groups gbi2_B<-gbi_B[rowSums(gbi_B)>0,] days_B<-days_B[rowSums(gbi_B)>0] gbi_B<-gbi2_B daysl_B[[j]]<-days_B gbis_B[[j]]<-gbi_B } #We now go through and assign a location to every subgroup sglocs_B<-list() for(i in 1:n_groups_B){ tx_B<-rep(NA,dim(gbis_B[[i]])[1]) ty_B<-rep(NA,dim(gbis_B[[i]])[1]) sglocs_B[[i]]<-data.frame(tx_B,ty_B) names(sglocs_B[[i]])<-c("x","y") sglocs_B[[i]]$x<-group_locs_B[i,1]+round(rnorm(dim(gbis_B[[i]])[1],0,2)) sglocs_B[[i]]$y<-group_locs_B[i,2]+round(rnorm(dim(gbis_B[[i]])[1],0,2)) } #Vector recording number of individuals in each group n_inds_B<-numeric() for(i in 1:n_groups_B){ n_inds_B[i]<-dim(gbis_B[[i]])[2] } #Calculate total individuals in the population n_tot_B<-sum(n_inds_B) #Population-level individual identities inds_tot_B<-seq(1,n_tot_B,1) #Information on each individual's group membership g_tot_B<-rep(seq(1,n_groups_B,1),n_inds_B) #Information on each individual's within-group identity gi_tot_B<-seq(1,n_inds_B[1],1) for(i in 2:n_groups_B){ gi_tot_B<-c(gi_tot_B,seq(1,n_inds_B[i],1)) } #We now calculate the full population association network full_net_B<-matrix(0,nr=n_tot_B,nc=n_tot_B) #Counts up between-group associations for(i in 1:100){ for(j in 1:(n_groups_B-1)){ for(k in (j+1):n_groups_B){ tA_B<-paste0(sglocs_B[[j]][,1],"-",sglocs_B[[j]][,2]) tB_B<-paste0(sglocs_B[[k]][,1],"-",sglocs_B[[k]][,2]) tA2_B<-tA_B[daysl_B[[j]]==i] tB2_B<-tB_B[daysl_B[[k]]==i] tt_B<-match(tA2_B,tB2_B) if(sum(is.na(tt_B))0.975 (less than chance) sum(obs$r0.975 (weighted out-degree of males less than females) sum(obs0.975 to the network being negatively assorted by nose colour) sum(obs0.975 to youngsters having lower out-strength) sum(obs0.975 to males having lower out-strength) sum(obs0,arr.ind=TRUE) tind1<-pind[sample(1:nrow(pind),1),] #record the day on which that individual/grouping-event occurred td<-which(day==day[tind1[1]]) #sample a second individual/grouping-event that occurs on the same day pind2<-pind[which(pind[,1]%in%td),] tind2<-pind2[sample(1:nrow(pind2),1),] #If additional constraints are met then conduct swap if(tind1[1]!=tind2[1]&tind1[2]!=tind2[2]){ if(gbi_t[tind1[1],tind2[2]]==0&gbi_t[tind2[1],tind1[2]]==0){ gbi_t2<-gbi_t gbi_t2[tind2[1],tind1[2]]<-gbi_t[tind1[1],tind1[2]] gbi_t2[tind1[1],tind1[2]]<-gbi_t[tind2[1],tind1[2]] gbi_t2[tind1[1],tind2[2]]<-gbi_t[tind2[1],tind2[2]] gbi_t2[tind2[1],tind2[2]]<-gbi_t[tind1[1],tind2[2]] gbi_t<-gbi_t2 } } } #We can then continue the Markov Chain and sample from it to generate our reference distribution of test statistics. Here we conduct 10000 swaps but we only save every 10 iterations (known as a thinning interval) to avoid auto-correlation that may occur because of rejected swaps c<-1 for(i in 1:10000){ pind<-which(gbi_t>0,arr.ind=TRUE) tind1<-pind[sample(1:nrow(pind),1),] td<-which(day==day[tind1[1]]) pind2<-pind[which(pind[,1]%in%td),] tind2<-pind2[sample(1:nrow(pind2),1),] if(tind1[1]!=tind2[1]&tind1[2]!=tind2[2]){ if(gbi_t[tind1[1],tind2[2]]==0&gbi_t[tind2[1],tind1[2]]==0){ gbi_t2<-gbi_t gbi_t2[tind2[1],tind1[2]]<-gbi_t[tind1[1],tind1[2]] gbi_t2[tind1[1],tind1[2]]<-gbi_t[tind2[1],tind1[2]] gbi_t2[tind1[1],tind2[2]]<-gbi_t[tind2[1],tind2[2]] gbi_t2[tind2[1],tind2[2]]<-gbi_t[tind1[1],tind2[2]] gbi_t<-gbi_t2 } } #This is where we save the swaps. Notice we only save every 10th swap if(i%%10==0){ rgbis[[c]]<-gbi_t c<-c+1 } } #Here we convert our permuted GBIs to networks rnets<-lapply(rgbis,get_network2) #We can then calculate our reference distribution ref_cvs<-unlist(lapply(rnets,CoV)) #We now are going to compare our Markov Chain with the observed coefficient of variation #Unsurprisingly, our observed coefficient of variation lies outside the reference distribution, but then we knew our networks were non-random already par(xpd=FALSE) plot(ref_cvs,type="l",ylim=c(0,0.4),las=1,ylab="Value of test statistic",cex.lab=1.5) lines(x=c(-100,100000),y=c(obs_cv,obs_cv),col="red",lwd=2) #Check p value from permutations ref_cvs2<-c(obs_cv,ref_cvs) sum(ref_cvs20,arr.ind=TRUE) tind1<-pind[sample(1:nrow(pind),1),] td<-which(day==day[tind1[1]]) #This is where we work out the nose colour of the individual sampled first tn1<-noses[[1]][tind1[2]] pind2<-pind[which(pind[,1]%in%td),] tind2<-pind2[sample(1:nrow(pind2),1),] #This is where we work out the nose colour of the individual sampled second tn2<-noses[[1]][tind2[2]] if(tind1[1]!=tind2[1]&tind1[2]!=tind2[2]){ if(gbi_t[tind1[1],tind2[2]]==0&gbi_t[tind2[1],tind1[2]]==0){ #We only conduct a swap if they have the same nose colour. If not then the current permuted GBI is resampled in the Markov Chain if(tn1==tn2){ gbi_t2<-gbi_t gbi_t2[tind2[1],tind1[2]]<-gbi_t[tind1[1],tind1[2]] gbi_t2[tind1[1],tind1[2]]<-gbi_t[tind2[1],tind1[2]] gbi_t2[tind1[1],tind2[2]]<-gbi_t[tind2[1],tind2[2]] gbi_t2[tind2[1],tind2[2]]<-gbi_t[tind1[1],tind2[2]] gbi_t<-gbi_t2 } } } } #Sampling period #100000 swaps with every 100th swap saved c<-1 for(i in 1:100000){ pind<-which(gbi_t>0,arr.ind=TRUE) tind1<-pind[sample(1:nrow(pind),1),] td<-which(day==day[tind1[1]]) tn1<-noses[[1]][tind1[2]] pind2<-pind[which(pind[,1]%in%td),] tind2<-pind2[sample(1:nrow(pind2),1),] tn2<-noses[[1]][tind2[2]] if(tind1[1]!=tind2[1]&tind1[2]!=tind2[2]){ if(gbi_t[tind1[1],tind2[2]]==0&gbi_t[tind2[1],tind1[2]]==0){ if(tn1==tn2){ gbi_t2<-gbi_t gbi_t2[tind2[1],tind1[2]]<-gbi_t[tind1[1],tind1[2]] gbi_t2[tind1[1],tind1[2]]<-gbi_t[tind2[1],tind1[2]] gbi_t2[tind1[1],tind2[2]]<-gbi_t[tind2[1],tind2[2]] gbi_t2[tind2[1],tind2[2]]<-gbi_t[tind1[1],tind2[2]] gbi_t<-gbi_t2 } } } if(i%%100==0){ rgbis[[c]]<-gbi_t c<-c+1 } } #Here we convert our permuted GBIs to networks rnets<-lapply(rgbis,get_network2) #We can then calculate our reference distribution ref_cvs<-unlist(lapply(rnets,CoV)) #If we produce the same plot as before, we can see that now the observed coefficient of variation lies within the reference distribution, suggesting that individuals interact at random aside from assorting by nose colour (this is the result we expect from how we simulated the data) plot(ref_cvs,type="l",ylim=c(0,0.4),ylab="Value of test statistic",las=1,cex.lab=1.5) lines(x=c(-100,100000),y=c(obs_cv,obs_cv),col="red",lwd=2) #Check p value from permutations ref_cvs2<-c(obs_cv,ref_cvs) sum(ref_cvs20.975 indicates it has a smaller degree sum(smeanw0],las=1,xlab="Degree",cex.lab=1.5,col="grey",main="") #This distribution is a little complex (it looks like there are two different statistical models generating it) #We can examine these two processes by splitting the histogram. The two processes in this particular case (if you haven't worked it out) are caused in part by within-group versus between-group associations par(mfrow=c(1,2)) hist(full_net[full_net>0.025],las=1,xlab="Degree",cex.lab=1.5,col="grey",main="") hist(full_net[full_net>0&full_net<0.025],las=1,xlab="Degree",cex.lab=1.5,col="grey",main="") par(mfrow=c(1,1)) #We regenerate the distribution here in a slightly simplified form #Calculate number of edges ne<-gsize(full_net2) #Calculate proportion of edges less than 0.025 pes<-sum(full_net>0&full_net<0.025)/ne #mean and standard deviation of within-group edge weights meb<-mean(full_net[full_net>0.025]) sdeb<-sd(full_net[full_net>0.025]) #set up vector to store edges for new graph new_edgeweights<-rep(NA,gsize(rdsn2)) #fill in vector - we are effectively generating a normal distribution of within-group edge weights and making all between-group edge weights equal to 0.005 for(i in 1:gsize(rdsn2)){ tb<-rbinom(1,1,pes) if(tb==1){ new_edgeweights[i]<-0.005 } if(tb==0){ new_edgeweights[i]<-rnorm(1,meb,sdeb) } } #We can check our new edge weight degree distribution against the original one par(mfrow=c(1,2)) hist(full_net[full_net>0&upper.tri(full_net)==TRUE],las=1,xlab="Degree",cex.lab=1.5,col="grey",breaks=seq(0,0.3,0.01),main="",ylim=c(0,2000)) hist(new_edgeweights,las=1,xlab="Degree",cex.lab=1.5,col="grey",breaks=seq(0,0.3,0.01),main="",ylim=c(0,2000)) par(mfrow=c(1,1)) #We are feeling pretty pleased with ourselves, we have done a pretty good (albeit not perfect) job of fitting the edge weight distribution. The main weakness of our current model is that it overstimates the number of very weak between-group connections, which is unsurprising as we set all of these to the same very low value. We can use this for our new graph #Plot newly generated, weighted reference network edge_attr(rdsn2)$weight<-new_edgeweights plot(rdsn2,vertex.label=NA,vertex.size=5,edge.width=(edge_attr(rdsn2)$weight*8)^3) ``` However, we then evaluate and realise there are problems with what we've done. Not only have we underestimated the strength of some between-group associations (presumably between members of nearby groups) but between-group edges tend to be much weaker and individuals with high degree may have more between-group connections reducing their mean edge weight. Similarly edge weights may also be biased by other things that influence associations such as nose colour. We failed to consider this covariance between edge weights and degree. ```{r distribution_based_reference_models_3, fig.width=8, fig.height=6} #So we plot the relationship here plot(deg,rowMeans(full_net),pch=16,xlab="Degree",ylab="Mean association strength",cex.lab=1.5) ``` From the plot we can work out that there is a complicated relationship between degree and the mean association strength and we need a complex reference model to capture this relationship properly. *** Distribution-based reference models are complicated. For some network measures they might not be possible at all. For others it might be important to consider covariance between them in order to generate an appropriate reference model. Distribution-based reference models could also be applied to raw data. You could fit a distribution to the relationship between individual traits or landscape features and the social or spatial data used to build your networks and rebuild your network from there. We don't cover that in this case study. *** *** *** ### Section 3.4 -- GENERATIVE REFERENCE MODELS Our final type of reference model is the generative reference model. We first briefly illustrate the use of some basic statistical models for the networks themselves, before showing how agent-based models can be used to generate reference distributions of networks. *** #### Section 3.4.1 -- Statistical network models Statistical network models are well-covered elsewhere in the network structure. We touch on two commonly used examples here: - (a) Stochastic block models which can be used to generate a reference distribution related to the community structure of the graph - (b) Exponential random graph models (ERGMs) which can be used to fit parameters to describe how the probability or weight of edges can be explained by structural properties of the network, nodal traits and dyadic traits. ```{r statistical_network_models, message=FALSE, results="hide", fig.show="hide"} #Note both these models are verbose during fitting and so we have hidden output and figures for this chunk of code ##Fit a stochastic block model to the association network #We fit a block model for a weighted network, assuming edge weights have a Gaussian distribution as this is a reasonable assumption for our association network (see previous sections) sb<-blockmodels::BM_gaussian(membership_type="SBM_sym",adj=full_net,verbosity=0) sb$estimate() ##Fit an ERGM to the dominance interaction data #We first convert our dominance network to a network object for the ergm package in R dom_el<-as.tnet(MAT_DOM) dom<-network(dom_el[,1:2]) #We then add edge weight as an attribute network::set.edge.attribute(dom,"weight",as.vector(dom_el[,3])) #We then add individual traits as node attributes network::set.vertex.attribute(dom,"sex",as.vector(sexes[[1]])) network::set.vertex.attribute(dom,"age",as.vector(ages[[1]])) network::set.vertex.attribute(dom,"nose",as.vector(noses[[1]])) #We can then fit a count ERGM (with a Poisson reference distribution) to the network #nonzero is a term to control for zero-inflation in edge counts (because many social networks are sparse) #Sum is an intercept-like term for edge weights #We can then fit an array of terms to test hypotheses about the network structure and associations between connection weights and individual traits #See https://rdrr.io/cran/ergm/man/ergm-terms.html for full details on ERGM terms dom_mod<-ergm(dom~nonzero+sum+mutual(form="nabsdiff")+cyclicalweights(twopath="min",combine="max",affect="min")+transitiveweights(twopath="min",combine="max",affect="min")+nodematch("sex",diff=TRUE)+nodematch("age",diff=TRUE)+nodematch("nose",diff=TRUE)+nodeofactor("age")+nodeofactor("sex")+nodeofactor("nose"),reference=~Poisson,response="weight",silent=TRUE) ##To check that the model has converged we would run #mcmc.diagnostics(dom_mod) #(code not run here) ``` Now we can examine the fit of these statistical models and explore how to use them as reference distributions. ```{r examine_model_outputs, fig.width=8, fig.height=6} #For the stochastic block model, we can see how the fit of the model depends on the number of blocks or communities plot(sb$ICL,pch=16,xlab="Number of blocks",ylab="Integrated classification likelihood",cex.lab=1.5) #We can see that the fit of the model doesn't really improve once 16 communities are included. This is unsurprising given we simulated 16 burbil groups and within-group associations are so much more frequent than between-group associations. #The best model fit is for 16 blocks/communities which.max(sb$ICL) #We can examine the model predictions visually as follows #The stochastic block model fits very closely to the observed network structure visually sb$plot_obs_pred(16) #We can check the fit of the block model further by working out the memberships it applies and comparing the size of blocks to the size of the groups we initially generated mems<-sign(round(sb$memberships[[16]]$Z,2)) table(unlist(gss)) table(colSums(mems)) #We can see full model parameters using the command below (not run here) #sb$model_parameters[16] ############################ #For the ERGM we can print out a model summary much like we do for other statistical models summary(dom_mod) #We can also simulate networks based on the ERGM fit to provide a reference distribution for further hypothesis testing (for example, by seeing how goodness of fit changes for different regions of the network) #Here we simulate 10 networks ref_doms<-simulate(dom_mod,10) #A quick plot to show the 10 reference networks #N.B. we are plotting using the network package here for speed. We could convert to igraph if desired par(mfrow=c(2,5),mar=c(0,0,0,0)) for(i in 1:length(ref_doms)){ plot(ref_doms[[1]]) } par(mfrow=c(1,1),mar=c(5,6,2,2)) #Here is the conversion into adjacency matrices ref_mats<-as.sociomatrix(ref_doms,attrname="weight",simplify=FALSE) #Print an adjacency matrix to demonstrate ref_mats[[1]] ``` *** *** #### Section 3.4.2 -- Agent-based reference models Agent-based models offer a powerful way to develop reference distributions that depend on behavioural rules rather than the structure of the observed network. You can program individuals to behave in a particular way and record their interactions and associations to generate a simulated network. This is of course how we generated our burbil society in the first place. Therefore, in order to demonstrate the use of agent-based models we are going to reuse some of our previous code and encourage you to examine the consequences of changing key parts of it. *** First we fit a spatially explicit agent-based model (ABM). Second we fit a spatially explicit ABM applied at a subgroup level. Third we develop a socially explicit agent-based ABM to see whether it is better able to explain burbil association patterns. *** *Note that we only produce one simulation of each agent-based model here. However, stochastic agent-based models such as this can also be used to build reference distributions of test statistics if run multiple times. Give it a go if you fancy!* *** In this example our question is: how are between-group association networks structured by space-use? Our test statistics will be the correlation between the network generated using the ABM and the observed between-group network (a Mantel test), the summed difference in values between the reference network and observed network, which can highlight any bias in the edge weights of the reference network and the summed absolute difference in values between the reference network and observed network, which shows how similar the reference network is to the observed network (smaller value is a better fit). *Note that these are the same test statistics used in one of our resampling examples*. *Note also that we have learned our lesson and are creating networks of summed associations between burbils from different groups rather than the entire network*. *Finally note that we add parameters as we go along, e.g. dist_eff defined in the first code chunk is used in all three*. ```{r agent_based_models_1, warning=FALSE, fig.width=8, fig.height=6} #Here we set the standard deviation for how far burbil subgroups tend to travel from their home range centre (we will assume we know these for now) #Note that we have used the value we originally used to generate the data here. Feel free to change the value and see what effect it has #This will be used for all three reference models dist_eff<-2 #First we need our group locations (printed below) print(group_locs) #We now create the observed between-group network group_net<-matrix(0,nr=dim(group_locs)[1],nc=dim(group_locs)[1]) for(i in 1:nrow(full_net)){ for(j in 1:ncol(full_net)){ if(g_tot[i]!=g_tot[j]){ group_net[g_tot[i],g_tot[j]]<-group_net[g_tot[i],g_tot[j]]+full_net[i,j] } } } #And we can then plot the observed between-group network gnet<-graph.adjacency(group_net,mode="undirected",weighted=TRUE) plot(gnet,vertex.color="light blue",edge.width=(edge_attr(gnet)$weight)^2) ########################################################## ##We now generate our reference model with a truly spatially explicit ABM (i.e. we remove the clan effect and allow individuals to be observed independently and not necessarily as subgroups) #We assume that each individual is observed 100 times but this assumption can be changed if desired #Empty list to store new locations R_indiv_locs<-list() #Assign individual locations for(i in 1:nrow(full_net)){ tx<-round(rnorm(100,group_locs[g_tot[i],1],dist_eff)) ty<-round(rnorm(100,group_locs[g_tot[i],2],dist_eff)) R_indiv_locs[[i]]<-cbind(tx,ty) } #Generate full network for associations between individuals R_fn<-matrix(NA,nr=nrow(full_net),nc=ncol(full_net)) for(i in 1:nrow(R_fn)){ for(j in 1:ncol(R_fn)){ R_fn[i,j]<-sum(rowSums(R_indiv_locs[[i]]==R_indiv_locs[[j]])==2)/100 } } diag(R_fn)<-0 #Generate network of summed between-group associations R_gn<-matrix(0,nr=dim(group_locs)[1],nc=dim(group_locs)[1]) for(i in 1:nrow(R_fn)){ for(j in 1:ncol(R_fn)){ if(g_tot[i]!=g_tot[j]){ R_gn[g_tot[i],g_tot[j]]<-R_gn[g_tot[i],g_tot[j]]+R_fn[i,j] } } } #Plot network generated RGN<-graph.adjacency(R_gn,mode="undirected",weighted=TRUE) plot(RGN,vertex.color="light blue",edge.width=2+(edge_attr(RGN)$weight)^2) #Calculate values for the test statistics vegan::mantel(R_gn,group_net) sum(R_gn-group_net) sum(abs(R_gn-group_net)) ``` Note for the first reference model, that while the network is fairly well correlated, the values of edge weights recorded are very different and upward biased. *** The first reference model therefore does not explain our observed between-group network well at all. So we now go through and re-simulate subgroups (assuming we have knowledge about their typical properties) and assign a location to every subgroup instead of making the model purely individual-based. We maintain group sizes from the original population. ```{r agent_based_models_2, warning=FALSE, fig.width=8, fig.height=6} #We have copied/pasted code from where we first generated our GBIs and then changed object names #Create a list to store individual IDs Rindss<-list() #Create a list to store group sizes Rgss<-list() #Create a list to store the sex of each individual Rsexes<-list() #Create a list to store the age of each individual Rages<-list() #Create a list to store the nose colour of each individual Rnoses<-list() #Create a list to store information on which day a subgroup is observed on Rdaysl<-list() #Create a list to store a group-by-individual matrix for each burbil group Rgbis<-list() #Set the mean number of subgroups observed for each group each day Rsg_mn<-5 #Set the strength of assortativity based on nose colour #Set a number between 0 and 1 Rsg_ass<-0.2 #Generate association data within each burbil group! for(j in 1:n_groups){ #individual identities Rinds<-seq(1,n_inds[j],1) Rindss[[j]]<-Rinds #group size gs<-length(Rinds) Rgss[[j]]<-gs #sex sex<-sample(c("M","F"),gs,replace=TRUE) Rsexes[[j]]<-sex #age age<-sample(c("AD","SUB","JUV"),gs,replace=TRUE,prob=c(0.6,0.2,0.2)) Rages[[j]]<-age #nose nose<-sample(c("RED","ORANGE"),gs,replace=TRUE,prob=c(0.7,0.3)) Rnoses[[j]]<-nose ################################# #Define number of subgroups on the first day n_sg<-rpois(1,Rsg_mn-1)+1 #find halfway point max_red<-floor(n_sg/2) #Sample subgroups on the first day subgroups1<-sample(n_sg,sum(nose=="RED"),replace=TRUE,prob=c(rep(0.5+Rsg_ass,max_red),rep(0.5-Rsg_ass,n_sg-max_red))) subgroups2<-sample(n_sg,sum(nose=="ORANGE"),replace=TRUE,prob=c(rep(0.5-Rsg_ass,max_red),rep(0.5+Rsg_ass,n_sg-max_red))) subgroups<-rep(NA,gs) subgroups[nose=="RED"]<-subgroups1 subgroups[nose=="ORANGE"]<-subgroups2 #Store relevant information in the group-by-individual matrix and days vector Rgbi<-matrix(0,nc=gs,nr=n_sg) Rgbi[cbind(subgroups,seq(1,gs,1))]<-1 Rdays<-rep(1,nrow(Rgbi)) #Repeat process over 100 days of observations for(i in 2:100){ n_sg<-rpois(1,Rsg_mn-1)+1 #find halfway point max_red<-floor(n_sg/2) subgroups1<-sample(n_sg,sum(nose=="RED"),replace=TRUE,prob=c(rep(0.5+Rsg_ass,max_red),rep(0.5-Rsg_ass,n_sg-max_red))) subgroups2<-sample(n_sg,sum(nose=="ORANGE"),replace=TRUE,prob=c(rep(0.5-Rsg_ass,max_red),rep(0.5+Rsg_ass,n_sg-max_red))) subgroups<-rep(NA,gss[[j]]) subgroups[nose=="RED"]<-subgroups1 subgroups[nose=="ORANGE"]<-subgroups2 tgbi<-matrix(0,nc=gs,nr=n_sg) tgbi[cbind(subgroups,seq(1,gs,1))]<-1 Rdays<-c(Rdays,rep(i,nrow(tgbi))) Rgbi<-rbind(Rgbi,tgbi) } #We edit the group-by-individual matrix and days vector to delete any "empty" groups Rgbi2<-Rgbi[rowSums(Rgbi)>0,] Rdays<-Rdays[rowSums(Rgbi)>0] Rgbi<-Rgbi2 Rdaysl[[j]]<-Rdays Rgbis[[j]]<-Rgbi } Rsglocs<-list() for(i in 1:n_groups){ tx<-rep(NA,dim(Rgbis[[i]])[1]) ty<-rep(NA,dim(Rgbis[[i]])[1]) Rsglocs[[i]]<-data.frame(tx,ty) names(Rsglocs[[i]])<-c("x","y") Rsglocs[[i]]$x<-group_locs[i,1]+round(rnorm(dim(Rgbis[[i]])[1],0,dist_eff)) Rsglocs[[i]]$y<-group_locs[i,2]+round(rnorm(dim(Rgbis[[i]])[1],0,dist_eff)) } #We now calculate the full population association network R_fn2<-matrix(0,nr=n_tot,nc=n_tot) #Counts up between-group associations for(i in 1:100){ for(j in 1:(n_groups-1)){ for(k in (j+1):n_groups){ tA<-paste0(Rsglocs[[j]][,1],"-",Rsglocs[[j]][,2]) tB<-paste0(Rsglocs[[k]][,1],"-",Rsglocs[[k]][,2]) tA2<-tA[Rdaysl[[j]]==i] tB2<-tB[Rdaysl[[k]]==i] tt<-match(tA2,tB2) if(sum(is.na(tt))