############## 'Multi-network Network-Based Diffusion Analysis reveals vertical cultural transmission of sponge tool use within dolphin matrilines' ## authors: Sonja Wild; Simon J. Allen; Michael Krützen; Stephanie L. King; Livia Gerber; William J.E. Hoppitt #################################################################################################### ######## Part 1: calculate dyadic home range overlaps ## load all necessary libraries ## packages needed: 'OpenStreetMap', 'adehabitatHR', 'ggmap' and 'XML', 'maps' library(sp) library(OpenStreetMap) library(rgdal) library(ggplot2) library(ggmap) library(XML) library(adehabitatHR) library(raster) library(dismo) library(XML) library(rgeos) # parts of the following code are based on lines of code suggested by Calenge in 2011 as a response to home range questions in a forum. The original entry can be found here: # http://r-sig-geo.2731867.n2.nabble.com/Walruses-and-adehabitatHR-class-estUDm-exclusion-of-non-habitat-pixels-and-summary-over-all-animals-td6497315.html ## read in GPS data: # available under https://datadryad.org/bitstream/handle/10255/dryad.211825/GPS%20locations.csv?sequence=1 setwd("C:/...") # set working directory GPS <- read.csv("GPS locations.csv", sep=",") class(GPS) head(GPS) # extract ID names from GPS file IDs <- sort(as.vector(unique(GPS[,"id_individual"]))) length(IDs) df <- data.frame(IDs) # Read SHAPEFILE of water body of the Western Gulf of Shark Bay shape <- readOGR(dsn = "C:/'''/water_area_shapefile", layer="water_area_shape_UTM") ## transform to UTM shape <- spTransform(shape, CRS("+init=epsg:32749")) ## double check the shape file by plotting plot(shape) # create a raster file with the extent of the shape file rgrid <- raster(extent(shape)) ## set resolution of the grid to 100m res(rgrid) <- c(100, 100) ## assign a value of 1 to each grid cell rgrid[] <- 1 # clip the grid layer with the shape file. Receive a grid that overlays the water body (no land) rgrid_msk <- mask(rgrid,shape) # assign a 0 to all cells that are not water rgrid_msk[is.na(rgrid_msk)] <- 0 # double check by plotting plot(rgrid_msk) ## set layer CRS to UTM zone 49 South. Might return an error if already in UTM zone 49 South proj4string(rgrid_msk) <- CRS(proj4string(shape)) # convert to spatial points data frame grid_ae <- as(rgrid_msk, 'SpatialPointsDataFrame') grid_ae <- grid_ae[!is.na(grid_ae@data$layer), ] gridded(grid_ae) <- TRUE summary(grid_ae) # assign to a new object hab hab <- grid_ae ## convert GPS to a spatialpixeldataframe and convert to UTM 49 S xy_GPS = GPS[c("longitude", "latitude")] coordinates(xy_GPS)=c("longitude","latitude") GPS_sp<-SpatialPointsDataFrame(xy_GPS, GPS) ## set coordinate system as WGS84 (epsg code 4326) proj4string(GPS_sp) <- CRS("+init=epsg:4326") #transform coordinates into UTM zone 49 South (epgs code 32749) GPS_sp <- spTransform(GPS_sp, CRS("+init=epsg:32749")) ## run kernel density estimates using the habitat as grid. # for choice of smoothing factor: href seems to oversmooth when using the bivariate kernel. With Epachernikov kernel estimates are # a little more accurate, but still oversmoothed. LSCV undersmoothes drastically and is hence not useful. # run a first kernel with epanechnikov and href ud_epa <- kernelUD(GPS_sp[,1], h="href", grid=grid_ae, kern="epa") # assign to new object ud_epa_new <- ud_epa ## extract smoothing parameters for each individual smoothing <- NULL for (i in 1:length(IDs)){ h <- ud_epa[[i]]@h$h smoothing[i] <- h } # smoothing parameters need to be made smaller. Thereby, large values of h need t be reduced more than already small values of h. # Adjustment was carefully chosen after visual inspection of 12 home ranges. # set a minimum of 1000 and a maximum of 4000 smoothing_red <- pmax(smoothing, 1000) smoothing_red <- pmin(smoothing_red, 4000) # then adjust smoothing factors smoothing_red <- 0.5*smoothing_red+1500 # rerun kernel density calculations with the adjusted smoothing factor for (i in 1: length(IDs)){ # each individual separetly sub <- subset(GPS, subset=GPS$id_individual==levels(GPS$id_individual)[i]) # subset the initial data frame with the GPS points sub <- sub[,-1] # remove the survey id column #### create a spatial points data frame xy = sub[c("longitude", "latitude")] coordinates(xy)=c("longitude","latitude") sub_GPS<-SpatialPointsDataFrame(xy, sub) ## set coordinate system as WGS84 (epsg code 4326) proj4string(sub_GPS) <- CRS("+init=epsg:4326") #transform coordinates into UTM zone 49 South (epgs code 32749) sub_GPS <- spTransform(sub_GPS, CRS("+init=epsg:32749")) h <- smoothing_red[i] # use adjusted smoothing parameter ud <- kernelUD(sub_GPS, # run kernelUD with adjusted smoothing parameter h=h, grid=grid_ae, kern="epa") ud_epa_new[[i]]$ud <- ud$ud # save the UD in the estUDm object created above ud_epa_new[[i]]@h$h <- h # save the adjusted smoothing factor } # ignore the warning about that xy should only contain one column # change to spatial pixels data frame udspdf <- estUDm2spixdf(ud_epa_new) fullgrid(udspdf) <- TRUE fullgrid(hab)<-TRUE # multiply each UD with the 1/0 (hab) and rescale so that the sum of the new UD sums up to 0.00001 resu <- lapply(1:ncol(udspdf), function(i) {udspdf[[i]] * hab[[1]]/sum(udspdf[[i]] * hab[[1]])/10000}) resu <- as.data.frame(resu) names(resu) <- names(udspdf@data) udspdf@data <- resu fullgrid(udspdf) <- FALSE # transfer back into a object of class estUDm re <- lapply(1:ncol(udspdf), function(i) { so <- new("estUD", udspdf[,i]) so@h <- list(h=0, meth="specified") # fake value so@vol <- FALSE return(so) }) names(re) <- names(udspdf) # re-assign names class(re) <- "estUDm" # save object save(re, file="Kernel_densities_epa_first five.RData") load("Kernel_densities_epa_first five.RData") # calculate home range overlaps using the adjusted kernels using 95% overlaps_UDOI_epa <- kerneloverlaphr(re, method="UDOI", percent=95, conditional=TRUE) # write objects as csv files write.csv(overlaps_UDOI_epa, file="overlaps_UDOI_first five.csv") ####################################################################################################################################### ####### PART 2: applying NBDA to sponging data: # load NBDA package install.packages("devtools") library(devtools) install_github("whoppitt/NBDA") library(NBDA) # set working directory here # all networks available under https://datadryad.org/review?doi=doi:10.5061/dryad.sc26m6c. SRI_vert_all <- read.csv("social_vertical.csv", row.names=1, header=TRUE) SRI_vert_all <- as.matrix(SRI_vert_all) SRI_hor_no_vert_all <- read.csv("social_horizontal.csv", row.names=1, header=TRUE) SRI_hor_no_vert_all <- as.matrix(SRI_hor_no_vert_all) # read ecological network ecol_all <- read.csv("HR_overlaps.csv", row.names=1, header=TRUE) ecol_all <- as.matrix(ecol_all) # read relatedness network relate_all <- read.csv("relatedness.csv", row.names=1, header=TRUE) relate_all <- as.matrix(relate_all) # read ILVs ILV_all <- read.csv("ILVs.csv", header=TRUE, sep=",") ILV_all[ILV_all==""]=NA # get list of IDs that have been seen at least 7 times ILV <- subset(ILV_all, subset=ILV_all$Number_sightings>6) # at least 7 sightings ILV <- subset(ILV, subset=ILV$Not_weaned==0) IDs <- ILV$id_individual length(IDs) #415 individuals remain # reduce vertical social network to only include the 415 individuals that make the cut-off point num <- which(colnames(SRI_vert_all) %in% IDs) SRI_vert_all <- SRI_vert_all[num, num] dim(SRI_vert_all) class(SRI_vert_all) # repeat for horizontal social network num <- which(colnames(SRI_hor_no_vert_all) %in% IDs) SRI_hor_no_vert_all <- SRI_hor_no_vert_all[num, num] dim(SRI_hor_no_vert_all) class(SRI_hor_no_vert_all) # repeat for ecological network num <- which(colnames(ecol_all) %in% IDs) ecol <- ecol_all[num, num] dim(ecol) class(ecol) # repeat for genetic network num <- which(colnames(relate_all) %in% IDs) relate <- relate_all[num, num] dim(relate) class(relate) # extract spongers (learners and demonstrators) spongers <- subset(ILV, subset=ILV$Sponger=="yes") spongers <- spongers[order(spongers$Sp_Order_acquisition),] # get ID codes of all spongers spongers_all <- spongers$id_individual # extract IDs of all spongers that are treated as learners sponger_learners <- as.vector(subset(spongers$id_individual, subset=spongers$Demons_sponging_forage=="no")) # extract IDs of all spongers treated as demonstrators sponger_demons <- as.vector(subset(spongers$id_individual, subset=spongers$Demons_sponging_forage=="yes")) # extract order of acquisition order <- NULL # create an object to store the vector of acquistion for (i in 1:length(sponger_learners)){ # for each sponger, extract the position in the networks and ILV data frame order[i] <- which(IDs==sponger_learners[i]) } order <- as.vector(order) OAc <- order # extract positions of demonstrators demons <- NULL # create an object to store the vector of acquistion for (i in 1:length(sponger_demons)){ # for each sponger demonstrator, extract the position in the networks and ILV data frame demons[i] <- which(IDs==sponger_demons[i]) } # contains positions of all sponger demonstrators demons <- as.vector(demons) # create vector of length(IDs) with 0 for non-demonstrators and 1 for demonstrators demons_vector <- c(rep(0,length(IDs))) for (i in demons){ demons_vector[i] <- 1 } ## prepare individual-level variables Sex <- ILV$Sex_1_0 Av_water_depth <- ILV$Av_water_depth Av_group_size <- ILV$Av_group_size HaplotypeE <- (ILV$Haplotype=="E")*1 HaplotypeE[is.na(HaplotypeE)]<-0 not.E <- 1-HaplotypeE ## all individuals with haplotype E are set to 0 (baseline), all others have 1 n.assMatrix <- 4 # number of matrices assMatrix.B <- array(data = c(SRI_vert_all, SRI_hor_no_vert_all, ecol, relate), dim=c(nrow(SRI_vert_all), ncol(SRI_vert_all), n.assMatrix)) # create an array with the four matrices Sex <- matrix(data = Sex, nrow=length(IDs), byrow=F) # all ILVs need to go into a matrix Av_water_depth <- matrix(data = Av_water_depth, nrow=length(IDs), byrow=F) Av_group_size <- matrix(data = Av_group_size, nrow=length(IDs), byrow=F) not.E <- matrix(data = not.E, nrow=length(IDs), byrow=F) ILVs <- c("Sex","Av_water_depth","Av_group_size", "not.E") label <- "spongingC" # extract the spongers learners with no maternity data available sponger_filter <- subset(spongers, subset=spongers$Demons_sponging_forage=="no") sponger_filter2 <- sort(subset(sponger_filter$id_individual, subset=sponger_filter$Mum_known=="no")) vec <- NULL for (i in 1:length(sponger_filter2)){ a <- which(IDs==sponger_filter2[i]) vec[i] <- a } # get position of spongers with no maternity data # they get set to 0 in the presence matrix (NBDAfilterfunction) filter <- paste0(label,"_", vec) # create NBDA Data Object nbdaDataSPONGING.C <- nbdaData(label=label, assMatrix=assMatrix.B, asoc_ilv=ILVs, int_ilv=ILVs, multi_ilv =ILVs, orderAcq=OAc,asocialTreatment="constant", demons = demons_vector) # creates OADA object # apply filter to exclude individuals without maternity data as learners nbdaDataSPONGING.C.filter <- filteredNBDAdata(nbdadata=nbdaDataSPONGING.C, filter="id", exclude=filter) # the first four positions correspond to the networks (vertical, horizontal, ecology, relatedness), # the following 4 to int.ILV, then 4 to asoc ILV and then 4 to multi ILV # all the multi.ILV positions are set to 0 # check ILVs for asoc (ILVs only influence asocial learning), int (ILVs influence asocial and social learning independently) # and multi (ILVs influence both asocial and social learning to the same extent). # in this analysis, multiILV will be set to 0 and therefore not estimated (in the constraintsVectMatrix) nbdaDataSPONGING.C.filter@asoc_ilv nbdaDataSPONGING.C.filter@int_ilv nbdaDataSPONGING.C.filter@multi_ilv # the following part creates a matrix, constraintsVectMatrix, with all possible combinations of networks and ILVs. # This is then input to the oadaAICtable function below to fit each model # An explanation of the constraintsVectMatrix is given below # set number of networks and number of ILVs num_networks <- 4 num_ILVs <- 4 vector <- seq(1:(num_networks+(2*num_ILVs))) # create a vector for the full model with all networks and ILVs (excluding multiILV slots which will all be set to 0) count <- 0 # create an object 'count', which starts on 0 constraintsVect <- matrix(nrow = 10000000, ncol=(num_networks+(2*num_ILVs))) # create a matrix to save the combination of parameters in constraintsVect[1,] <- seq(1:(num_networks+(2*num_ILVs))) # the first row gets filled with a sequence from 1:12 (all parameters will be estimated, none are set to 0) for (i in 1:(num_networks+(2*num_ILVs)-1)){ # a loop for each number of parameters to be estimated array <- combn(vector, i, FUN = NULL, simplify = TRUE) # for each number of paramters to be estiamted (e.g. 2) create all possible combinations of numbers between 1:12 (e.g. 2&8, 1&5 etc) for (j in 1:length(array[1,])){ # for each of those combinations vector2 <- seq(1:((num_networks+(2*num_ILVs))-i)) # create a second vector with 11-i free spaces position <- array[,j] # for each created combination count <- count+1 # add +1 to the count for (k in position){ # at each possible position vector2 <- append(vector2, 0, after=k-1) # add a 0 (e.g. 1 0 2 3 ...; 1 2 0 3 4 5 ...; 1 2 3 0 4 5 ....) } constraintsVect[count+1,] <- vector2 # and save the resulting order in a matrix } } constraintsVect <- na.omit(constraintsVect) # remove all NAs from the matrix constraintsVect <- rbind(constraintsVect, rep.int(0,(num_networks+2*(num_ILVs)))) # add a last row with all 0 constraintsVect <- cbind(constraintsVect, matrix(0,ncol=4, nrow=length(constraintsVect[,1]))) ## add 4 columns at the end with all 0 (multi_ILV) constraintsVectMatrix<-constraintsVect # Each line of the resulting object specifies a model # Each element in the line corresponds to a parameter in the model. When an element is zero, that paramter is constrained # =0. When two elements have the same value, they are constrained to have the same value (not relevant here). # For example: constraintsVectMatrix[1,] # The first four elements (1-4) are the s parameters for each network- so in this model all networks are included. # The next four elements (5-8) are the parameters determining the effect each ILV has on asocial learning. So in this model # all ILVs are assumed to affect asocial learning. # The next five elements (9-12) are the parameters determining the effect each ILV has on social learning. So in this model # all ILVs are assumed to affect social learning. # The final four elements determine the effect each parameter has on both asocial and social learning (the multiplicative # NBDA model). In our analysis we estimate the effects each ILV has on asocial and social learning independently, so these parameters # are constrained to be zero for all models fitted. ####################################################################################################################################### ####################################################################################################################################### # run NBDA using the NBDA Data object and the constraitnsVectMatrix # this fits every model specified by the constrainstsVectMatrix matrix tableSPONGING.C.filter<-oadaAICtable(nbdadata=nbdaDataSPONGING.C.filter, constraintsVectMatrix=constraintsVectMatrix,writeProgressFile = T) print(tableSPONGING.C.filter) save(tableSPONGING.C.filter, file="AIC table sponging.Rdata") load("AIC table sponging.Rdata") write.csv(as.data.frame(tableSPONGING.C.filter@printTable), "AIC table sponging.csv") ##Create a new object with a printTable that excludes unfitted model newTableSPONGING<-tableSPONGING.C.filter newTableSPONGING@printTable<-tableSPONGING.C.filter@printTable[!is.nan(tableSPONGING.C.filter@printTable$aicc)&!is.na(tableSPONGING.C.filter@printTable$aicc),] newTableSPONGING@aicc<-tableSPONGING.C.filter@aicc[!is.nan(tableSPONGING.C.filter@aicc)&!is.na(tableSPONGING.C.filter@aicc)] newTableSPONGING@MLEs<-tableSPONGING.C.filter@MLEs[!is.nan(tableSPONGING.C.filter@aicc)&!is.na(tableSPONGING.C.filter@aicc),] newTableSPONGING@MLEilv<-tableSPONGING.C.filter@MLEilv[!is.nan(tableSPONGING.C.filter@aicc)&!is.na(tableSPONGING.C.filter@aicc),] newTableSPONGING@MLEint<-tableSPONGING.C.filter@MLEint[!is.nan(tableSPONGING.C.filter@aicc)&!is.na(tableSPONGING.C.filter@aicc),] newTableSPONGING@printTable<-newTableSPONGING@printTable[order(newTableSPONGING@printTable$aicc),] newTableSPONGING@printTable$deltaAICc<-newTableSPONGING@printTable$aicc-newTableSPONGING@printTable$aicc[1] # calculate support for fitted models newTableSPONGING@printTable$RelSupport<- exp(-0.5*newTableSPONGING@printTable$deltaAICc) newTableSPONGING@printTable$AkaikeWeight<-newTableSPONGING@printTable$RelSupport/sum(newTableSPONGING@printTable$RelSupport) newTableSPONGING@deltaAIC<-newTableSPONGING@aicc-min(newTableSPONGING@aicc) newTableSPONGING@RelSupport<- exp(-0.5*newTableSPONGING@deltaAIC) newTableSPONGING@AkaikeWeight<-newTableSPONGING@RelSupport/sum(newTableSPONGING@RelSupport) dim(tableSPONGING.C.filter@printTable)[1]-dim(newTableSPONGING@printTable)[1] dim(tableSPONGING.C.filter@printTable)[1] ##2 models could not be fitted out of 4096- probably too many parameters for the dataset # save reduced AIC table as csv write.csv(newTableSPONGING@printTable, file="sponging.AIC_table_CORRECTED.csv") # obtain network support for each network combination networksSupport_sponging<-networksSupport(newTableSPONGING) networksSupport_sponging write.csv(networksSupport_sponging, file="networksSupport_sponging.csv") #83.7% support for vertical social network only (1:0:0:0) # extract support for each variable variable_support <- variableSupport(newTableSPONGING, includeAsocial = T) variable_support write.csv(variable_support, file="variable_support_sponging.csv") # extract model averaged medians MLE_med <- modelAverageEstimates(newTableSPONGING,averageType = "median") MLE_med write.csv(MLE_med, "MLE_sponging.csv") ####################################################################################################################################### #Getting 95% confidence intervals using profile likelihood techniques #This is vital for s parameters since CIs based on SEs will be highly misleading due to frequent assymetry in the profile likelihood ####################################################################################################################################### print(newTableSPONGING)[1:10,] # constraintsVectMatrix[4076,] for best model (vertical social learning + social.sex) bestModelData<-constrainedNBDAdata(nbdadata=nbdaDataSPONGING.C.filter,constraintsVect =constraintsVectMatrix[4076,]) model.best.social<-oadaFit(bestModelData) model.best.social@outputPar # [1] 1.233004e+10 -4.840486e+00 model.best.social@optimisation model.best.social@aicc # extract profile likelihood. which=1 extracts the first parameter (s parameter for vertical social learning) plotProfLik(which=1,model=model.best.social1,range=c(0,1e30), resolution=20) #Here we can see that we cannot set an upper limit on s (see explanation below) #Zoom in to locate the lower limit for s plotProfLik(which=1,model=model.best.social,range=c(0,500), resolution=20) plotProfLik(which=1,model=model.best.social,range=c(30,40), resolution=20) profLikCI(which=1,model=model.best.social,lowerRange=c(30,40)) #Lower CI Upper CI #33.08928 Inf ###################################################################################################################### #To explain why we cannot set an upper limit on s, we extract a table with a row for each acquisition event #Each row we get the connection of the individual that learned #The total connections to informed individuals across the population- in this case equal to the number of dolphins with a sponging mother, #since connections are binary #and the maximum connection strength (again this should be 1, but just to confirm) eventTable<-NULL for(i in 1:9) {eventTable<-rbind(eventTable,(c((bestModelData2@stMetric[bestModelData2@event.id==unique(bestModelData2@event.id)[i]])[ bestModelData2@status[bestModelData2@event.id==unique(bestModelData2@event.id)[i]]==1], sum(bestModelData2@stMetric[bestModelData2@event.id==unique(bestModelData2@event.id)[i]]), max(bestModelData2@stMetric[bestModelData2@event.id==unique(bestModelData2@event.id)[i]]))) ) } dimnames(eventTable)[[2]]<-c("Connection of Learner","Sum of Connections","Maximum Connection") eventTable #We can see that in every case the individual to learn shelling has an informed mother #There are a variable number of individuals with sponging mothers across events. But importantly, the dolphin to acquire the behaviour has #the (joint) maximum connection to informed individuals. #In any diffusion where, for every event, the individual to acquire the behaviour is always the one with the maximum connection to informed #individuals (even if it is joint maximum), we cannot set an upper limit on s with OADA. Since the next individual to learn is always the #one that the network would predict as being most likely (or joint most likely), a value of s=Inf is plausible. ########################################################### # which=2 extracts second parameter (gender) plotProfLik(which=2,model=model.best.social,range=c(-10,-2), resolution=20) profLikCI(which=2,model=model.best.social,lowerRange=c(-10,-6),upperRange = c(-4,-2)) #Lower CI Upper CI #-7.971296 -2.246465 # back-transform exp(c(4.840486e+00,7.971296, 2.246465)) #[1] 126.530831 2896.608938 9.454256 # Females are an estimated 126x (95% CI= 9.5-2890) faster to learn the behaviour from their mothers than males are # extract what proportion of spongers are estimated to have learned sponging socially from their mothers prop.solve.social.byevent <- oadaPropSolveByST.byevent(nbdadata = bestModelData, model=model.best.social) # outputs of 1 for each events means that all sponger offspring have learned socially from their mothers prop.solve.social <- oadaPropSolveByST(nbdadata = bestModelData, model=model.best.social) # 100% are estimated to have learned socially prop.solve.social #To get the estimates for the lower bound we should find the corresponding value of the other parameters to plug in when s1 is constrained to this value bestModelDataS1LowerBound<-constrainedNBDAdata(nbdadata=nbdaDataSPONGING.C.filter,constraintsVect =constraintsVectMatrix[4076,],offset=c(33.08928,rep(0,15))) bestModelS1LowerBound<-oadaFit(bestModelDataS1LowerBound,type="asocial") bestModelS1LowerBound@outputPar #Now plug into the prop solve function in one of these two ways: prop.solve.social.lower <- oadaPropSolveByST(par= c(33.08928, bestModelS1LowerBound@outputPar),model=NULL, nbdadata = bestModelData) prop.solve.social.lower <- oadaPropSolveByST(model=bestModelS1LowerBound, nbdadata = bestModelDataS1LowerBound) prop.solve.social.lower #AT least 98.9% learned by social transmission from mothers # In theory repeat for upper limit, but here there is no need since the upper limit is s=inf, # so the upper limit is 100%