##############  'Multi-network Network-Based Diffusion Analysis reveals vertical cultural transmission of sponge tool use within dolphin matrilines'
## authors: Sonja Wild; Simon J. Allen; Michael Krützen; Stephanie L. King; Livia Gerber; William J.E. Hoppitt

####################################################################################################
######## Part 1: calculate dyadic home range overlaps

## load all necessary libraries

## packages needed: 'OpenStreetMap', 'adehabitatHR', 'ggmap' and 'XML', 'maps'

library(sp)
library(OpenStreetMap)
library(rgdal)
library(ggplot2)
library(ggmap)
library(XML)
library(adehabitatHR)
library(raster)
library(dismo)
library(XML)
library(rgeos)

# parts of the following code are based on lines of code suggested by Calenge in 2011 as a response to home range questions in a forum. The original entry can be found here:
# http://r-sig-geo.2731867.n2.nabble.com/Walruses-and-adehabitatHR-class-estUDm-exclusion-of-non-habitat-pixels-and-summary-over-all-animals-td6497315.html

## read in GPS data:
# available under https://datadryad.org/bitstream/handle/10255/dryad.211825/GPS%20locations.csv?sequence=1


setwd("C:/...") # set working directory
GPS <- read.csv("GPS locations.csv", sep=",")
class(GPS)
head(GPS)

# extract ID names from GPS file
IDs <- sort(as.vector(unique(GPS[,"id_individual"])))
length(IDs)

df <- data.frame(IDs)

# Read SHAPEFILE of water body of the Western Gulf of Shark Bay

shape <- readOGR(dsn = "C:/'''/water_area_shapefile", layer="water_area_shape_UTM")
## transform to UTM
shape <- spTransform(shape, CRS("+init=epsg:32749"))

## double check the shape file by plotting
plot(shape)

# create a raster file with the extent of the shape file
rgrid <- raster(extent(shape))
## set resolution of the grid to 100m
res(rgrid) <- c(100, 100)

## assign a value of 1 to each grid cell
rgrid[] <- 1

# clip the grid layer with the shape file. Receive a grid that overlays the water body (no land)
rgrid_msk <- mask(rgrid,shape)

# assign a 0 to all cells that are not water
rgrid_msk[is.na(rgrid_msk)] <- 0

# double check by plotting
plot(rgrid_msk)

## set layer CRS to UTM zone 49 South. Might return an error if already in UTM zone 49 South
proj4string(rgrid_msk) <- CRS(proj4string(shape))


# convert to spatial points data frame
grid_ae <- as(rgrid_msk, 'SpatialPointsDataFrame')
grid_ae <- grid_ae[!is.na(grid_ae@data$layer), ]

gridded(grid_ae) <- TRUE
summary(grid_ae)

# assign to a new object hab
hab <- grid_ae


## convert GPS to a spatialpixeldataframe and convert to UTM 49 S

xy_GPS = GPS[c("longitude", "latitude")]
coordinates(xy_GPS)=c("longitude","latitude")
GPS_sp<-SpatialPointsDataFrame(xy_GPS, GPS)

## set coordinate system as WGS84 (epsg code 4326)
proj4string(GPS_sp) <- CRS("+init=epsg:4326")

#transform coordinates into UTM zone 49 South (epgs code 32749)
GPS_sp <- spTransform(GPS_sp, CRS("+init=epsg:32749"))


## run kernel density estimates using the habitat as grid.
# for choice of smoothing factor: href seems to oversmooth when using the bivariate kernel. With Epachernikov kernel estimates are
# a little more accurate, but still oversmoothed. LSCV undersmoothes drastically and is hence not useful. 

# run a first kernel with epanechnikov and href
ud_epa <- kernelUD(GPS_sp[,1],
                   h="href",
                   grid=grid_ae,
                   kern="epa")


# assign to new object
ud_epa_new <- ud_epa

## extract smoothing parameters for each individual
smoothing <- NULL


for (i in 1:length(IDs)){
  h <- ud_epa[[i]]@h$h
  smoothing[i] <- h
}


# smoothing parameters need to be made smaller. Thereby, large values of h need t be reduced more than already small values of h.
# Adjustment was carefully chosen after visual inspection of 12 home ranges. 

# set a minimum of 1000 and a maximum of 4000
smoothing_red <- pmax(smoothing, 1000)
smoothing_red <- pmin(smoothing_red, 4000)

# then adjust smoothing factors
smoothing_red <- 0.5*smoothing_red+1500


# rerun kernel density calculations with the adjusted smoothing factor
for (i in 1: length(IDs)){ # each individual separetly
  
  sub <- subset(GPS, subset=GPS$id_individual==levels(GPS$id_individual)[i]) # subset the initial data frame with the GPS points
  sub <- sub[,-1] # remove the survey id column
  
  #### create a spatial points data frame 
  xy = sub[c("longitude", "latitude")]
  coordinates(xy)=c("longitude","latitude")
  sub_GPS<-SpatialPointsDataFrame(xy, sub)
  
  ## set coordinate system as WGS84 (epsg code 4326)
  proj4string(sub_GPS) <- CRS("+init=epsg:4326")
  
  #transform coordinates into UTM zone 49 South (epgs code 32749)
  sub_GPS <- spTransform(sub_GPS, CRS("+init=epsg:32749"))
  
  h <- smoothing_red[i] # use adjusted smoothing parameter
  
  ud <- kernelUD(sub_GPS, # run kernelUD with adjusted smoothing parameter
                 h=h,
                 grid=grid_ae,
                 kern="epa")
  ud_epa_new[[i]]$ud <- ud$ud # save the UD in the estUDm object created above
  ud_epa_new[[i]]@h$h <- h # save the adjusted smoothing factor
}

# ignore the warning about that xy should only contain one column


# change to spatial pixels data frame
udspdf <- estUDm2spixdf(ud_epa_new)
fullgrid(udspdf) <- TRUE
fullgrid(hab)<-TRUE

# multiply each UD with the 1/0 (hab) and rescale so that the sum of the new UD sums up to 0.00001 
resu <- lapply(1:ncol(udspdf), function(i) {udspdf[[i]] * hab[[1]]/sum(udspdf[[i]] * hab[[1]])/10000}) 
resu <- as.data.frame(resu)
names(resu) <- names(udspdf@data)
udspdf@data <- resu

fullgrid(udspdf) <- FALSE

# transfer back into a object of class estUDm
re <- lapply(1:ncol(udspdf), function(i) { 
  so <- new("estUD", udspdf[,i]) 
  so@h <- list(h=0, meth="specified") # fake value 
  so@vol <- FALSE 
  return(so) 
}) 

names(re) <- names(udspdf) # re-assign names
class(re) <- "estUDm" 


# save object
save(re, file="Kernel_densities_epa_first five.RData")
load("Kernel_densities_epa_first five.RData")

# calculate home range overlaps using the adjusted kernels using 95%
overlaps_UDOI_epa <- kerneloverlaphr(re, method="UDOI", percent=95, conditional=TRUE) 


# write objects as csv files

write.csv(overlaps_UDOI_epa, file="overlaps_UDOI_first five.csv")




#######################################################################################################################################
####### PART 2: applying NBDA to sponging data:

# load NBDA package

install.packages("devtools")
library(devtools)
install_github("whoppitt/NBDA")
library(NBDA)


# set working directory here
# all networks available under https://datadryad.org/review?doi=doi:10.5061/dryad.sc26m6c.

SRI_vert_all <- read.csv("social_vertical.csv", row.names=1, header=TRUE)
SRI_vert_all <- as.matrix(SRI_vert_all)

SRI_hor_no_vert_all <- read.csv("social_horizontal.csv", row.names=1, header=TRUE)
SRI_hor_no_vert_all <- as.matrix(SRI_hor_no_vert_all)

# read ecological network
ecol_all <- read.csv("HR_overlaps.csv", row.names=1, header=TRUE)
ecol_all <- as.matrix(ecol_all)

# read relatedness network
relate_all <- read.csv("relatedness.csv", row.names=1, header=TRUE)
relate_all <- as.matrix(relate_all)

# read ILVs
ILV_all <- read.csv("ILVs.csv", header=TRUE, sep=",")

ILV_all[ILV_all=="<NA>"]=NA

# get list of IDs that have been seen at least 7 times
ILV <- subset(ILV_all, subset=ILV_all$Number_sightings>6) # at least 7 sightings

ILV <- subset(ILV, subset=ILV$Not_weaned==0)

IDs <- ILV$id_individual
length(IDs) #415 individuals remain

# reduce vertical social network to only include the 415 individuals that make the cut-off point
num <- which(colnames(SRI_vert_all) %in% IDs)
SRI_vert_all <- SRI_vert_all[num, num]
dim(SRI_vert_all)
class(SRI_vert_all)

# repeat for horizontal social network
num <- which(colnames(SRI_hor_no_vert_all) %in% IDs)
SRI_hor_no_vert_all <- SRI_hor_no_vert_all[num, num]
dim(SRI_hor_no_vert_all)
class(SRI_hor_no_vert_all)

# repeat for ecological network
num <- which(colnames(ecol_all) %in% IDs)
ecol <- ecol_all[num, num]
dim(ecol)
class(ecol)

# repeat for genetic network
num <- which(colnames(relate_all) %in% IDs)
relate <- relate_all[num, num]
dim(relate)
class(relate)


# extract spongers (learners and demonstrators)
spongers <- subset(ILV, subset=ILV$Sponger=="yes")
spongers <- spongers[order(spongers$Sp_Order_acquisition),]

# get ID codes of all spongers
spongers_all <- spongers$id_individual
# extract IDs of all spongers that are treated as learners
sponger_learners <- as.vector(subset(spongers$id_individual, subset=spongers$Demons_sponging_forage=="no"))
# extract IDs of all spongers treated as demonstrators
sponger_demons <- as.vector(subset(spongers$id_individual, subset=spongers$Demons_sponging_forage=="yes"))

# extract order of acquisition
order <- NULL # create an object to store the vector of acquistion

for (i in 1:length(sponger_learners)){ # for each sponger, extract the position in the networks and ILV data frame
  order[i] <- which(IDs==sponger_learners[i])
}

order <- as.vector(order)
OAc <- order

# extract positions of demonstrators
demons <- NULL # create an object to store the vector of acquistion

for (i in 1:length(sponger_demons)){ # for each sponger demonstrator, extract the position in the networks and ILV data frame
  demons[i] <- which(IDs==sponger_demons[i])
}

# contains positions of all sponger demonstrators
demons <- as.vector(demons)

# create vector of length(IDs) with 0 for non-demonstrators and 1 for demonstrators
demons_vector <- c(rep(0,length(IDs)))

for (i in demons){
  demons_vector[i] <- 1
  
}

## prepare individual-level variables
Sex <- ILV$Sex_1_0
Av_water_depth <- ILV$Av_water_depth
Av_group_size <- ILV$Av_group_size
HaplotypeE <- (ILV$Haplotype=="E")*1
HaplotypeE[is.na(HaplotypeE)]<-0
not.E <- 1-HaplotypeE ## all individuals with haplotype E are set to 0 (baseline), all others have 1

n.assMatrix <- 4 # number of matrices
assMatrix.B <- array(data = c(SRI_vert_all, SRI_hor_no_vert_all, ecol, relate), dim=c(nrow(SRI_vert_all), ncol(SRI_vert_all), n.assMatrix)) # create an array with the four matrices

Sex <- matrix(data = Sex, nrow=length(IDs), byrow=F) # all ILVs need to go into a matrix
Av_water_depth <- matrix(data = Av_water_depth, nrow=length(IDs), byrow=F)
Av_group_size <- matrix(data = Av_group_size, nrow=length(IDs), byrow=F)
not.E <- matrix(data = not.E, nrow=length(IDs), byrow=F)

ILVs <- c("Sex","Av_water_depth","Av_group_size", "not.E")

label <- "spongingC"

# extract the spongers learners with no maternity data available
sponger_filter <- subset(spongers, subset=spongers$Demons_sponging_forage=="no")
sponger_filter2 <- sort(subset(sponger_filter$id_individual, subset=sponger_filter$Mum_known=="no"))

vec <- NULL
for (i in 1:length(sponger_filter2)){
  a <- which(IDs==sponger_filter2[i])
  vec[i] <- a
} # get position of spongers with no maternity data
# they get set to 0 in the presence matrix (NBDAfilterfunction)

filter <- paste0(label,"_", vec)

# create NBDA Data Object
nbdaDataSPONGING.C <- nbdaData(label=label, assMatrix=assMatrix.B, asoc_ilv=ILVs, int_ilv=ILVs, multi_ilv =ILVs,  orderAcq=OAc,asocialTreatment="constant", demons = demons_vector) # creates OADA object
# apply filter to exclude individuals without maternity data as learners
nbdaDataSPONGING.C.filter <- filteredNBDAdata(nbdadata=nbdaDataSPONGING.C, filter="id", exclude=filter)


# the first four positions correspond to the networks (vertical, horizontal, ecology, relatedness),
# the following 4 to int.ILV, then 4 to asoc ILV and then 4 to multi ILV
# all the multi.ILV positions are set to 0


# check ILVs for asoc (ILVs only influence asocial learning), int (ILVs influence asocial and social learning independently)
# and multi (ILVs influence both asocial and social learning to the same extent).
# in this analysis, multiILV will be set to 0 and therefore not estimated (in the constraintsVectMatrix)
nbdaDataSPONGING.C.filter@asoc_ilv
nbdaDataSPONGING.C.filter@int_ilv
nbdaDataSPONGING.C.filter@multi_ilv


# the following part creates a matrix, constraintsVectMatrix, with all possible combinations of networks and ILVs.
# This is then input to the oadaAICtable function below to fit each model
# An explanation of the constraintsVectMatrix is given below

# set number of networks and number of ILVs
num_networks <- 4
num_ILVs <- 4

vector <- seq(1:(num_networks+(2*num_ILVs))) # create a vector for the full model with all networks and ILVs (excluding multiILV slots which will all be set to 0)
count <- 0 # create an object 'count', which starts on 0

constraintsVect <- matrix(nrow = 10000000, ncol=(num_networks+(2*num_ILVs))) # create a matrix to save the combination of parameters in
constraintsVect[1,] <- seq(1:(num_networks+(2*num_ILVs))) # the first row gets filled with a sequence from 1:12 (all parameters will be estimated, none are set to 0)

for (i in 1:(num_networks+(2*num_ILVs)-1)){ # a loop for each number of parameters to be estimated
  array <- combn(vector, i, FUN = NULL, simplify = TRUE) # for each number of paramters to be estiamted (e.g. 2) create all possible combinations of numbers between 1:12 (e.g. 2&8, 1&5 etc)
  
  for (j in 1:length(array[1,])){ # for each of those combinations
    vector2 <- seq(1:((num_networks+(2*num_ILVs))-i)) # create a second vector with 11-i free spaces
    position <- array[,j] # for each created combination
    count <- count+1 # add +1 to the count
    
    for (k in position){ # at each possible position
      vector2 <- append(vector2, 0, after=k-1) # add a 0 (e.g. 1 0 2 3 ...; 1 2 0 3 4 5 ...; 1 2 3 0 4 5 ....)
    }
    constraintsVect[count+1,] <- vector2 # and save the resulting order in a matrix
  }
}


constraintsVect <- na.omit(constraintsVect) # remove all NAs from the matrix
constraintsVect <- rbind(constraintsVect, rep.int(0,(num_networks+2*(num_ILVs)))) # add a last row with all 0

constraintsVect <- cbind(constraintsVect, matrix(0,ncol=4, nrow=length(constraintsVect[,1]))) ## add 4 columns at the end with all 0 (multi_ILV)

constraintsVectMatrix<-constraintsVect

# Each line of the resulting object specifies a model
# Each element in the line corresponds to a parameter in the model. When an element is zero, that paramter is constrained
# =0. When two elements have the same value, they are constrained to have the same value (not relevant here).
# For example:
constraintsVectMatrix[1,]
# The first four elements (1-4) are the s parameters for each network- so in this model all networks are included.
# The next four elements (5-8) are the parameters determining the effect each ILV has on asocial learning. So in this model
# all ILVs are assumed to affect asocial learning.
# The next five elements (9-12) are the parameters determining the effect each ILV has on social learning. So in this model
# all ILVs are assumed to affect social learning.
# The final four elements determine the effect each parameter has on both asocial and social learning (the multiplicative
# NBDA model). In our analysis we estimate the effects each ILV has on asocial and social learning independently, so these parameters
# are constrained to be zero for all models fitted.

#######################################################################################################################################
#######################################################################################################################################

# run NBDA using the NBDA Data object and the constraitnsVectMatrix
# this fits every model specified by the constrainstsVectMatrix matrix
tableSPONGING.C.filter<-oadaAICtable(nbdadata=nbdaDataSPONGING.C.filter, constraintsVectMatrix=constraintsVectMatrix,writeProgressFile = T)
print(tableSPONGING.C.filter)

save(tableSPONGING.C.filter, file="AIC table sponging.Rdata")
load("AIC table sponging.Rdata")


write.csv(as.data.frame(tableSPONGING.C.filter@printTable), "AIC table sponging.csv")


##Create a new object with a printTable that excludes unfitted model
newTableSPONGING<-tableSPONGING.C.filter
newTableSPONGING@printTable<-tableSPONGING.C.filter@printTable[!is.nan(tableSPONGING.C.filter@printTable$aicc)&!is.na(tableSPONGING.C.filter@printTable$aicc),]

newTableSPONGING@aicc<-tableSPONGING.C.filter@aicc[!is.nan(tableSPONGING.C.filter@aicc)&!is.na(tableSPONGING.C.filter@aicc)]
newTableSPONGING@MLEs<-tableSPONGING.C.filter@MLEs[!is.nan(tableSPONGING.C.filter@aicc)&!is.na(tableSPONGING.C.filter@aicc),]
newTableSPONGING@MLEilv<-tableSPONGING.C.filter@MLEilv[!is.nan(tableSPONGING.C.filter@aicc)&!is.na(tableSPONGING.C.filter@aicc),]
newTableSPONGING@MLEint<-tableSPONGING.C.filter@MLEint[!is.nan(tableSPONGING.C.filter@aicc)&!is.na(tableSPONGING.C.filter@aicc),]


newTableSPONGING@printTable<-newTableSPONGING@printTable[order(newTableSPONGING@printTable$aicc),]
newTableSPONGING@printTable$deltaAICc<-newTableSPONGING@printTable$aicc-newTableSPONGING@printTable$aicc[1]

# calculate support for fitted models
newTableSPONGING@printTable$RelSupport<- exp(-0.5*newTableSPONGING@printTable$deltaAICc)
newTableSPONGING@printTable$AkaikeWeight<-newTableSPONGING@printTable$RelSupport/sum(newTableSPONGING@printTable$RelSupport)


newTableSPONGING@deltaAIC<-newTableSPONGING@aicc-min(newTableSPONGING@aicc)

newTableSPONGING@RelSupport<- exp(-0.5*newTableSPONGING@deltaAIC)
newTableSPONGING@AkaikeWeight<-newTableSPONGING@RelSupport/sum(newTableSPONGING@RelSupport)


dim(tableSPONGING.C.filter@printTable)[1]-dim(newTableSPONGING@printTable)[1]
dim(tableSPONGING.C.filter@printTable)[1]

##2 models could not be fitted out of 4096- probably too many parameters for the dataset

# save reduced AIC table as csv
write.csv(newTableSPONGING@printTable, file="sponging.AIC_table_CORRECTED.csv")


# obtain network support for each network combination
networksSupport_sponging<-networksSupport(newTableSPONGING)
networksSupport_sponging
write.csv(networksSupport_sponging, file="networksSupport_sponging.csv")
#83.7% support for vertical social network only (1:0:0:0)

# extract support for each variable
variable_support <- variableSupport(newTableSPONGING, includeAsocial = T)
variable_support
write.csv(variable_support, file="variable_support_sponging.csv")

# extract model averaged medians
MLE_med  <- modelAverageEstimates(newTableSPONGING,averageType = "median")
MLE_med

write.csv(MLE_med, "MLE_sponging.csv")


#######################################################################################################################################
#Getting 95% confidence intervals using profile likelihood techniques
#This is vital for s parameters since CIs based on SEs will be highly misleading due to frequent assymetry in the profile likelihood
#######################################################################################################################################

print(newTableSPONGING)[1:10,]

# constraintsVectMatrix[4076,] for best model (vertical social learning + social.sex)

bestModelData<-constrainedNBDAdata(nbdadata=nbdaDataSPONGING.C.filter,constraintsVect =constraintsVectMatrix[4076,])
model.best.social<-oadaFit(bestModelData)
model.best.social@outputPar
# [1] 1.233004e+10 -4.840486e+00
model.best.social@optimisation
model.best.social@aicc


# extract profile likelihood. which=1 extracts the first parameter (s parameter for vertical social learning)
plotProfLik(which=1,model=model.best.social1,range=c(0,1e30), resolution=20)
#Here we can see that we cannot set an upper limit on s (see explanation below)

#Zoom in to locate the lower limit for s
plotProfLik(which=1,model=model.best.social,range=c(0,500), resolution=20)
plotProfLik(which=1,model=model.best.social,range=c(30,40), resolution=20)
profLikCI(which=1,model=model.best.social,lowerRange=c(30,40))

#Lower CI Upper CI
#33.08928       Inf

######################################################################################################################
#To explain why we cannot set an upper limit on s, we extract a table with a row for each acquisition event
#Each row we get the connection of the individual that learned
#The total connections to informed individuals across the population- in this case equal to the number of dolphins with a sponging mother, 
#since connections are binary
#and the maximum connection strength (again this should be 1, but just to confirm)

eventTable<-NULL
for(i in 1:9)
{eventTable<-rbind(eventTable,(c((bestModelData2@stMetric[bestModelData2@event.id==unique(bestModelData2@event.id)[i]])[
  bestModelData2@status[bestModelData2@event.id==unique(bestModelData2@event.id)[i]]==1],
  sum(bestModelData2@stMetric[bestModelData2@event.id==unique(bestModelData2@event.id)[i]]),
  max(bestModelData2@stMetric[bestModelData2@event.id==unique(bestModelData2@event.id)[i]])))
)
}
dimnames(eventTable)[[2]]<-c("Connection of Learner","Sum of Connections","Maximum Connection")
eventTable
#We can see that in every case the individual to learn shelling has an informed mother
#There are a variable number of individuals with sponging mothers across events. But importantly, the dolphin to acquire the behaviour has
#the (joint) maximum connection to informed individuals.

#In any diffusion where, for every event, the individual to acquire the behaviour is always the one with the maximum connection to informed
#individuals (even if it is joint maximum), we cannot set an upper limit on s with OADA. Since the next individual to learn is always the 
#one that the network would predict as being most likely (or joint most likely), a value of s=Inf is plausible.
###########################################################



# which=2 extracts second parameter (gender)
plotProfLik(which=2,model=model.best.social,range=c(-10,-2), resolution=20)
profLikCI(which=2,model=model.best.social,lowerRange=c(-10,-6),upperRange = c(-4,-2))

#Lower CI  Upper CI
#-7.971296 -2.246465
# back-transform
exp(c(4.840486e+00,7.971296, 2.246465))
#[1]  126.530831 2896.608938    9.454256
# Females are an estimated 126x (95% CI= 9.5-2890) faster to learn the behaviour from their mothers than males are


# extract what proportion of spongers are estimated to have learned sponging socially from their mothers
prop.solve.social.byevent <- oadaPropSolveByST.byevent(nbdadata = bestModelData, model=model.best.social) # outputs of 1 for each events means that all sponger offspring have learned socially from their mothers
prop.solve.social <- oadaPropSolveByST(nbdadata = bestModelData, model=model.best.social) # 100% are estimated to have learned socially

prop.solve.social

#To get the estimates for the lower bound we should find the corresponding value of the other parameters to plug in when s1 is constrained to this value
bestModelDataS1LowerBound<-constrainedNBDAdata(nbdadata=nbdaDataSPONGING.C.filter,constraintsVect =constraintsVectMatrix[4076,],offset=c(33.08928,rep(0,15)))
bestModelS1LowerBound<-oadaFit(bestModelDataS1LowerBound,type="asocial")
bestModelS1LowerBound@outputPar
#Now plug into the prop solve function in one of these two ways:
prop.solve.social.lower <- oadaPropSolveByST(par= c(33.08928, bestModelS1LowerBound@outputPar),model=NULL, nbdadata = bestModelData)
prop.solve.social.lower <- oadaPropSolveByST(model=bestModelS1LowerBound, nbdadata = bestModelDataS1LowerBound)
prop.solve.social.lower

#AT least 98.9% learned by social transmission from mothers

# In theory repeat for upper limit, but here there is no need since the upper limit is s=inf,
# so the upper limit is 100%