## Streicker et al. Phylodynamics reveals extinction-recolonization dynamics underpin apparently endemic vampire bat rabies in Costa Rica. Proceedings of the Royal Society B. DOI: 10.1098/rspb.2019.1527 ## Code for spatiotemporal analysis of extinction-recolonization rm(list=ls()) library(raster) library(RColorBrewer) library(maps) library(ggmap) library(effects) library(nnet) library(fields) library(EnvStats) library(pscl) setwd("") # outbreak data meta<-read.table(file="ESM_File4_CR_metadata.txt",sep="\t",header=T) crseqs<-subset(meta,meta$country=="CR") crseqs<-droplevels(crseqs) ## Figure 4B time between outbreaks detected v1a<-subset(crseqs,crseqs$sublineage=="1a") v1as<-v1a[order(v1a$monthyr),] v1mat<-as.matrix(dist(v1as$monthyr)) v1a_inter<-v1mat[row(v1mat) == col(v1mat) + 1] v1b<-subset(crseqs,crseqs$sublineage=="1b") v1bs<-v1b[order(v1b$monthyr),] v1bmat<-as.matrix(dist(v1bs$monthyr)) v1b_inter<-v1bmat[row(v1bmat) == col(v1bmat) + 1] v1c<-subset(crseqs,crseqs$sublineage=="1c") v1cs<-v1c[order(v1c$monthyr),] v1cmat<-as.matrix(dist(v1cs$monthyr)) v1c_inter<-v1cmat[row(v1cmat) == col(v1cmat) + 1] v1d<-subset(crseqs,crseqs$sublineage=="1d") v1ds<-v1d[order(v1d$monthyr),] v1dmat<-as.matrix(dist(v1ds$monthyr)) v1d_inter<-v1dmat[row(v1dmat) == col(v1dmat) + 1] v2<-subset(crseqs,crseqs$sublineage=="2") v2s<-v2[order(v2$monthyr),] v2mat<-as.matrix(dist(v2s$monthyr)) v2_inter<-v2mat[row(v2mat) == col(v2mat) + 1] ## full distribution of intervals obverved across all viruses all_int<-c(v1a_inter,v1b_inter,v1c_inter,v1d_inter,v2_inter) all_int_noZeros<-all_int[all_int!=0] # remove zeros to avoid bias from detections in the same month ## time between the final observation of each virus and the end of the study (lastObs) lastObs<-max(crseqs$monthyr) lastObs-max(v1a$monthyr) # 0 lastObs-max(v1b$monthyr) # 2.91 lastObs-max(v1c$monthyr) # 2 lastObs-max(v1d$monthyr) # 10.41 lastObs-max(v2$monthyr) # 2.91 lincols<-c(brewer.pal(4,name = "Oranges"),"skyblue") quartz(width=2.5,height=2) par(las=1,mgp=c(1.5,.6,0),ps=9,mar=c(2.5,2.3,.1,.8)) plot(density(all_int_noZeros),xlab="Interval between viral detections (years)",main="",xlim=c(0,11)) df <- approxfun(density(all_int_noZeros)) xnew <- c(lastObs-max(v1a$monthyr),lastObs-max(v1b$monthyr),lastObs-max(v1c$monthyr),lastObs-max(v1d$monthyr),lastObs-max(v2$monthyr)+.1) points(xnew,df(xnew),col="black",bg=lincols[c(1,2,3,3,5)],pch=21,cex=1.5) points(10.41,0,bg=lincols[4],pch=21,col="black",cex=1.5) legend('topright',legend=c("L1a","L1b","L1c","L1d","L2"),pch=21,col="black",pt.bg=lincols, y.intersp = .75,bty="n") ## empirical density x <- c(2,2.91,2.91,10.41) demp(x, all_int_noZeros) ## probabilities of not observing a circulating virus pemp(x, all_int_noZeros) ## cumulative probability of having observed this virus if it exists prob95<-qemp(.95,all_int_noZeros,discrete = F) xline(prob95,col="blue",lty=2) ## Multinomial regression # This analysis models the categorical time series of each viral lineage as a choice among N alternative outcomes, where N is the number of viral lineages # Using the L2 virus (the first virus detected) as a baseline, outcomes are therefore modelled as a set of N-1 binary choices # re-load data meta<-read.table(file="ESM_File4_CR_metadata.txt",sep="\t",header=T) crseqs<-subset(meta,meta$country=="CR") crseqs<-droplevels(crseqs) # set reference as the first virus detected (L2) levels(crseqs$sublineage) crseqs$sublineage<-relevel(crseqs$sublineage,ref="2") levels(crseqs$sublineage) # fit models and examine outputs m1<-multinom(sublineage~year, data=crseqs,maxit=10000) # model with fixed effect of year m0<-multinom(sublineage~1, data=crseqs,maxit=10000) # intercept only model anova(m1,m0,test = "Chisq") # evaluate whether adding random noise to year, so each time point is distinct, influences results # crseqs$year2<-crseqs$year+runif(length(crseqs$year),0.001,.9) # add random noise to year so there is a single observation per unit of time # m2<-multinom(sublineage~year2, data=crseqs,maxit=10000) # m1<-m2 summary(m1) coefs<-exp(coef(m1)) z <- summary(m1)$coefficients/summary(m1)$standard.errors p <- (1 - pnorm(abs(z), 0, 1)) * 2 p pR2(m1) # pseudo r2 # generate predictions for each virus/year combination dfyear<-data.frame(year=c(2004:2017)) # dfyear<-data.frame(year2=c(2004:2017)) p_strains<-cbind(predict(m1,newdata=dfyear,"probs"),dfyear) # Plot figure 4A quartz() fit.effects<-Effect("year2",m1) fit.effects$response<-"virus" plot(fit.effects,main="") # plot all lineages together # p_strains_melt<-melt(p_strains,id.vars="year",value.name="Probability") # colnames(p_strains_melt)[1]<-"Year" # colnames(p_strains_melt)[2]<-"Virus" # ggplot(p_strains_melt,aes(x=Year,y=Probability,colour=Virus))+geom_line()