##################################################### # Code from Greischar, Mideo, Read & Bjornstad 2016 # ##################################################### library(splines) # import data: allData = read.csv("SH1 dryad data file.csv", header=TRUE, sep = ',') # focus on non-drug treated infections nodrugs = allData[allData$Drugs=="nodrugs",] # select resistant clone clones = nodrugs[nodrugs$Clone=="R",] # rename columns for drug resistant asexuals and gametocytes # for ease of referencing later colnames(clones) <- c("Box","Mouse","Drugs","Clone","Day","Weight","RBC","Asex","S.asex","Gam","S.gam") # set units for RBCs # in this data set, RBC numbers are given in numbers that need to be multiplied by 10^6 rbcUnit = 10^6 getF = function(n, nparms1,sse1,nparms2,sse2){ # function to return p-value associated with F statistic # which follows an F distribution with df1 = p-q, df2 = n-p # first determine which model has more parameters/ # smaller sum squared error: if(nparms1>nparms2 & sse2>sse1){ sseOmega = sse1 p = nparms1 sseomega = sse2 q = nparms2 } # end test for model1 is more complex if(nparms2>nparms1 & sse1>sse2){ sseOmega = sse2 p = nparms2 sseomega = sse1 q = nparms1 } # end test for model2 is more complex if(nparms2>nparms1 & sse2 > sse1){ sseOmega = NA p = NA sseomega = NA q = NA print("Error: model fit is not a global optimum, refit with different starting parameters.") } # if model2 has more parameters but doesn't fit better, that's an error if(nparms1>nparms2 & sse1 > sse2){ sseOmega = NA p = NA sseomega = NA q = NA print("Error: model fit is not a global optimum, refit with different starting parameters.") } # if model2 has more parameters but doesn't fit better, that's an error Fstatistic = ((sseomega-sseOmega)/(p-q))/(sseOmega/(n-p)) pVal = pf(q=Fstatistic, df1=(p-q), df2=(n-p), lower.tail=FALSE) return(pVal) } # end getF function maxCarryover=1 i=1 mouse=clones[clones$Mouse==i,] mouseID=i tau=2 # to get the longest continuous run of data: resLog=rle(diff(mouse$Day)==1) # returns lengths of runs for which the difference in plausible indices is 1 runLength = max(resLog$lengths[which(resLog$values)]) # get the max length of # the consecutive run dummy=0 j=1 while(dummy==0 & j0 & Itplus1[p]>0){ fitA = lm(log(Itplus1[p])~offset(log(It[p])+log(St[p]))) x[p] = exp(fitA$coef[[1]])} } } # end asex loop x[x*St>20]=NA length(x) <- length(times) # so that an NA is added for the last day, # when effective propagation could not be estimated subtimes = times[is.na(x)==FALSE] subx = x[is.na(x)==FALSE] subrbc = rbc[is.na(x)==FALSE] subpara = para[is.na(x)==FALSE] subgams = gams[is.na(x)==FALSE] resPe=rle(diff(subtimes)==1) # returns lengths of runs for which the difference in plausible indices is 1 runPeLength = max(resPe$lengths[which(resPe$values)]) # get the max length of # the longest run dummyPe=0 jPe=1 while(dummyPe==0 & jPe0){sVals = bs(convTime,degree=dg,intercept=TRUE)}} if(dg>=4){sVals = bs(convTime,df=dg,intercept=TRUE)} # define objective function after creating sVals spline object gobs <- function(parms,data){ cparms = parms[1:(length(parms)-2)] cVal = exp(-exp(sVals%*%cparms)) epsilon = maxCarryover*exp(-exp(parms[(length(parms)-1)])) G3est = exp(parms[(length(parms))]) Stminus3 = data[,1] Itminus3 = data[,2] Gt = data[(tau+1):length(Stminus3),3] GtPred = rep(NA, length(Gt)) GtPred[1] = G3est for (Gindex in c(2:length(GtPred))){ j=c(1:(Gindex-1)) GtPred[Gindex]=sum((epsilon^((Gindex-1)-j))*(cVal[j])*xt[j]* Itminus3[j]*Stminus3[j])+(epsilon^(Gindex-1))*G3est } GtPred[GtPred>1e20]=1e20 sse <- sum((log(GtPred+1)-log(Gt+1))^2,na.rm=TRUE) } # end gobs fx # loop to estimate conversion rates by finding coefficients # of spline basis functions tries = 1000 # tries to fit lowest number of parameter values mugHi = (log(2)/5)*24 mugLo = (log(2)/41)*24 # confidence intervals from Reece et al. 2003 minEps = exp(-mugHi) # minimum allowable epsilon maxEps = exp(-mugLo) # maximum allowable epsilon parms0 = c(runif(ncol(sVals),min=-45,max=85),log(-log(runif(1, min=minEps, max = maxEps))), log(1+rgamma(1,shape = 1/0.0582,scale = 0.0582*gamToFit[(tau+1)]))) parmTries = matrix(NA, nrow = tries, ncol = length(parms0)+1) for (take in c(1:tries)){ dummy=1 while(dummy!=0){ convFit <- optim(parms0, gobs, data=dataToFit, control=c(maxit=5000)) dummy=convFit$convergence parms0 = c(runif(ncol(sVals),min=-45,max=85),log(-log(runif(1, min=minEps, max = maxEps))), log(1+rgamma(1,shape = 1/0.0582,scale = 0.0582*gamToFit[(tau+1)]))) sseVal = gobs(convFit$par, data=dataToFit) print(c(i,take,convFit$convergence,parms0)) } # end while loop to get convergence parmTries[take,] = c(convFit$par,sseVal) } # end loop to get multiple convergent tries with different # starting parameters # set bar for sse if(degrees[val]==min(degrees)){minSSE = min(parmTries[,(length(parms0)+1)])} if(degrees[val]>min(degrees)){ while(min(parmTries[,(length(parms0)+1)])>minSSE){ take=take+1 parms0 = c(runif(ncol(sVals),min=-45,max=85),log(-log(runif(1, min=minEps, max = maxEps))), log(1+rgamma(1,shape = 1/0.0582,scale = 0.0582*gamToFit[(tau+1)]))) convFit <- optim(parms0, gobs, data=dataToFit, control=c(maxit=5000)) print(c(i,take,convFit$convergence,parms0)) if(convFit$convergence==0){ sseVal = gobs(convFit$par, data=dataToFit) newTry = c(convFit$par,sseVal) parmTries = rbind(parmTries,newTry)} } # end while loop } bestrow = parmTries[which.min(parmTries[,(length(parms0)+1)]),1:length(parms0)] parms = bestrow bestFit[val,1:length(convFit$par)] = parms numParms = length(convFit$par) bestFit$numParms[val] = numParms cparms = parms[1:(length(parms)-2)] cVal = exp(-exp(sVals%*%cparms)) filelab = paste("ConversionMouse", i, "Degree", dg, ".txt", sep = '') write.table(cVal,filelab) epsilon = maxCarryover*exp(-exp(parms[(length(parms)-1)])) G3est = exp(parms[(length(parms))]) Stminus3 = dataToFit[,1] Itminus3 = dataToFit[,2] Gt = dataToFit[(tau+1):length(Stminus3),3] bestFit$numObs[val] = numObs GtPred = rep(NA, length(Gt)) GtPred[1] = G3est for (Gindex in c(2:length(GtPred))){ j=c(1:(Gindex-1)) GtPred[Gindex]=sum((epsilon^((Gindex-1)-j))*(cVal[j])*xt[j]* Itminus3[j]*Stminus3[j])+(epsilon^(Gindex-1))*G3est } GtPred[GtPred>1e20]=1e20 # end predicting G by gobs algorithm sse <- sum((log(GtPred+1)-log(Gt+1))^2,na.rm=TRUE) minSSE=sse bestFit$sse[val] = sse res = as.data.frame(cbind(Gt,GtPred)) write.table(res, paste("ConversionMouse", i, "Degree", dg, "Pred.txt", sep = '')) # plot fit # get index of any NAs in gam counts gamNA = which(is.na(Gt)) par(mfrow = c(1,1), bty = "n", mar = c(5,5,3,1)) plot(gamCountTime, log(Gt+1)-log(GtPred+1), pch = 16, col = "black", xlab = "t", ylab = "Residual gametocyte abundance", main = paste("degree = ", dg, sep = "")) if(any(is.na(Gt))){ points(gamCountTime[gamNA],rep(0,length(gamNA)),pch = 4, col = "red") } } # end degree loop # write results table resLab = paste("SplineFitsMouse", i, ".txt", sep = '') write.table(bestFit,resLab) } # end conditional regarding number of data points # run F-test res = read.table(paste("SplineFitsMouse",i,".txt",sep='')) m=1 k=2 pVal=1 forwardseln = 1 while(k<=length(res[,1]) & pVal>0.05){ pVal = getF(res$numObs[1],res$numParms[m],res$sse[m],res$numParms[k],res$sse[k]) print(c(m,k,pVal)) if(pVal<0.05){forwardseln=k; m=k; pVal=1} k=k+1 } bestPar = res[forwardseln,1:(res$numParms[forwardseln])] bestEpsFull = maxCarryover*exp(-exp(bestPar[(length(bestPar)-1)])) bestEps = c(bestEpsFull[[1]]) conv = read.table(paste("ConversionMouse",i,"Degree",degrees[forwardseln],".txt",sep='')) bestConv = conv[,1] gamEst = read.table(paste("ConversionMouse",i,"Degree",degrees[forwardseln],"Pred.txt",sep='')) length(convTime) <- length(gamCountTime) length(bestConv) <- length(gamCountTime) length(bestEps) <- length(gamCountTime) res.output = as.data.frame(cbind(convTime,bestConv,gamCountTime,gamEst$Gt,gamEst$GtPred,bestEps)) colnames(res.output) <- c("Days","Conversion","GamDays","Gt","GtPred","epsilon") assign(paste("estConv",i,sep=''),res.output) write.table(res.output,paste("estConv",i,".txt",sep='')) plot(estConv1$Days,estConv1$Conversion, type = "o", col = "purple3",pch = 16)