#THIS SCRIPT CARRIES OUT VALIDATION FOR PREDICTIONS UNDER THE PHYLOGENETIC REGRESSION + LAMBDA MODEL rm(list=ls()) library(phytools) library(geiger) library(phylolm) source("phyloPred.jknife.R") source("starPhyloPred.jknife.R") #GET DATA AND TREE fullTree<-read.newick("Rabosky_etal2014.timetree.nex") fullTree$tip.label[grep("Acanthopagrus_schlegelii",fullTree$tip.label)]<-"Acanthopagrus_schlegelii" fullTree$tip.label[grep("Auxis_thazard",fullTree$tip.label)]<-"Auxis_thazard" fullTree$tip.label[grep("Auxis_rochei",fullTree$tip.label)]<-"Auxis_rochei" fullTree$tip.label[grep("Carassius_auratus",fullTree$tip.label)]<-"Carassius_auratus" fullTree$tip.label[grep("Diagramma_picta",fullTree$tip.label)]<-"Diagramma_pictum" fullTree$tip.label[grep("Helicolenus_dactylopterus",fullTree$tip.label)]<-"Helicolenus_dactylopterus" fullTree$tip.label[grep("Oncorhynchus_masou",fullTree$tip.label)]<-"Oncorhynchus_masou" fullTree$tip.label[grep("Osmerus_mordax_mordax",fullTree$tip.label)]<-"Osmerus_mordax" fullTree$tip.label[grep("Salvelinus_alpinus",fullTree$tip.label)]<-"Salvelinus_alpinus" fullTree$tip.label[grep("Salvelinus_leucomaenis_leucomaenis",fullTree$tip.label)]<-"Salvelinus_leucomaenis" fullTree$tip.label[grep("Sebastes_pachycephalus_pachycephalus",fullTree$tip.label)]<-"Sebastes_pachycephalus" fullTree$tip.label[grep("Thymallus_arcticus",fullTree$tip.label)]<-"Thymallus_arcticus" fullDat<-read.csv("bionut_working.csv") rownames(fullDat)<-fullDat[,1] fullDat<-fullDat[,-1] datSp<-rownames(fullDat) species<-datSp[which(match(datSp, fullTree$tip.label, nomatch=0)!=0)] exclude<-fullTree$tip.label[which(match(fullTree$tip.label, datSp, nomatch=0)==0)] tree<-drop.tip(fullTree, exclude) nutDat<-fullDat[species,c("protein", "fat", "fac3_pol", "fac6_pol", "iron", "zinc", "ret_eq", "vitb12", "vitd")] nutDat<-data.frame(nutDat[,"protein"], sapply(c("fat", "fac3_pol", "fac6_pol", "iron", "zinc", "ret_eq", "vitb12", "vitd"), function(x) log(nutDat[,x]+1))) colnames(nutDat)[1]<-"protein" lifeHistDat<-fullDat[species, c("maxlen", "troph", "depmax")] nutList<-lapply(1:ncol(nutDat), function(x) data.frame(lifeHistDat[which(is.na(nutDat[,x])==FALSE),], nutDat[which(is.na(nutDat[,x])==FALSE),x])) names(nutList)<-colnames(nutDat) for (i in 1:length(nutList)) {colnames(nutList[[i]])[ncol(nutList[[i]])]<-names(nutList)[i]} for (i in 1:length(nutList)) { for (j in 1:ncol(nutList[[i]])){ nutList[[i]]<-nutList[[i]][which(is.na(nutList[[i]][,j])==FALSE),] } } treeList<-lapply(1:length(nutList), function(x) drop.tip(tree, tree$tip.label[which(match(tree$tip.label, rownames(nutList[[x]]), nomatch=0)==0)])) #pgls.table<-matrix(, length(nutList), 18, dimnames=list(names(nutList), c("n", "lambda", "b0", "b1_maxlen", "b1_troph", "b1_depmax", "se_b0", "se_b1_maxlen", "se_b1_troph", "se_b1_depmax", "tstat_b0", "tstat_maxlen", "tstat_troph", "tstat_depmax", "P_b0", "P_maxlen", "P_troph", "P_depmax"))) errorMat<-matrix(,length(nutList),3, dimnames=list(names(nutList), c("median_diff_percMean", "median_diff_nSD", "errorRate"))) for (i in 1:length(nutList)){ pred<-matrix(, nrow(nutList[[i]]), 6, dimnames=list(rownames(nutList[[i]]), c("measuredVal", "predVal", "lcl_predInt", "ucl_predInt", "diff_percMean", "diff_nSampleSD"))) withinCIMat<-matrix(, nrow(nutList[[i]]), 1, dimnames=list(rownames(nutList[[i]]), "withinCI")) maxlen<-nutList[[i]][,"maxlen"] names(maxlen)<-rownames(nutList[[i]]) troph<-nutList[[i]][,"troph"] names(troph)<-rownames(nutList[[i]]) depmax<-nutList[[i]][,"depmax"] names(depmax)<-rownames(nutList[[i]]) nut<-nutList[[i]][,4] names(nut)<-rownames(nutList[[i]]) for (j in 1:nrow(nutList[[i]])){ maxlen.rm<-maxlen[-j] troph.rm<-troph[-j] depmax.rm<-depmax[-j] nut.rm<-nut[-j] tree.rm<-drop.tip(treeList[[i]], rownames(nutList[[i]])[j]) result<-phylolm(nut.rm ~ maxlen.rm + troph.rm + depmax.rm, phy=tree.rm, model="lambda") coeff<-c(result[[3]], result$coeff) names(coeff)<-c("lambda", "b0", "b1_maxlen", "b1_troph", "b1_depmax") Yhat<-(coeff["b1_maxlen"] * maxlen) + (coeff["b1_troph"] * troph) + (coeff["b1_depmax"] * depmax) + coeff["b0"] res<- nut - Yhat pred.res<-phyloPred.jknife(treeList[[i]], res, rownames(nutList[[i]])[j]) predVal<-Yhat[j] + as.numeric(pred.res$PredVal) measVal<-as.numeric(nut[j]) diff<-abs(measVal - predVal) predSE<-as.numeric(pred.res$PredSE) diff_nPredSE<-diff / predSE if (diff_nPredSE < 2) withinCIMat[j]<-TRUE else withinCIMat[j]<-FALSE lclPredInt<-predVal - (1.96 * predSE) uclPredInt<-predVal + (1.96 * predSE) if (i > 1) predVal<-exp(predVal) - 1 if (i > 1) measVal<-exp(nut[j]) - 1 if (i > 1) lclPredInt<-exp(lclPredInt) - 1 if (i > 1) uclPredInt<-exp(uclPredInt) - 1 if (i > 1) nutSD<-sd(exp(nut) -1) else nutSD<-sd(nut) if (measVal == 0) diff1<-NA else diff1<-as.numeric((abs(measVal - predVal) / measVal) * 100) if (measVal == 0) diff2<-NA else diff2<-as.numeric((abs(measVal - predVal) / nutSD)) pred[j,]<-round(c(measVal, predVal, lclPredInt, uclPredInt, diff1, diff2), 3) } if (i == 1) write.csv(pred, file="phyloPredValid.PGLS.protein.csv") if (i == 2) write.csv(pred, file="phyloPredValid.PGLS.lipid.csv") if (i == 3) write.csv(pred, file="phyloPredValid.PGLS.omega3.csv") if (i == 4) write.csv(pred, file="phyloPredValid.PGLS.omega6.csv") if (i == 5) write.csv(pred, file="phyloPredValid.PGLS.iron.csv") if (i == 6) write.csv(pred, file="phyloPredValid.PGLS.zinc.csv") if (i == 7) write.csv(pred, file="phyloPredValid.PGLS.vitA.csv") if (i == 8) write.csv(pred, file="phyloPredValid.PGLS.vitB12.csv") if (i == 9) write.csv(pred, file="phyloPredValid.PGLS.vitD.csv") errorMat[i,1]<-median(pred[,"diff_percMean"], na.rm=TRUE) errorMat[i,2]<-median(pred[,"diff_nSampleSD"], na.rm=TRUE) errorMat[i,3]<-length(which(withinCIMat == FALSE))/nrow(pred) write.csv(errorMat, file="phyloPredValidPGLS.error.csv") }