library(ape) library(geiger) library(caper) source("./BayesModelS_v22.r") #ensure this matches the version of BayesModelS in your working directory testTip = function(trees, branch, nsim, rate, VCV = matrix(c(1,0,0,1), nrow = 2, ncol = 2), file = NULL) { #testTip runs BayesModelS analyses on multiple data sets simulated using a single tree. It can be used to identify rates of Type I and Type II errors when identifying #evolutionary singularities on a specefic branch. #BayesModelS will write multiple files to the current directory. Each simulation will overwrite the files of the last. Be careful not to overwrite BayesModelS data from other analyses. #output: A data frame with two columns. Each row represents one run of BayesModelS and includes the rate at which evolution was multiplied on the target branch #during trait simulation and the proportion of BayesModelS predictions that were more extreme that then simulated value for that branch. #input: #trees: an object of either class "phylo" or "multiPhylo". If "multiPhylo", data will be simulated on trees selected randomly from trees #branch: the name of the tip which is being analyzed. Should be of class character and must match a tip on all trees used as input #nsim: the number of simulations to run. Numeric #rate: a vector of two numeric values. The evolutionary rate multiplyer on the target tip will vary continuously between these two values, with equal intervals based on nsim #VCV: a variance-covariance matrix for trait simulation. Default is two traits with variation of one and no covariance. BayesModels requires one predictor #to run, even if the is no covariation. #file: a string indicating location of save output (should not include ".csv"). Default is NULL, which will not write the output to a file. if (!length(rate) == 2) stop("the rate variable should be a vector with two numeric values, a min and a max rate of evolution") if (!is.numeric(rate)) stop("the rate variable should be a vector with two numeric values, a min and a max rate of evolution") if (!is.numeric(nsim)) stop("nsim should be numeric") if (class(trees) == "multiPhylo" ) { allTrees = trees[round(runif(nsim, 1, length(trees)))] } else { if(! class(trees) == "phylo") stop("trees variable should either be of class 'phylo' or 'multiPhylo'") allTrees = trees } if (! nrow(VCV) == ncol(VCV)) stop("VCV should be a matrix with equal number of columns and rows") rates = seq(from = rate[1], to = rate[2], length.out = nsim) branchNum = which(tree$edge[,2] == which(tree$tip.label == branch)) if (class(trees) == "multiPhylo") { allTrees = mapply(adjustTree, rate = rates, tree = allTrees, MoreArgs = list(branch = branchNum), SIMPLIFY = FALSE) #multiply length of target tip to emulate faster evolution during trait simulation } else allTrees = mapply(adjustTree, rate = rates, MoreArgs = list(tree = allTrees, branch = branchNum), SIMPLIFY = FALSE) data = lapply(allTrees, sim.char, par = VCV, model = "BM") #simulate all data sets missingList = branch formula = "value ~ predictor1" if (nrow(VCV) >= 3) { #if more than one predictor . . . for (i in 2:(nrow(VCV) - 1)) { #nrow(VCV) - 1 is number of predictors formula = paste(formula, " + predictor", i, sep = "") #add predictor to formula } } pathO = "./" factorName = c() posteriorProb = data.frame(rate = rates, prob = NA) for (i in 1:length(data)) { runData = data.frame(species = rownames(data[[i]]), value = data[[i]][,1,1], predictor1 = data[[i]][,2,1]) #transform simulated data from array to named data frame if (nrow(VCV) >= 3) { for (j in 3:(nrow(VCV))) { runData = cbind(runData, data[[i]][,j,1]) #add data for additional predictor colnames(runData)[j + 1] = paste("predictor", j - 1, sep = "") #name column for additional predictor } } rownames(runData) = NULL #rownames duplicates in species column if (class(trees) == "phylo") trees = c(trees, trees) #blm function can only use multyphylo objects. This simply creates a multiPhylo with two identical trees bmselection = blm(formula, runData, trees, factorName = factorName, missingList = missingList, currentValue = 0, nposterior = 201000, burnin = 1000, thin = 100, varSelection = "lambda", lambdaValue = NA, kappaValue = NA, path = pathO) #run BayesModelS MCMC. May need to adjust chain lengths and thin rate predict(bmselection, missingList, path = pathO) postPredict = read.csv("./predictions.csv") meanPrediction = mean(postPredict[,2]) distPrediction = abs(runData$value[which(runData$species == branch)] - meanPrediction) #gets distance of simualted value on target tip from mean of all predictions postPredict$dist = abs(postPredict[,2] - meanPrediction) #gets distance of each prediction from mean of all predictions posteriorProb$prob[i] = sum(postPredict$dist > distPrediction) / length(postPredict$dist) #calculates proportion of predictions farther from mean of predictions than simulated value on target tip if (! is.null(file)) { if(! is.character(file)) stop("the file variable should either be NULL or a character string") write.csv(posteriorProb, paste(file, ".csv", sep ="")) } } posteriorProb } adjustTree = function(rate, tree, branch) { tree$edge.length[branch] = tree$edge.length[branch] * rate return(tree) }