library(ape)
library(geiger)
library(caper)
source("./BayesModelS_v22.r") #ensure this matches the version of BayesModelS in your working directory

testTip = function(trees, branch, nsim, rate, VCV = matrix(c(1,0,0,1), nrow = 2, ncol = 2), file = NULL) {
#testTip runs BayesModelS analyses on multiple data sets simulated using a single tree. It can be used to identify rates of Type I and Type II errors when identifying
  #evolutionary singularities on a specefic branch.
  #BayesModelS will write multiple files to the current directory. Each simulation will overwrite the files of the last. Be careful not to overwrite BayesModelS data from other analyses.
#output: A data frame with two columns. Each row represents one run of BayesModelS and includes the rate at which evolution was multiplied on the target branch
   #during trait simulation and the proportion of BayesModelS predictions that were more extreme that then simulated value for that branch.
#input:
  #trees: an object of either class "phylo" or "multiPhylo". If "multiPhylo", data will be simulated on trees selected randomly from trees
  #branch: the name of the tip which is being analyzed. Should be of class character and must match a tip on all trees used as input
  #nsim: the number of simulations to run. Numeric
  #rate: a vector of two numeric values. The evolutionary rate multiplyer on the target tip will vary continuously between these two values, with equal intervals based on nsim
  #VCV: a variance-covariance matrix for trait simulation. Default is two traits with variation of one and no covariance. BayesModels requires one predictor
    #to run, even if the is no covariation. 
  #file: a string indicating location of save output (should not include ".csv"). Default is NULL, which will not write the output to a file.
  
  if (!length(rate) == 2) stop("the rate variable should be a vector with two numeric values, a min and a max rate of evolution")
  if (!is.numeric(rate)) stop("the rate variable should be a vector with two numeric values, a min and a max rate of evolution")
  if (!is.numeric(nsim)) stop("nsim should be numeric")
  if (class(trees) == "multiPhylo" ) { allTrees = trees[round(runif(nsim, 1, length(trees)))]  
    } else { if(! class(trees) == "phylo") stop("trees variable should either be of class 'phylo' or 'multiPhylo'")
      allTrees = trees
  }
  if (! nrow(VCV) == ncol(VCV)) stop("VCV should be a matrix with equal number of columns and rows")
  rates = seq(from = rate[1], to = rate[2], length.out = nsim)
  branchNum = which(tree$edge[,2] == which(tree$tip.label == branch))
  if (class(trees) == "multiPhylo") { allTrees = mapply(adjustTree, rate = rates, tree = allTrees, MoreArgs = list(branch = branchNum), SIMPLIFY = FALSE) #multiply length of target tip to emulate faster evolution during trait simulation
  } else allTrees = mapply(adjustTree, rate = rates, MoreArgs = list(tree = allTrees, branch = branchNum), SIMPLIFY = FALSE)
  data = lapply(allTrees, sim.char, par = VCV, model = "BM") #simulate all data sets
  missingList = branch
  formula = "value ~ predictor1"
  if (nrow(VCV) >= 3) { #if more than one predictor . . .
    for (i in 2:(nrow(VCV) - 1)) { #nrow(VCV) - 1 is number of predictors
      formula = paste(formula, " + predictor", i, sep = "") #add predictor to formula
    }
  }
  pathO = "./"
  factorName = c()
  posteriorProb = data.frame(rate = rates, prob = NA)
  for (i in 1:length(data)) {
    runData = data.frame(species = rownames(data[[i]]), value = data[[i]][,1,1], predictor1 = data[[i]][,2,1]) #transform simulated data from array to named data frame
    if (nrow(VCV) >= 3) {
      for (j in 3:(nrow(VCV))) {
        runData = cbind(runData, data[[i]][,j,1]) #add data for additional predictor
        colnames(runData)[j + 1] = paste("predictor", j - 1, sep = "") #name column for additional predictor
      }
    }
    rownames(runData) = NULL #rownames duplicates in species column
    if (class(trees) == "phylo") trees = c(trees, trees) #blm function can only use multyphylo objects. This simply creates a multiPhylo with two identical trees
    bmselection = blm(formula, runData, trees, factorName = factorName, missingList = missingList, currentValue = 0, nposterior = 201000, burnin = 1000, thin = 100, varSelection = "lambda", lambdaValue = NA, kappaValue = NA, path = pathO) #run BayesModelS MCMC. May need to adjust chain lengths and thin rate
    predict(bmselection, missingList, path = pathO)
    postPredict = read.csv("./predictions.csv")
    meanPrediction = mean(postPredict[,2])
    distPrediction = abs(runData$value[which(runData$species == branch)] - meanPrediction) #gets distance of simualted value on target tip from mean of all predictions
    postPredict$dist = abs(postPredict[,2] - meanPrediction) #gets distance of each prediction from mean of all predictions
    posteriorProb$prob[i] = sum(postPredict$dist > distPrediction) / length(postPredict$dist) #calculates proportion of predictions farther from mean of predictions than simulated value on target tip
    if (! is.null(file)) {
      if(! is.character(file)) stop("the file variable should either be NULL or a character string")
      write.csv(posteriorProb, paste(file, ".csv", sep =""))
    }
  }
  posteriorProb
}

adjustTree = function(rate, tree, branch) {
  tree$edge.length[branch] = tree$edge.length[branch] * rate
  return(tree)
}