#FUNCTIONS.r - Should run all these functions in order to work on the examples #Packages that are necessary. library(ape) library(vegan) library(picante) library(phangorn) library(PHYLOGR) #################################### Function phylo.signal #################################### # This function performs the randomization test described in Blomberg et al. (2003). # It differs from phylosignal (Steve Kembel) in that it employs phylogenetic generalized regressions (PGLS) # to perform the test, and consequently it can handle phylogenies with soft politomies. 'phylo.signal'<- function(trait,phy,rep = 999) { K <- Kcalc(trait, phy) cov.matrix <- (vcv.phylo(phy)) cov.matrix <- cov.matrix/det(cov.matrix)^(1/dim(cov.matrix)[1]) eigx <- eigen(cov.matrix) T <- eigx$vectors %*% diag(sqrt(eigx$values)) %*% t(eigx$vectors) mD <- solve(T) y <- mD %*% trait x <- as.data.frame(rep(0,length(y))) x1 <- mD %*% cbind(rep(1, length(y)), as.matrix(x)) fit1 <- lm(y ~ - 1) MSE.OBS <- mean(resid(fit1)^2) MSE.NULL <- matrix(0,rep,1) for (i in 1:rep){ y <- mD %*% sample(trait) fit.null <- lm(y ~ - 1) MSE.NULL[i,] <- mean(resid(fit.null)^2)} P.value = sum(MSE.OBS > MSE.NULL)/(rep + 1) data.frame(N.species = length(trait),K.value = K ,MSE.OBS,MSE.NULL = mean(MSE.NULL),P.value) } #################################### Function phylo.OU #################################### # This function estimates the parameter d for OU-transformation (Blomberg et al. 2003). # Results are identical to those obtained from the Matlab module RegressionV2 (Lavin et al. 2008) # when no independent variables are included in the model. # To obtain the optimal branch lengths after OU-transformation, see phylo.OU.matrix. # Plot command shows the REML curve as a funtion of d (default plot=T). # IMPORTANT! In some instances d does not converge because some internal branches may colapse before the optimal transformation is found (this is very common with trees obtained with the rcoal command). When this occurs, other branch lengths should be employed. 'phylo.OU' <- function(trait,phy,plot=T) { V1 <- vcv.phylo(phy) nspp1 <- dim(V1)[1] V1 <- V1/det(V1)^(1/nspp1) initV1 <- V1 U <- cbind(rep(1, nspp1)) tau1 <- matrix(diag(initV1), nspp1, nspp1) + t(matrix(diag(initV1),nspp1, nspp1)) - 2 * initV1 pgls.ML <- function(d1) { V1 <- (d1^tau1) * (1 - d1^(2 * initV1))/(1 - d1^2) V1 <- V1/det(V1)^(1/nspp1) invV <- solve(V1,diag(nspp1)) a <- solve((t(U) %*% invV %*% U), ((t(U) %*% invV %*% trait))) E <- (trait - U %*% a) s2 <- (t(E) %*% invV %*% E)/(nspp1 - 1) ML<- (-0.5*nspp1*log(2*pi)) - (0.5*(nspp1*log((nspp1-1)*s2/nspp1) + log(det(V1)) + nspp1)) -ML } est <- optimize(pgls.ML,lower=0,upper=3) d.ML <- est$minimum ML <- -(est$objective) pgls.REML <- function(d1) { V1 <- (d1^tau1) * (1 - d1^(2 * initV1))/(1 - d1^2) V1 <- V1/det(V1)^(1/nspp1) invV <- solve(V1,diag(nspp1)) a <- solve((t(U) %*% invV %*% U), ((t(U) %*% invV %*% trait))) E <- (trait - U %*% a) s2 <- (t(E) %*% invV %*% E)/(nspp1 - 1) REML <- (-0.5*(nspp1-1)*log(2*pi)) + (0.5*log(det(t(U)%*%U))) - ((0.5*((nspp1 - 1)*log(s2) + log(det(V1))+log(det((t(U) %*% invV %*% U)))+(nspp1 - 1)))) -REML } est1 <- optimize(pgls.REML,lower=0,upper=3) d.REML <- est1$minimum REML <- -(est1$objective) if (plot){ ## Plotting all the data for REML XX <- matrix(1:200/100+0.001,200,2) for (i in 1:200){ d1 <- XX[i,1] V1 <- V1/det(V1)^(1/nspp1) V1 <- (d1^tau1) * (1 - d1^(2 * initV1))/(1 - d1^2) invV <- solve(V1,diag(nspp1)) a <- solve((t(U) %*% invV %*% U), ((t(U) %*% invV %*% trait))) E <- (trait - U %*% a) s2 <- (t(E) %*% invV %*% E)/(nspp1 - 1) REML.plot <- (-0.5*(nspp1-1)*log(2*pi)) + (0.5*log(det(t(U)%*%U))) - ((0.5*((nspp1 - 1)*log(s2) + log(det(V1))+log(det((t(U) %*% invV %*% U)))+(nspp1 - 1)))) XX[i,2]<- REML.plot} plot(XX[,1],XX[,2],xlab='OU-transformation parameter (d)',ylab='REML')} else{ } vcv <- (d.REML^tau1) * (1 - d.REML^(2 * initV1))/(1 - d.REML^2) data.frame(d.REML,REML, d.ML,ML) } ################################## Function phylo.OU.matrix #################################### # This function returns the transformed branch lengths (variance-covariance matrix) after OU-transformation (Blomberg et al. 2003). # When paramater d is not specified, this function returns the optimal branch lengths according to REML (the value of d can be obtained with the function phylo.OU). 'phylo.OU.matrix' <- function(trait,phy,d=NULL) { V1 <- vcv.phylo(phy) nspp1 <- dim(V1)[1] V1 <- V1/det(V1)^(1/nspp1) initV1 <- V1 U <- cbind(rep(1, nspp1)) tau1 <- matrix(diag(initV1), nspp1, nspp1) + t(matrix(diag(initV1),nspp1, nspp1)) - 2 * initV1 if (is.null(d)) { pgls.REML <- function(d1) { V1 <- (d1^tau1) * (1 - d1^(2 * initV1))/(1 - d1^2) V1 <- V1/det(V1)^(1/nspp1) invV <- solve(V1,diag(nspp1)) a <- solve((t(U) %*% invV %*% U), ((t(U) %*% invV %*% trait))) E <- (trait - U %*% a) s2 <- (t(E) %*% invV %*% E)/(nspp1 - 1) REML <- (-0.5*(nspp1-1)*log(2*pi)) + (0.5*log(det(t(U)%*%U))) - ((0.5*((nspp1 - 1)*log(s2) + log(det(V1))+log(det((t(U) %*% invV %*% U)))+(nspp1 - 1)))) -REML } est1 <- optimize(pgls.REML,lower=0,upper=3) d.REML <- est1$minimum REML <- -(est1$objective)} else { d.REML <- ifelse(d == 1,1.000000001,d) } vcv.OU <- (d.REML^tau1) * (1 - d.REML^(2 * initV1))/(1 - d.REML^2) return(vcv.OU) } ################################## Function vcv2phylo ######################################## # This function converts a variance covariance matrix back into a rooted phylo object (file employed by packages of phylogenetic analyses such as APE and Picante) # Note that the order of the internal nodes are assigned arbitrarily and may change when compared to the original phylo file. As a result, the plots of the original and the back-transformed file may not always be identical. Nonetheless, the order of the tips remains unchanged, which is crucial for most analyses whether the phenotypic data must be in the same order as the tip data. # IMPORTANT!! This function works well ONLY with ultrametric phylogenies!! 'vcv2phylo'<- function(vcv) { dist <- diag(vcv)-vcv dist1 <- cbind(dist,rep(max(dist),ncol(dist))) dist1 <- rbind(dist1,rep(max(dist),ncol(dist1))) dist1[ncol(dist)+1,ncol(dist)+1] <- 0 dist1 <- dist1*2 rooted.tree <- root(nj(dist1),ncol(dist1)) rooted.tree <- drop.tip(rooted.tree,ncol(dist1)) if(is.ultrametric(rooted.tree)){ } else{ stop('Sorry, this function only works well with ultrametric trees. Please try rescaling the branch lengths.')} rooted.tree } ################################## Function phylo.signal.disc ######################################## # This function tests for phylogenetic signal with categorical traits. # It works similarly to phylo.signal, by randomizing the tip data and comparing the minimum number of # character state changes with a null model. # Minimum character state change is obtained with parsimony, and the syntax allows for different evolutionary # models. # To build a matrix of costs of character state transition, see Maddison & Maddison 2000. # MacClade 4 Manual pp.69-72 (unordered parsimony is the default, cost=NULL). # Note that this function corresponds to the "Fixed Tree, Character Radomly Reshuffled" model proposed in # Maddison & Slatkin (1991) Evolution 45:1184. # ------------------------------------------------------------------------- # Correction suggested by Anna Kostikova to David Bapst # Hi David, # I believe if you replace these 2 lines in the code, the function won't # give error message: # OBS <- parsimony(phy,obs,cost=cost1) # by # OBS <- parsimony(phy,obs,method="sankoff",cost=cost1) # NULL.MODEL[i,]<-parsimony(phy,null, cost=cost1) # by # NULL.MODEL[i,]<-parsimony(phy,null,method="sankoff",cost=cost1) # Because it seems like only sankoff can handle cost matrix parameter. # Anna (anna.kostikova@gmail.com) # ------------------------------------------------------------------------- 'phylo.signal.disc' <- function(trait,phy,rep = 999,cost=NULL) { lev <- attributes(factor(trait))$levels if (length(lev) == length(trait)) stop("Are you sure this variable is categorical?") if(is.null(cost)){ cost1 <- 1-diag(length(lev)) } else { if (length(lev) != dim(cost)[1]) stop("Dimensions of the character state transition matrix do not agree with the number of levels") cost1<- t(cost) } dimnames(cost1) <- list(lev,lev) trait <- as.numeric(trait) attributes(trait)$names <- phy$tip.label NULL.MODEL <- matrix(NA,rep,1) obs <- t(data.frame(trait)) obs <- phyDat(t(obs),type="USER",levels=attributes(factor(obs))$levels) OBS <- parsimony(phy,obs,method="sankoff",cost=cost1) for (i in 1:rep){ null <- sample(as.numeric(trait)) attributes(null)$names <- attributes(trait)$names null <- t(data.frame(null)) null <- phyDat(t(null),type="USER",levels=attributes(factor(null))$levels) NULL.MODEL[i,]<-parsimony(phy,null,method="sankoff",cost=cost1) P.value <- sum(OBS >= NULL.MODEL)/(rep + 1) } hist(NULL.MODEL,xlab="Transitions in Randomizations",xlim=c(min(c(min(NULL.MODEL,OBS-1))),max(NULL.MODEL)+1), main="", ylab="Frequency") mtext("(b)", side=3, font=2, line=1, adj=0,cex=1.5) arrows(OBS,rep/10,OBS,0,angle=20,col="red",lwd=4) OUTPUT1 <- t(data.frame(Number.of.Levels = length(attributes(factor(trait))$levels), Evolutionary.Transitions.Observed=OBS,Evolutionary.Transitions.Randomization.Median=median(NULL.MODEL),Evolutionary.Transitions.Randomization.Min=min(NULL.MODEL),Evolutionary.Transitions.Randomization.Max=max(NULL.MODEL),P.value)) if(is.null(cost)){ list(.Randomization.Results=OUTPUT1,.Levels= lev,.Costs.of.character.state.transition.UNORDERED.PARSIMONY = t(cost1)) } else { list(.Randomization.Results=OUTPUT1,.Levels= lev,.Costs.of.character.state.transition.FROM.ROW.TO.COL = t(cost1)) } } ################################## Function cor.brown ######################################## ## This function simulates the evolution of two correlated variables under a Brownian model of evolution. 'cor.brown' <- function(cor,phy,plot=TRUE){ T<-vcv.phylo(phy) R<-matrix(c(1,cor,cor,1),nrow=2,ncol=2) U<-matrix(,nrow=dim(T)[1],ncol=2) U[,1]<-rnorm(dim(T)[1]) U[,2]<-rnorm(dim(T)[1]) V<-U%*%chol(R) X<-t(V)%*%chol(T) IC_Trait1 <- pic(X[1,],phy) IC_Trait2 <- pic(X[2,],phy) if (plot){ layout(matrix(c(1,1,2,3), 2, 2, byrow = FALSE)) plot(ladderize(phy)) plot(X[1,],X[2,],xlab="Trait1",ylab="Trait2") plot(IC_Trait1,IC_Trait2) par(mfrow=c(1,1)) } else{ } data.frame(cor.Trait=as.numeric(cor.test(X[1,],X[2,])[4]),cor.IC=cor.origin(IC_Trait1,IC_Trait2)) } ################################################################################################## ################################################################################################## ##CORRECTED VERSION OF DROP.TIP BY EMMANUEL PARADIS ## drop.tip.R (2010-11-24) ## Remove Tips in a Phylogenetic Tree ## Copyright 2003-2010 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. extract.clade <- function(phy, node, root.edge = 0, interactive = FALSE) { Ntip <- length(phy$tip.label) ROOT <- Ntip + 1 Nedge <- dim(phy$edge)[1] wbl <- !is.null(phy$edge.length) if (interactive) node <- identify(phy)$nodes else { if (length(node) > 1) { node <- node[1] warning("only the first value of 'node' has been considered") } if (is.character(node)) { if (is.null(phy$node.label)) stop("the tree has no node labels") node <- which(phy$node.label %in% node) + Ntip } if (node <= Ntip) stop("node number must be greater than the number of tips") } if (node == ROOT) return(phy) phy <- reorder(phy) # insure it is in cladewise order root.node <- which(phy$edge[, 2] == node) start <- root.node + 1 # start of the clade looked for anc <- phy$edge[root.node, 1] # the ancestor of 'node' next.anc <- which(phy$edge[-(1:start), 1] <= anc) # find the next occurence of 'anc' or an 'older' node keep <- if (length(next.anc)) start + 0:(next.anc[1] - 1) else start:Nedge if (root.edge) { NewRootEdge <- phy$edge.length[root.node] root.edge <- root.edge - 1 while (root.edge) { if (anc == ROOT) break i <- which(phy$edge[, 2] == anc) NewRootEdge <- NewRootEdge + phy$edge.length[i] root.edge <- root.edge - 1 anc <- phy$edge[i, 1] } if (root.edge && !is.null(phy$root.edge)) NewRootEdge <- NewRootEdge + phy$root.edge phy$root.edge <- NewRootEdge } phy$edge <- phy$edge[keep, ] if (wbl) phy$edge.length <- phy$edge.length[keep] TIPS <- phy$edge[, 2] <= Ntip tip <- phy$edge[TIPS, 2] phy$tip.label <- phy$tip.label[sort(tip)] # <- added sort to avoid shuffling of tip labels (2010-07-21) ## keep the ordering so no need to reorder tip.label: phy$edge[TIPS, 2] <- order(tip) if (!is.null(phy$node.label)) phy$node.label <- phy$node.label[sort(unique(phy$edge[, 1])) - Ntip] Ntip <- length(phy$tip.label) phy$Nnode <- dim(phy$edge)[1] - Ntip + 1L ## The block below renumbers the nodes so that they conform ## to the "phylo" format -- same as in root() newNb <- integer(Ntip + phy$Nnode) newNb[node] <- Ntip + 1L sndcol <- phy$edge[, 2] > Ntip ## executed from right to left, so newNb is modified before phy$edge: phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <- (Ntip + 2):(Ntip + phy$Nnode) phy$edge[, 1] <- newNb[phy$edge[, 1]] phy } drop.tip <- function(phy, tip, trim.internal = TRUE, subtree = FALSE, root.edge = 0, rooted = is.rooted(phy), interactive = FALSE) { if (!inherits(phy, "phylo")) stop('object "phy" is not of class "phylo"') Ntip <- length(phy$tip.label) ## find the tips to drop: if (interactive) { cat("Left-click close to the tips you want to drop; right-click when finished...\n") xy <- locator() nToDrop <- length(xy$x) tip <- integer(nToDrop) lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) for (i in 1:nToDrop) { d <- sqrt((xy$x[i] - lastPP$xx)^2 + (xy$y[i] - lastPP$yy)^2) tip[i] <- which.min(d) } } else { if (is.character(tip)) tip <- which(phy$tip.label %in% tip) } if (any(tip > Ntip)) warning("some tip numbers were higher than the number of tips") if (!rooted && subtree) { phy <- root(phy, (1:Ntip)[-tip][1]) root.edge <- 0 } phy <- reorder(phy) NEWROOT <- ROOT <- Ntip + 1 Nnode <- phy$Nnode Nedge <- dim(phy$edge)[1] if (subtree) { trim.internal <- TRUE tr <- reorder(phy, "pruningwise") N <- .C("node_depth", as.integer(Ntip), as.integer(Nnode), as.integer(tr$edge[, 1]), as.integer(tr$edge[, 2]), as.integer(Nedge), double(Ntip + Nnode), DUP = FALSE, PACKAGE = "ape")[[6]] } wbl <- !is.null(phy$edge.length) edge1 <- phy$edge[, 1] # local copies edge2 <- phy$edge[, 2] # keep <- !logical(Nedge) ## delete the terminal edges given by `tip': keep[match(tip, edge2)] <- FALSE if (trim.internal) { ints <- edge2 > Ntip ## delete the internal edges that do not have anymore ## descendants (ie, they are in the 2nd col of `edge' but ## not in the 1st one) repeat { sel <- !(edge2 %in% edge1[keep]) & ints & keep if (!sum(sel)) break keep[sel] <- FALSE } if (subtree) { ## keep the subtending edge(s): subt <- edge1 %in% edge1[keep] & edge1 %in% edge1[!keep] keep[subt] <- TRUE } if (root.edge && wbl) { degree <- tabulate(edge1[keep]) if (degree[ROOT] == 1) { j <- integer(0) # will store the indices of the edges below the new root repeat { i <- which(edge1 == NEWROOT & keep) j <- c(i, j) NEWROOT <- edge2[i] degree <- tabulate(edge1[keep]) if (degree[NEWROOT] > 1) break } keep[j] <- FALSE if (length(j) > root.edge) j <- 1:root.edge NewRootEdge <- sum(phy$edge.length[j]) if (length(j) < root.edge && !is.null(phy$root.edge)) NewRootEdge <- NewRootEdge + phy$root.edge phy$root.edge <- NewRootEdge } } } if (!root.edge) phy$root.edge <- NULL ## drop the edges phy$edge <- phy$edge[keep, ] if (wbl) phy$edge.length <- phy$edge.length[keep] ## find the new terminal edges (works whatever 'subtree' and 'trim.internal'): TERMS <- !(phy$edge[, 2] %in% phy$edge[, 1]) ## get the old No. of the nodes and tips that become tips: oldNo.ofNewTips <- phy$edge[TERMS, 2] ## in case some tips are dropped but kept because of 'subtree = TRUE': if (subtree) { i <- which(tip %in% oldNo.ofNewTips) if (length(i)) { phy$tip.label[tip[i]] <- "[1_tip]" tip <- tip[-i] } } n <- length(oldNo.ofNewTips) # the new number of tips in the tree ## the tips may not be sorted in increasing order in the ## 2nd col of edge, so no need to reorder $tip.label phy$edge[TERMS, 2] <- rank(phy$edge[TERMS, 2]) phy$tip.label <- phy$tip.label[-tip] ## make new tip labels if necessary: if (subtree || !trim.internal) { ## get the numbers of the nodes that become tips: node2tip <- oldNo.ofNewTips[oldNo.ofNewTips > Ntip] new.tip.label <- if (subtree) { paste("[", N[node2tip], "_tips]", sep = "") } else { if (is.null(phy$node.label)) rep("NA", length(node2tip)) else phy$node.label[node2tip - Ntip] } if (!is.null(phy$node.label)) phy$node.label <- phy$node.label[-(node2tip - Ntip)] phy$tip.label <- c(phy$tip.label, new.tip.label) } ## update node.label if needed: if (!is.null(phy$node.label)) phy$node.label <- phy$node.label[sort(unique(phy$edge[, 1])) - Ntip] phy$Nnode <- dim(phy$edge)[1] - n + 1L # update phy$Nnode ## The block below renumbers the nodes so that they conform ## to the "phylo" format -- same as in root() newNb <- integer(n + phy$Nnode) newNb[NEWROOT] <- n + 1L sndcol <- phy$edge[, 2] > n ## executed from right to left, so newNb is modified before phy$edge: phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <- (n + 2):(n + phy$Nnode) phy$edge[, 1] <- newNb[phy$edge[, 1]] storage.mode(phy$edge) <- "integer" collapse.singles(phy) }