# Parasitic taxa are key to the vertical stratification and community variation of pelagic ciliates from the surface to the abyssopelagic zone ## Wan et al. "Parasitic taxa are key to the vertical stratification and community variation of pelagic ciliates from the surface to the abyssopelagic zone" This document contains the statistical analyses conducted in R (v.4.1.3) for the manuscript. Please not that raw sequencing data processing, ASV table attaining, alpha diversity, beta diversity, and taxonomic information were conducted under the Easy Amplicon pipeline (https://github.com/YongxinLiu/EasyAmplicon). ## Bray-Curtis (dis)similarity ```r # load package library(vegan) # import data spe <- read.delim("otutab.txt", sep = '\t', row.names = 1, check.names = FALSE) spe <- data.frame(t(spe)) # get Bray-Curtis dissimilarity matrix comm_dis <- as.matrix(vegdist(spe, method = "bray")) # get Bray-Curtis similarity matrix comm_sim <- 1 - as.matrix(vegdist(spe, method = "bray")) ``` ## Clustering ```r # load packages pacman::p_load(vegan, phangorn, factoextra, eclust) # import data data <- read.table("otutab.txt", header = T, row.names = 1) data <- t(data) # upgma up = upgma(vegdist(data, method="bray")) plot(up) # hclust hc = hclust(dist(data)) plot(hc) plot(hc, hang = -1) #add clustering box for visually friendly. rect.hclust(hc,2) #rect.hclust(hc,k), k is the number of specified clusters to be clustered # k-means based on silhouette coefficient fviz_nbclust(data, kmeans, method = c("silhouette")) km.res <- kmeans(data,2) #kmeans(data,k),k is the optimal number of clusters showed in previous step fviz_cluster(km.res, data = data) # if error occurring as: cannot rescale a constant/zero column to unit variance, run next line below # fviz_cluster(km.res, data[ , which(apply(data, 2, var) != 0)]) ``` ## Distance-decay ```r # load packages pacman::p_load(vegan, geosphere) # import site data site <- read.delim('site.txt', sep = '\t', row.names = 1, check.names = FALSE) site_dis <- geosphere::distm(site[c("Longitude", "Latitude")])/1000 rownames(site_dis) <- rownames(site) colnames(site_dis) <- rownames(site) site_dis <- reshape2::melt(site_dis) site_dis <- subset(site_dis, value != 0) head(site_dis) # import community data spe <- read.delim("otutab.txt", sep = '\t', row.names = 1, check.names = FALSE) spe <- data.frame(t(spe)) comm_sim <- 1 - as.matrix(vegan::vegdist(spe, method = "bray")) diag(comm_sim) <- 0 comm_sim[upper.tri(comm_sim)] <- 0 comm_sim <- reshape2::melt(comm_sim) comm_sim <- subset(comm_sim, value != 0) head(comm_sim) # merge distance and similarity results comm_dis <- merge(comm_sim, site_dis, by = c('Var1', 'Var2')) names(comm_dis) <- c('site1', 'site2', 'comm_sim', 'site_dis/km') head(comm_dis) # wirte output file write.table(comm_dis, 'comm_dis.txt', sep = '\t', row.names = FALSE, quote = FALSE) # construct distance-decay linear model comm_dis <- read.delim('comm_dis.txt', sep = '\t') comm_dis$site_dis_km <- comm_dis$site_dis/1000 fit <- lm(comm_sim~site_dis_km, data = comm_dis) summary(fit) ``` ## Mantel test ```r # load packages pacman::p_load(ecodist, vegan, geosphere) # import data spe <- read.table("otutab.txt", header=T, row.names = 1) # otu/asv table spe <- t(spe) group <- read.table("group.txt", header=T, row.names = 1) # group info env <- read.table("env.txt", header = T, row.names = 1) #environmental table # calculate matrices spe.dist <- vegdist(spe, method = "bray") # BC matrix env.dist <- vegdist(env, method = "euclidean") # environmantel matrix ocean.dist <- vegdist(env$Tur, method = "euclidean") # oceanic matrix ## Mantel Test: Dissimilarity vs. Environmental Distance env.mantel <- vegan::mantel(spe.dist, env.dist, method = "pearson", permutations = 999) env.mantel # Single Mantel Test single.mantel <-function(spe,env, sim.method, correlation, p.adjust.m){ library(vegan) co = colnames(env) factor = c() r = c() p.value=c() spe.dist<-vegdist(spe,method = sim.method) for(elem in 1:length(co)){ env.dist = vegdist(env[elem], method = "euclidean") ad = vegan::mantel(spe.dist, env.dist, permutations = 999,method=correlation); factor = c(factor,co[elem]); r = c(r,ad$statistic); p.value=c(p.value, ad$signif) } p.adjusted =p.adjust(p.value,method=p.adjust.m) pairw.res = data.frame(factor, r, p.value, p.adjusted) return(pairw.res) } sin.mantel = single.mantel(spe, env, sim.method="bray", correlation = "pearson", p.adjust.m= "fdr") sin.mantel ## Mantel Test: Dissimilarity vs. Oceanic Distance geo <- data.frame(group$Longitude, group$Latitude) d.geo <- distm(geo, fun = distHaversine) dist.geo <- as.dist(d.geo) ocean.mantel <- vegan::mantel(spe.dist, dist.geo, method = "pearson", permutations = 999) ocean.mantel ## Partial Mantel # Dissimilarity vs. environmental distance(control oceanic distance) mantel.partial(spe.dist, env.dist, dist.geo, method = "pearson", permutations = 999) # Dissimilarity vs. oceanic distance(control environment) mantel.partial(spe.dist, dist.geo, env.dist, method = "pearson", permutations = 999) ## MRM MRM <- MRM(spe.dist ~ env.dist + dist.geo, nperm=1000, method = "linear") MRM ``` ## Spearman correlation ```r # load packages pacman::p_load(psych, reshape2) # import data env <- read.delim('env.txt', sep = '\t', row.names = 1) spe <- read.delim('otutab.txt', sep = '\t', row.names = 1) spe <- spe[rownames(env), ] # calculate spearman correlation spearman <- corr.test(env, spe, method = 'spearman', adjust = 'fdr') r <- data.frame(spearman$r) p <- data.frame(spearman$p) r$env <- rownames(r) p$env <- rownames(p) r <- melt(r, id = 'env') p <- melt(p, id = 'env') spearman <- cbind(r, p$value) colnames(spearman) <- c('env', 'spe', 'spearman_correlation', 'p.value') spearman$spe <- factor(spearman$spe, levels = colnames(spe)) head(spearman) # visualization library(ggplot2) p1 <- ggplot() + geom_tile(data = spearman, aes(x = spe, y = env, fill = spearman_correlation)) + scale_fill_gradientn(colors = c('#2D6DB1', 'white', '#DC1623'), limit = c(-1, 1)) + theme(panel.grid = element_blank(), panel.background = element_rect(color = 'black'), legend.key = element_blank(), axis.text.x = element_text(color = 'black', angle = 45, hjust = 1, vjust = 1), axis.text.y = element_text(color = 'black'), axis.ticks = element_line(color = 'black')) + scale_x_discrete(expand = c(0, 0)) + scale_y_discrete(expand = c(0, 0)) + labs(y = '', x = '', fill = 'Correlation') p1 spearman[which(spearman$p.value<0.001),'sig'] <- '***' spearman[which(spearman$p.value<0.01 & spearman$p.value>0.001),'sig'] <- '**' spearman[which(spearman$p.value<0.05 & spearman$p.value>0.01),'sig'] <- '*' p2 <- p1 + geom_text(data = spearman, aes(x = spe, y = env, label = sig), size = 3) p2 ``` ## Normalized Stochasticity Ratios (NST) ```r # load packages pacman::p_load(NST, ape, iCAMP, picante) # import data otu <- t(read.table("oli.txt", row.names = 1, header = T)) # otu/asv table group <- read.table("group.txt", row.names = 1, header = T) #group info # check whether the otu table corresponds to the metadata samp.ck <- NST::match.name(rn.list=list(otu=otu,group=group)) otu <- samp.ck$otu otu <- otu[,colSums(otu)>0,drop=FALSE] group <- samp.ck$group # grouping and metacommunity seting groupi <- group[,1,drop = FALSE] prefix <- "oli" prefixi <- paste0(prefix,".Group") # if treatment and control are from different metacommunities, you may set meta.groupi=groupi,default = NULL #meta.groupi=NULL meta.groupi <- groupi # calculate NST # record running time t1 <- Sys.time() tnst <- tNST(comm=otu, group=groupi, meta.group=meta.groupi, meta.com=NULL, dist.method="bray", abundance.weighted=TRUE, rand=999, output.rand=TRUE, nworker=8, LB=FALSE, null.model="PF", between.group=TRUE, SES=TRUE, RC=TRUE) # save and wirte the results save(tnst, file = paste0(prefixi, ".tNST.rda")) write.table(tnst$index.grp, file = paste0(prefixi, ".tNST.summary.txt"), quote = FALSE, sep = "\t") write.table(tnst$index.pair.grp,file = paste0(prefixi,".tNST.pairwise.txt"),quote = FALSE,sep = "\t") write.table(tnst$index.pair,file = paste0(prefixi,".tNST.pairwise.index.txt"),quote = FALSE,sep = "\t") write.table(tnst$index.between,file = paste0(prefixi,".tNST.between.summary.txt"),quote = FALSE,sep = "\t") write.table(tnst$index.pair.between,file = paste0(prefixi,".tNST.pairwise.between.txt"),quote = FALSE,sep = "\t") format(Sys.time()-t1) tnstbt=nst.boot(nst.result=tnst, group=groupi, rand=999, trace=TRUE, two.tail=FALSE, out.detail=TRUE, between.group=FALSE, nworker=8) save(tnstbt,file = paste0(prefixi,".tNST.boot.rda")) write.table(tnstbt$summary,file = paste0(prefixi,".tNST.boot.summary.txt"), quote = FALSE,sep = "\t") write.table(tnstbt$compare,file = paste0(prefixi,".tNST.boot.compare.txt"), quote = FALSE,sep = "\t") (t=format(Sys.time()-t1)) ``` ## Neutral Community Model (NCM) ```r pacman::p_load(Hmisc, minpack.lm, stats4) #using Non-linear least squares (NLS) to calculate R2: #spp: A community table with taxa as rows and samples as columns spp <- read.table('otu.txt', head=T, row.names = 1) spp <- t(spp) N <- mean(apply(spp, 1, sum)) p.m <- apply(spp, 2, mean) p.m <- p.m[p.m != 0] p <- p.m/N spp.bi <- 1*(spp>0) freq <- apply(spp.bi, 2, mean) freq <- freq[freq != 0] C <- merge(p, freq, by=0) C <- C[order(C[,2]),] C <- as.data.frame(C) C.0 <- C[!(apply(C, 1, function(y) any(y == 0))),] p <- C.0[,2] freq <- C.0[,3] names(p) <- C.0[,1] names(freq) <- C.0[,1] d = 1/N ##Fit model parameter m (or Nm) using Non-linear least squares (NLS) m.fit <- nlsLM(freq ~ pbeta(d, N*m*p, N*m*(1 -p), lower.tail=FALSE),start=list(m=0.1)) m.fit #get the m value m.ci <- confint(m.fit, 'm', level=0.95) freq.pred <- pbeta(d, N*coef(m.fit)*p, N*coef(m.fit)*(1 -p), lower.tail=FALSE) pred.ci <- binconf(freq.pred*nrow(spp), nrow(spp), alpha=0.05, method="wilson", return.df=TRUE) Rsqr <- 1 - (sum((freq - freq.pred)^2))/(sum((freq - mean(freq))^2)) Rsqr# get the R2 value #Optional: write 3 files: p.csv, freq.csv and freq.pred.csv # write.csv(p, file = "d:/Work/WPO/result/NCM/D5_p.csv") # write.csv(freq, file = "d:/Work/WPO/result/NCM/D5_freq.csv") # write.csv(freq.pred, file = "d:/Work/WPO/result/NCM/D5_freq.pred.csv") ``` ## Dispersal ability ```r # load packages pacman::p_load(tidyverse, geosphere, vegan) # Obtain distance matrix # site <- read.delim('site.txt', sep = '\t', row.names = 1, check.names = FALSE) # site_dis <- geosphere::distm(site[c("Longitude", "Latitude")])/1000 # rownames(site_dis) <- rownames(site) # colnames(site_dis) <- rownames(site) # site_dis <- reshape2::melt(site_dis) # site_dis <- subset(site_dis, value != 0) # head(site_dis) # # dis <- spread(site_dis, key = Var2, value = value) # dis[is.na(dis)] <- 0 # write.table(dis, "dis.txt", sep = "\t", row.names = F) # Import data dis <- read.table('dis.txt', row.names = 1, header = T) dis <- dis/1000 data <- read.csv('otutab_rare_D5.csv', header = T, row.names = 1) # Obtain subests last_row <- ncol(data) last_row <- as.numeric(last_row) cut_row <- last_row - 5 prefix <- 'Colpodea' #change taxa group prefixi <- paste0(prefix,".D5") # change group name spe <- subset(data, Class == prefix, select = -c(cut_row:last_row)) # Transform the OTU table to 1-0 format spe[spe>0]=1 # Calculate connectivity for each patch patch <- names(spe) n <- ncol(spe) a <- nrow(spe) con <- c() for (i in patch) { ijk <- 0 for (j in setdiff(patch, i)) { jk <- 0 #exp_dij <- exp(-dis[i,j]) #active exp_dij <- 1 / exp(-dis[i,j]) #passive num <- spe[,i]+spe[,j] num[num >0]=1 m <- sum(num) for (k in 1:a) { jk <- jk + spe[k,j] * exp_dij } ijk <- 1/m * 1/(n-1) * jk + ijk } con <- c(con, ijk) } con <- data.frame(patch, con) con con<-na.omit(con) # Calculate average connectivity for metacommunity ave.con <- mean(con[ ,2]) ave.con ac <- c('ave.con', ave.con) output <- rbind(con, ac) # Write the output write.csv(output, paste0(prefixi, '.con.csv'), quote = FALSE) ``` ## Correlations between subcommunity (Spirotrichea as example) and whole community ```r # load packages pacman::p_load(ggplot2, spaa, EcolUtils, vegan, permute, lattice, tidyverse, dplyr, ggpmisc, patchwork) # calculating BC result otu <- read.delim('spi.txt', row.names = 1) otu <- t(otu) dis <- vegan::vegdist(otu, method = 'bray') dis <- as.matrix(dis) write.table(dis, 'Bray-curtis.spi.txt', sep = '\t', col.names = NA, quote = FALSE) dis <- read.delim('Bray-curtis.spi.txt', row.names = 1) group <- read.delim('group.txt', stringsAsFactors = FALSE) D5 <- subset(group, group == 'D5')$samples dis_D5 <- dis[D5,D5] D25 <- subset(group, group == 'D25')$samples dis_D25 <- dis[D25,D25] DCM <- subset(group, group == 'DCM')$samples dis_DCM <- dis[DCM,DCM] D200 <- subset(group, group == 'D200')$samples dis_D200 <- dis[D200,D200] D300 <- subset(group, group == 'D300')$samples dis_D300 <- dis[D300,D300] D500 <- subset(group, group == 'D500')$samples dis_D500 <- dis[D500,D500] D750 <- subset(group, group == 'D750')$samples dis_D750 <- dis[D750,D750] D1000 <- subset(group, group == 'D1000')$samples dis_D1000 <- dis[D1000,D1000] D2000 <- subset(group, group == 'D2000')$samples dis_D2000 <- dis[D2000,D2000] D3000 <- subset(group, group == 'D3000')$samples dis_D3000 <- dis[D3000,D3000] Bottom <- subset(group, group == 'Bottom')$samples dis_Bottom <- dis[Bottom,Bottom] dis_D5 <- as.vector(as.dist(dis_D5)) dis_D25 <- as.vector(as.dist(dis_D25)) dis_DCM <- as.vector(as.dist(dis_DCM)) dis_D200 <- as.vector(as.dist(dis_D200)) dis_D300 <- as.vector(as.dist(dis_D300)) dis_D500 <- as.vector(as.dist(dis_D500)) dis_D750 <- as.vector(as.dist(dis_D750)) dis_D1000 <- as.vector(as.dist(dis_D1000)) dis_D2000 <- as.vector(as.dist(dis_D2000)) dis_D3000 <- as.vector(as.dist(dis_D3000)) dis_Bottom <- as.vector(as.dist(dis_Bottom)) dat <- data.frame( dis = c(dis_D5, dis_D25, dis_DCM, dis_D200, dis_D300, dis_D500, dis_D750, dis_D1000, dis_D2000, dis_D3000, dis_Bottom), group = factor(c( rep('D5', length(dis_D5)), rep('D25', length(dis_D25)), rep('DCM', length(dis_DCM)), rep('D200', length(dis_D200)), rep('D300', length(dis_D300)), rep('D500', length(dis_D500)), rep('D750', length(dis_D750)), rep('D1000', length(dis_D1000)), rep('D2000', length(dis_D2000)), rep('D3000', length(dis_D3000)), rep('Bottom', length(dis_Bottom)) ), levels = c("D5","D25","DCM","D200","D300","D500","D750","D1000","D2000","D3000","Bottom"))) dat <- na.omit(dat) write.table(dat, 'Bray-curtis_pair.spi.txt', sep = '\t', col.names = NA, quote = FALSE) all <- read.table('Bray-curtis_pair.all.txt', header = T, row.names = 1) spi <- read.table('Bray-curtis_pair.spi.txt', header = T, row.names = 1) all.sub <- subset(all, group == 'Bottom')#change label(group name) dis.all.sub <- all.sub$dis spi.sub <- subset(spi, group == 'Bottom')#change label(group name) dis.spi.sub <- spi.sub$dis # making import data for linear models dat <- data.frame(dis.all.sub, dis.spi.sub)#change the number to 'dis.oli.sub' num colnames(dat) <- c('all', 'dis') write.table(dat, 'Bottom.lm.txt', sep = '\t', quote = F, row.names = F, col.names = T)#change label(group name) # lm lm_d5 <- read.table('D5.lm.txt', header = T) lm_d25 <- read.table('D25.lm.txt', header = T) lm_dcm <- read.table('DCM.lm.txt', header = T) lm_d200 <- read.table('D200.lm.txt', header = T) lm_d300 <- read.table('D300.lm.txt', header = T) lm_d500 <- read.table('D500.lm.txt', header = T) lm_d750 <- read.table('D750.lm.txt', header = T) lm_d1000 <- read.table('D1000.lm.txt', header = T) lm_d2000 <- read.table('D2000.lm.txt', header = T) lm_d3000 <- read.table('D3000.lm.txt', header = T) lm_bot <- read.table('Bottom.lm.txt', header = T) # fit <- lm(all~dis, data = lm) # summary(fit) # to get R-squared value ```