--- title: "MicrobiomeProject1" output: pdf_document --- Installing packages ```{r setup, include=FALSE} # knitr::opts_chunk$set(echo = FALSE) library(dada2) library("knitr") library(phyloseq) library(ggplot2) library(tidyverse) source("https://bioconductor.org/biocLite.R") # biocLite("BiocStyle") # biocLite("DECIPHER") # biocLite("phangorn") library("BiocStyle") library("DECIPHER") library("phangorn") library(devtools) # biocLite("adespatial") library("vegan") biocLite("DESeq2") library("DESeq2") library("adespatial") library("genefilter") library("dbplyr") library("microbiomeSeq") library("nlme") library("lme4") library("lmerTest") library("pairwiseAdonis") library("RVAideMemoire") library("gridExtra") library("pairwiseAdonis") library("ggpubr") ``` ## Filtering reads and construct OTUs ### Set up Path and filter reads ```{r} # pathF <- "/Volumes/Micksexternal/jonas/data/demultiplexed/dbcAmplicons/R1" # pathR <- "/Volumes/Micksexternal/jonas/data/demultiplexed/dbcAmplicons/R2" # filtpathF <- file.path(pathF, "filtered") # filtpathR <- file.path(pathR, "filtered") # fastqFs <- sort(list.files(pathF, pattern="fastq")) # fastqRs <- sort(list.files(pathR, pattern="fastq")) # if(length(fastqFs) != length(fastqRs)) stop("Forward and reverse files do not match.") # # out3 <- filterAndTrim(fwd=file.path(pathF, fastqFs), # filt=file.path(filtpathF, fastqFs), rev=file.path(pathR, fastqRs), # filt.rev=file.path(filtpathR, fastqRs), truncLen=c(250,240), # maxN=0, maxEE=c(6,6), truncQ=2, # rm.phix=TRUE, compress=TRUE, verbose=TRUE, multithread=TRUE) ``` ### Learn error rates and merge pair-ends ```{r} # filtpathF <- "/Volumes/Micksexternal/jonas/data/demultiplexed/dbcAmplicons/R1/filtered" # filtpathR <- "/Volumes/Micksexternal/jonas/data/demultiplexed/dbcAmplicons/R2/filtered" # filtFs <- list.files(filtpathF, pattern="fastq", full.names = TRUE) # filtRs <- list.files(filtpathR, pattern="fastq", full.names = TRUE) # sample.names <- sapply(strsplit(basename(filtFs), "_"), `[`, 1) # sample.namesR <- sapply(strsplit(basename(filtRs), "_"), `[`, 1) # if(!identical(sample.names, sample.namesR)) stop("Forward and reverse files do not match.") # names(filtFs) <- sample.names # names(filtRs) <- sample.names # set.seed(100) # errF <- learnErrors(filtFs, nbases=1e8, multithread=TRUE) # errR <- learnErrors(filtRs, nbases=1e8, multithread=TRUE) # mergers <- vector("list", length(sample.names)) # names(mergers) <- sample.names # # for(sam in sample.names) { # cat("Processing:", sam, "\n") # derepF <- derepFastq(filtFs[[sam]]) # ddF <- dada(derepF, err=errF, multithread=TRUE) # derepR <- derepFastq(filtRs[[sam]]) # ddR <- dada(derepR, err=errR, multithread=TRUE) # merger <- mergePairs(ddF, derepF, ddR, derepR) # mergers[[sam]] <- merger # } ``` ### Save reads ```{r} # seqtab3 <- makeSequenceTable(mergers) # saveRDS(seqtab3, "/Volumes/Micksexternal/jonas/data/demultiplexed/dbcAmplicons/output/seqtabnew3.rds") # st3 <- readRDS("/Volumes/Micksexternal/jonas/data/demultiplexed/dbcAmplicons/output/seqtabnew3.rds") # seqtab3 <- removeBimeraDenovo(st3, method="consensus", multithread=TRUE) ``` ### Check filters Let's see how many of the initial reads were retaiened after the filtering step and the removal of the chimeras. ```{r} # getN <- function(x) sum(getUniques(x)) # track <- cbind(out3,rowSums(seqtab3)) # colnames(track) <- c("input", "filtered","nonchim") # rownames(track) <- sample.names # head(track) # write.csv(track, "~/Desktop/track.csv") # track <- read.csv("track.csv") # head(track) ``` ### Assign OTUs We are using the Silva database v132 to assign OTUs. ```{r} # tax <- assignTaxonomy(seqtab3, "/Users/michaelsong/Downloads/silva_nr_v132_train_set1.fasta", multithread=TRUE) # saveRDS(seqtab3, "/Users/michaelsong/Downloads/newseqtab4_final.rds") # saveRDS(tax, "/Users/michaelsong/Downloads/newtax4_final.rds") #tax <- readRDS("/Volumes/Micksexternal/jonas/data/demultiplexed/dbcAmplicons/output/newtax3_final.rds") ``` ## Build Tree We're building a ML tree for phylogenetic distance. ```{r NJ and ML tree, message = FALSE, warning = FALSE, error = FALSE} # seqs <- getSequences(seqtab3) # names(seqs) <- seqs # alignment <- AlignSeqs(DNAStringSet(seqs), anchor=NA) # phang.align <- phyDat(as(alignment, "matrix"), type="DNA") # dm <- dist.ml(phang.align) # treeNJ <- NJ(dm) # # fit = pml(treeNJ, data=phang.align) # fitGTR <- update(fit, k=4, inv=0.2) # fitGTR <- optim.pml(fitGTR, model="GTR", optInv=TRUE, optGamma=TRUE, # rearrangement = "stochastic", control = pml.control(trace = 0)) # saveRDS(fitGTR, "/Volumes/Micksexternal/jonas/data/demultiplexed/dbcAmplicons/output/fitGTR.rds") # fitGTR1 <- readRDS("/Volumes/Micksexternal/jonas/data/demultiplexed/dbcAmplicons/output/fitGTR.rds") # detach("package:phangorn", unload=TRUE) #load table and make Phyloseq object #samdf <- read.csv("/Volumes/Micksexternal/Microbiome4.csv", row.names = 1) pstree <- phyloseq(otu_table(seqtab3, taxa_are_rows=FALSE, errorIfNULL = TRUE), sample_data(samdf), tax_table(tax) ,phy_tree(fitGTR1$tree)) head(samdf) saveRDS(pstree, "~/Desktop/pstree.rds") ``` Here is a preview of how the data was coded up. ```{r, message = FALSE, warning = FALSE, error = FALSE} #Separate Experimental from Control Data pstreeExp <- phyloseq::subset_samples(pstree, Experiment=="exp") pstreeExp <- phyloseq::subset_samples(pstreeExp, Population!="Control") pstreePre <- phyloseq::subset_samples(pstree, Experiment=="pre") pstreePost <- phyloseq::subset_samples(pstree, Experiment=="post") pstreePrePost <- phyloseq::subset_samples(pstree, Experiment!="exp") pstreeNoLow1 <- phyloseq::subset_samples(pstreeExp, EXTRACT!="1282016A") pstreeNoLow2 <- phyloseq::subset_samples(pstreeNoLow1, EXTRACT!= "1262016A") pstreeNoLow3 <- phyloseq::subset_samples(pstreeNoLow2, EXTRACT!="1252016A") pstreeNoLow4 <- phyloseq::subset_samples(pstreeNoLow3, EXTRACT!="12302016A") pstreeNoLow5 <- phyloseq::subset_samples(pstreeNoLow4, EXTRACT!="12172016A") pstreeNoLow6 <- phyloseq::subset_samples(pstreeNoLow5, EXTRACT!="1212016A") pstreeNoLow7 <- phyloseq::subset_samples(pstreeNoLow6, EXTRACT!="12122016A") pstreeLOW <- phyloseq::subset_samples(pstreeExp, Temperature=="aLow") pstreeMED <- phyloseq::subset_samples(pstreeExp, Temperature=="bMedium") pstreeHIGH <- phyloseq::subset_samples(pstreeExp, Temperature=="cHigh") pstreeControl <- phyloseq::subset_samples(pstree, Population=="Control") #If Finaland is bad it can be removed pstreeNoFin <- phyloseq::subset_samples(pstreeExp, Population!="Finland") pstreeNoFinNoIs <- phyloseq::subset_samples(pstreeNoFin, Population!="Israel") pstreeNoFinNoGer <- phyloseq::subset_samples(pstreeNoFin, Population!="Germany") pstreeFin <- phyloseq::subset_samples(pstreeExp, Population="Finland") #Reduce experiment samples to top 20 most abundant OTUs top20 <- names(sort(taxa_sums(pstreeNoFin), decreasing=TRUE))[1:20] ps.top20 <- transform_sample_counts(pstreeNoFin, function(OTU) OTU/sum(OTU)) ps.top20 <- prune_taxa(top20, ps.top20) #plot the tree p4 = plot_tree(ps.top10, color="Temperature", label.tips="Genus") + facet_wrap(~Population, shrink=TRUE) + theme(legend.position = "bottom") p4 plotyolo <- plot_bar(pstreeExp, "Order") ``` ## Adonis ```{r} set.seed(28132) psExpR = rarefy_even_depth(pstreeExp, sample.size = 15000) df = as(sample_data(psExpR), "data.frame") d2 = phyloseq::distance(psExpR, "unifrac") dw = phyloseq::distance(psExpR, "wunifrac") d3 = phyloseq::distance(psExpR, "bray") #Unweighted UniFrac distance psQadonisJan6uni = adonis(d2 ~ Population/Genotype/SourcePop2 + Population/Genotype/SourcePop2*Temperature + Temperature + EXTRACT, df) psQadonisJan6uni #Betadisper Temperature unifrac_betadisp_temp <- betadisper(d2, df$Temperature) permutest(unifrac_betadisp_temp, permutations = 999) #NMDS Plot (Figure 3B) uniplot_nmds_poptemp_new <- uniplot_nmds_temp + aes(shape = Population) + geom_point(size = 4) + stat_ellipse(aes(group = Temperature)) + scale_color_manual(values = c("#0033FF", "#9933FF", "#FF0000")) #Weighted UniFrac distance psQadonisJan6uniw = adonis(dw ~ Population/Genotype/SourcePop2 + Population/Genotype/SourcePop2*Temperature + Temperature + EXTRACT, df) psQadonisJan6uniw #Betadisper Population wunifrac_betadisp_pop <- betadisper(dw, df$Population) permutest(wunifrac_betadisp_pop, permutations = 999) #Betadisper Temperature wunifrac_betadisp_temp <- betadisper(dw, df$Temperature) permutest(wunifrac_betadisp_temp, permutations = 999) #Betadisper Genotype wunifrac_betadisp_gen <- betadisper(dw, df$Genotype) permutest(wunifrac_betadisp_gen, permutations = 999) #NMDS Plot (Figure 3C) wuniplot_nmds_poptemp_new <- wuniplot_nmds_poptemp + aes(shape = Genotype) + geom_point(size = 4) + scale_shape_manual(values = c(1:9)) + scale_color_manual(values = c("#0033FF", "#9933FF", "#FF0000")) + stat_ellipse(aes(group = Temperature)) #Bray-Curtis dissimilarity psQadonisJan6bray = adonis(d3 ~ Population/Genotype/SourcePop2 + Population/Genotype/SourcePop2*Temperature + Temperature + EXTRACT, df) psQadonisJan6bray #Betadisper Population bray_betadisp_pop <- betadisper(d3, df$Population) permutest(bray_betadisp_pop, permutations = 999) #Betadisper Temperature bray_betadisp_temp <- betadisper(d3, df$Temperature) permutest(bray_betadisp_temp, permutations = 999) #NMDS Plot (Figure 3A) bray_nmds_poptemp_new <- bray_nmds_poptemp + stat_ellipse(aes(group = Temperature)) + geom_point(size = 4) + scale_color_manual(values = c("#0033FF", "#9933FF", "#FF0000")) ###Pairwise adonis pairwise.adonis(d2 ~ Population, data=df) pstreeNoLow7 psExpRpstreeNoLow = rarefy_even_depth(pstreeNoLow7, sample.size = 15000) dfff = as(sample_data(psExpRpstreeNoLow), "data.frame") d2ff = phyloseq::distance(psExpRpstreeNoLow, "unifrac") psQadonisJan6uniNoLow = adonis(d2ff ~ Population/Genotype/SourcePop2 + Population/Genotype/SourcePop2*Temperature + Temperature + EXTRACT, dfff) psQadonisJan6uniNoLow pair_uni <- pairwise.adonis(d2, df$Temperature) pair_bray <- pairwise.adonis(d3, df$Temperature) d4 = phyloseq::distance(psExpR, "wunifrac") pair_wuni <- pairwise.adonis(d4, df$Temperature) pair_wuni ``` ## Pre-Post adonis ```{r} opts_chunk$set(comment = NA) set.seed(28132) psPrePost = rarefy_even_depth(pstreePrePost, sample.size = 15000) #Anova table df <- as(sample_data(PrePost), "data.frame") dw <- distance(PrePost, "wunifrac") d2 <- distance(PrePost, "unifrac") d3 <- distance(PrePost, "bray") #Weighted UniFrac distance prepost_adonis_wuni <- adonis(dw ~ Experiment/Population/Genotype/SourcePop2 + Population/Genotype/SourcePop2*Temperature + Temperature + EXTRACT, method = "wunifrac", df) prepost_adonis_wuni #Betadisper Experiment wunifrac_betadisp_exp <- betadisper(dw, df$Experiment) permutest(wunifrac_betadisp_exp, permutations = 999) #NMDS Plot (Figure S2C) wuniplot_nmds_exp <- plot_ordination(PrePost, wuni_nmds, type = "samples", color = "Experiment") + stat_ellipse() + geom_point(size = 4) #Unweighted UniFrac distance prepost_adonis_uni <- adonis(d2 ~ Experiment/Population/Genotype/SourcePop2 + Population/Genotype/SourcePop2*Temperature + Temperature + EXTRACT, method = "unifrac", df) prepost_adonis_uni #Experiment betadisper unifrac_betadisp_exp <- betadisper(d2, df$Experiment) permutest(unifrac_betadisp_exp, permutations = 999) #NMDS Plot (Figure S2B) uniplot_nmds_exp <- plot_ordination(PrePost, uni_nmds, type = "samples", color = "Experiment") + stat_ellipse() + geom_point(size = 4) #Bray-Curtis Dissimilarity prepost_adonis_bray <- adonis(d3 ~ Experiment/Population/Genotype/SourcePop2 + Population/Genotype/SourcePop2*Temperature + Temperature + EXTRACT, method = "bray", df) prepost_adonis_bray #Experiment betadisper bray_betadisp_exp <- betadisper(d3, df$Experiment) permutest(bray_betadisp_exp, permutations = 999) #NMDS Plot (Figure S2A) brayplot_nmds_exp <- plot_ordination(PrePost, bray_nmds, type = "samples", color = "Experiment") + stat_ellipse() + geom_point(size = 4) ``` ##Identification of contaminant OTUs from extraction kits ```{r} pstree <- readRDS("Data/pstree.rds") pstree@sam_data$NAME <- rownames(pstree@sam_data) controls <- subset_samples(pstree, Population == "Control") meta_control <- controls@sam_data ###Subset data for extraction kits kits <- subset_samples(controls, EXTRACT == "8112016A" | EXTRACT == "9272016A") ###rarefy to 15000 reads set.seed(28132) controls_final <- rarefy_even_depth(kits, sample.size = 15000) ###Identify top 20 OTUs controls_20 <- names(sort(taxa_sums(controls_final), TRUE)[1:20]) controls_20 ###Prune top 20 OTUs controls_prune20 <- prune_taxa(controls_20, controls_final) sum(sample_sums(controls_prune20)) / sum(sample_sums(controls_final)) ###Make table prune20_table <- cbind(tax_table(controls_prune20)) write.csv(prune20_table, "prune20_table.csv") names <- rownames(prune20_table) prune20 <- as.data.frame(prune20_table) prune20$OTU <- names ###Import Supplemental Table S6 experiment_OTUs <- read.csv("NegOTU.csv") contaminant_names <- prune20$OTU ###Compare top 20OTUs found in kits to OTUs in experimental samples from Supplemental table S6 iscontaminant <- contaminant_names %in% experiment_OTUs[,1] iscontaminant prune20$Contaminant <- iscontaminant write.csv(prune20, "contaminant_final_test.csv") ``` ```{r} pstreeControl_FamilyN <- pstreeControl %>% tax_glom(taxrank = "Family") %>% # agglomerate at phylum level transform_sample_counts(function(x) {x/sum(x)} ) %>% # Transform to rel. abundance psmelt() %>% # Melt to long format filter(Abundance > 0.01) %>% # Filter out low abundance taxa arrange(Family) pstreeControl_GenusN <- pstreeControl %>% tax_glom(taxrank = "Genus") %>% # agglomerate at phylum level transform_sample_counts(function(x) {x/sum(x)} ) %>% # Transform to rel. abundance psmelt() %>% # Melt to long format filter(Abundance > 0.01) %>% # Filter out low abundance taxa arrange(Genus) pstreePrePost_FamilyN <- pstreePrePost %>% tax_glom(taxrank = "Family") %>% # agglomerate at phylum level transform_sample_counts(function(x) {x/sum(x)} ) %>% # Transform to rel. abundance psmelt() %>% # Melt to long format filter(Abundance > 0.01) %>% # Filter out low abundance taxa arrange(Family) pstreePrePost_GenusN <- pstreePrePost %>% tax_glom(taxrank = "Genus") %>% # agglomerate at phylum level transform_sample_counts(function(x) {x/sum(x)} ) %>% # Transform to rel. abundance psmelt() %>% # Melt to long format filter(Abundance > 0.01) %>% # Filter out low abundance taxa arrange(Genus) ``` #Family anovas ```{r} Burkholderiaceae_otu <- read.csv("~/Desktop/ReDoSep29/Burkholderiaceae.csv") Flavobacteriaceae_otu <- read.csv("~/Desktop/ReDoSep29/Flavobacteriaceae.csv") Moraxellaceae_otu <- read.csv("~/Desktop/ReDoSep29/Moraxellaceae.csv") Pseudomonadaceae_otu <- read.csv("~/Desktop/ReDoSep29/Pseudomonadaceae.csv") Rhizobiaceae_otu <- read.csv("~/Desktop/ReDoSep29/Rhizobiaceae.csv") Burkholderiaceae_otu_anova <- aov(log(Abundance) ~ Temperature*Population/Genotype, data = Burkholderiaceae_otu) summary.aov(Burkholderiaceae_otu_anova) Burkholderiaceae_otu_kruskal.test <- kruskal.test(Abundance ~ Temperature, data = Burkholderiaceae_otu) Burkholderiaceae_otu_kruskal.test Flavobacteriaceae_otu_anova <- aov(log(Abundance) ~ Temperature*Population/Genotype, data = Flavobacteriaceae_otu) summary.aov(Flavobacteriaceae_otu_anova) Flavobacteriaceae_otu_kruskal.test <- kruskal.test(Abundance ~ Temperature, data = Flavobacteriaceae_otu) Flavobacteriaceae_otu_kruskal.test Pseudomonadaceae_otu_anova <- aov(log(Abundance) ~ Temperature*Population/Genotype, data = Pseudomonadaceae_otu) summary.aov(Pseudomonadaceae_otu_anova) Pseudomonadaceae_otu_kruskal.test <- kruskal.test(Abundance ~ Temperature, data = Pseudomonadaceae_otu) Pseudomonadaceae_otu_kruskal.test Rhizobiaceae_otu_anova <- aov(log(Abundance) ~ Temperature*Population/Genotype, data = Rhizobiaceae_otu) summary.aov(Rhizobiaceae_otu_anova) Rhizobiaceae_otu_kruskal.test <- kruskal.test(Abundance ~ Temperature, data = Rhizobiaceae_otu) Rhizobiaceae_otu_kruskal.test p9000 <- plot(log(Abundance) ~ Temperature*Population/Genotype, data = Burkholderiaceae_otu, main = "Burkholderiaceae") p9001 <- plot(log(Abundance) ~ Temperature*Population/Genotype, data = Flavobacteriaceae_otu, main = "Flavobacteriaceae") p9002 <- plot(log(Abundance) ~ Temperature*Population/Genotype, data = Pseudomonadaceae_otu, main = "Pseudomonadaceae") p9003 <- plot(log(Abundance) ~ Temperature*Population/Genotype, data = Rhizobiaceae_otu, main = "Rhizobiaceae") ``` ## Alpha Diversity Different measure of alpha diversity plotted ```{r, message = FALSE, warning = FALSE, error = FALSE} theme_set(theme_bw()) samples.out <- rownames(seqtab3) #With Controls prich_expAndControl <- plot_richness(pstreeExp, x="Experiment", measures=c("Observed", "Chao1", "ACE", "Shannon", "Simpson", "InvSimpson"), color="Temperature") #Without Controls prich_expNoControl <- plot_richness(pstreeExp, x="Temperature", color = "Population", measures = c("Observed", "Chao1", "ACE", "Shannon", "Simpson", "InvSimpson")) prich_expNoControl pt <- plot_anova_diversity(pstreeExp, method = c("richness", "simpson", "shannon"), grouping_column = "Temperature", pValueCutoff = 0.05, filename = FALSE) pp <- plot_anova_diversity(pstreeExp, method = c("richness", "simpson", "shannon"), grouping_column = "Population", pValueCutoff = 0.05) pg <- plot_anova_diversity(pstreeExp, method = c("richness", "simpson", "shannon"), grouping_column = "Genotype", pValueCutoff = 0.05) pe <- plot_anova_diversity(pstreeExp, method = c("richness", "simpson", "shannon"), grouping_column = "EXTRACT", pValueCutoff = 0.05) plot_richness(pstreeExp, measures = c("richness", "simpson", "shannon"), x = "Temperature") grid.arrange(pt, pp, pg, nrow = 1) print(pe) alpha <- estimate_richness(pstree, split = TRUE, measures = c("Observed", "simpson", "shannon")) ``` ## Bar plots ```{r, message = FALSE, warning = FALSE, error = FALSE} #Figure 2 #Tax_glom to family pstreePrePost_E <- tax_glom(pstreenofin_rare, taxrank = "Family") #Convert OTU read counts to relative abundances pstreePrePost_Erel <- transform_sample_counts(pstreePrePost_E, function(x) x / sum(x)) #Identify top 15 shared families family15_E <- names(sort(taxa_sums(pstreePrePost_Erel), TRUE)[1:15]) #Only keep reads for top 15 shared families family15_E_prune <- prune_taxa(family15_E, pstreePrePost_Erel) #Percent of reads in top 15 shared families sum(taxa_sums(family15_E_prune)) / sum(taxa_sums(pstreePrePost_Erel)) 0.9871074 #Create table family15_E_prunetable <- cbind(tax_table(family15_E_prune)) #Consolidate table formatting family15_E_prunetable[family15_E, "Family"] <- as(tax_table(family15_E_prune)[family15_E, "Family"], "character") tax_table(family15_E_prune) <- family15_E_prunetable #Subset by population popI_E <- subset_samples(family15_E_prune, Population == "Israel") popG_E <- subset_samples(family15_E_prune, Population == "Germany") #Merge samples by "Temperature, set levels for variables in both subsets, convert relative abundances to percentages #Israel popI_E_temp <- merge_samples(popI_E, "Temperature") sample_data(popI_E_temp)$Population <- levels(sample_data(popI_E)$Population) sample_data(popI_E_temp)$Temperature <- levels(sample_data(popI_E)$Temperature) popI_E_percent <- transform_sample_counts(popI_E_temp, function(x) 100 * x / sum(x)) #Germany popG_E_temp <- merge_samples(popG_E, "Temperature") sample_data(popG_E_temp)$Population <- levels(sample_data(popG_E)$Population) sample_data(popG_E_temp)$Temperature <- levels(sample_data(popG_E)$Temperature) popG_E_percent <- transform_sample_counts(popG_E_temp, function(x) 100 * x / sum(x)) #Plot separately popI_E_barplot <- plot_bar(popI_E_percent, "Temperature", fill = "Family") + ylab("Relative Abundance") + guides(fill = FALSE) + facet_wrap(~Population) + scale_fill_manual(values = c("#FFCC00", "#0099FF", "#FF9900", "#FF6600", "#CC0033", "#FF0033", "#FF0066", "660066", "#CC00CC", "#9966FF", "#9900FF", "#6600FF", "#FF3366", "#FF6699", "#999999")) + theme_bw() popI_E_barplot popG_E_barplot <- plot_bar(popG_E_percent, "Temperature", fill = "Family") + ylab("") + facet_wrap(~Population) + scale_fill_manual(values = c("#FFCC00", "#0099FF", "#FF9900", "#FF6600", "#CC0033", "#FF0033", "#FF0066", "660066", "#CC00CC", "#9966FF", "#9900FF", "#6600FF", "#FF3366", "#FF6699", "#999999")) + theme_bw() popG_E_barplot #Combine plots combined_E_barplot <- plot_grid(popI_E_barplot, popG_E_barplot, nrow = 1, rel_widths = c(21,30)) combined_E_barplot #Figure S1 #Subset for Israel Samples pstreePrePost_C <- subset_samples(pstreePrePost, Population == "Israel") #Tax_glom to family pstreePrePost_C <- tax_glom(pstreePrePost_C, taxrank = "Family") #Convert OTU read counts to relative abundances pstreePrePost_Crel <- transform_sample_counts(pstreePrePost_C, function(x) x / sum(x)) #Identify top 15 shared families family15_C <- names(sort(taxa_sums(pstreePrePost_Crel), TRUE)[1:15]) #Only keep reads for top 15 shared families family15_C_prune <- prune_taxa(family15_C, pstreePrePost_Crel) #Percent of reads in top 15 shared families sum(taxa_sums(family15_C_prune)) / sum(taxa_sums(pstreePrePost_Crel)) 0.9655597 #Create table family15_C_prunetable <- cbind(tax_table(family15_C_prune)) #Consolidate table formatting family15_C_prunetable[family15_C, "Family"] <- as(tax_table(family15_C_prune)[family15_C, "Family"], "character") tax_table(family15_C_prune) <- family15_C_prunetable #Subset by Experiment popI_Cpre <- subset_samples(family15_C_prune, Experiment == "pre") popI_Cpost <- subset_samples(family15_C_prune, Experiment == "post") #Merge samples by "Temperature, set levels for variables in both subsets, converte relative abundances to percentages #Israel pre popI_Cpre_temp <- merge_samples(popI_Cpre, "Temperature") sample_data(popI_Cpre_temp)$Experiment <- levels(sample_data(popI_Cpre)$Experiment) sample_data(popI_Cpre_temp)$Temperature <- levels(sample_data(popI_Cpre)$Temperature) popI_Cpre_percent <- transform_sample_counts(popI_Cpre_temp, function(x) 100 * x / sum(x)) #Israel post popI_Cpost_temp <- merge_samples(popI_Cpost, "Temperature") sample_data(popI_Cpost_temp)$Experiment <- levels(sample_data(popI_Cpost)$Experiment) sample_data(popI_Cpost_temp)$Temperature <- levels(sample_data(popI_Cpost)$Temperature) popI_Cpost_percent <- transform_sample_counts(popI_Cpost_temp, function(x) 100 * x / sum(x)) #Plot separately popI_Cpre_barplot <- plot_bar(popI_Cpre_percent, "Temperature", fill = "Family") + ylab("Relative Abundance") + guides(fill = FALSE) + facet_wrap(~Experiment) + scale_fill_manual(values = c("#CCCCCC", "#0099FF", "#FF9900", "#FF0066", "#FF0099", "#660066", "#6633FF", "#9900FF", "#6600FF", "#3300FF", "#0000FF", "#3399FF", "#0066CC", "#FF6699", "#999999")) + theme_bw() popI_Cpre_barplot popI_Cpost_barplot <- plot_bar(popI_Cpost_percent, "Temperature", fill = "Family") + ylab("") + facet_wrap(~Experiment) + scale_fill_manual(values = c("#CCCCCC", "#0099FF", "#FF9900", "#FF0066", "#FF0099", "#660066", "#6633FF", "#9900FF", "#6600FF", "#3300FF", "#0000FF", "#3399FF", "#0066CC", "#FF6699", "#999999")) + theme_bw() popI_Cpost_barplot #Combine plots combined_C_barplot <- plot_grid(popI_Cpre_barplot, popI_Cpost_barplot, nrow = 1, rel_widths = c(19,30)) combined_C_barplot ###Germany pre/post (Figure S1B) #Subset for Germany Samples pstreePrePost_D <- subset_samples(pstreePrePost, Population == "Germany") #Tax_glom to family pstreePrePost_D <- tax_glom(pstreePrePost_D, taxrank = "Family") #Convert OTU read counts to relative abundances pstreePrePost_Drel <- transform_sample_counts(pstreePrePost_D, function(x) x / sum(x)) #Identify top 15 shared families family15_D <- names(sort(taxa_sums(pstreePrePost_Drel), TRUE)[1:15]) #Only keep reads for top 15 shared families family15_D_prune <- prune_taxa(family15_D, pstreePrePost_Drel) #Percent of reads in top 15 shared families sum(taxa_sums(family15_D_prune)) / sum(taxa_sums(pstreePrePost_Drel)) 0.9585275 #Create table family15_D_prunetable <- cbind(tax_table(family15_D_prune)) #Consolidate table formatting family15_D_prunetable[family15_D, "Family"] <- as(tax_table(family15_D_prune)[family15_D, "Family"], "character") tax_table(family15_D_prune) <- family15_D_prunetable #Subset by Experiment popG_Dpre <- subset_samples(family15_D_prune, Experiment == "pre") popG_Dpost <- subset_samples(family15_D_prune, Experiment == "post") #Merge samples by "Temperature, set levels for variables in both subsets, converte relative abundances to percentages #Germany pre popG_Dpre_temp <- merge_samples(popG_Dpre, "Temperature") sample_data(popG_Dpre_temp)$Experiment <- levels(sample_data(popG_Dpre)$Experiment) sample_data(popG_Dpre_temp)$Temperature <- levels(sample_data(popG_Dpre)$Temperature) popG_Dpre_percent <- transform_sample_counts(popG_Dpre_temp, function(x) 100 * x / sum(x)) #Germany post popG_Dpost_temp <- merge_samples(popG_Dpost, "Temperature") sample_data(popG_Dpost_temp)$Experiment <- levels(sample_data(popG_Dpost)$Experiment) sample_data(popG_Dpost_temp)$Temperature <- levels(sample_data(popG_Dpost)$Temperature) popG_Dpost_percent <- transform_sample_counts(popG_Dpost_temp, function(x) 100 * x / sum(x)) #Plot separately popG_Dpre_barplot <- plot_bar(popG_Dpre_percent, "Temperature", fill = "Family") + ylab("Relative Abundance") + guides(fill = FALSE)+ facet_wrap(~Experiment) + scale_fill_manual(values = c("#CCCCCC", "#33FF00", "#0099FF", "#FF9900", "#CC0033", "#FF0066", "#FF0099", "#660066", "#6633FF", "#9900FF", "#3300FF", "#0000FF", "#3399FF", "#0066CC", "#FF6699")) + theme_bw() popG_Dpre_barplot popG_Dpost_barplot <- plot_bar(popG_Dpost_percent, "Temperature", fill = "Family") + ylab("") + facet_wrap(~Experiment) + scale_fill_manual(values = c("#CCCCCC", "#33FF00", "#0099FF", "#FF9900", "#CC0033", "#FF0066", "#FF0099", "#660066", "#6633FF", "#9900FF", "#3300FF", "#0000FF", "#3399FF", "#0066CC", "#FF6699")) + theme_bw() popG_Dpost_barplot #Combine plots combined_D_barplot <- plot_grid(popG_Dpre_barplot, popG_Dpost_barplot, nrow = 1, rel_widths = c(19,30)) combined_D_barplot ``` ## DESeq2 ```{r, fig.height = 6, fig.width = 3, fig.align = "center", message = FALSE, warning = FALSE, error = FALSE} physeq <- taxa_level(pstreeExp, which_level = "Family") deseq_sig <- differential_abundance(physeq, grouping_column = "Temperature", output_norm = "log-relative", pvalue.threshold = 0.05, lfc.threshold = 1, filename = T) deseq_sig_pop <- differential_abundance(physeq, grouping_column = "Population", output_norm = "log-relative", pvalue.threshold = 0.05, lfc.threshold = 1, filename = T) plot69 <- plot_signif(deseq_sig$plotdata, top.taxa = 20) print(plot69) plot70 <- plot_MDA(deseq_sig$importance, top.taxa = 20) + ggtitle("A") print(plot70) plot77 <- plot_MDA(deseq_sig_pop$importance, top.taxa = 20) + ggtitle("B") print(plot77) grid.arrange(plot70, plot77, nrow = 1) moreplot <- plot_MA(deseq_sig$SignFeaturesTable) print(moreplot$maplot) print(moreplot$lfcplot) moreplotgg <- ggmaplot(deseq_sig$SignFeaturesTable, fdr = 0.05, fc = 1.5, top = 15, select.top.method = c("padj", "fc"), main = NULL, xlab = "Log2 mean expression", ylab = "Log2 fold change", ggtheme = theme_classic()) #include_graphics("/Users/michaelsong/Rplots11.pdf") #Genera physeq1 <- taxa_level(pstreeExp, which_level = "Genus") deseq_sig1 <- differential_abundance(physeq1, grouping_column = "Temperature", output_norm = "log-relative", pvalue.threshold = 0.05, lfc.threshold = 0, filename = T) plot19 <- plot_signif(deseq_sig1$plotdata, top.taxa = 20) print(plot19) plot10 <- plot_MDA(deseq_sig1$importance, top.taxa = 20) + ggtitle("B") print(plot10) moreplot33 <- plot_MA(deseq_sig1$SignFeaturesTable) print(moreplot33$maplot) print(moreplot33$lfcplot) diagdds = phyloseq_to_deseq2(pstreeExp, ~ Temperature) diagdds = DESeq(diagdds, test="LRT", fitType="parametric") ``` ```{r diff exp effect depending on Temp} set.seed(28132) pstreeLOWr = rarefy_even_depth(pstreeLOW, sample.size = 15000) pstreeMEDr = rarefy_even_depth(pstreeMED, sample.size = 15000) pstreeHIGHr = rarefy_even_depth(pstreeHIGH, sample.size = 15000) dfpstreeLOWr = as(sample_data(pstreeLOWr), "data.frame") pstreeLOWrd2 = phyloseq::distance(pstreeLOWr, "unifrac") dfpstreeLOWadonis = adonis(pstreeLOWrd2 ~ Population/Genotype/SourcePop2 + Population/Genotype/SourcePop2 + EXTRACT, dfpstreeLOWr) dfpstreeLOWadonis dfpstreeMEDr = as(sample_data(pstreeMEDr), "data.frame") pstreeMEDrd2 = phyloseq::distance(pstreeMEDr, "unifrac") dfpstreeMEDadonis = adonis(pstreeMEDrd2 ~ Population/Genotype/SourcePop2 + Population/Genotype/SourcePop2 + EXTRACT, dfpstreeMEDr) dfpstreeMEDadonis dfpstreeHIGHr = as(sample_data(pstreeHIGHr), "data.frame") pstreeHIGHrd2 = phyloseq::distance(pstreeHIGHr, "unifrac") dfpstreeHIGHadonis = adonis(pstreeHIGHrd2 ~ Population/Genotype/SourcePop2 + Population/Genotype/SourcePop2 + EXTRACT, dfpstreeHIGHr) dfpstreeHIGHadonis ``` ```{r prune} phylo2 = prune_samples(c("NegControl121", "NegControl1212", "NegControl811", "NegControl927"), pstree) phylo3 = prune_taxa(taxa_sums(phylo2) > 0, phylo2) ``` ```{r new figure 6, fig.height = 6, fig.width = 3, fig.align = "center", message = FALSE, warning = FALSE, error = FALSE} pstreeLOWHIGH <- phyloseq::subset_samples(pstreeExp, Temperature!="bMedium") pstreeLOWMED <- phyloseq::subset_samples(pstreeExp, Temperature!="cHigh") pstreeMEDHIGH <- phyloseq::subset_samples(pstreeExp, Temperature!="aLow") physeqLOWHIGH <- taxa_level(pstreeLOWHIGH, which_level = "Family") deseq_sigLOWHIGH <- differential_abundance(physeqLOWHIGH, grouping_column = "Temperature", output_norm = "log-relative", pvalue.threshold = 0.05, lfc.threshold = 1, filename = T) # deseq_sig_popLOWHIGH <- differential_abundance(physeqLOWHIGH, grouping_column = "Population", output_norm = "log-relative", # pvalue.threshold = 0.05, lfc.threshold = 1, filename = T) physeqLOWMED <- taxa_level(pstreeLOWMED, which_level = "Family") deseq_sigLOWMED <- differential_abundance(physeqLOWMED, grouping_column = "Temperature", output_norm = "log-relative", pvalue.threshold = 0.05, lfc.threshold = 1, filename = T) # deseq_sig_popLOWMED <- differential_abundance(physeqLOWMED, grouping_column = "Population", output_norm = "log-relative", # pvalue.threshold = 0.05, lfc.threshold = 1) physeqMEDHIGH <- taxa_level(pstreeMEDHIGH, which_level = "Family") deseq_sigMEDHIGH <- differential_abundance(physeqMEDHIGH, grouping_column = "Temperature", output_norm = "log-relative", pvalue.threshold = 0.05, lfc.threshold = 1, filename = T) # deseq_sig_popMEDHIGH <- differential_abundance(physeqMEDHIGH, grouping_column = "Population", output_norm = "log-relative", # pvalue.threshold = 0.05, lfc.threshold = 1) plotdeseq_sigLOWHIGH1 <- plot_signif(deseq_sigLOWHIGH$plotdata, top.taxa = 20) print(plotdeseq_sigLOWHIGH1) plotdeseq_sigLOWHIGH2 <- plot_MDA(deseq_sigLOWHIGH$importance, top.taxa = 20) + ggtitle("") print(plotdeseq_sigLOWHIGH2) plotdeseq_sigLOWHIGH3 <- plot_MA(deseq_sigLOWHIGH$SignFeaturesTable) print(plotdeseq_sigLOWHIGH3$maplot) print(plotdeseq_sigLOWHIGH3$lfcplot) plotdeseq_sigLOWHIGH1 <- plot_signif(deseq_sigLOWHIGH$plotdata, top.taxa = 20) print(plotdeseq_sigLOWHIGH1) plotdeseq_sigLOWHIGH2 <- plot_MDA(deseq_sigLOWHIGH$importance, top.taxa = 20) + ggtitle("") print(plotdeseq_sigLOWHIGH2) plotdeseq_sigLOWHIGH3 <- plot_MA(deseq_sigLOWHIGH$SignFeaturesTable) print(plotdeseq_sigLOWHIGH3$maplot) print(plotdeseq_sigLOWHIGH3$lfcplot) plotdeseq_sigLOWHIGH1 <- plot_signif(deseq_sigLOWHIGH$plotdata, top.taxa = 20) print(plotdeseq_sigLOWHIGH1) plotdeseq_sigLOWHIGH2 <- plot_MDA(deseq_sigLOWHIGH$importance, top.taxa = 20) + ggtitle("") print(plotdeseq_sigLOWHIGH2) plotdeseq_sigLOWHIGH3 <- plot_MA(deseq_sigLOWHIGH$SignFeaturesTable) print(plotdeseq_sigLOWHIGH3$maplot) print(plotdeseq_sigLOWHIGH3$lfcplot) diagdds = phyloseq_to_deseq2(pstreeExp, ~ Temperature) diagdds = DESeq(diagdds, test="LRT", fitType="parametric") ```