--- title: "Pais_et_al_complete_statistics" author: "Luis Teixeira and Ines Pais" date: "25/06/2018" output: html_document: default pdf_document: default --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ```{r libraries for the whole script} # R version 3.5.0 #load libraries #tidyverse 1.2.1 library(tidyverse) #reshape 0.8.7 library(reshape) #reshape2 1.4.3 library(reshape2) #lme4 1.1-17 library(lme4) #lmerTest 3.0-1 library(lmerTest) #multcomp 1.4-8 library(multcomp) #lsmeans 2.27-62 library(lsmeans) #multcompView 0.1-7 library(multcompView) ``` ```{r options} #change this value to "yes" to save some output files throughout the script write_files <- "no" ``` ##Figure 1 ### Iso vials not flipped ```{r iso 10 days non flipping} iso_vials_nf <- read.csv("S1_Data.csv",sep=",",head=T) iso_vials_nf$replicate <- as.factor(iso_vials_nf$replicate) iso_vials_nf$day <- as.factor(iso_vials_nf$day) #create column with the cfu from the medium that present the highest number iso_vials_nf$maxi <- apply(iso_vials_nf[,4:8], 1,max) #create log from the maximum cfu iso_vials_nf$log = log10(iso_vials_nf$maxi+1) head(iso_vials_nf) #plot data ggplot(data=iso_vials_nf, aes(day,log)) + geom_jitter(height=0,width=.2) + expand_limits(y=c(0,7)) + facet_grid(replicate ~ .,margins = FALSE) #statistical analyses #lmer with replicate as random factor modeliso_vials_nf <- lmer(log ~ day + (1 | replicate), data=iso_vials_nf) summary(modeliso_vials_nf) #estimates from model #titres lsmeansLT(modeliso_vials_nf) values_iso_vials_nf <- data.frame(lsmeansLT(modeliso_vials_nf)) levels_iso_vials_nf <- values_iso_vials_nf[,c(3,7,8) ] apply(levels_iso_vials_nf,1, function(i) '^'(10,i)) #fold change '^'(10,levels_iso_vials_nf[2,1] - levels_iso_vials_nf[1,1]) ``` ### Iso vials flipped ```{r iso 10 days flipping} iso_vials_f <- read.csv("S2_Data.csv",sep=",",head=T) iso_vials_f$replicate <- as.factor(iso_vials_f$replicate) iso_vials_f$day<-factor(iso_vials_f$day) #create column with the cfu from the medium that present the highest number iso_vials_f$maxi <- apply(iso_vials_f[,4:8], 1,max) #create log from the maximum cfu iso_vials_f$log = log10(iso_vials_f$maxi+1) head(iso_vials_f) #plot data ggplot(data=iso_vials_f, aes(day,log)) + geom_jitter(height=0,width=.2) + expand_limits(y=c(0,7)) + facet_grid(replicate ~ .,margins = FALSE) #statistical analyses #lmer with replicate as random factor model_iso_vials_f <- lmer(log ~ day + (1 | replicate), data=iso_vials_f) summary(model_iso_vials_f) #estimates from model #titres lsmeansLT(model_iso_vials_f) values_iso_vials_f <- data.frame(lsmeansLT(model_iso_vials_f)) levels_iso_vials_f <- values_iso_vials_f[,c(3,7,8) ] apply(levels_iso_vials_f,1, function(i) '^'(10,i)) #fold change '^'(10,levels_iso_vials_f[1,1] - levels_iso_vials_f[2,1]) ``` ### Iso vials flipped qPCR ```{r iso 10 days flipping qPCR} iso_vials_f_qpcr <- read.csv("S3_Data.csv",sep=",",head=T) iso_vials_f_qpcr$replicate <- as.factor(iso_vials_f_qpcr$replicate) iso_vials_f_qpcr$day<-factor(iso_vials_f_qpcr$day) #create log iso_vials_f_qpcr$log = log10(iso_vials_f_qpcr$relativelevels) head(iso_vials_f_qpcr) #plot ggplot(data=iso_vials_f_qpcr, aes(day,log)) + geom_jitter(height=0,width=.2) + facet_grid(replicate ~ .,margins = FALSE) #statistical analyses #lmer with replicate as random factor model_iso_vials_f_qpcr <- lmer(log ~ day + (1 | replicate), data=iso_vials_f_qpcr) summary(model_iso_vials_f_qpcr) #fold change lsmeansLT(model_iso_vials_f_qpcr) values_iso_vials_f_qpcr <- data.frame(lsmeansLT(model_iso_vials_f_qpcr)) levels_iso_vials_f_qpcr <- values_iso_vials_f_qpcr[,c(3,7,8) ] apply(levels_iso_vials_f_qpcr,1, function(i) '^'(10,i)) #fold change '^'(10,levels_iso_vials_f_qpcr[1,1] - levels_iso_vials_f_qpcr[2,1]) ``` ### growth on food ```{r growth on food} growth_vials <- read.csv("S4_Data.csv",sep=",",head=T) growth_vials$day<-factor(growth_vials$day) #create column with the cfu from the medium that present the highest number growth_vials$maxi <- apply(growth_vials[,4:8], 1,max) #create log from the maximum cfu growth_vials$log = log10(growth_vials$maxi + 1) head(growth_vials) #plot ggplot(data=growth_vials, aes(day,log)) + geom_jitter(height=0,width=.2) #statistical analyses #lm model_growth_vials <- lm(log ~day, data=growth_vials ) summary(model_growth_vials) #estimates from model lsmeans::lsmeans(model_growth_vials, specs = "day") values_growth_vials <- summary(lsmeans::lsmeans(model_growth_vials, specs = "day")) levels_growth_vials <- summary(lsmeans::lsmeans(model_growth_vials, specs = "day"))[2] #fold change '^'(10,levels_growth_vials[2,1] - levels_growth_vials[1,1]) ``` ### wild flies vials flipped ```{r wild flies flipped all data together} wild_vials_f <- read.csv("S5_Data.csv",sep=",",head=T) wild_vials_f$replicate <- as.factor(wild_vials_f$replicate) #create column with the cfu from the medium that present the highest number wild_vials_f$maxi <- apply(wild_vials_f[,4:8], 1,max) #create log from the maximum cfu wild_vials_f$log = log10(wild_vials_f$maxi+1) head(wild_vials_f) #plot data ggplot(data=wild_vials_f, aes(as.factor(day),log)) + geom_jitter(height=0,width=.2) + expand_limits(y=c(0,7)) + facet_grid(replicate ~ .,margins = FALSE) #statistical analysis model_wild_vials_f <- lmer(log ~ day + (1 | replicate), data=wild_vials_f) summary(model_wild_vials_f) #estimates values_wild_vials_f <- summary(lsmeans::lsmeans(model_wild_vials_f, specs = "day", at = list(day = c(0,10,20)))) levels_wild_vials_f <- data.frame(values_wild_vials_f[,c("lsmean")]) #fold change day 0 / day 10 '^'(10, levels_wild_vials_f[1,1] - levels_wild_vials_f[2,1]) apply(levels_wild_vials_f,1, function(i) '^'(10,i)) ``` ##Figures 2 and 3 ```{r import table and process} #import table bacteria<-read.csv("S6_Data.csv",sep=",",head=T) #create column with OTU only bacteria$OTU <- sapply(strsplit(as.character(bacteria$greengenes_tax_string),'otu_'), "[", 2) bacteria$OTU <- paste("OTU",bacteria$OTU,sep="") #define OTUs that are contaminants contaminats <-c("OTU2881","OTU2886","OTU3005") #remove contaminants bacteria <- filter(bacteria, !(OTU %in% contaminats)) #groups of bacteria with same colony morphology Acetobacteraceae<-c("OTU2782", "OTU2781", "OTU2784","OTU2755","OTU2757","OTU2758","OTU2759","OTU2760","OTU2761","OTU2762","OTU2753","OTU2763") Enterobacteriaceae <- c("OTU3643", "OTU3529", "OTU3638", "OTU3592", "OTU3635", "OTU3558", "OTU3570", "OTU3559","OTU3548") Lactobacillus <- c("OTU1870","OTU1905","OTU1865") #creat column with OTU or aggregate of OTUs when morphologies of colonies indistinguishable bacteria$OTUtable <- bacteria$OTU bacteria$OTUtable[bacteria$OTUtable %in% Lactobacillus] <- "Lacto" bacteria$OTUtable[bacteria$OTUtable %in% Acetobacteraceae] <- "Aceto" bacteria$OTUtable[bacteria$OTUtable %in% Enterobacteriaceae] <- "Entero" ``` ### Analysis of diversity of bacteria ```{r script to get accumulation curve of OTUs} #get number of entries n <- length(distinct(bacteria,Source,Day,Fly)[,1]) accumulation <- data.frame(source_day = rep(NA,n),fly = rep(NA,n),count = rep(NA,n)) bacteria$source_day<-with(bacteria,paste(Source," ",Day,"days",sep = "")) conditions <- distinct(bacteria,source_day) counter <- 1 for (i in c(1:length(conditions[,1]))){ flies <- distinct(filter(bacteria,source_day == conditions[i,]),Fly) for (n in c(1:length(flies[,1]))){ flies.data <- filter(bacteria,source_day == conditions[i,], Fly %in% flies[1:n,]) count <- distinct(flies.data, OTU) accumulation$source_day[counter] <- conditions[i,] accumulation$fly[counter] <- flies[n,] accumulation$count[counter] <- length(count[,1]) counter <- counter + 1 } } accumulation ggplot(accumulation, aes(x=fly, y = count, color = source_day)) + geom_line(size=1) + geom_text(data = filter(accumulation, fly == "10", source_day == "Wild 0days" | source_day == "Lab 10days"), aes(label = source_day), vjust = -.6, hjust = .8) + geom_text(data = filter(accumulation, fly == "10", source_day == "Wild 10days" | source_day == "Lab 0days"), aes(label = source_day), vjust = 1.7, hjust = .7) + scale_colour_discrete(guide = 'none') + theme_bw() + theme(axis.title.y=element_text(margin=margin(0,8,0,0))) + labs( y = "Total OTUs", x = "Individuals") + scale_x_continuous(breaks=seq(0,10,1)) + ylim(0,36) fly_ID <- distinct(bacteria,fly_ID) Diversity <- data.frame(fly_ID,OTU_number = rep(NA,length(fly_ID))) for (j in c(1:length(Diversity$fly_ID))){ flies.data <- filter(bacteria,fly_ID == Diversity$fly_ID[j]) Diversity$OTU_number[j] <- length(distinct(flies.data, OTU)[,1]) } #manual addition of Lab_Day10_FlyB, which had 0 bacteria Diversity <- rbind(Diversity, data.frame(fly_ID = "Lab_Day10_FlyB", OTU_number = 0)) Diversity ``` ```{r Max of each OTU per fly} #Make table with max of each OTU per fly #Creat table with number of OTUtable per morphotypes Combinations <- distinct(bacteria,OTUtable, Unique_morpho, fly_ID) OTUs_per_morpho <- summarise(group_by(Combinations,Unique_morpho, fly_ID), count = n()) #select unique morphos that have 2 or more entries in this table OTUs_per_morpho <- filter(OTUs_per_morpho,count > 1) #identify unique morphotypes with 2 different OTUS assigned #identify for which of these there is more information on the OTUs in the fly analysis #remove these unique morphotypes from analysis list_p <- c() table_p <- data.frame(Unique_morpho = c(), OTUtable = c()) for (i in c(1:nrow(OTUs_per_morpho))){ pOTUs <- filter(Combinations, Unique_morpho == OTUs_per_morpho$Unique_morpho[i])$OTUtable fly <- filter(bacteria, fly_ID == OTUs_per_morpho$fly_ID[i], OTUtable %in% pOTUs) morpho_per_OTU <- summarise(group_by(fly, OTUtable), count = n()) for (j in c(1,2)){ if (morpho_per_OTU[j,2] > 1) table_p <- rbind(table_p, data.frame(Unique_morpho = c(as.character(OTUs_per_morpho$Unique_morpho[i])), OTUtable = c(morpho_per_OTU[j,1]))) } } table_p$morpho_OTU <- paste(table_p$Unique_morpho, table_p$OTUtable) bacteria_for_max <- filter(bacteria, !(paste(Unique_morpho, OTUtable) %in% table_p$morpho_OTU)) #Make column of OTUtable and fly ID together bacteria_for_max$flyOTU <- with(bacteria_for_max,paste(OTUtable,fly_ID,sep="_")) #get maximum for each OTUtable in each fly tableOTU_fly <- summarise(group_by(bacteria_for_max,flyOTU), MaxOTU = max(Cfu_gut, na.rm = TRUE)) tableOTU_fly <- tableOTU_fly %>% separate(flyOTU, c("OTU","source","day","fly"), "_") #write table if(write_files == "yes") {write.table(tableOTU_fly, file = "tableOTU_fly.csv", row.names=FALSE, sep = ",")} ``` ```{r define functions to extract different frequencies and to plot} #define functions to analyse frequencies of OTUs in different groups of bacteria #tables Frequencies <- function(bacgroup) { #get subset of bacteria bacteria_group <- filter(bacteria, OTU %in% bacgroup) bacteria_group$source_day<-with(bacteria_group,paste(Source,Day,sep = "_")) #create table with all possible combinations of OTU, source, and day OTUs <- distinct(bacteria_group,OTU) source_days <- distinct(bacteria_group,source_day) freq_group <- expand.grid(OTU = OTUs[,1], source_day = source_days[,1], stringsAsFactors = FALSE) freq_group$divide<-freq_group$source_day freq_group <- freq_group %>% separate(divide, c("source", "day"), "_") #get number of colonies sequenced of each OTU per source_day freq_OTU <- summarise(group_by(bacteria_group,source_day,OTU), count = n()) freq_group$count <- freq_OTU$count[match(paste(freq_group$source_day,freq_group$OTU),paste(freq_OTU$source_day,freq_OTU$OTU))] freq_group$count[is.na(freq_group$count)] <- 0 #get number of total colonies per source_day group_per_source_day <- summarise(group_by(bacteria_group,source_day), count = n()) freq_group$total_per_source_day <- group_per_source_day$count[match(freq_group$source_day,group_per_source_day$source_day)] #get frequencies of OTU per source_day freq_group$freq <- with(freq_group,count / total_per_source_day *100) #calculate in how many flies at different days in different populations are each OTU fly_and_OTU <- distinct(group_by(bacteria_group,OTU),fly_ID) fly_and_OTU$divide <- fly_and_OTU$fly_ID fly_and_OTU <- fly_and_OTU %>% separate(divide, c("source", "day", "fly"), "_") fly_and_OTU$day <- substring(fly_and_OTU$day,4) fly_per_OTU <- summarise(group_by(fly_and_OTU,OTU,source,day), count = n()) freq_group$nflies <- fly_per_OTU$count[match(paste(freq_group$source_day,freq_group$OTU,sep="_"),paste(fly_per_OTU$source,fly_per_OTU$day,fly_per_OTU$OTU,sep="_"))] freq_group$nflies[is.na(freq_group$nflies)] <- 0 #reorder rows freq_group <- arrange(freq_group, source, day, OTU) #write table if(write_files == "yes") {write.table(freq_group, file = paste(filename,"_frequencies.csv",sep=""), row.names=FALSE, sep = ",")} return(freq_group) } total_OTU <- function(group){ allflies <- distinct(tableOTU_fly,source,day,fly) #add entry for a fly with no bacteria allflies <- rbind(allflies, c("Lab", "Day10", "FlyB")) allflies$fly_ID <- with(allflies,paste(source,day,fly,sep = "_")) bacteria_group <- filter(tableOTU_fly, OTU == group) bacteria_group$fly_ID <- with(bacteria_group,paste(source,day,fly,sep = "_")) allflies$cfu <- bacteria_group$MaxOTU[match(allflies$fly_ID, bacteria_group$fly_ID)] allflies$cfu[is.na(allflies$cfu)] <- 0 if(write_files == "yes") {write.table(allflies, file = paste(filename,"_cfu_fly.csv",sep=""), row.names=FALSE, sep = ",")} return(allflies) } Plotfreq <- function(freq_bact,pop) { #make table that includes missing values (required for proper display of bars' width) comb <- filter(freq_bact,source == pop) max_freq <- max(comb$freq)+5 #plot freqplot <- ggplot(comb,aes(x=OTU,y=freq,fill = day)) + geom_col(position = "dodge") + geom_text(aes(label=nflies), position=position_dodge(width=0.9), vjust=-0.25, size = 6) + theme_bw() + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1, size = 8), axis.title.y=element_text(margin=margin(0,8,0,0)) ) + labs( y = "Frequency", x = "OTU") + scale_fill_manual(values=c("#CCCCCC", "#000000")) + scale_y_continuous(breaks=seq(0,max_freq,10), limits = c(0,max_freq)) if(write_files == "yes") {ggsave(paste(filename,pop,"freq.tiff",sep ="_"), width = 20, height = 12.5, units = "cm", dpi = 300)} return(freqplot) } #statistics Chisq_freq <- function(file) { statsfreq <- filter(file,source == "Wild") statsfreq <- dplyr::select(statsfreq, OTU, count,day) statsfreq <- spread(statsfreq, key = day, value = count) return(chisq.test(statsfreq[,2:3], simulate.p.value = TRUE, B=1000000)) } ``` ###Analysis of Acetobacteraceae ```{r analysis of Acetobacteraceae} filename <- "Aceto" freq_aceto <- Frequencies(Acetobacteraceae) total_aceto <- total_OTU("Aceto") Plotfreq(freq_aceto,"Lab") Plotfreq(freq_aceto,"Wild") Chisq_freq(freq_aceto) ggplot(total_aceto,aes(x = day, y = log10(cfu+1))) + geom_jitter(width=.1, size = 2, shape = 21, fill = "white", stroke = .5) + facet_grid(.~source) + theme_bw() model1_aceto <- lm(log10(cfu+1)~source*day, data=total_aceto) summary(model1_aceto) model2_aceto <- lm(log10(cfu+1)~day, data=filter(total_aceto, source == "Wild")) summary(model2_aceto) #estimates values_acetobacteraceae <- summary(lsmeans::lsmeans(model1_aceto, specs = "day", by = "source")) levels_acetobacteraceae <- data.frame(values_acetobacteraceae[,c("lsmean")]) '^'(10,levels_acetobacteraceae) ``` ###Analysis of Lactobacillus ```{r analysis of Lactobacillus} filename <- "Lacto" freq_lacto <- Frequencies(Lactobacillus) total_lacto <- total_OTU("Lacto") Plotfreq(freq_lacto,"Lab") Plotfreq(freq_lacto,"Wild") Chisq_freq(freq_lacto) ggplot(total_lacto,aes(x = day, y = log10(cfu+1))) + geom_jitter(width=.1, size = 2, shape = 21, fill = "white", stroke = .5) + facet_grid(.~source) + theme_bw() model1_lacto <- lm(log10(cfu+1)~source*day, data=total_lacto) summary(model1_lacto) model2_lacto <- lm(log10(cfu+1)~day, data=filter(total_lacto, source == "Wild")) summary(model2_lacto) ``` ###Analysis of Enterobacteriaceae ```{r analysis of Enterobacteriaceae} filename <- "Entero" freq_entero <- Frequencies(Enterobacteriaceae) total_entero <- total_OTU("Entero") total_entero <- filter(total_entero, source == "Wild") Plotfreq(freq_entero,"Wild") Chisq_freq(freq_entero) ggplot(total_entero,aes(x = day, y = log10(cfu+1))) + geom_jitter(width=.1, size = 2, shape = 21, fill = "white", stroke = .5) + theme_bw() model1_entero <- lm(log10(cfu+1)~day, data=total_entero) summary(model1_entero) ``` ##Analysis of Leuconostoc ```{r Leuconostoc analysis} filename <- "Leuconostoc" total_leuco <- total_OTU("OTU1934") total_leuco <- filter(total_leuco, source == "Wild") ggplot(total_leuco,aes(x = day, y = log10(cfu+1))) + geom_jitter(width=.2, size = 2, shape = 21, fill = "white", stroke = .5) + theme_bw() model1_leuco <- lm(log10(cfu+1)~day, data=filter(total_leuco, source == "Wild")) summary(model1_leuco) ``` ```{r test independence of frequencies of leuconostoco and lactobacillus} #create matrix with data #get information per fly Lactos <- filter(tableOTU_fly, (OTU == "Lacto" | OTU == "OTU1934"), source == "Wild") Lactos$MaxOTU <- 1 LactosS <- spread(Lactos,key= OTU, value=MaxOTU, fill = 0, drop = FALSE) #create columns indicating that fly has 0, lacto, leuco, or both LactosS$Lacto_only <- ifelse(LactosS$Lacto == 1 & LactosS$OTU1934 == 0, 1, 0) LactosS$Leuco_only <- ifelse(LactosS$Lacto == 0 & LactosS$OTU1934 == 1, 1, 0) LactosS$both <- ifelse(LactosS$Lacto == 1 & LactosS$OTU1934 == 1, 1, 0) LactosS$none <- ifelse(LactosS$Lacto == 0 & LactosS$OTU1934 == 0, 1, 0) #create matrix for chisquare Lacto_table <- data.frame(c(0,0),c(0,0)) colnames(Lacto_table) <- c("Lacto_0","Lacto_1") rownames(Lacto_table) <- c("Leuco_0","Leuco_1") Lacto_table[1,1] <- sum(LactosS$none) Lacto_table[1,2] <- sum(LactosS$Lacto_only) Lacto_table[2,1] <- sum(LactosS$Leuco_only) Lacto_table[2,2] <- sum(LactosS$both) Lacto_table #chi-square test chisq.test(Lacto_table, simulate.p.value = TRUE, B=1000000) ``` ##Figure 4 ###load data stability in vials and cages ```{r load data and plot stability} #load data stab <- read.csv("S7_Data.csv",sep=",",head=T) stab$replicate<-as.factor(stab$replicate) stab$logcfu <- log10(stab$cfu+1) head(stab) #filter stability in cages and stability in vials stabvials <- filter(stab, cagevial == "vial") stabcages <- filter(stab, cagevial == "cage") #PLOTS ##Stability in vials ggplot(data=stabvials,aes(day,logcfu))+ geom_jitter(width=.5) + facet_grid(bacteria ~ replicate,margins = "False") ##Stability in cages ggplot(data=stabcages,aes(day,logcfu))+ geom_jitter(width=.5) + facet_grid(bacteria ~ replicate,margins = "False") ``` ###Analysis of stability in vials ```{r analysis of stability in vials} #all data together stabvials$day<-as.factor(stabvials$day) stabvials$replicate<-as.factor(stabvials$replicate) model_vials1 <- lmer(logcfu ~ day * bacteria + (1|replicate), data = stabvials) model_vials2 <- lmer(logcfu ~ day + bacteria + (1|replicate), data = stabvials) anova(model_vials1,model_vials2) summary(model_vials1) #estimates from the model lsmeansLT(model_vials1) values_vials1 <- data.frame(lsmeansLT(model_vials1)) apply(values_vials1[c(3,7,8)], 1, function(i) '^'(10,i)) #compare slopes coef(model_vials1) contrast_matrix_vials_slope <- rbind( "Ac. cibinongensis" = c(0, 1, 0, 0, 0, 0, 0, 0), "Ac. OTU2753" = c(0, 1, 0, 0, 0, 1, 0, 0), "Ac. thailandicus" = c(0, 1, 0, 0, 0, 0, 1, 0), "L. pseudomesenteroides" = c(0, 1, 0, 0, 0, 0, 0, 1) ) summary(glht(model_vials1, contrast_matrix_vials_slope), test=adjusted("holm")) ``` ###Analysis of stability during time-course analysis in cages ###nls fit timecourse analysis ```{r nls timecourse} #get only replicate 1 data sbcg1 <- filter(stabcages, replicate == "1") #models model_all_1 <- nls(logcfu ~ a[bacteria] * exp(-b[bacteria]*day) + c[bacteria], data = sbcg1, start=list(a=c(4,4,4,4), b = c(1,1,1,1), c=c(0,0,0,0))) summary(model_all_1) model_all_2 <- nls(logcfu ~ a[bacteria] * exp(-b*day) + c[bacteria], data = sbcg1, start=list(a=c(4,4,4,4), b = 1, c=c(0,0,0,0))) anova(model_all_1,model_all_2) AIC(model_all_1) AIC(model_all_2) model_all_3 <- nls(logcfu ~ a * exp(-b*day) + c[bacteria], data = sbcg1, start=list(a=4, b = 1, c=c(0,0,0,0))) anova(model_all_3,model_all_2) AIC(model_all_3) model_all_4 <- nls(logcfu ~ a[bacteria] * exp(-b*day) + c, data = sbcg1, start=list(a=c(4,4,4,4), b = 1, c=0)) anova(model_all_4,model_all_2) AIC(model_all_4) #model_all_2 is simplest model that explains the data summary(model_all_2) levels(sbcg1$bacteria) coef_all_2 <- coef(model_all_2) #half-life in hours (from b estimate) log(2)*1/coef_all_2[5]*24 #compare asymptotes contrast_matrix_asympt <- rbind( "cAc. cibinongensis vs. cAc. OTU2753" = c(0, 0, 0, 0, 0, 1, -1, 0, 0), "cAc. cibinongensis vs. cAc. wild1153" = c(0, 0, 0, 0, 0, 1, 0, -1, 0), "cAc. cibinongensis vs. cL. pseudomesenteroides" = c(0, 0, 0, 0, 0, 1, 0, 0, -1), "cAc. OTU2753 vs. cAc. wild1153" = c(0, 0, 0, 0, 0, 0, 1, -1, 0), "cAc. OTU2753 vs. cL. pseudomesenteroides" = c(0, 0, 0, 0, 0, 0, 1, 0, -1), "cAc. wild1153 vs. cL. pseudomesenteroides" = c(0, 0, 0, 0, 0, 0, 0, 1, -1)) summary(glht(model_all_2, contrast_matrix_asympt), test=adjusted("holm")) #estimates of asymptotes values_all_2 <- data.frame(summary(model_all_2)$parameters) table_estimates <- values_all_2[6:9,1:2] table_estimates$bacteria <- levels(sbcg1$bacteria) table_estimates$low_CI_log <- table_estimates$Estimate - 1.96 * table_estimates$Std..Error table_estimates$high_CI_log <- table_estimates$Estimate + 1.96 * table_estimates$Std..Error table_estimates$estimate <- '^'(10,table_estimates$Estimate) table_estimates$low_CI <- '^'(10,table_estimates$low_CI_log) table_estimates$high_CI <- '^'(10,table_estimates$high_CI_log) table_estimates ``` ###Analysis of stability in cages replicate 2 ```{r analysis of stability in cages replicate 2} #get replicate 2 only sbcg2 <- filter(stabcages, replicate == "2") sbcg2$day<-as.factor(sbcg2$day) model_cg2 <- lm(logcfu ~ day * bacteria, data = sbcg2) model_cg2b <- lm(logcfu ~ day + bacteria, data = sbcg2) anova(model_cg2, model_cg2b) summary(model_cg2b) #test day5 logcfu different from 0 contrast_matrix_cfu_day5 <- rbind( "Ac. cibinongensis" = c(1, 1, 0, 0, 0), "Ac. OTU2753" = c(1, 1, 1, 0, 0), "Ac. wild1153" = c(1, 1, 0, 1, 0), "L. pseudomesenteroides" = c(1, 1, 0, 0, 1) ) summary(glht(model_cg2b, contrast_matrix_cfu_day5), test=adjusted("holm")) ``` ###localization of Ac. thailandicus ```{r analysis of localization of Ac. thailandicus} cp <- read.csv("S8_Data.csv",sep=",",head=T) cp$compartment <- factor(cp$compartment, levels = c("Crop","Anterior midgut","Middle midgut","Posterior midgut","Hindgut")) cp$replicate<-as.factor(cp$replicate) cp$day<-as.factor(cp$day) cp$logcfu <- log10(cp$cfu+1) ##plot ggplot(data=cp,aes(x = compartment,y=logcfu, fill=fly, colour=fly))+ geom_jitter(width=0.1) + facet_grid(day ~ replicate ,margins = "False")+ theme_bw() model1_local <- lmer(logcfu ~ compartment*day + (1 | replicate), data = cp) model2_local <- lmer(logcfu ~ compartment+day + (1 | replicate), data = cp) anova(model1_local,model2_local) summary(model1_local) anova(model1_local) lsmeansLT(model1_local) #compare Crop and AM at day 5 contrast_matrix_day5_cp <- rbind( "Crop_day5 - AM_day5 = 0" = c(0, -1, 0, 0, 0, 0, -1, 0, 0, 0), "Crop_day5 - MM_day5 = 0" = c(0, 0, -1, 0, 0, 0, 0, -1, 0, 0), "Crop_day5 - PM_day5 = 0" = c(0, 0, 0, -1, 0, 0, 0, 0, -1, 0), "Crop_day5 - H_day5 = 0" = c(0, 0, 0, 0, -1, 0, 0, 0, 0, -1), "AM_day5 - MM_day5 = 0" = c(0, 1, -1, 0, 0, 0, 1, -1, 0, 0), "AM_day5 - PM_day5 = 0" = c(0, 1, 0, -1, 0, 0, 1, 0, -1, 0), "AM_day5 - H_day5 = 0" = c(0, 1, 0, 0, -1, 0, 1, 0, 0, -1), "MM_day5 - PM_day5 = 0" = c(0, 0, 1, -1, 0, 0, 0, 1, -1, 0), "MM_day5 - H_day5 = 0" = c(0, 0, 1, 0, -1, 0, 0, 1, 0, -1), "PM_day5 - H_day5 = 0" = c(0, 0, 0, 1, -1, 0, 0, 0, 1, -1) ) summary(glht(model1_local, contrast_matrix_day5_cp, test=adjusted("holm"))) ``` ###ANALYSIS OF PROLIFERATION ```{r load data and plot proliferation} #load data profull <- read.csv("S9_Data.csv",sep=",",head=T) profull$bacteria <- relevel(profull$bacteria, "Ac. OTU2753") profull$replicate<-as.factor(profull$replicate) profull$logcfu <- log10(profull$cfu+1) profull$hours <- relevel(profull$hours,"0h") head(profull) ##plot ggplot(data=profull,aes(x = as.factor(log10titer),y=logcfu, fill=hours, colour = hours))+ geom_jitter(width=0.2) + facet_grid(bacteria ~ replicate ,margins = "False") #summmary stats_pro <- as.data.frame(summarise(group_by(profull,bacteria, hours, log10titer), median = median(logcfu), mean = mean(logcfu), meanCFUs = '^'(10,mean(logcfu)), sd = sd(logcfu))) stats_pro ## Linear models of individual bacteria #Ac. OTU2753 pro2753 <- filter(profull, bacteria == "Ac. OTU2753") modelpro2753_1 <- lmer(logcfu ~ log10titer*hours + (1 | replicate), data = pro2753) model2753_2 <- lmer(logcfu ~ log10titer+hours + (1 | replicate), data = pro2753) anova(modelpro2753_1,model2753_2) summary(modelpro2753_1) contrast_matrix_slopes_2753 <- rbind( "2753_2" = c(0, 0, 1, 2), "2753_3" = c(0, 0, 1, 3), "2753_4" = c(0, 0, 1, 4), "2753_5" = c(0, 0, 1, 5) ) summary(glht(modelpro2753_1, contrast_matrix_slopes_2753, test=adjusted("holm"))) #Ac. cibinongensis pro_cibinongensis <- filter(profull, bacteria == "Ac. cibinongensis") model_cibinongensis_1 <- lmer(logcfu ~ log10titer*hours + (1 | replicate), data = pro_cibinongensis) model_cibinongensis_2 <- lmer(logcfu ~ log10titer+hours + (1 | replicate), data = pro_cibinongensis) anova(model_cibinongensis_1,model_cibinongensis_2) summary(model_cibinongensis_2) #Ac. thailandicus pro_thailandicus <- filter(profull, bacteria == "Ac. thailandicus", hours != "24h_chaser") model_thailandicus_1 <- lmer(logcfu ~ log10titer*hours + (1 | replicate), data = pro_thailandicus) model_thailandicus_2 <- lmer(logcfu ~ log10titer+hours + (1 | replicate), data = pro_thailandicus) anova(model_thailandicus_1,model_thailandicus_2) summary(model_thailandicus_2) stats_thai_prol <- as.data.frame(summarise(group_by(pro_thailandicus, replicate, hours, log10titer), median = median(logcfu), mean = mean(logcfu), meanCFUs = '^'(10,mean(logcfu)), sd = sd(logcfu))) stats_thai_2 <- as.data.frame(summarise(group_by(filter(pro_thailandicus, hours =="0h"), replicate, log10titer), mean_0h = mean(logcfu) )) mean(filter(pro_thailandicus, hours == "0h")$logcfu) stats_thai_prol #comparison between cibinogensis and thailandicus pro_comp <- filter(profull, bacteria == "Ac. thailandicus" | bacteria == "Ac. cibinongensis", hours != "24h_chaser") model_comp_1 <- lmer(logcfu ~ log10titer*hours*bacteria + (1 | replicate), data = pro_comp) model_comp_2 <- lmer(logcfu ~ hours*bacteria + log10titer*bacteria + log10titer*hours + (1 | replicate), data = pro_comp) anova(model_comp_1, model_comp_2) model_comp_3 <- lmer(logcfu ~ hours*bacteria + log10titer*bacteria + log10titer + hours + (1 | replicate), data = pro_comp) anova(model_comp_3, model_comp_2) model_comp_4 <- lmer(logcfu ~ bacteria + log10titer*bacteria + log10titer + hours + (1 | replicate), data = pro_comp) anova(model_comp_3, model_comp_4) summary(model_comp_3) #chaser flies filter(profull,hours == "24h_chaser") #L. pseudomesenteroides pro_leuco <- filter(profull, bacteria == "L. pseudomesenteroides") model_leuco_1 <- lmer(logcfu ~ log10titer*hours + (1 | replicate), data = pro_leuco) model_leuco_2 <- lmer(logcfu ~ log10titer+hours + (1 | replicate), data = pro_leuco) anova(model_leuco_1,model_leuco_2) summary(model_leuco_2) ``` ```{r load and analysis Lactobacillus colonization} lacto <- read.csv("S10_Data.csv",sep=",",head=T) #lacto$dilution <- as.factor(lacto$dilution) lacto$day <- as.factor(lacto$day) stats_lacto <- as.data.frame(summarise(group_by(lacto,bacteria, day, as.factor(dilution)), median = median(logcfu), mean = mean(logcfu), meanCFUs = '^'(10,mean(logcfu)), sd = sd(logcfu))) stats_lacto #L. brevis Lbrevis <- subset(lacto, lacto$bacteria == "L. brevis") ggplot(Lbrevis, aes( day, logcfu)) + geom_boxplot(outlier.shape = NA) + geom_jitter(width = 0.1) + facet_grid(replicate ~ dilution,margins = FALSE, scales = "fixed") + theme_bw() #lmer lmer_Lbrevis_1 <- lmer(logcfu ~ day * dilution + (1 | replicate) ,data=Lbrevis) lmer_Lbrevis_2 <- lmer(logcfu ~ day + dilution + (1 | replicate) ,data=Lbrevis) anova(lmer_Lbrevis_1, lmer_Lbrevis_2) summary(lmer_Lbrevis_2) #L. paraplantarum Lpara <- subset(lacto, lacto$bacteria == "L. paraplantarum") ggplot(Lpara, aes( day, logcfu)) + geom_boxplot(outlier.shape = NA) + geom_jitter(width = 0.1) + facet_grid(replicate ~ dilution,margins = FALSE, scales = "fixed") + theme_bw() #lmer lmer_Lpara_1 <- lmer(logcfu ~ day * dilution + (1 | replicate) ,data=Lpara) lmer_Lpara_2 <- lmer(logcfu ~ day + dilution + (1 | replicate) ,data=Lpara) anova(lmer_Lpara_1, lmer_Lpara_2) summary(lmer_Lpara_2) ``` ###Bacteria growth on food ```{r analysis of proliferation of Acetobacter isolates on flyfood} foodg <- read.csv("S11_Data.csv",sep=",",head=T) foodg$replicate<-as.factor(foodg$replicate) foodg$day<-as.factor(foodg$day) foodg$logcfu <- log10(foodg$cfu+1) ##plot ggplot(data=foodg,aes(x = day,y=logcfu))+ geom_jitter(width=0.1) + facet_grid(bacteria ~ replicate ,margins = "False") ##linear model #Ac. OTU2753 foodg2753 <- filter(foodg, bacteria == "Ac. OTU2753") model2753_1 <- lmer(logcfu ~ day + (1 | replicate), data = foodg2753) summary(model2753_1) tukey2753 <- glht(model2753_1, mcp(day="Tukey")) summary(tukey2753) cld(tukey2753) #Ac. thailandicus foodgwild <- filter(foodg, bacteria == "Ac. thailandicus") modelwild_1 <- lmer(logcfu ~ day + (1 | replicate), data = foodgwild) summary(modelwild_1) tukeywild <- glht(modelwild_1, mcp(day="Tukey")) summary(tukeywild) cld(tukeywild) #Ac. cibinongensis foodgcibi <- filter(foodg, bacteria == "Ac. cibinongensis") head(foodgcibi) modelcibi_1 <- lmer(logcfu ~ day + (1 | replicate), data = foodgcibi) summary(modelcibi_1) tukeycibi <- glht(modelcibi_1, mcp(day="Tukey")) summary(tukeycibi) cld(tukeycibi) ``` ##Figure 5 ###Analysis of proliferation in D. melanogaster and D. simulans ```{r load data and plot proliferation in different species} #test protocol in bottles #load data bottle_test <- read.csv("S12_Data.csv",sep=",",head=T) bottle_test$logcfu <- log10(bottle_test$cfu+1) bottle_test$log10titer <- as.factor(bottle_test$log10titer) ggplot(data=bottle_test, aes(y = logcfu, x = hours)) + geom_jitter(width = .3) + facet_grid(log10titer ~ bacteria) #tahilandicus bottle_test_thai <- filter(bottle_test, hours != "24h_chaser", bacteria == "Ac. thailandicus") lm_bottle_test_thai_1 <- lm(logcfu ~ hours * log10titer, data = bottle_test_thai) lm_bottle_test_thai_2 <- lm(logcfu ~ hours + log10titer, data = bottle_test_thai) anova(lm_bottle_test_thai_1, lm_bottle_test_thai_2) summary(lm_bottle_test_thai_1) contrast_matrix_slopes_bottle_test_thai <- rbind( "titer3" = c(0, 1, 0, 0), "titer4" = c(0, 1, 0, 1) ) summary(glht(lm_bottle_test_thai_1, contrast_matrix_slopes_bottle_test_thai, adjust="holm")) #increases with dose 3 #OTU2753 bottle_test_2753 <- filter(bottle_test, hours != "24h_chaser", bacteria == "Ac. OTU2753") lm_bottle_test_2753_1 <- lm(logcfu ~ hours * log10titer, data = bottle_test_2753) lm_bottle_test_2753_2 <- lm(logcfu ~ hours + log10titer, data = bottle_test_2753) anova(lm_bottle_test_2753_1, lm_bottle_test_2753_2) summary(lm_bottle_test_2753_1) contrast_matrix_slopes_bottle_test_2753 <- rbind( "titer4" = c(0, 1, 0, 0), "titer5" = c(0, 1, 0, 1) ) summary(glht(lm_bottle_test_2753_1, contrast_matrix_slopes_bottle_test_2753, adjust="holm")) #decreases in both doses #load data of Dmel and Dsim proms <- read.csv("S13_Data.csv",sep=",",head=T) proms$replicate<-as.factor(proms$replicate) proms$logcfu <- log10(proms$cfu+1) proms$log10titer <- as.factor(proms$log10titer) proms$hours <- as.factor(proms$hours) proms$hours <- relevel(proms$hours,"0") proms$stock <- factor(proms$stock, levels =c("w1118","canton-S","dmel O13","dsim A07","dsim J04","dsim O13")) head(proms) stats_proms <- as.data.frame(summarise(group_by(proms, stock, hours, log10titer), median = median(logcfu), mean = mean(logcfu), meanCFUs = '^'(10,mean(logcfu)), sd = sd(logcfu))) stats_proms #plot ggplot(data=filter(proms,replicate==1), aes(y = logcfu, x = hours)) + geom_jitter() + facet_grid(log10titer ~ stock) ggplot(data=filter(proms,replicate==2), aes(y = logcfu, x = hours)) + geom_jitter() + facet_grid(log10titer ~ stock) #test host interaction modelms1 <- lmer(logcfu ~ host*log10titer*hours + (1 | replicate) + (1 | stock), data = proms) modelms2 <- lmer(logcfu ~ host*log10titer + log10titer*hours + host*hours + (1 | replicate) + (1 | stock), data = proms) anova(modelms1, modelms2) drop1(modelms2, test = "Chisq") modelms3 <- lmer(logcfu ~ log10titer*hours + host*hours + (1 | replicate) + (1 | stock), data = proms) drop1(modelms3, test = "Chisq") summary(modelms3) lsmeansLT(modelms3) contrast_matrix_slopes_hosts <- rbind( "melano_3" = c(0, 0, 1, 0, 0, 0), "melano_4" = c(0, 0, 1, 0, 1, 0), "simulans_3" = c(0, 0, 1, 0, 0, 1), "simulans_4" = c(0, 0, 1, 0, 1, 1) ) summary(glht(modelms3, contrast_matrix_slopes_hosts, adjust="holm")) #simpler model with no titer*hours interactions modelms4 <- lmer(logcfu ~ log10titer + host*hours + (1 | replicate) + (1 | stock), data = proms) summary(modelms4) contrast_matrix_slopes_hosts_simple <- rbind( "melano" = c(0, 0, 0, 1, 0), "simulans" = c(0, 0, 0, 1, 1) ) summary(glht(modelms4, contrast_matrix_slopes_hosts_simple, adjust="holm")) #Difference between lines of Dmel dmel <- filter(proms, host == "dmel") lmer_dmel1 <-lmer(logcfu ~ hours*log10titer*stock + (1 | replicate), data = dmel) lmer_dmel2 <-lmer(logcfu ~ hours*log10titer + log10titer*stock + hours*stock + (1 | replicate), data = dmel) anova(lmer_dmel1, lmer_dmel2) drop1(lmer_dmel2, test = "Chisq") lmer_dmel3 <-lmer(logcfu ~ log10titer*stock + hours*stock + (1 | replicate), data = dmel) drop1(lmer_dmel3, test = "Chisq") summary(lmer_dmel3) contrast_matrix_slopes_dmel <- rbind( "w1118" = c(0, 0, 0, 0, 1, 0, 0, 0, 0), "canton-S" = c(0, 0, 0, 0, 1, 0, 0, 1, 0), "dmel O13" = c(0, 0, 0, 0, 1, 0, 0, 0, 1) ) summary(glht(lmer_dmel3, contrast_matrix_slopes_dmel, adjust="holm")) #Dsim dsim <- filter(proms, host == "dsim") lmer_dsim1 <-lmer(logcfu ~ hours*log10titer*stock + (1 | replicate), data = dsim) lmer_dsim2 <-lmer(logcfu ~ hours*log10titer + log10titer*stock + hours*stock + (1 | replicate), data = dsim) anova(lmer_dsim1, lmer_dsim2) drop1(lmer_dsim2, test = "Chisq") lmer_dsim3 <-lmer(logcfu ~ log10titer*stock + hours*stock + (1 | replicate), data = dsim) drop1(lmer_dsim3, test = "Chisq") lmer_dsim4 <-lmer(logcfu ~ log10titer + hours*stock + (1 | replicate), data = dsim) lmer_dsim5 <-lmer(logcfu ~ log10titer + hours + stock + (1 | replicate), data = dsim) anova(lmer_dsim5,lmer_dsim4) summary(lmer_dsim5) ``` ##Figure 6 ###Analysis of flies monoassociated with Acetobacter species ```{r load data and plot - flies associated with different Acetobacter} #load data mono <- read.csv("S14_Data.csv",sep=",",head=T) mono$vials <- as.factor(paste(mono$bacteria, mono$vials, sep = "_")) #make file with entry per day m.fit<-reshape2:::melt.data.frame(mono, id=c( "condition","bacteria","vials"), variable.name="devday", value.name = "count", na.rm=TRUE) m.fit$devday <- as.numeric(gsub("X", "", m.fit$devday)) m.fit$count <- as.numeric(m.fit$count) head(m.fit) ##ANALYSIS OF DEVELOPMENT #make each entry correspond to one pupae or one adult emerged - untable dg0ad<-untable(m.fit[,c(1,2,3,4)],num=m.fit$count) dg0ad[1:20,] ##Statistics time to pupariation #lm model_pupae_mono_1 <- lm(devday ~ bacteria , data=filter(dg0ad, condition == "pupae")) summary(model_pupae_mono_1) tukey_pupae_mono_1 <- glht(model_pupae_mono_1, mcp(bacteria="Tukey")) summary(tukey_pupae_mono_1) cld(tukey_pupae_mono_1) ##Statistics time to adulthood #lm model_adult_mono_l <- lm(devday ~ bacteria , data=filter(dg0ad, condition == "adult")) summary(model_adult_mono_l) tukeyadult <- glht(model_adult_mono_l, mcp(bacteria="Tukey")) summary(tukeyadult) cld(tukeyadult) lsmeans::lsmeans(model_adult_mono_l, "bacteria") ##ANALYSIS OF TOTAL PROGENY mono$total <- rowSums(mono[,c(4:16)],na.rm = TRUE) #Statistical analysis #total pupae model_tp_l<- lm(total ~ bacteria, data = filter(mono, condition == "pupae") ) summary(model_tp_l) tukey_tp_l <- glht(model_tp_l, mcp(bacteria="Tukey")) summary(tukey_tp_l) cld(tukey_tp_l) #total adults model_ta_l<- lm(total ~ bacteria, data = filter(mono, condition == "adult")) summary(model_ta_l) tukey_ta_l <- glht(model_ta_l, mcp(bacteria="Tukey")) summary(tukey_ta_l) cld(tukey_ta_l) ``` ###Analysis of flies exposed to changing environment ```{r analysis of gut bacterial levels from G0} #load and plot data g0gut <- read.csv("S15_Data.csv",sep=",",head=T) #transform nc (not counteble) to 30000cfu per gut levels(g0gut$cfu) <- c(levels(g0gut$cfu), 30000) g0gut$cfu[g0gut$cfu=="nc"] <- 30000 g0gut$cfu<-as.numeric(as.character(g0gut$cfu)) g0gut$logcfu <- log10(g0gut$cfu+1) #Plot ggplot(data=g0gut,aes(day,logcfu, fill=gender, colour=gender))+ geom_jitter(width=.5) + facet_grid(bacteria ~ replicate,margins = "False") #Statistics summary_g0_gut_bact <- as.data.frame(summarise(group_by(g0gut, bacteria, day, gender), medianCFU = median(cfu), median = median(logcfu), mean = mean(logcfu))) summary_g0_gut_bact #wilcoxon test because some values are high but undetermined - use rank test wilcox.test(cfu ~ day, data = filter(g0gut, gender == "female", bacteria == "Ac. thailandicus")) wilcox.test(cfu ~ day, data = filter(g0gut, gender == "female", bacteria == "Ac. OTU2753")) wilcox.test(cfu ~ bacteria, data = filter(g0gut, gender == "female", day == 10, bacteria != "GF")) wilcox.test(cfu ~ bacteria, data = filter(g0gut, gender == "male", day == 10, bacteria != "GF")) ``` ```{r analysis of bacteria from G0 transmitted to food} #load data bottles <- read.csv("S16_Data.csv",sep=",",head=T) #reshape table to get one line per cfu reading md <- melt(bottles, id=(c("replicate","bacteria", "cage"))) names(md)[names(md)=="value"] <- "colonies" names(md)[names(md)=="variable"] <- "day" bottles$cage<-as.factor(bottles$cage) #remove X from day md$day <- as.numeric(gsub("X", "", md$day)) md <- filter(md, bacteria != "GF") md$presence[md$colonies == 0] = 0 md$presence[md$colonies > 0] = 1 md$presence[md$colonies == "nc"] = 1 md$presence[md$colonies == "No data"] <- NA md$presence <- as.numeric(md$presence) #statistics #generalized linear model with binomial distribution model1food <- glmer(presence~bacteria * day + (1|replicate) , data=md, family = binomial) model2food <- glmer(presence~bacteria + day + (1|replicate) , data=md, family = binomial) anova(model1food,model2food) model3food <- glmer(presence~bacteria + (1|replicate) , data=md, family = binomial) anova(model3food,model2food) summary(model3food) ``` ```{r analysis of number of eggs layed by G0} #load data eggs <- read.csv("S17_Data.csv",sep=",",head=T) #make file with entry per day m.eggs<-reshape2:::melt.data.frame(eggs, id=c("bacteria","cage","replicate"),variable.name="day",value.name = "eggs", na.rm=TRUE) m.eggs$day <- as.numeric(gsub("X", "", m.eggs$day)) m.eggs$eggs <- as.numeric(m.eggs$eggs) head(m.eggs) #get total eggs per fly eggs$total<-rowSums(eggs[,c(4:13)],na.rm = TRUE) head(eggs) boxplot(eggs$total ~eggs$bacteria) #Analyse total number of eggs model1eg<-lmer(total~bacteria + (1|replicate), data=eggs) summary(model1eg) tukey1eg<-glht(model1eg, mcp(bacteria="Tukey")) summary(tukey1eg) cld(tukey1eg) ``` ```{r analysis of development time and total adults of F1} f1 <- read.csv("S18_Data.csv",sep=",",head=T) ##DEVELOPMENT #make one entry per day m.f1<-reshape2:::melt.data.frame(f1, id=c("replicate", "day","bacteria","cage"),variable.name="devday",value.name = "adults", na.rm=TRUE) m.f1$devday <- as.numeric(as.character(gsub("X", "", m.f1$devday))) m.f1$adults <- as.numeric(m.f1$adults) m.f1$day <- as.factor(m.f1$day) m.f1$replicate <- as.factor(m.f1$replicate) #one entry per fly umf1<-untable(m.f1[,c(1,2,3,4,5)],num=m.f1$adults) umf1[1:20,] #histograms ggplot(umf1, aes(devday, fill = bacteria)) + geom_histogram(breaks=seq(7, 20, by = 1), binwidth = 1, color = "black") + scale_x_continuous(breaks=7:20)+ facet_grid(bacteria ~ replicate,margins = FALSE, scales = "fixed")+ scale_fill_manual(values=c("salmon","olivedrab2","deepskyblue"))+ theme_bw() #Statistical analysis on developmental time to adulthood lmer_dev <- lmer(devday~bacteria + (1|replicate/cage), data=umf1) summary(lmer_dev) tukey_lmer_dev <-glht(lmer_dev, mcp(bacteria="Tukey")) summary(tukey_lmer_dev) lsmeansLT(lmer_dev) ##TOTAL NUMBER OF ADULTS f1$total <- rowSums(f1[,c(5:15)],na.rm = TRUE) #remove individual days tad <- f1[,-(5:15), drop = FALSE] head(tad) #dcast the days tadcast <- reshape2::dcast(tad, replicate + bacteria + cage ~ day, value.var = "total", na.rm = TRUE) #make total of progeny for each pair summing all days tadcast$total <- rowSums(tadcast[,c(4:13)],na.rm = TRUE) #Boxplot ggplot(tadcast, aes(bacteria, total, fill = bacteria)) + geom_boxplot() + facet_grid(. ~ replicate,margins = FALSE, scales = "fixed")+ scale_fill_manual(values=c("salmon","olivedrab2","deepskyblue"))+ theme_bw() #Statistical analysis on the total number of adults model1ta<-lmer(total~bacteria + (1|replicate) , data=tadcast) summary(model1ta) tukey1ta<-glht(model1ta, mcp(bacteria="Tukey")) summary(tukey1ta) ``` ```{r analysis of F1 fertility} #load and plot data f1fert <- read.csv("S19_Data.csv",sep=",",head=T) #convert number of adults per day to numbers, replicate to factor for(i in (8:22)){ f1fert[,i]<-as.numeric(f1fert[,i]) } f1fert$replicate<-as.factor(f1fert$replicate) #make total adults f1fert$total <- rowSums(f1fert[,c(8:22)],na.rm = TRUE) head(f1fert) #remove individual days cagepair and daypair tadf1 <- f1fert[,-(c(4,5,8:22)), drop = FALSE] #dcast the vial days tadcastf1 <- reshape2::dcast(tadf1, replicate + pairingdate + bacteria + pairs ~ vialday, value.var = "total", na.rm = TRUE) #make total of progeny for each pair summing all vial days tadcastf1$total <- rowSums(tadcastf1[,c(5:9)],na.rm = TRUE) #plot total per pair and during the 10 days (total of total) ggplot(tadcastf1, aes(bacteria, total, fill = bacteria)) + geom_boxplot() + facet_grid(. ~ replicate,margins = FALSE, scales = "fixed")+ scale_fill_manual(values=c("salmon","olivedrab2","deepskyblue"))+ theme_bw() #Statistical analysis on the total number of adults model1_taf1<-lmer(total~bacteria + (1|replicate) , data=tadcastf1) summary(model1_taf1) tukey1_taf1<-glht(model1_taf1, mcp(bacteria="Tukey")) summary(tukey1_taf1) ``` ###Analysis of crosses between Ac. thailandicus monoassociated and GF flies ```{r crosses monoassociated and GF parents} #load data f1_crosses <- read.csv("S20_Data.csv",sep=",",head=T) ##DEVELOPMENT ANALYSIS ##Transform data to have one entry per developed fly md_f1_crosses<-reshape2:::melt.data.frame(f1_crosses, id=c( "Replicate","pair","pupaeadult","condition", "vialday"),variable.name="devday",value.name = "count", na.rm=TRUE) md_f1_crosses$Replicate<-as.factor(md_f1_crosses$Replicate) # Expanded table with number of rows correspondent to number of flies expanded_f1_crosses <- untable(md_f1_crosses[,c(1,2,3,4,5,6)], num=md_f1_crosses[,7]) expanded_f1_crosses$devday <- as.numeric(gsub("X", "", expanded_f1_crosses$devday)) expanded_f1_crosses$Replicate<-as.factor(expanded_f1_crosses$Replicate) #SUBSET PUPAE AND ADULTS pupae_f1_crosses <-filter(expanded_f1_crosses, pupaeadult == "pupae") adults_f1_crosses <-filter(expanded_f1_crosses, pupaeadult == "adult") ##TOTAL PROGENY f1_crosses$total <- rowSums(f1_crosses[,c(6:23)],na.rm = TRUE) #remove individual days tad_f1_crosses <- f1_crosses[,-(6:23), drop = FALSE] #dcast the days tadcast_f1_crosses <- reshape2::dcast(tad_f1_crosses, Replicate + pair + pupaeadult + condition ~ vialday, value.var = "total", na.rm = TRUE) #make total of progeny for each pair summing all days tadcast_f1_crosses$total <- rowSums(tadcast_f1_crosses[,c(5:9)],na.rm = TRUE) tadp_f1_crosses = filter(tadcast_f1_crosses, pupaeadult == "pupae") tada_f1_crosses = filter(tadcast_f1_crosses, pupaeadult == "adult") ###STATISTICAL ANALYSIS ##TOTAL PROGENY #Total pupae model_tp_f1_crosses_l<- lmer(total~condition + (1|Replicate) , data = tadp_f1_crosses) summary(model_tp_f1_crosses_l) tukey_tp_f1_crosses_l <- glht(model_tp_f1_crosses_l, mcp(condition="Tukey")) summary(tukey_tp_f1_crosses_l) cld(tukey_tp_f1_crosses_l) #Total adults model_ta_f1_crosses_l<- lmer(total~condition + (1|Replicate), data = tada_f1_crosses) summary(model_ta_f1_crosses_l) tukey_ta_f1_crosses_l <- glht(model_ta_f1_crosses_l, mcp(condition="Tukey")) summary(tukey_ta_f1_crosses_l) cld(tukey_ta_f1_crosses_l) ##DEVELOPMENTAL TIME #lmer pupae model_pupae_f1_crosses_l <- lmer(devday~condition + (1|Replicate) + (1|pair), data=pupae_f1_crosses) summary(model_pupae_f1_crosses_l) tukey_pupae_f1_crosses_l <- glht(model_pupae_f1_crosses_l, mcp(condition="Tukey")) summary(tukey_pupae_f1_crosses_l) cld(tukey_pupae_f1_crosses_l) #lmer adult model_adult_f1_crosses_l <- lmer(devday~condition + (1|Replicate) + (1|pair), data=adults_f1_crosses) summary(model_adult_f1_crosses_l) tukey_adult_f1_crosses_l <- glht(model_adult_f1_crosses_l, mcp(condition="Tukey")) summary(tukey_adult_f1_crosses_l) cld(tukey_adult_f1_crosses_l) ``` ##Figure 7 ###Analysis of fitness of flies associated with different natural bacterial isolates ```{r analysis of fitness of flies associated with different natural bacterial isolates} ###Analysis of development and number of adults from G0 #load data g0_dev_mono_eggs <- read.csv("S21_Data.csv",sep=",",head=T) g0_dev_mono_eggs$bacteria = relevel(g0_dev_mono_eggs$bacteria, ref="GF Mannitol") ##DEVELOPMENT ANALYSIS #transform data to have one entry per developed fly #melt table(days in rows) m_g0_dev_mono_eggs <- melt(g0_dev_mono_eggs , id=(c("bacteria","replicate","code","vial"))) names(m_g0_dev_mono_eggs)[names(m_g0_dev_mono_eggs)=="value"] <- "adults" names(m_g0_dev_mono_eggs)[names(m_g0_dev_mono_eggs)=="variable"] <- "day" # Expanded table with number of rows correspondent to number of flies expandedm_g0_dev_mono_eggs <- untable(m_g0_dev_mono_eggs[,c(1,2,3,4,5)], m_g0_dev_mono_eggs[,6]) expandedm_g0_dev_mono_eggs$day <- as.numeric(gsub("X", "", expandedm_g0_dev_mono_eggs$day)) expandedm_g0_dev_mono_eggs$replicate<-as.factor(expandedm_g0_dev_mono_eggs$replicate) expandedm_g0_dev_mono_eggs$vial<-as.factor(expandedm_g0_dev_mono_eggs$vial) expandedm_g0_dev_mono_eggs$unique_v<-as.factor(paste(expandedm_g0_dev_mono_eggs$bacteria, expandedm_g0_dev_mono_eggs$replicate, expandedm_g0_dev_mono_eggs$vial)) head(expandedm_g0_dev_mono_eggs) #mixed linear model model_g0_dev_mono_eggs <- lmer(day~bacteria + (1|unique_v) + (1|replicate), data=expandedm_g0_dev_mono_eggs) summary(model_g0_dev_mono_eggs) #model_g0_dev_mono_eggs2 <- lmer(day~bacteria + (1|unique_v) + (1|replicate), data=expandedm_g0_dev_mono_eggs) tukey_g0_dev_mono_eggs <- lsmeans::lsmeans(model_g0_dev_mono_eggs, list(pairwise ~ bacteria), adjust = "tukey") tukey_g0_dev_mono_eggs cld(tukey_g0_dev_mono_eggs,Letters = letters[seq(1:10)]) lsmeans_g0_dev_mono_eggs <- as.data.frame(summary(tukey_g0_dev_mono_eggs)$`lsmeans of bacteria`) lsmeans_g0_dev_mono_eggs ##TOTAL ADULTS EMERGED ANALYSIS head(g0_dev_mono_eggs) g0_dev_mono_eggs$total <- rowSums(g0_dev_mono_eggs[,c(5:15)],na.rm = TRUE) g0_dev_mono_eggs$replicate <- as.factor(g0_dev_mono_eggs$replicate) #lm lmer_adult_total_mono_eggs <- lmer(total~ bacteria + (1|replicate) , data=g0_dev_mono_eggs) tukey_adult_total_mono_eggs <- lsmeans::lsmeans(lmer_adult_total_mono_eggs, list(pairwise ~ bacteria), adjust = "tukey") tukey_adult_total_mono_eggs cld(tukey_adult_total_mono_eggs) ###FERTILITY G0 ADULTS #load and plot data g0_fertility_mono_eggs <- read.csv("S22_Data.csv",sep=",",head=T) g0_fertility_mono_eggs$bacteria <- relevel(g0_fertility_mono_eggs$bacteria, ref="GF Mannitol") g0_fertility_mono_eggs$replicate <- as.factor(g0_fertility_mono_eggs$replicate) ggplot(g0_fertility_mono_eggs, aes(bacteria, adults, fill = bacteria)) + geom_boxplot() + facet_grid(. ~ replicate,margins = FALSE, scales = "fixed")+ theme_bw() #lmer model_g0_fertility_mono_eggs <- lmer(adults~ bacteria + (1|replicate), g0_fertility_mono_eggs) summary(model_g0_fertility_mono_eggs) tukey_g0_fertility_mono_eggs <- lsmeans::lsmeans(model_g0_fertility_mono_eggs, list(pairwise ~ bacteria), adjust = "tukey") tukey_g0_fertility_mono_eggs cld(tukey_g0_fertility_mono_eggs, Letters = letters[seq(1:10)]) lsmeans_g0_fertility_mono_eggs <- as.data.frame(summary(tukey_g0_fertility_mono_eggs)$`lsmeans of bacteria`) ### analysis of correlation between development time and fertility correlation_data <- merge(lsmeans_g0_fertility_mono_eggs[,1:2], lsmeans_g0_dev_mono_eggs[,1:2], by = "bacteria", all = TRUE) names(correlation_data)[names(correlation_data) == 'lsmean.x'] <- 'fertility' names(correlation_data)[names(correlation_data) == 'lsmean.y'] <- 'development_time' head(correlation_data) ggplot(correlation_data, aes(development_time, fertility)) + geom_point() + geom_text(data = correlation_data, aes(label = bacteria), vjust = -1.5, size = 1.5) + theme_bw() cor.test(correlation_data$fertility, correlation_data$development_time, method = c("pearson")) #wihtout GF samples statistics are very similar correlation_data_bact_only <- filter(correlation_data, bacteria != "GF", bacteria != "GF Mannitol", bacteria != "GF MRS") cor.test(correlation_data_bact_only$fertility, correlation_data_bact_only$development_time, method = c("pearson")) ``` ###Fitness analysis of flies grown on sterile fig homogenate ```{r analysis of development and number of adults from G0 in sterile figs homogenates} #load and plot data g0_fig_dev_sterile <- read.csv("S23_Data.csv",sep=",",head=T) g0_fig_dev_sterile$bacteria = relevel(g0_fig_dev_sterile$bacteria, ref="GF") ##DEVELOPMENT m_g0_fig_dev_sterile <- melt(g0_fig_dev_sterile, id=(c("bacteria","repvial","replicate"))) names(m_g0_fig_dev_sterile)[names(m_g0_fig_dev_sterile)=="value"] <- "adults" names(m_g0_fig_dev_sterile)[names(m_g0_fig_dev_sterile)=="variable"] <- "day" m_g0_fig_dev_sterile$replicate<-as.factor(m_g0_fig_dev_sterile$replicate) m_g0_fig_dev_sterile$repvial<-as.factor(m_g0_fig_dev_sterile$repvial) # Expanded table with number of rows correspondent to number of flies e_g0_fig_dev_sterile <- untable(m_g0_fig_dev_sterile[,c(1,2,3,4)], num=m_g0_fig_dev_sterile[,5]) #Take out "X" from day column e_g0_fig_dev_sterile$day <- as.numeric(gsub("X", "", e_g0_fig_dev_sterile$day)) ##Plot ggplot(e_g0_fig_dev_sterile, aes( bacteria, day)) + geom_boxplot() + facet_grid(. ~ replicate,margins = FALSE, scales = "fixed")+ theme_bw() ##STATISTICS #Developmental time model_g0_fig_dev_sterile <- lmer(day ~ bacteria + (1|replicate), data = e_g0_fig_dev_sterile ) summary(model_g0_fig_dev_sterile) lsmeansLT(model_g0_fig_dev_sterile) tukey_model_g0_fig_dev_sterile<-glht(model_g0_fig_dev_sterile, mcp(bacteria="Tukey")) cld(tukey_model_g0_fig_dev_sterile) summary(tukey_model_g0_fig_dev_sterile) #total number of adults head(g0_fig_dev_sterile) g0_fig_dev_sterile$total <- rowSums(g0_fig_dev_sterile[,c(4:30)],na.rm = TRUE) #plot ggplot(g0_fig_dev_sterile, aes( bacteria, total)) + geom_boxplot(outlier.shape = NA) + geom_jitter(width = 0.1) + facet_grid(. ~ replicate,margins = FALSE, scales = "fixed") + theme_bw() #lmer model_g0_fig_total_sterile<-lmer( total ~ bacteria + (1|replicate), data = g0_fig_dev_sterile) summary(model_g0_fig_total_sterile) lsmeansLT(model_g0_fig_total_sterile) tukey_model_g0_fig_total_sterile<-glht(model_g0_fig_total_sterile, mcp(bacteria="Tukey")) cld(tukey_model_g0_fig_total_sterile) summary(tukey_model_g0_fig_total_sterile) #G0 fertility from sterile figs homogenate #load data g0_fertility_fig_sterile <- read.csv("S24_Data.csv",sep=",",head=T) g0_fertility_fig_sterile$bacteria = relevel(g0_fertility_fig_sterile$bacteria, ref="GF") g0_fertility_fig_sterile$replicate<-as.factor(g0_fertility_fig_sterile$replicate) g0_fertility_fig_sterile$vialrep<-as.factor(g0_fertility_fig_sterile$vialrep) g0_fertility_fig_sterile #plot ggplot(g0_fertility_fig_sterile, aes( bacteria, adults)) + geom_boxplot(outlier.shape = NA) + geom_jitter(width = 0.1) + facet_grid(. ~ replicate,margins = FALSE, scales = "fixed")+ theme_bw() #Statistics model_g0_fertility_fig_sterile <- lmer(adults ~ bacteria + (1|replicate) ,data=g0_fertility_fig_sterile) summary(model_g0_fertility_fig_sterile) tukey_model_g0_fertility_fig_sterile<-glht(model_g0_fertility_fig_sterile, mcp(bacteria="Tukey")) cld(tukey_model_g0_fertility_fig_sterile) summary(tukey_model_g0_fertility_fig_sterile) ``` ###Fitness analysis of flies grown on collected figs ```{r analysis of G0 development and total number of adults} #Load data and process g0_dev_fig_tree <- read.csv("S25_Data.csv",sep=",",head=T) g0_dev_fig_tree$bacteria = relevel(g0_dev_fig_tree$bacteria, ref="GF") g0_dev_fig_tree$rep <- as.factor(g0_dev_fig_tree$rep) g0_dev_fig_tree$Unique_fig <- paste(g0_dev_fig_tree$rep,g0_dev_fig_tree$fig,sep="_") g0_dev_fig_tree$Unique_repvial <- paste(g0_dev_fig_tree$rep,g0_dev_fig_tree$repvial,g0_dev_fig_tree$bacteria,sep="_") ##Process data for DEVELOPMENT analysis m_g0_dev_fig_tree <- reshape2::melt(g0_dev_fig_tree, id=(c("rep","repvial","fig","bacteria","Unique_fig","Unique_repvial"))) names(m_g0_dev_fig_tree)[names(m_g0_dev_fig_tree)=="value"] <- "adults" names(m_g0_dev_fig_tree)[names(m_g0_dev_fig_tree)=="variable"] <- "day" m_g0_dev_fig_tree$Unique_fig<-as.factor(m_g0_dev_fig_tree$Unique_fig) m_g0_dev_fig_tree$Unique_repvial<-as.factor(m_g0_dev_fig_tree$Unique_repvial) # Expanded table with number of rows correspondent to number of flies e_g0_dev_fig_tree <- reshape::untable(m_g0_dev_fig_tree[,c(1:7)], num=m_g0_dev_fig_tree[,8]) #Take out "X" from day column e_g0_dev_fig_tree$day <- as.numeric(gsub("X", "", e_g0_dev_fig_tree$day)) ##Process data for TOTAL number analysis g0_dev_fig_tree$total <- rowSums(g0_dev_fig_tree[,c(5:21)],na.rm = TRUE) g0_dev_fig_tree$Unique_fig <- as.factor(g0_dev_fig_tree$Unique_fig) #Statistics #total number of adults model_total_fig_tree_1 <- lmer(total ~ bacteria * rep + (1|Unique_fig), data= g0_dev_fig_tree) model_total_fig_tree_2 <- lmer(total ~ bacteria + rep + (1|Unique_fig), data= g0_dev_fig_tree) anova(model_total_fig_tree_1, model_total_fig_tree_2) summary(model_total_fig_tree_2) # development time model_dev_fig_tree_1 <- lmer(day~ bacteria * rep + (1|Unique_repvial) + (1|Unique_fig), data= e_g0_dev_fig_tree) model_dev_fig_tree_2 <- lmer(day~ bacteria + rep + (1|Unique_repvial) + (1|Unique_fig), data= e_g0_dev_fig_tree) anova(model_dev_fig_tree_1, model_dev_fig_tree_2) summary(model_dev_fig_tree_1) lsmeansLT(model_dev_fig_tree_1) pairs(lsmeans::lsmeans(model_dev_fig_tree_1, list(pairwise ~ bacteria | rep)), adjust="holm") ```