###################################################################################################################################################### ###################################################################################################################################################### ###################################################################################################################################################### ############### SOURCE CODE: Host dependent in planta activation of a putative mating response between two different Rhizophagus irregularis isolates. #05.11.2019 # Ivan Mateus #V1 # Code should be used as a guideline to reproduce the data analysis. It is not intended to be used as a copy/paste solution. # Modification, of paths, input files, etc is needed in order to execute these commands. ###################################################################################################################################################### ###################################################################################################################################################### ###################################################################################################################################################### ###################################################################################################################################################### # Bioinformatic pipeline ############################################################################################################################################333 #################################### #1. Download data from bioproject PRJNA494798 on cluster #################################### prefetch -O /scratch/wally/FAC/FBM/DEE/isanders/popgen_to_var/IM/01_Co-inoculation_raw/ SRX4991141 #2. Change Names mv SRR7416443_1.fastq SPECIES_ISOLATE_NUCLEI_1.fastq ###################################### #3. extract SRA ###################################### for i in $(ls *.sra); do echo $i; fastq-dump --split-files -O /scratch/wally/FAC/FBM/DEE/isanders/popgen_to_var/IM/01_Co-inoculation_raw/ $i ; done ###################################### #4. Trimm reads with Trimm galore ###################################### #a. Change name of files based on samples name table mv SRR7416443_1.fastq SPECIES_ISOLATE_NUCLEI_1.fastq mv SRR7416443_2.fastq SPECIES_ISOLATE_NUCLEI_2.fastq #b. Trimm and quality filter with TrimGalore https://github.com/FelixKrueger/TrimGalore #Default parameters a=0;for i in $(ls *_1.fastq); do echo $(echo $i | cut -d'_' -f1,2,3); ~/Software/TrimGalore-0.6.0/trim_galore --paired --fastqc --gzip --output_dir /mnt/d/XX_UNIL_DEE/06_SingleNucleiCorradi/02_TrimmedFiles/ $i $(echo $i |cut -d'_' -f1,2,3)_2.fastq;done ###################################### #5. Mapp Reads To Host genome. ###################################### STAR - 2-pass mapping with re-generated genome. #a. Index the ref genome with STAR for 1st-pass mapping mkdir Genome_STAR_1st-pass #(do not create this folder in the folder "RNAseq") cd Genome_STAR_1st-pass MyGenomeDir1=$(pwd) echo $MyGenomeDir1 cp ../mesculenta_305_v6.1.gene_exons.gff3 . cp ../Mesculenta_305_v6.fa . ~/Software/STAR/bin/Linux_x86_64_static/STAR --runMode genomeGenerate --genomeDir $MyGenomeDir1 --genomeFastaFiles $MyGenomeDir1/Mesculenta_305_v6.fa --runThreadN 8 --sjdbGTFfile $MyGenomeDir1/mesculenta_305_v6.1.gene_exons.gff3 --sjdbGTFfeatureExon CDS --sjdbGTFtagExonParentTranscript Parent --sjdbGTFtagExonParentGene ID --sjdbOverhang 99 ~/Software/STAR/bin/Linux_x86_64_static/STAR --runMode genomeGenerate --genomeDir $MyGenomeDir1 --genomeFastaFiles /mnt/d/XX_UNIL_DEE/10_FungalMycToolKit/03_MapLotusJaponica/Genome_STAR_1st-pass/Lj3.0_pseudomol.fna --runThreadN 8 --sjdbGTFfile /mnt/d/XX_UNIL_DEE/10_FungalMycToolKit/03_MapLotusJaponica/Genome_STAR_1st-pass/Lj3.0_gene_models.gff3 --sjdbGTFfeatureExon CDS --sjdbGTFtagExonParentTranscript Parent --sjdbGTFtagExonParentGene ID --sjdbOverhang 99 #b. 1st-pass mapping ( to generate SJ.out.tab file for each sample) cd Genome_file_indexed_with_STAR_1st-pass MyGenomeDir1=$(pwd) echo $MyGenomeDir1 for i in $(ls *_1.fq); do echo $i; ~/Software/STAR/bin/Linux_x86_64_static/STAR --runThreadN 8 --genomeDir $MyGenomeDir1 --readFilesIn $i $(echo $i | cut -d'_' -f1,2,3)_2_val_2.fq --alignIntronMin 20 --alignIntronMax 5000 --outFilterMismatchNoverLmax 0.4 --outFilterMismatchNmax 15 --sjdbOverhang 99 --outFilterIntronMotifs RemoveNoncanonical --alignEndsType EndToEnd --outSAMtype BAM SortedByCoordinate --limitGenomeGenerateRAM 14500000000 --limitBAMsortRAM 11500000000; mv SJ.out.tab $(echo $i | cut -d'_' -f1,2,3).SJ.out.tab; rm *.bam ; done #c. Create the SJ.out.tab by filtering and merging SJ.out.tab files from all runs mkdir Genome_file_indexed_with_STAR_2nd-pass_2samples #(do not create this folder in the folder "RNAseq") cd Genome_file_indexed_with_STAR_2nd-pass_2samples MyGenomeDir2=$(pwd) echo "" > Global.SJ.out.tab.Pass2.sjdb awk 'BEGIN {OFS="\t"; strChar[0]="."; strChar[1]="+"; strChar[2]="-";} {if($5>0){print $1,$2,$3,strChar[$4]}}' ../Unmapped_ERM_rep1_DRS092924.SJ.out.tab >> Global.SJ.out.tab.Pass2.sjdb #d. Index the genome with STAR for 2nd-pass mapping cp ../mesculenta_305_v6.1.gene_exons.gff3 . cp ../Mesculenta_305_v6.fa . ~/Software/STAR/bin/Linux_x86_64_static/STAR --runMode genomeGenerate --genomeDir $MyGenomeDir2 --genomeFastaFiles $MyGenomeDir2/Lotusjaponicus_MG20_v3.0_genome.fa --runThreadN 8 --sjdbGTFfile $MyGenomeDir2/Lotusjaponicus_MG20_v3.0_annotations.gff3 --sjdbGTFfeatureExon CDS --sjdbGTFtagExonParentTranscript Parent --sjdbGTFtagExonParentGene ID --sjdbOverhang 99 --sjdbFileChrStartEnd $MyGenomeDir2/Global.SJ.out.tab.Pass2.sjdb #e. 2nd-pass mapping and produce unmapped reads file cd Genome_file_indexed_with_STAR_ 2nd-pass MyGenomeDir2=$(pwd) echo $MyGenomeDir2 for i in $(ls *_1.fq); do echo $i; ~/Software/STAR/bin/Linux_x86_64_static/STAR --runThreadN 8 --genomeDir $MyGenomeDir2 --readFilesIn $i $(echo $i | cut -d'_' -f1,2,3)_2_val_2.fq --alignIntronMin 20 --alignIntronMax 5000 --outFilterMismatchNoverLmax 0.4 --outFilterMismatchNmax 15 --sjdbOverhang 99 --outFilterIntronMotifs RemoveNoncanonical --alignEndsType EndToEnd --outSAMtype BAM SortedByCoordinate --limitGenomeGenerateRAM 14500000000 --limitBAMsortRAM 11500000000 --outReadsUnmapped Fastx; mv Aligned.sortedByCoord.out.bam RNAseq-aligned_$(echo $i | cut -d'_' -f1,2,3).bam; mv Unmapped.out.mate1 Unmapped_$(echo $i | cut -d'_' -f1,2,3).1.fq; mv Unmapped.out.mate2 Unmapped_$(echo $i | cut -d'_' -f1,2,3).2.fq; done # f. compress unmapped reads a=0;for folder in $(ls -d */); do echo "-> "$folder; cd $folder; cd T5_Mapping_pass2*; bsub -q dee -L /bin/bash -J $folder'R1' -u ivandario.mateusgonzalez@unil.ch -N "gzip *1.fq"; bsub -q dee -L /bin/bash -J $folder'R2' -u ivandario.mateusgonzalez@unil.ch -N "gzip *2.fq"; cd ../..; done ###################################### # 6. Take unmapped reads and map them to second genome ###################################### #g. Index the second ref genome with STAR for 1st-pass mapping mkdir Genome_file_indexed_with_STAR_1st-pass #(do not create this folder in the folder "RNAseq") cd Genome_file_indexed_with_STAR_1st-pass MyGenomeDir1=$(pwd) echo $MyGenomeDir1 cp ../DAOM197198_PacBio.gff3 . cp ../DAOM197198_PacBio.fa . ~/Software/STAR/bin/Linux_x86_64_static/STAR --runMode genomeGenerate --genomeDir $MyGenomeDir1 --genomeFastaFiles $MyGenomeDir1/DAOM197198_PacBio.fa --runThreadN 8 --sjdbGTFfile $MyGenomeDir1/DAOM197198_PacBio.gff --sjdbGTFfeatureExon CDS --sjdbGTFtagExonParentTranscript Parent --sjdbGTFtagExonParentGene ID --sjdbOverhang 99 #h. 1st-pass mapping on second genome ( to generate SJ.out.tab file for each sample) cd Genome_file_indexed_with_STAR_1st-pass MyGenomeDir1=$(pwd) echo $MyGenomeDir1 for i in $(ls *.1.fq); do echo $i;~/Software/STAR/bin/Linux_x86_64_static/STAR --runThreadN 8 --genomeDir $MyGenomeDir1 --readFilesIn $i $(echo $i | cut -d'.' -f1).2.fq --alignIntronMin 20 --alignIntronMax 5000 --outFilterMismatchNoverLmax 0.4 --outFilterMismatchNmax 15 --sjdbOverhang 99 --outFilterIntronMotifs RemoveNoncanonical --alignEndsType EndToEnd --outSAMtype BAM SortedByCoordinate --limitGenomeGenerateRAM 14500000000 --limitBAMsortRAM 11500000000; mv SJ.out.tab $(echo $i | cut -d'.' -f1).SJ.out.tab; rm *.bam ; done #i. Create the SJ.out.tab by filtering and merging SJ.out.tab files from all runs mkdir Genome_file_indexed_with_STAR_2nd-pass_2samples #(do not create this folder in the folder "RNAseq") cd Genome_file_indexed_with_STAR_2nd-pass_2samples MyGenomeDir2=$(pwd) echo "" > Global.SJ.out.tab.Pass2.sjdb awk 'BEGIN {OFS="\t"; strChar[0]="."; strChar[1]="+"; strChar[2]="-";} {if($5>0){print $1,$2,$3,strChar[$4]}}' ../Unmapped_ERM_rep1_DRS092924.SJ.out.tab >> Global.SJ.out.tab.Pass2.sjdb #j. Index the 2nd genome with STAR for 2nd-pass mapping cp ../mesculenta_305_v6.1.gene_exons.gff3 . cp ../Mesculenta_305_v6.fa . ~/Software/STAR/bin/Linux_x86_64_static/STAR --runMode genomeGenerate --genomeDir $MyGenomeDir2 --genomeFastaFiles $MyGenomeDir2/DAOM197198_PacBio.fa --runThreadN 8 --sjdbGTFfile $MyGenomeDir2/DAOM197198_PacBio.gff --sjdbGTFfeatureExon CDS --sjdbGTFtagExonParentTranscript Parent --sjdbGTFtagExonParentGene ID --sjdbOverhang 99 --sjdbFileChrStartEnd $MyGenomeDir2/Global.SJ.out.tab.Pass2.sjdb #k. 2nd-pass mapping on the second genome cd Genome_file_indexed_with_STAR_ 2nd-pass MyGenomeDir2=$(pwd) echo $MyGenomeDir2 for i in $(ls *.1.fq); do echo $i; ~/Software/STAR/bin/Linux_x86_64_static/STAR --runThreadN 8 --genomeDir $MyGenomeDir2 --readFilesIn $i $(echo $i | cut -d'.' -f1).2.fq --alignIntronMin 20 --alignIntronMax 5000 --outFilterMismatchNoverLmax 0.4 --outFilterMismatchNmax 15 --sjdbOverhang 99 --outFilterIntronMotifs RemoveNoncanonical --alignEndsType EndToEnd --outSAMtype BAM SortedByCoordinate --limitGenomeGenerateRAM 14500000000 --limitBAMsortRAM 11500000000; mv Aligned.sortedByCoord.out.bam MappedRiir_UnMappedLotus_$(echo $i | cut -d'_' -f2,3,4).bam; done ###################################### #7. Count reads on feature ###################################### ~/Software/subread-1.6.5-source/bin/featureCounts -t CDS -g ID -a Genome_file_indexed_with_STAR_1st-pass/DAOM197198_PacBio.gff -o ############################################################################################################################# ###################################################################################################################################################### ###################################################################################################################################################### ###################################################################################################################################################### ###################################################################################################################################################### ###################################################################################################################################################### ###################################################################################################################################################### ###################################################################################################################################################### # Differential transcription analysis on RNA counts on differetn CASSAVA HOST genotypes. #author: IM # 08.08.2019 # Version v1 ###################################################################################################################################################### ###################################################################################################################################################### ###################################################################################################################################################### ######################################################################## # Libraries IMPORT ######################################################################## library(DESeq2) library(tidyr) library(ggplot2) library(gplots) library(RColorBrewer) library(wesanderson) library(gridExtra) library(VennDiagram) library(ggtree) library(ape) library(pheatmap) library(circlize) ######################################################################## # DATA IMPORT ######################################################################## countdata <- read.table(file="C:/Users/comat/Documents/Science/Sanders_lab/Co-inoculation_manuscript/V12_NatMicro_Sanders_comments/02_AMFDATA/featureCounts_co-inoculation_Rirregularis_vf.txt", header=TRUE) # change colnames colnames(countdata)<-c("Geneid","Chr","Start","End","Strand","Length", "BRA337_B1_1","BRA337_B1_2","BRA337_B1_3","BRA337_Coinoculation_1","BRA337_Coinoculation_2","BRA337_Coinoculation_3", "BRA337_DAOM197198_1","BRA337_DAOM197198_2","BRA337_DAOM197198_3","BRA337_MOCK_1","BRA337_MOCK_2","BRA337_MOCK_3", "CM4574_B1_1","CM4574_B1_2","CM4574_B1_3","CM4574_Coinoculation_1","CM4574_Coinoculation_2", "CM4574_DAOM197198_1","CM4574_DAOM197198_2","CM4574_DAOM197198_3","CM4574_MOCK_1","CM4574_MOCK_2","CM4574_MOCK_3", "COL2215_B1_1","COL2215_B1_2","COL2215_B1_3","COL2215_Coinoculation_1","COL2215_Coinoculation_2","COL2215_Coinoculation_3", "COL2215_DAOM197198_1","COL2215_DAOM197198_2","COL2215_DAOM197198_3","COL2215_MOCK_1","COL2215_MOCK_2") rownames(countdata)<-gsub("cds-","",as.character(countdata$Geneid)) head(countdata) ######################################################################## # ANALYSIS FOR 3 treatments (DAOM197198, B1 and Co-inoculation) ######################################################################## # Filter data COL2215<-countdata[,grep("BRA337",colnames(countdata),invert=T)] COL2215<-COL2215[,grep("CM4574",colnames(COL2215),invert=T)] COL2215<-COL2215[,grep("MOCK",colnames(COL2215),invert=T)] # select only count data COL2215 <- COL2215[ ,7:ncol(COL2215)] # Convert to matrix COL2215 <- as.matrix(COL2215) # Assign treatments conditionCOL2215 <- factor(c(rep("B1", 3), rep("Coinoculation", 3), rep("DAOM197198", 3))) # Make data frame with treatmens coldataCOL2215 <- data.frame(row.names=colnames(COL2215), conditionCOL2215) # Run DESEQ step1 dds <- DESeqDataSetFromMatrix(countData=COL2215, colData=coldataCOL2215, design=~conditionCOL2215) dds # Run the DESeq pipeline step2 dds <- DESeq(dds) # Regularized log transformation for heatmaps rld <- rlogTransformation(dds) # Principal components analysis plotPCA(rld, intgroup="conditionCOL2215") # Get differential expression results resCOL2215 <- results(dds) ## Order by adjusted p-value resCOL2215 <- resCOL2215[order(resCOL2215$padj), ] ## Merge with normalized count data resdataCOL2215 <- merge(as.data.frame(resCOL2215), as.data.frame(counts(dds, normalized=TRUE)), by="row.names", sort=FALSE) names(resdataCOL2215)[1] <- "Gene" ######################################################################## # ANALYSIs FOR PAIRWISE COMPARISON (DAOM197198 and Co-inoculation) ######################################################################## # Filter data COL2215DAOM<-COL2215[,grep("B1",colnames(COL2215),invert=T)] # Convert to matrix COL2215DAOM <- as.matrix(COL2215DAOM) head(COL2215DAOM) # Assign treatments condition <- factor(c( rep("Coinoculation", 3),rep("DAOM", 3))) # Make data frame with treatmens coldataDAOM <- data.frame(row.names=colnames(COL2215DAOM), condition) # Run DESEQ step1 dds <- DESeqDataSetFromMatrix(countData=COL2215DAOM, colData=coldataDAOM, design=~condition) dds # Run the DESeq pipeline step2 dds <- DESeq(dds) # Regularized log transformation for heatmaps rld <- rlogTransformation(dds) # Principal components analysis plotPCA(rld, intgroup="condition") # Get differential expression results resDAOM <- results(dds) ## Order by adjusted p-value resDAOM <- resDAOM[order(resDAOM$padj), ] ## Merge with normalized count data resdataCOL2215DAOM <- merge(as.data.frame(resDAOM), as.data.frame(counts(dds, normalized=TRUE)), by="row.names", sort=FALSE) names(resdataCOL2215DAOM)[1] <- "Gene" # Show filtered results resdataCOL2215DAOM[resdataCOL2215DAOM$padj<0.1,] ######################################################################## # VENN DIAGRAM for DAOM 197198 vs CO-inoculation and B1 vs. Co-inoculation ######################################################################## draw.pairwise.venn((1868+79), (1699+79), 79, category = c("DAOM197198 vs. Co-inoculation", "B1 vs. Co-inoculation"), lty = rep("blank", 2), fill = c("light blue", "pink"), alpha = rep(0.5, 2), cat.pos = c(0, 0), cat.dist = rep(0.025, 2)) ######################################################################## # PLOT GENE COUNTS OF A SINGLE GENE # All treatments gene counts on the same plot ######################################################################## gene="GBC39608.1" Toplot<-resdataCOL2215[resdataCOL2215$Gene==gene,] data_long <- gather(Toplot, condition, NormCounts, COL2215_B1_1:COL2215_DAOM197198_3, factor_key=TRUE) data_long<-cbind.data.frame(data_long,Treatment=c("B1","B1","B1","Co-inoculation","Co-inoculation","Co-inoculation","DAOM197198","DAOM197198","DAOM197198")) ggplot(data_long, aes(x=Treatment, y=NormCounts, fill=Treatment) ) + scale_fill_manual(values=wes_palette(n=3, name="Darjeeling1")) + geom_boxplot()+ ggtitle(paste("Plant genotype COL2215",gene,sep = "\n")) + xlab("") + ylab("Normalized counts") + geom_dotplot(binaxis='y', stackdir='center', dotsize=0.6) + theme_bw() + theme(axis.line = element_line(colour = "black"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank()) + scale_x_discrete(limits=c("DAOM197198","B1","Co-inoculation")) ######################################################################## # PLOT PHYLOGENY ######################################################################## # IMPORT PHYLOGNEY tree <- read.tree("C:/Users/comat/Documents/Science/Sanders_lab/Co-inoculation_manuscript/V12_NatMicro_Sanders_comments/02_AMFDATA/06_HMG-box_Mucorales/V1_blastHomologs_upregulated_Mucorales.nhx") # Change tip labels tree$tip.label<-unlist(lapply(strsplit(tree$tip.label,"_"), function (x) x[[1]])) ggtree(tree) + geom_tiplab() + geom_nodelab() ######################################################################## # Pheatmap genes in sexual reproduction in fungi ######################################################################## tabla <- read.delim(file="C:/Users/comat/Documents/Science/Sanders_lab/Co-inoculation_manuscript/V12_NatMicro_Sanders_comments/02_AMFDATA/01_Differential_transcription/ReproductiveProteins_mundo_ComparisonThisStudy_UP.txt", sep="\t", header=TRUE) rownames(tabla)<-tabla$Protein pheatmap(t(tabla[,5:8]), cluster_rows = F,cluster_cols = F,cellwidth = 12,cellheight = 12,color = c("white","black")) # attention gene nomenclature conserved as in mondo et al., factors = 1:length(levels(tabla$Protein)) # just indicate there are 20 sectors circos.initialize(factors = factors, xlim = c(0, 1)) circos.track(ylim = c(0, 1), factors = factors, bg.col = "white", track.height = 0.01) circos.trackText(x = rep(0.5,length(levels(tabla$Protein))), y = rep(10,length(levels(tabla$Protein)) ), labels = tabla$Protein, cex = 0.8, factors = factors, col = "black", font = 2, facing = "clockwise", niceFacing = TRUE,) circos.track(ylim = c(0, 1), factors = factors, bg.col = tabla$Upregulated_COL2215, bg.border = "#EEEEEE", track.height = 0.1) circos.track(ylim = c(0, 1), factors = factors, bg.col = tabla$Upregulated_CM4574.7, bg.border = "#EEEEEE", track.height = 0.1) circos.track(ylim = c(0, 1), factors = factors, bg.col =tabla$Upregulated_BRA337, bg.border = "#EEEEEE", track.height = 0.1) circos.track(ylim = c(0, 1), factors = factors, bg.col = tabla$Upregulated_Mundo2018, bg.border = "#EEEEEE", track.height = 0.1) ######################################################################## # SNP Analysis of RNA-seq ######################################################################## # SNP ANALYSIS COL 2215 SNP_COL2215<-read.delim("../03_SNPcoinoc/COL2215_SNP_v1.txt",h=F) head(SNP_COL2215) colnames(SNP_COL2215)[10:18]<-c("DAOM197198_1","DAOM197198_2","DAOM197198_3","B1_1","B1_2","B1_3","Co-inoculation_1","Co-inoculation_2","Co-inoculation_3") SNP_COL2215[,10:18 ] <- lapply(SNP_COL2215[,10:18 ], as.character) AlFq_COL2215_DAOM197198_1<-unlist(lapply(strsplit(split = "\\:",SNP_COL2215$DAOM197198_1), function (x) x[[3]] )) FQRef_COL2215_DAOM197198_1<- unlist(lapply(strsplit(split = "\\,", AlFq_COL2215_DAOM197198_1), function (x) x[[1]])) FQRef_COL2215_DAOM197198_1<-as.numeric(gsub("\\.","0",FQRef_COL2215_DAOM197198_1)) FQAlt_COL2215_DAOM197198_1<- unlist(lapply(strsplit(split = "\\,", AlFq_COL2215_DAOM197198_1), function (x) x[[2]])) FQAlt_COL2215_DAOM197198_1<- as.numeric(gsub("\\.","0",FQAlt_COL2215_DAOM197198_1)) COL2215_DAOM197198_1<-cbind.data.frame(SNP_COL2215[1:5], RefAF=FQRef_COL2215_DAOM197198_1, AltFQ=FQAlt_COL2215_DAOM197198_1,treat=rep("DAOM197198_1",length(FQAlt_COL2215_DAOM197198_1)) ) AlFq_COL2215_DAOM197198_2<-unlist(lapply(strsplit(split = "\\:",SNP_COL2215$DAOM197198_2), function (x) x[[3]] )) FQRef_COL2215_DAOM197198_2<- unlist(lapply(strsplit(split = "\\,", AlFq_COL2215_DAOM197198_2), function (x) x[[1]])) FQRef_COL2215_DAOM197198_2<-as.numeric(gsub("\\.","0",FQRef_COL2215_DAOM197198_2)) FQAlt_COL2215_DAOM197198_2<- unlist(lapply(strsplit(split = "\\,", AlFq_COL2215_DAOM197198_2), function (x) x[[2]])) FQAlt_COL2215_DAOM197198_2<- as.numeric(gsub("\\.","0",FQAlt_COL2215_DAOM197198_2)) COL2215_DAOM197198_2<-cbind.data.frame(SNP_COL2215[1:5], RefAF=FQRef_COL2215_DAOM197198_2, AltFQ=FQAlt_COL2215_DAOM197198_2,treat=rep("DAOM197198_2",length(FQAlt_COL2215_DAOM197198_2)) ) AlFq_COL2215_DAOM197198_3<-unlist(lapply(strsplit(split = "\\:",SNP_COL2215$DAOM197198_3), function (x) x[[3]] )) FQRef_COL2215_DAOM197198_3<- unlist(lapply(strsplit(split = "\\,", AlFq_COL2215_DAOM197198_3), function (x) x[[1]])) FQRef_COL2215_DAOM197198_3<-as.numeric(gsub("\\.","0",FQRef_COL2215_DAOM197198_3)) FQAlt_COL2215_DAOM197198_3<- unlist(lapply(strsplit(split = "\\,", AlFq_COL2215_DAOM197198_3), function (x) x[[2]])) FQAlt_COL2215_DAOM197198_3<- as.numeric(gsub("\\.","0",FQAlt_COL2215_DAOM197198_3)) COL2215_DAOM197198_3<-cbind.data.frame(SNP_COL2215[1:5], RefAF=FQRef_COL2215_DAOM197198_3, AltFQ=FQAlt_COL2215_DAOM197198_3,treat=rep("DAOM197198_3",length(FQAlt_COL2215_DAOM197198_3)) ) AlFq_COL2215_B1_1<-unlist(lapply(strsplit(split = "\\:",SNP_COL2215$B1_1), function (x) x[[3]] )) FQRef_COL2215_B1_1<- unlist(lapply(strsplit(split = "\\,", AlFq_COL2215_B1_1), function (x) x[[1]])) FQRef_COL2215_B1_1<-as.numeric(gsub("\\.","0",FQRef_COL2215_B1_1)) FQAlt_COL2215_B1_1<- unlist(lapply(strsplit(split = "\\,", AlFq_COL2215_B1_1), function (x) x[[2]])) FQAlt_COL2215_B1_1<- as.numeric(gsub("\\.","0",FQAlt_COL2215_B1_1)) COL2215_B1_1<-cbind.data.frame(SNP_COL2215[1:5], RefAF=FQRef_COL2215_B1_1, AltFQ=FQAlt_COL2215_B1_1,treat=rep("B1_1",length(FQAlt_COL2215_B1_1)) ) AlFq_COL2215_B1_2<-unlist(lapply(strsplit(split = "\\:",SNP_COL2215$B1_2), function (x) x[[3]] )) FQRef_COL2215_B1_2<- unlist(lapply(strsplit(split = "\\,", AlFq_COL2215_B1_2), function (x) x[[1]])) FQRef_COL2215_B1_2<-as.numeric(gsub("\\.","0",FQRef_COL2215_B1_2)) FQAlt_COL2215_B1_2<- unlist(lapply(strsplit(split = "\\,", AlFq_COL2215_B1_2), function (x) x[[2]])) FQAlt_COL2215_B1_2<- as.numeric(gsub("\\.","0",FQAlt_COL2215_B1_2)) COL2215_B1_2<-cbind.data.frame(SNP_COL2215[1:5], RefAF=FQRef_COL2215_B1_2, AltFQ=FQAlt_COL2215_B1_2,treat=rep("B1_2",length(FQAlt_COL2215_B1_2)) ) AlFq_COL2215_B1_3<-unlist(lapply(strsplit(split = "\\:",SNP_COL2215$B1_3), function (x) x[[3]] )) FQRef_COL2215_B1_3<- unlist(lapply(strsplit(split = "\\,", AlFq_COL2215_B1_3), function (x) x[[1]])) FQRef_COL2215_B1_3<-as.numeric(gsub("\\.","0",FQRef_COL2215_B1_3)) FQAlt_COL2215_B1_3<- unlist(lapply(strsplit(split = "\\,", AlFq_COL2215_B1_3), function (x) x[[2]])) FQAlt_COL2215_B1_3<- as.numeric(gsub("\\.","0",FQAlt_COL2215_B1_3)) COL2215_B1_3<-cbind.data.frame(SNP_COL2215[1:5], RefAF=FQRef_COL2215_B1_3, AltFQ=FQAlt_COL2215_B1_3,treat=rep("B1_3",length(FQAlt_COL2215_B1_3)) ) AlFq_COL2215_Coinoculation_1<-unlist(lapply(strsplit(split = "\\:",SNP_COL2215$'Co-inoculation_1'), function (x) x[[3]] )) FQRef_COL2215_Coinoculation_1<- unlist(lapply(strsplit(split = "\\,", AlFq_COL2215_Coinoculation_1), function (x) x[[1]])) FQRef_COL2215_Coinoculation_1<-as.numeric(gsub("\\.","0",FQRef_COL2215_Coinoculation_1)) FQAlt_COL2215_Coinoculation_1<- unlist(lapply(strsplit(split = "\\,", AlFq_COL2215_Coinoculation_1), function (x) x[[2]])) FQAlt_COL2215_Coinoculation_1<- as.numeric(gsub("\\.","0",FQAlt_COL2215_Coinoculation_1)) COL2215_Coinoculation_1<-cbind.data.frame(SNP_COL2215[1:5], RefAF=FQRef_COL2215_Coinoculation_1, AltFQ=FQAlt_COL2215_Coinoculation_1,treat=rep("Coinoculation_1",length(FQAlt_COL2215_Coinoculation_1)) ) AlFq_COL2215_Coinoculation_2<-unlist(lapply(strsplit(split = "\\:",SNP_COL2215$'Co-inoculation_2'), function (x) x[[3]] )) FQRef_COL2215_Coinoculation_2<- unlist(lapply(strsplit(split = "\\,", AlFq_COL2215_Coinoculation_2), function (x) x[[1]])) FQRef_COL2215_Coinoculation_2<-as.numeric(gsub("\\.","0",FQRef_COL2215_Coinoculation_2)) FQAlt_COL2215_Coinoculation_2<- unlist(lapply(strsplit(split = "\\,", AlFq_COL2215_Coinoculation_2), function (x) x[[2]])) FQAlt_COL2215_Coinoculation_2<- as.numeric(gsub("\\.","0",FQAlt_COL2215_Coinoculation_2)) COL2215_Coinoculation_2<-cbind.data.frame(SNP_COL2215[1:5], RefAF=FQRef_COL2215_Coinoculation_2, AltFQ=FQAlt_COL2215_Coinoculation_2,treat=rep("Coinoculation_2",length(FQAlt_COL2215_Coinoculation_2)) ) AlFq_COL2215_Coinoculation_3<-unlist(lapply(strsplit(split = "\\:",SNP_COL2215$'Co-inoculation_3'), function (x) x[[3]] )) FQRef_COL2215_Coinoculation_3<- unlist(lapply(strsplit(split = "\\,", AlFq_COL2215_Coinoculation_3), function (x) x[[1]])) FQRef_COL2215_Coinoculation_3<-as.numeric(gsub("\\.","0",FQRef_COL2215_Coinoculation_3)) FQAlt_COL2215_Coinoculation_3<- unlist(lapply(strsplit(split = "\\,", AlFq_COL2215_Coinoculation_3), function (x) x[[2]])) FQAlt_COL2215_Coinoculation_3<- as.numeric(gsub("\\.","0",FQAlt_COL2215_Coinoculation_3)) COL2215_Coinoculation_3<-cbind.data.frame(SNP_COL2215[1:5], RefAF=FQRef_COL2215_Coinoculation_3, AltFQ=FQAlt_COL2215_Coinoculation_3,treat=rep("Coinoculation_3",length(FQAlt_COL2215_Coinoculation_3)) ) All_alfq<-rbind.data.frame(COL2215_DAOM197198_1,COL2215_DAOM197198_2,COL2215_DAOM197198_3,COL2215_B1_1,COL2215_B1_2,COL2215_B1_3,COL2215_Coinoculation_1,COL2215_Coinoculation_2,COL2215_Coinoculation_3) All_alfq<-cbind.data.frame(All_alfq,Cond=unlist(lapply(strsplit(split ="_",as.character(All_alfq$treat)),function (x) x[[1]])),FQ=All_alfq$RefAF/(All_alfq$RefAF+All_alfq$AltFQ)) with( All_alfq[complete.cases(All_alfq), ], tapply(FQ, treat, mean)) # REdo allele fq. per sample a1<-ggplot(All_alfq[grep("DAOM",All_alfq$treat),], aes(x=FQ, fill=treat)) + geom_histogram(bins=30, alpha=1, position="identity") + ggtitle("Plant genotype COL2215\n DAOM197198 replicates") + scale_fill_manual(values=c(wes_palette(n=4, name="FantasticFox1")[2] ,wes_palette(n=2, name="FantasticFox1")[1],wes_palette(n=4, name="Moonrise2")[2] )) + theme_bw() + theme(axis.line = element_line(colour = "black"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank()) + theme(legend.position = c(0.2, 0.4)) a2<-ggplot(All_alfq[grep("B1",All_alfq$treat),], aes(x=FQ, fill=treat)) + geom_histogram(bins=30, alpha=1, position="identity") + ggtitle("Plant genotype COL2215\n B1 replicates") + scale_fill_manual(values=c(wes_palette(n=4, name="Darjeeling1")[1] ,wes_palette(n=4, name="GrandBudapest1")[4],wes_palette(n=5, name="FantasticFox1")[5] )) + theme_bw() + theme(axis.line = element_line(colour = "black"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank()) + theme(legend.position = c(0.2, 0.4)) a3<-ggplot(All_alfq[grep("Coinoculation",All_alfq$treat),], aes(x=FQ, fill=treat)) + geom_histogram(bins=30, alpha=1, position="identity") + ggtitle("Plant genotype COL2215\n Co-inoculation replicates") + scale_fill_manual(values=c(wes_palette(n=2, name="Darjeeling1")[2] ,wes_palette(n=2, name="Chevalier1")[1],wes_palette(n=2, name="Cavalcanti1")[2] )) + theme_bw() + theme(axis.line = element_line(colour = "black"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank())+ theme(legend.position = c(0.2, 0.4)) grid.arrange(a1,a2,a3, widths = c(1.5, 2), layout_matrix = rbind(c(1, 3), c(2, 3))) ################## # Individual loci All_alfq2<-gather(All_alfq, condition, Counts,RefAF,AltFQ) test1<-All_alfq2[grep("BDIQ01000197.1",All_alfq2$V1),] test1<-test1[grep("175586",test1$V2),] t1<-ggplot(test1, aes(fill=condition, y=Counts, x=treat)) + geom_bar(position="stack", stat="identity") + ggtitle("Plant genotype COL2215\n SNP Scaffold BDIQ01000197.1 Position 175586 ") + scale_fill_manual(values=c(wes_palette(n=3, name="Darjeeling1")[3] ,wes_palette(n=3, name="Darjeeling1")[1])) + theme_bw() + theme(axis.line = element_line(colour = "black"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank())+ theme(axis.text.x=element_text(angle=90, hjust=1)) test1<-All_alfq2[grep("BDIQ01000001.1",All_alfq2$V1),] test1<-test1[grep("28234",test1$V2),] t2<-ggplot(test1, aes(fill=condition, y=Counts, x=treat)) + geom_bar(position="stack", stat="identity") + ggtitle("Plant genotype COL2215\n SNP Scaffold BDIQ01000001.1 Position 28234 ") + scale_fill_manual(values=c(wes_palette(n=3, name="Darjeeling1")[3] ,wes_palette(n=3, name="Darjeeling1")[1])) + theme_bw() + theme(axis.line = element_line(colour = "black"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank()) + theme(axis.text.x=element_text(angle=90, hjust=1)) test1<-All_alfq2[grep("BDIQ01000205.1",All_alfq2$V1),] test1<-test1[grep("69970",test1$V2),] t3<-ggplot(test1, aes(fill=condition, y=Counts, x=treat)) + geom_bar(position="stack", stat="identity") + ggtitle("Plant genotype COL2215\n SNP Scaffold BDIQ01000205.1 Position 69970 ") + scale_fill_manual(values=c(wes_palette(n=3, name="Darjeeling1")[3] ,wes_palette(n=3, name="Darjeeling1")[1])) + theme_bw() + theme(axis.line = element_line(colour = "black"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank()) + theme(axis.text.x=element_text(angle=90, hjust=1)) grid.arrange(t1,t2,t3,nrow=1) ######################################################################## # Contaminants analysis ######################################################################## ################################################################################################################################# FC<-read.delim("ALL_Samples_Screen.out",h=F) FC$V6<- as.character(FC$V6) FC$V7<- as.character(FC$V7) FC$V2<- as.character(FC$V2) Species<-unlist(lapply(strsplit(split=" ",FC$V6),function (x) paste(x[[3]],x[[4]],x[[5]]) ) ) Sample<-as.data.frame(matrix(unlist(lapply(strsplit(split="\\_",FC$V7),function (x) gsub(".out","",c(x[[2]],x[[3]],x[[4]]) ))),ncol=3,byrow = T ) ) treat<-unlist(lapply(strsplit(split="\\_",FC$V7),function (x) gsub(".out","",paste(x[[2]],x[[3]],x[[4]]) ))) SharedHashes<-as.numeric(unlist(lapply(strsplit(split="/",FC$V2),function (x) c(x[[1]]))) ) FC<-cbind.data.frame(Species, Sample,SharedHashes,treat) data_wide <- spread(FC, treat, SharedHashes) data_wide[is.na(data_wide)]<- 0 data_wide2<-data.frame(do.call("rbind", by(data_wide[,c(5:38)], data_wide$Species, FUN=colSums) )) rownames(data_wide)<- data_wide$Species pheatmap(data_wide2[,4:34]) ######################################################## # Colonization analysis. ######################################################## Colonization <- read.table("colonization_vf.txt", header = T) Col_COL2215<-Colonization[Colonization$var=="V4_COL2215",] Col_COL2215$treat<-gsub("CANB1","Co-inoculation",Col_COL2215$treat) Col_COL2215$treat<-gsub("CAN","DAOM197198",Col_COL2215$treat) Col_COL2215$treat<-gsub("A$","MOCK",Col_COL2215$treat) Col_COL2215$treat<-factor(Col_COL2215$treat, levels = c("MOCK","DAOM197198", "B1", "Co-inoculation")) Col_BRA337<-Colonization[Colonization$var=="V5_BRA337",] Col_BRA337$treat<-gsub("CANB1","Co-inoculation",Col_BRA337$treat) Col_BRA337$treat<-gsub("CAN","DAOM197198",Col_BRA337$treat) Col_BRA337$treat<-gsub("A$","MOCK",Col_BRA337$treat) Col_BRA337$treat<-factor(Col_BRA337$treat, levels = c("MOCK","DAOM197198", "B1", "Co-inoculation")) Col_CM4574<-Colonization[Colonization$var=="V6_CM4574-7",] Col_CM4574$treat<-gsub("CANB1","Co-inoculation",Col_CM4574$treat) Col_CM4574$treat<-gsub("CAN","DAOM197198",Col_CM4574$treat) Col_CM4574$treat<-gsub("A$","MOCK",Col_CM4574$treat) Col_CM4574$treat<-factor(Col_CM4574$treat, levels = c("MOCK","DAOM197198", "B1", "Co-inoculation")) AllCol<-rbind.data.frame(Col_COL2215,Col_BRA337,Col_CM4574) ggplot(Col_COL2215, aes(x=treat, y=perc,fill=treat) ) + geom_boxplot() + scale_fill_manual(values=wes_palette(n=4, name="Darjeeling1")) + ggtitle("Plant genotype COL2215") + xlab("") + ylab("Fungal colonization") + theme_bw() + theme(axis.line = element_line(colour = "black"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank()) # Stat analysis model=(lm(data=Col_COL2215,perc~treat)) summary(model) ANOVA=aov(model) # Tukey test to study each pair of treatment : TukeyHSD(x=ANOVA, 'treat', conf.level=0.95) ######################################################## # plant phenotypic response analysis. example of host genotype COL2215 ######################################################## Harvest <- read.table("dry_weights.txt", header = T) Har_COL2215<-Harvest[Harvest$var=="4",] Har_COL2215$treat<-gsub("A\\+B","Co-inoculation",Har_COL2215$treat) Har_COL2215$treat<-gsub("A","DAOM197198",Har_COL2215$treat) Har_COL2215$treat<-gsub("B","B1",Har_COL2215$treat) Har_COL2215$treat<-gsub("C$","MOCK",Har_COL2215$treat) Har_COL2215$treat<-factor(Har_COL2215$treat, levels = c("MOCK","DAOM197198", "B1", "Co-inoculation")) Har_BRA337<-Harvest[Harvest$var=="5",] Har_BRA337$treat<-gsub("A\\+B","Co-inoculation",Har_BRA337$treat) Har_BRA337$treat<-gsub("A","DAOM197198",Har_BRA337$treat) Har_BRA337$treat<-gsub("B","B1",Har_BRA337$treat) Har_BRA337$treat<-gsub("C$","MOCK",Har_BRA337$treat) Har_BRA337$treat<-factor(Har_BRA337$treat, levels = c("MOCK","DAOM197198", "B1", "Co-inoculation")) Har_CM4574<-Harvest[Harvest$var=="6",] Har_CM4574$treat<-gsub("A\\+B","Co-inoculation",Har_CM4574$treat) Har_CM4574$treat<-gsub("A","DAOM197198",Har_CM4574$treat) Har_CM4574$treat<-gsub("B","B1",Har_CM4574$treat) Har_CM4574$treat<-gsub("C$","MOCK",Har_CM4574$treat) Har_CM4574$treat<-factor(Har_CM4574$treat, levels = c("MOCK","DAOM197198", "B1", "Co-inoculation")) AllHar<-rbind.data.frame(Har_COL2215,Har_BRA337,Har_CM4574) AllHar$var<-gsub("4","COL2215",AllHar$var) AllHar$var<-gsub("5","BRA337",AllHar$var) AllHar$var<-gsub("6","CM4574",AllHar$var) ########### #COL2215 ggplot(Har_COL2215, aes(x=treat, y=total,fill=treat) ) + geom_boxplot() + scale_fill_manual(values=wes_palette(n=4, name="Darjeeling1")) + ggtitle("Plant genotype COL2215") + xlab("") + ylab("Total dry weight") + theme_bw() + theme(axis.line = element_line(colour = "black"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank()) # Stat analysis model=(lm(data=Har_COL2215,total~treat)) summary(model) ANOVA=aov(model) # Tukey test to study each pair of treatment : TukeyHSD(x=ANOVA, 'treat', conf.level=0.95)