# Chavan R. et al 2015 # This script is the simplified version used to perform the estimation of differential expression genes referring to RNA-Seq experiment. # The analysis was performed in RStudio using Version 0.98.1083 # R version 3.1.1 (2014-07-10) # Platform: x86_64-apple-darwin13.1.0 (64-bit) ######################################################## # # Packages and Version information # ######################################################## library("DESeq2") #Version 1.6.2 library("vsn") #Version 3.34.0 library("RColorBrewer") #Version 1.1-2 library("gplots") #Version 2.16.0 library("genefilter") #Version 1.48.1 library("Biobase") #Version 2.26.0 library("limma") #Version 3.22.4 library("car") #Version 2.0-24 ######################################################## # # Read files from HTseq-count output # ######################################################## sampleFiles <- list.files(path="../Samples") sampleCondition=factor(c(rep("C1",3), rep("C2",3)), levels=c("C2","C1")) sampleTable=data.frame(sampleName=sampleFiles, fileName=sampleFiles,condition=sampleCondition) directory <- c("../Samples") des <- formula(~ condition) ddsHTSeq <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, directory = directory, design = des) # Normalization of the samples ddsHTSeq <- estimateSizeFactors(ddsHTSeq) # Estimate biological variance ddsHTSeq <- estimateDispersions(ddsHTSeq) # First preview of the dispersion plotDispEsts(ddsHTSeq, ylim = c(1e-6, 1e1)) # Estimate differential expression between different conditions ddsHTSeq <- nbinomWaldTest(ddsHTSeq) # Results for p-adjusted value resultsNames(ddsHTSeq) res <- results(ddsHTSeq) res <- res[order(res$padj),] head(res) # MA plot plotMA(ddsHTSeq,ylim=c(-3,3),main="Differential expression analysis between condictions") abline(h=c(-1.5,1.5),col="dodgerblue",lwd=2) sum(res$padj < .05, na.rm=TRUE) #mcols(res, use.names=TRUE) resSig <- res[ which(res$padj < .05), ] write.table(resSig, ".../Sifnificant_genes_C1_C2.csv", col.names = NA, quote=FALSE, sep="\t") ######################################################## # # Estimation of the influence of a data point # ######################################################## W <- res$stat maxCooks <- apply(assays(ddsHTSeq)[["cooks"]],1,max) idx <- !is.na(W) plot(rank(W[idx]), maxCooks[idx], xlab="Rank W.S", ylab="Distance per gene", ylim=c(0,5), cex=.4, col=rgb(0,0,0,.3)) abline(h=1, col="red") ######################################################## # # Standard deviation across all samples # ######################################################## notAllZero <- (rowSums(counts(ddsHTSeq))>0) meanSdPlot(log2(counts(ddsHTSeq,normalized=TRUE)[notAllZero,] + 1), ylim = c(0,2.5)) meanSdPlot(assay(rld[notAllZero,]), ylim = c(0,2.5)) meanSdPlot(assay(vsd[notAllZero,]), ylim = c(0,2.5)) ######################################################## # # Plot distance of samples # ######################################################## distance <- dist(t(assay(rld))) Dist_perf <- as.matrix(distsRL) rownames(Dist_perf) <- colnames(Dist_perf) <- with(colData(ddsHTSeq), paste(condition,sampleFiles , sep=" : ")) heatmap.2(Dist_perf, trace="none", col = rev(hmcol), margin=c(16, 16)) print(plotPCA(rld, intgroup=c("condition"))) ######################################################## # # Heatmap produced from the analysis # ######################################################## # Plot between 2 condition of the interest d1=read.csv("C1_Paj.csv",header=T, sep="\t") d2=read.csv("C2_Paj.csv",header=T, sep="\t") # Merge data data=merge(d1,d2,by="X",incomparables=NA,all=TRUE) rn=rownames(data) unique(rn) rn=data[,1] colnames(data)=c("Gene","C1","C2") data2=sapply(data,function(x) if (is.factor(x)) { as.numeric(as.character(x))}else{x}) rownames(data2)=data[,1] data2=data2[,2:3] write.table(data2,"./order.csv",sep="\t") hm <- heatmap.2(data2, scale="col", Rowv=F, Colv=F, symkey=FALSE, margins=c(8,8), cexRow=0.7, cexCol=1.0, key=TRUE, keysize=1.5, trace="none",density.info=c("density"),tracecol="blue",col=redgreen(100), main="C1 vs C2") # Example of the selection slist=data[1350:1370,c(2,3)] slist2=sapply(slist,function(x) if (is.factor(x)) { as.numeric(as.character(x))}else{x}) rownames(slist2)=data[1350:1370,1] hm <- heatmap.2(slist2, scale="col", Rowv=F, Colv=F, symkey=FALSE, margins=c(8,8), cexRow=0.7, cexCol=1.0, key=TRUE, keysize=1.5, trace="none",density.info=c("density"),tracecol="blue",col=redgreen(100), main="FoldChange C1 vs. C2")