* Check RNA-seq read quality with FastQC Version: 0.11.5 * Map reads to genome with Tophat * Build genome index for Tophat using PlasmoDB P. berghei ANKA genome version 28 Version: bowtie2/2.2.9 Command: bowtie2-build PlasmoDB-28_PbergheiANKA_Genome.fa * Map reads with Tophat Versions: tophat/2.1.1 bowtie2/2.2.9 samtools/1.3.1 boost/1.60.0 Command: tophat -p 8 --library-type fr-firststrand --max-intron-length 3000 --min-intron-length 4 --output-dir <1.fastq> <2.fastq> * Check quality of mapping with flagstats Version: samtools/1.3.1 Command: samtools flagstat * Analyse differential expression with limma/voom Versions: limma/3.28.14 edgeR/3.4.0 Rsubread/1.22.2 R/3.3.1 Commands: fa_targets <- readTargets(file=fa_bam_list) fa_celltype <- factor(fa_targets$CellType) fa_design <- model.matrix(~fa_celltype) fa_fcounts <- featureCounts(files=fa_targets$BamFile,annot.ext=gff_file,isGTFAnnotationFile=TRUE,nthreads=8,isPairedEnd=TRUE,strandSpecific=2) fa_isexpr <- rowSums(cpm(fa_fcounts$counts) > 3) >= 3 fa_x <- fa_fcounts$counts[fa_isexpr,] fa_y <- voom(fa_x,fa_design,plot=TRUE) plotMDS(fa_y,xlim=c(-5,5)) fa_fit <- eBayes(lmFit(fa_y,fa_design)) write.csv(topTable(fa_fit,coef=2,number=Inf,p.value=0.05),file="limma_out_fa") write.csv(topTable(fa_fit,coef=2,number=Inf,p.value=1),file="limma_out_fa_full") * Quantify RPKM counts with limma Versions: limma/3.28.14 Rsubread/1.22.2 R/3.3.1 Commands: all_targets <- readTargets(file=all_bam_list) all_celltype <- factor(all_targets$CellType) all_design <- model.matrix(~all_celltype) all_fcounts <- featureCounts(files=all_targets$BamFile,annot.ext=gff_file,isGTFAnnotationFile=TRUE,nthreads=8,isPairedEnd=TRUE,strandSpecific=2) x <- DGEList(counts=all_fcounts$counts, genes=all_fcounts$annotation[,c("GeneID","Length")]) x_rpkm <- rpkm(x,x$genes$Length) write.csv(x_rpkm,file="all_rpkm")