# Helm et al. - Characterization of differential transcript abundance through time during Nematostella vectensis development # Set working directories for the analysis. These will depend on the setup of your computer. # You will need the following: # - The directory (inDir) where you want to place the two supplementary data files: # - "Additional_file_7_matrix.txt" # - "Additional_file_9_blast2go_annotations.bygene.txt" # - "Additional_file_10_GO_transcript_annots.txt" # The latter file contains specific GO annotations from the blast2GO pipeline that have been formated for the use with the topGO package. inDir <- "~/data" # - The directory (outDir) where you want the output files to be placed outDir <- "~/analyses" # fetch required R packages #source("http://bioconductor.org/biocLite.R") #biocLite("edgeR") #source("http://bioconductor.org/biocLite.R") #biocLite("topGO") #source("http://bioconductor.org/biocLite.R") #biocLite("goseq") #source("http://bioconductor.org/biocLite.R") #biocLite("GO.db") #source("http://bioconductor.org/biocLite.R") #biocLite("BiocUpgrade") library(edgeR) library(goseq) library(GO.db) library(topGO) #sessionInfo() #> sessionInfo() #R version 2.15.2 (2012-10-26) #Platform: x86_64-apple-darwin9.8.0/x86_64 (64-bit) #locale: #[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 #attached base packages: #[1] stats graphics grDevices utils datasets methods base #other attached packages: # [1] goseq_1.10.0 geneLenDataBase_0.99.10 BiasedUrn_1.04 edgeR_3.0.4 limma_3.14.3 topGO_2.10.0 SparseM_0.96 # [8] GO.db_2.8.0 RSQLite_0.11.2 DBI_0.2-5 AnnotationDbi_1.20.3 Biobase_2.18.0 BiocGenerics_0.4.0 graph_1.36.1 #loaded via a namespace (and not attached): # [1] biomaRt_2.14.0 Biostrings_2.26.2 bitops_1.0-4.2 BSgenome_1.26.1 GenomicFeatures_1.10.1 GenomicRanges_1.10.5 grid_2.15.2 IRanges_1.16.4 # [9] lattice_0.20-10 Matrix_1.0-10 mgcv_1.7-22 nlme_3.1-105 parallel_2.15.2 RCurl_1.95-3 Rsamtools_1.10.2 rtracklayer_1.18.1 #[17] stats4_2.15.2 tools_2.15.2 XML_3.95-0.1 zlibbioc_1.4.0 #read the matrix file x <- read.table(file.path(inDir, "Additional_file_7_matrix.txt"),header=TRUE) x <- x[1:26511,] #exclude ribosomal sequences #exactTest() #2h-7h s <- x[,c(3:6)] rownames(s) <- x$transcript s[is.na(s)] <- 0 TP <- factor(c("2h","2h","7h","7h")) data.frame(Sample=colnames(s),TP) m <- data.frame(Sample=colnames(s),TP) y <- DGEList(counts=s[,1:4], group=m$TP) colnames(y) <- m$Sample #filter read with no/low counts keep <- rowSums(cpm(y)) >= 2 y <- y[keep,] #update library sizes y$samples$lib.size <- colSums(y$counts) dim(y) #TMM normalization y <- calcNormFactors(y) y$samples #estimate dispersion y <- estimateCommonDisp(y, verbose=TRUE) y <- estimateTagwiseDisp(y) de <- exactTest(y, pair=c("2h","7h")) #build results table D <- de$table D <- cbind(rownames(D),D) names(D)[names(D)=="rownames(D)"] = "transcript" D <- cbind (D, p.adjust(D$PValue, method = "bonferroni")) names(D)[names(D)=="p.adjust(D$PValue, method = \"bonferroni\")"] = "P_adjust_2h_7h" names(D)[names(D)=="logFC"] = "logFC_2h_7h" names(D)[names(D)=="logCPM"] = "logCPM_2h_7h" names(D)[names(D)=="PValue"] = "PValue_2h_7h" #7h-12h s <- x[,c(5:8)] rownames(s) <- x$transcript s[is.na(s)] <- 0 TP <- factor(c("7h","7h","12h","12h")) data.frame(Sample=colnames(s),TP) m <- data.frame(Sample=colnames(s),TP) y <- DGEList(counts=s[,1:4], group=m$TP) colnames(y) <- m$Sample keep <- rowSums(cpm(y)) >= 2 y <- y[keep,] y$samples$lib.size <- colSums(y$counts) dim(y) y <- calcNormFactors(y) y$samples y <- estimateCommonDisp(y, verbose=TRUE) y <- estimateTagwiseDisp(y) de <- exactTest(y, pair=c("7h","12h")) D2 <- de$table D2 <- cbind(rownames(D2),D2) names(D2)[names(D2)=="rownames(D2)"] = "transcript" D2 <- cbind (D2, p.adjust(D2$PValue, method = "bonferroni")) names(D2)[names(D2)=="p.adjust(D2$PValue, method = \"bonferroni\")"] = "P_adjust_7h_12h" names(D2)[names(D2)=="logFC"] = "logFC_7h_12h" names(D2)[names(D2)=="logCPM"] = "logCPM_7h_12h" names(D2)[names(D2)=="PValue"] = "PValue_7h_12h" #12h-24h s <- x[,c(7:10)] rownames(s) <- x$transcript s[is.na(s)] <- 0 TP <- factor(c("12h","12h","24h","24h")) data.frame(Sample=colnames(s),TP) m <- data.frame(Sample=colnames(s),TP) y <- DGEList(counts=s[,1:4], group=m$TP) colnames(y) <- m$Sample keep <- rowSums(cpm(y)) >= 2 y <- y[keep,] y$samples$lib.size <- colSums(y$counts) dim(y) y <- calcNormFactors(y) y$samples y <- estimateCommonDisp(y, verbose=TRUE) y <- estimateTagwiseDisp(y) de <- exactTest(y, pair=c("12h","24h")) D3 <- de$table D3 <- cbind(rownames(D3),D3) names(D3)[names(D3)=="rownames(D3)"] = "transcript" D3 <- cbind (D3, p.adjust(D3$PValue, method = "bonferroni")) names(D3)[names(D3)=="p.adjust(D3$PValue, method = \"bonferroni\")"] = "P_adjust_12h_24h" names(D3)[names(D3)=="logFC"] = "logFC_12h_24h" names(D3)[names(D3)=="logCPM"] = "logCPM_12h_24h" names(D3)[names(D3)=="PValue"] = "PValue_12h_24h" #24h-5d s <- x[,c(9:12)] rownames(s) <- x$transcript s[is.na(s)] <- 0 TP <- factor(c("24h","24h","5d","5d")) data.frame(Sample=colnames(s),TP) m <- data.frame(Sample=colnames(s),TP) y <- DGEList(counts=s[,1:4], group=m$TP) colnames(y) <- m$Sample keep <- rowSums(cpm(y)) >= 2 y <- y[keep,] y$samples$lib.size <- colSums(y$counts) dim(y) y <- calcNormFactors(y) y$samples y <- estimateCommonDisp(y, verbose=TRUE) y <- estimateTagwiseDisp(y) de <- exactTest(y, pair=c("24h","5d")) D4 <- de$table D4 <- cbind(rownames(D4),D4) names(D4)[names(D4)=="rownames(D4)"] = "transcript" D4 <- cbind (D4, p.adjust(D4$PValue, method = "bonferroni")) names(D4)[names(D4)=="p.adjust(D4$PValue, method = \"bonferroni\")"] = "P_adjust_24h_5d" names(D4)[names(D4)=="logFC"] = "logFC_24h_5d" names(D4)[names(D4)=="logCPM"] = "logCPM_24h_5d" names(D4)[names(D4)=="PValue"] = "PValue_24h_5d" #5d-10d s <- x[,c(11:14)] rownames(s) <- x$transcript s[is.na(s)] <- 0 TP <- factor(c("5d","5d","10d","10d")) data.frame(Sample=colnames(s),TP) m <- data.frame(Sample=colnames(s),TP) y <- DGEList(counts=s[,1:4], group=m$TP) colnames(y) <- m$Sample keep <- rowSums(cpm(y)) >= 2 y <- y[keep,] y$samples$lib.size <- colSums(y$counts) dim(y) y <- calcNormFactors(y) y$samples y <- estimateCommonDisp(y, verbose=TRUE) y <- estimateTagwiseDisp(y) de <- exactTest(y, pair=c("5d","10d")) D5 <- de$table D5 <- cbind(rownames(D5),D5) names(D5)[names(D5)=="rownames(D5)"] = "transcript" D5 <- cbind (D5, p.adjust(D5$PValue, method = "bonferroni")) names(D5)[names(D5)=="p.adjust(D5$PValue, method = \"bonferroni\")"] = "P_adjust_5d_10d" names(D5)[names(D5)=="logFC"] = "logFC_5d_10d" names(D5)[names(D5)=="logCPM"] = "logCPM_5d_10d" names(D5)[names(D5)=="PValue"] = "PValue_5d_10d" #merging the interval test results into one table m <- merge(D, D2, by.x="transcript", by.y="transcript", all=TRUE) m <- merge(m, D3, by.x="transcript", by.y="transcript", all=TRUE) m <- merge(m, D4, by.x="transcript", by.y="transcript", all=TRUE) m <- merge(m, D5, by.x="transcript", by.y="transcript", all=TRUE) #add test results to count matrix x <- x[,c(1:32,53:59)] #matrix without edgeR analysis m <- merge(x, m, by.x="transcript", by.y="transcript", all=TRUE) m <- m[,c(1:32,40:59,33:39)] #sort table setwd(file.path(outDir)) #write.table(m,"matrix.txt") # Get the number of DE genes across timepoints (Bonferroni and 0.05 cut) #2-7 i1 <- m[(m$logFC_2h_7h > 0) & (m$P_adjust_2h_7h < 0.05) & (!is.na(m$P_adjust_2h_7h)),] #increasing expression d1 <- m[(m$logFC_2h_7h < 0) & (m$P_adjust_2h_7h < 0.05) & (!is.na(m$P_adjust_2h_7h)),] #decreasing expression #dim(bhi) #dim(bhd) I1 <- c(nrow(i1),-(nrow(d1))) #7-12 i2 <- m[(m$logFC_7h_12h > 0) & (m$P_adjust_7h_12h < 0.05) & (!is.na(m$P_adjust_7h_12h)),] d2 <- m[(m$logFC_7h_12h < 0) & (m$P_adjust_7h_12h < 0.05) & (!is.na(m$P_adjust_7h_12h)),] #dim(boi) #dim(bod) I2 <- c(nrow(i2),-(nrow(d2))) #12-24 i3 <- m[(m$logFC_12h_24h > 0) & (m$P_adjust_12h_24h < 0.05) & (!is.na(m$P_adjust_12h_24h)),] d3 <- m[(m$logFC_12h_24h < 0) & (m$P_adjust_12h_24h < 0.05) & (!is.na(m$P_adjust_12h_24h)),] #dim(bhi) #dim(bhd) I3 <- c(nrow(i3),-(nrow(d3))) #24-5d i4 <- m[(m$logFC_24h_5d > 0) & (m$P_adjust_24h_5d < 0.05) & (!is.na(m$P_adjust_24h_5d)),] d4 <- m[(m$logFC_24h_5d < 0) & (m$P_adjust_24h_5d < 0.05) & (!is.na(m$P_adjust_24h_5d)),] #dim(boi) #dim(bod) I4 <- c(nrow(i4),-(nrow(d4))) #5d-10d i5 <- m[(m$logFC_5d_10d > 0) & (m$P_adjust_5d_10d < 0.05) & (!is.na(m$P_adjust_5d_10d)),] d5 <- m[(m$logFC_5d_10d < 0) & (m$P_adjust_5d_10d < 0.05) & (!is.na(m$P_adjust_5d_10d)),] #dim(bhi) #dim(bhd) I5 <- c(nrow(i5),-(nrow(d5))) si <- data.frame(I1,I2,I3,I4,I5) #make dataframe for DE genes #Fig. 2 - Plot number of DE genes and Log2FC versus Log2CPM for all intervals m1 <- subset(m, !is.na(m$P_adjust_2h_7h)) rownames(m1)=m1[,1] m2 <- subset(m, !is.na(m$P_adjust_7h_12h)) rownames(m2)=m2[,1] m3 <- subset(m, !is.na(m$P_adjust_12h_24h)) rownames(m3)=m3[,1] m4 <- subset(m, !is.na(m$P_adjust_24h_5d)) rownames(m4)=m4[,1] m5 <- subset(m, !is.na(m$P_adjust_5d_10d)) rownames(m5)=m5[,1] png("Fig_2.png",height = 17, width = 17, units = "cm",res = 300) par(mfrow=c(2,3),mgp=c(2.2, 0.7, 0)) par(mar=c(7,7, 5.5, 3.5) - 3.0) bl <- barplot(as.matrix(si), main="A", ylab= "Number of DE transcripts", xlab= "Time intervals",ylim=c(-3800,3800), beside=TRUE, col=c("red", "blue"), names.arg=c("2h_7h","7h_12h","12h_24h","24h_5d","5d_10d"),cex.names=0.6) text(x= bl, y= as.matrix(si), labels=as.character(c(nrow(i1),nrow(d1),nrow(i2),nrow(d2),nrow(i3),nrow(d3),nrow(i4),nrow(d4),nrow(i5),nrow(d5))), xpd=TRUE, pos=c(3,1)) abline(0, 0, col = "black") plot(m1$logCPM_2h_7h, m1$logFC_2h_7h, main="B",pch=16, ylim = c(-13,13), xlim=c(-1,13),col=rgb(0,0,0,40,maxColorValue=255), ylab = "log2FC", xlab = "log2CPM") points(m1[m1$P_adjust_2h_7h < .05,]$logCPM_2h_7h, m1[m1$P_adjust_2h_7h < .05,]$logFC_2h_7h, pch=16, col=rgb(255,0,0,70,maxColorValue=255)) abline(h=c(-1,1), col="grey") plot(m2$logCPM_7h_12h, m2$logFC_7h_12h, main="C", pch=16, ylim = c(-13,13), col=rgb(0,0,0,40,maxColorValue=255), xaxt="n", ylab = "", xlab = "") points(m2[m2$P_adjust_7h_12h < .05,]$logCPM_7h_12h, m2[m2$P_adjust_7h_12h < .05,]$logFC_7h_12h, pch=16, col=rgb(255,0,0,70,maxColorValue=255)) abline(h=c(-1,1), col="grey") plot(m3$logCPM_12h_24h, m3$logFC_12h_24h, main="D", pch=16, ylim = c(-13,13), xlim=c(-1,13),col=rgb(0,0,0,40,maxColorValue=255), ylab = "log2FC", xlab = "log2CPM") points(m3[m3$P_adjust_12h_24h < .05,]$logCPM_12h_24h, m3[m3$P_adjust_12h_24h < .05,]$logFC_12h_24h, pch=16, col=rgb(255,0,0,70,maxColorValue=255)) abline(h=c(-1,1), col="grey") plot(m4$logCPM_24h_5d, m4$logFC_24h_5d, main="E", pch=16, ylim = c(-13,13),xlim=c(-1,13), col=rgb(0,0,0,40,maxColorValue=255), xaxt="n", ylab = "", xlab = "") points(m4[m4$P_adjust_24h_5d < .05,]$logCPM_24h_5d, m4[m4$P_adjust_24h_5d < .05,]$logFC_24h_5d, pch=16, col=rgb(255,0,0,70,maxColorValue=255)) abline(h=c(-1,1), col="grey") plot(m5$logCPM_5d_10d, m5$logFC_5d_10d, main="F", pch=16, ylim = c(-13,13), col=rgb(0,0,0,40,maxColorValue=255), xaxt="n", ylab = "", xlab = "") points(m5[m5$P_adjust_5d_10d < .05,]$logCPM_5d_10d, m5[m5$P_adjust_5d_10d < .05,]$logFC_5d_10d, pch=16, col=rgb(255,0,0,70,maxColorValue=255)) abline(h=c(-1,1), col="grey") dev.off() #topGO #Build the acyclic GO graph and output all annotions. y <- read.table(file.path(inDir, "Additional_file_7_matrix.txt"),header=TRUE) gm <- subset(y,y$GO_ID != "NA") #> dim(gm) #[1] 18308 59 #18308 sequences in the reference of gene predictions have been annotated with GO terms blast2GO <- readMappings(file.path(inDir, "Additional_file_9_blast2go_annotations.bygene.txt")) # this is the blast2go output topgo <- function(geneList,on,ns){ topDiffGenes <- function(allScore) {return(allScore < pval)} #build topGO object GOdata <- new("topGOdata", ontology = on, allGenes = geneList, gene2GO = blast2GO, geneSel = topDiffGenes, annot = annFUN.gene2GO, nodeSize = ns) #output GO-transcript associations ann.genes <- genesInTerm(GOdata) ag <- stack(ann.genes) agf <- as.data.frame(ag) agf2 <-agf[,c(2,1)] # sort columns write.table(agf2,quote = FALSE,col.names = F,row.names = F,file=paste(on,"_GO_transcript_annots.txt", sep="")) } # One can use any interval to build the graph # Here we use interval 24h-5d. int <- "24h-5d" geneList <- gm$P_adjust_24h_5d #any interval could be chosen as we are only interested in building the acyclic graph and in outputting the annotations. names(geneList) <- gm$transcript pval <- 0.05 #any cutoff could be chosen as we are only interested in building the acyclic graph and in outputting the annotations. ns <- 1 #nodeSize #BP on <- "BP" topgo(geneList,on,ns) #MF on <- "MF" topgo(geneList,on,ns) #CC on <- "CC" topgo(geneList,on,ns) #The three output files were concatenated. The complete set of annotations can be found in "Helm_et_al_GO_transcript_annots.txt" #GOseq annot <- read.table(file.path(inDir, "Additional_file_10_GO_transcript_annots.txt"),header=FALSE) names(annot) <- c("go", "gene") annot2 <- as.data.frame(annot) rgo <- annot[c(2,1)] #2h_7h #-------- #vector: DE genes get a 1, non DE a 0 genes=as.integer(m1$P_adjust_2h_7h<.05) names(genes)=row.names(m1) #check number of DE table(genes) #get gene length vector glength= m1$length names(glength)=row.names(m1) #weighing function pwf=nullp(genes,bias.data = glength) GO.wall=goseq(pwf,gene2cat = rgo) #------ k <- as.data.frame(GO.wall) # enriched GOs k$bh_adjust <- p.adjust(k$over_represented_pvalue,method="BH") #add adjusted p-values enr <- subset(k, k$bh_adjust <.05) #get enriched GO categories #------- sys <- m1[, c(1, 33, 36)] # generate a dataframe holding transcript id, LogFC_2h_7h and P_adjust_2h_7h me <- merge(annot2, sys, by.x = "gene", by.y = "transcript") # merge with annotation file sink("2h_7h.txt") rank <- 0 for (i in 1:nrow(enr)) { rank <- rank + 1 line <- enr[i,] got <- line$category # get the enriched GO term count <- 0 count2 <- 0 count3 <- 0 annot2ss <- subset(annot2,annot2$go == got) for (g in 1:nrow(annot2ss)) { #count number of sequences in reference with particular GO term entry <- annot2ss[g,] if (entry$go == got) { count <- count + 1 } } mess <- subset(me,me$go == got) for (e in 1:nrow(mess)) { gn <- mess[e,] if ((gn$logFC_2h_7h < 0) & (gn$P_adjust_2h_7h < 0.05)){ #count number of DE sequences with particular GO term and decreasing expression count2 <- count2 + 1 }else if ((gn$logFC_2h_7h > 0) & (gn$P_adjust_2h_7h < 0.05)) { #count number of DE sequences with particular GO term and increasing expression count3 <- count3 + 1 } } t <- strsplit(Term(got), "\r") d <- strsplit(Definition(got), "\r") o <- strsplit(Ontology(got), "\r") cat("2h_7h ,",rank,",",got,",",o[[1]],",",t[[1]],",",line$bh_adjust,",",count,",",count2,",",count3,"\n") } sink() #7h_12h genes=as.integer(m2$P_adjust_7h_12h<.05) names(genes)=row.names(m2) table(genes) glength= m2$length names(glength)=row.names(m2) pwf=nullp(genes,bias.data = glength) GO.wall=goseq(pwf,gene2cat = rgo) #------ k <- as.data.frame(GO.wall) k$bh_adjust <- p.adjust(k$over_represented_pvalue,method="BH") enr <- subset(k, k$bh_adjust <.05) #------- sys <- m2[, c(1, 37, 40)] # generate a dataframe holding transcript id, LogFC_7h_12h and P_adjust_7h_12h me <- merge(annot2, sys, by.x = "gene", by.y = "transcript") # merge with annotation file sink("7h_12h.txt") rank <- 0 for (i in 1:nrow(enr)) { rank <- rank + 1 line <- enr[i,] got <- line$category count <- 0 count2 <- 0 count3 <- 0 annot2ss <- subset(annot2,annot2$go == got) for (g in 1:nrow(annot2ss)) { entry <- annot2ss[g,] if (entry$go == got) { count <- count + 1 } } mess <- subset(me,me$go == got) for (e in 1:nrow(mess)) { gn <- mess[e,] if ((gn$logFC_7h_12h < 0) & (gn$P_adjust_7h_12h < 0.05)){ count2 <- count2 + 1 }else if ((gn$logFC_7h_12h > 0) & (gn$P_adjust_7h_12h < 0.05)) { count3 <- count3 + 1 } } t <- strsplit(Term(got), "\r") d <- strsplit(Definition(got), "\r") o <- strsplit(Ontology(got), "\r") cat("7h_12h ,",rank,",",got,",",o[[1]],",",t[[1]],",",line$bh_adjust,",",count,",",count2,",",count3,"\n") } sink() #12h_24h genes=as.integer(m3$P_adjust_12h_24h<.05) names(genes)=row.names(m3) table(genes) glength= m3$length names(glength)=row.names(m3) pwf=nullp(genes,bias.data = glength) GO.wall=goseq(pwf,gene2cat = rgo) #------ k <- as.data.frame(GO.wall) k$bh_adjust <- p.adjust(k$over_represented_pvalue,method="BH") enr <- subset(k, k$bh_adjust <.05) #------- sys <- m3[, c(1, 41, 44)] # generate a dataframe holding transcript id, LogFC_12h_24h and P_adjust_12h_24h me <- merge(annot2, sys, by.x = "gene", by.y = "transcript") # merge with annotation file sink("12h_24h.txt") rank <- 0 for (i in 1:nrow(enr)) { rank <- rank + 1 line <- enr[i,] got <- line$category count <- 0 count2 <- 0 count3 <- 0 annot2ss <- subset(annot2,annot2$go == got) for (g in 1:nrow(annot2ss)) { entry <- annot2ss[g,] if (entry$go == got) { count <- count + 1 } } mess <- subset(me,me$go == got) for (e in 1:nrow(mess)) { gn <- mess[e,] if ((gn$logFC_12h_24h < 0) & (gn$P_adjust_12h_24h < 0.05)){ count2 <- count2 + 1 }else if ((gn$logFC_12h_24h > 0) & (gn$P_adjust_12h_24h < 0.05)) { count3 <- count3 + 1 } } t <- strsplit(Term(got), "\r") d <- strsplit(Definition(got), "\r") o <- strsplit(Ontology(got), "\r") cat("12h_24h ,",rank,",",got,",",o[[1]],",",t[[1]],",",line$bh_adjust,",",count,",",count2,",",count3,"\n") } sink() #24h_5d genes=as.integer(m4$P_adjust_24h_5d<.05) names(genes)=row.names(m4) table(genes) glength= m4$length names(glength)=row.names(m4) pwf=nullp(genes,bias.data = glength) GO.wall=goseq(pwf,gene2cat = rgo) #------ k <- as.data.frame(GO.wall) k$bh_adjust <- p.adjust(k$over_represented_pvalue,method="BH") enr <- subset(k, k$bh_adjust <.05) #------- sys <- m4[, c(1, 45, 48)] # generate a dataframe holding transcript id, LogFC_24h_5d and P_adjust_24h_5d me <- merge(annot2, sys, by.x = "gene", by.y = "transcript") # merge with annotation file sink("24h_5d.txt") rank <- 0 for (i in 1:nrow(enr)) { rank <- rank + 1 line <- enr[i,] got <- line$category count <- 0 count2 <- 0 count3 <- 0 annot2ss <- subset(annot2,annot2$go == got) for (g in 1:nrow(annot2ss)) { entry <- annot2ss[g,] if (entry$go == got) { count <- count + 1 } } mess <- subset(me,me$go == got) for (e in 1:nrow(mess)) { gn <- mess[e,] if ((gn$logFC_24h_5d < 0) & (gn$P_adjust_24h_5d < 0.05)){ count2 <- count2 + 1 }else if ((gn$logFC_24h_5d > 0) & (gn$P_adjust_24h_5d < 0.05)) { count3 <- count3 + 1 } } t <- strsplit(Term(got), "\r") d <- strsplit(Definition(got), "\r") o <- strsplit(Ontology(got), "\r") cat("24h_5d ,",rank,",",got,",",o[[1]],",",t[[1]],",",line$bh_adjust,",",count,",",count2,",",count3,"\n") } sink() #5d-10d genes=as.integer(m5$P_adjust_5d_10d<.05) names(genes)=row.names(m5) table(genes) glength= m5$length names(glength)=row.names(m5) pwf=nullp(genes,bias.data = glength) GO.wall=goseq(pwf,gene2cat = rgo) #------ k <- as.data.frame(GO.wall) k$bh_adjust <- p.adjust(k$over_represented_pvalue,method="BH") enr <- subset(k, k$bh_adjust <.05) #------- sys <- m5[, c(1, 49, 52)] # generate a dataframe holding transcript id, LogFC_5d_10d and P_adjust_5d_10d me <- merge(annot2, sys, by.x = "gene", by.y = "transcript") sink("5d_10d.txt") rank <- 0 for (i in 1:nrow(enr)) { rank <- rank + 1 line <- enr[i,] got <- line$category # get the enriched GO term count <- 0 count2 <- 0 count3 <- 0 annot2ss <- subset(annot2,annot2$go == got) for (g in 1:nrow(annot2ss)) { entry <- annot2ss[g,] if (entry$go == got) { count <- count + 1 } } mess <- subset(me,me$go == got) for (e in 1:nrow(mess)) { gn <- mess[e,] if ((gn$logFC_5d_10d < 0) & (gn$P_adjust_5d_10d < 0.05)){ count2 <- count2 + 1 }else if ((gn$logFC_5d_10d > 0) & (gn$P_adjust_5d_10d < 0.05)) { count3 <- count3 + 1 } } t <- strsplit(Term(got), "\r") d <- strsplit(Definition(got), "\r") o <- strsplit(Ontology(got), "\r") cat("5d_10d ,",rank,",",got,",",o[[1]],",",t[[1]],",",line$bh_adjust,",",count,",",count2,",",count3,"\n") } sink()