# Helm et al. - Characterization of differential transcript abundance through time during Nematostella vectensis development

# Set working directories for the analysis. These will depend on the setup of your computer.
# You will need the following:

# - The directory (inDir) where you want to place the two supplementary data files:
#	- "Additional_file_7_matrix.txt"
#	- "Additional_file_9_blast2go_annotations.bygene.txt"
#	- "Additional_file_10_GO_transcript_annots.txt"
#	The latter file contains specific GO annotations from the blast2GO pipeline that have been formated for the use with the topGO package.

inDir <- "~/data"

# - The directory (outDir) where you want the output files to be placed

outDir <- "~/analyses"

# fetch required R packages

#source("http://bioconductor.org/biocLite.R")
#biocLite("edgeR")

#source("http://bioconductor.org/biocLite.R")
#biocLite("topGO") 

#source("http://bioconductor.org/biocLite.R")
#biocLite("goseq")

#source("http://bioconductor.org/biocLite.R")
#biocLite("GO.db")

#source("http://bioconductor.org/biocLite.R")
#biocLite("BiocUpgrade")

library(edgeR)
library(goseq)
library(GO.db)
library(topGO)

#sessionInfo()
#> sessionInfo()
#R version 2.15.2 (2012-10-26)
#Platform: x86_64-apple-darwin9.8.0/x86_64 (64-bit)

#locale:
#[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

#attached base packages:
#[1] stats     graphics  grDevices utils     datasets  methods   base     

#other attached packages:
# [1] goseq_1.10.0            geneLenDataBase_0.99.10 BiasedUrn_1.04          edgeR_3.0.4             limma_3.14.3            topGO_2.10.0            SparseM_0.96           
# [8] GO.db_2.8.0             RSQLite_0.11.2          DBI_0.2-5               AnnotationDbi_1.20.3    Biobase_2.18.0          BiocGenerics_0.4.0      graph_1.36.1           

#loaded via a namespace (and not attached):
# [1] biomaRt_2.14.0         Biostrings_2.26.2      bitops_1.0-4.2         BSgenome_1.26.1        GenomicFeatures_1.10.1 GenomicRanges_1.10.5   grid_2.15.2            IRanges_1.16.4        
# [9] lattice_0.20-10        Matrix_1.0-10          mgcv_1.7-22            nlme_3.1-105           parallel_2.15.2        RCurl_1.95-3           Rsamtools_1.10.2       rtracklayer_1.18.1    
#[17] stats4_2.15.2          tools_2.15.2           XML_3.95-0.1           zlibbioc_1.4.0  

#read the matrix file

x <- read.table(file.path(inDir, "Additional_file_7_matrix.txt"),header=TRUE)
x <- x[1:26511,] #exclude ribosomal sequences

#exactTest()

#2h-7h

s <- x[,c(3:6)]
rownames(s) <- x$transcript
s[is.na(s)] <- 0

TP <- factor(c("2h","2h","7h","7h"))
data.frame(Sample=colnames(s),TP)
m <- data.frame(Sample=colnames(s),TP)

y <- DGEList(counts=s[,1:4], group=m$TP)
colnames(y) <- m$Sample

#filter read with no/low counts
keep <- rowSums(cpm(y)) >= 2
y <- y[keep,]

#update library sizes
y$samples$lib.size <- colSums(y$counts)
dim(y)

#TMM normalization
y <- calcNormFactors(y)
y$samples

#estimate dispersion
y <- estimateCommonDisp(y, verbose=TRUE)
y <- estimateTagwiseDisp(y)

de <- exactTest(y, pair=c("2h","7h"))

#build results table
D <- de$table
D <- cbind(rownames(D),D)
names(D)[names(D)=="rownames(D)"] = "transcript"
D <- cbind (D, p.adjust(D$PValue, method = "bonferroni"))
names(D)[names(D)=="p.adjust(D$PValue, method = \"bonferroni\")"] = "P_adjust_2h_7h"
names(D)[names(D)=="logFC"] = "logFC_2h_7h"
names(D)[names(D)=="logCPM"] = "logCPM_2h_7h"
names(D)[names(D)=="PValue"] = "PValue_2h_7h"

#7h-12h

s <- x[,c(5:8)]
rownames(s) <- x$transcript
s[is.na(s)] <- 0

TP <- factor(c("7h","7h","12h","12h"))
data.frame(Sample=colnames(s),TP)
m <- data.frame(Sample=colnames(s),TP)

y <- DGEList(counts=s[,1:4], group=m$TP)
colnames(y) <- m$Sample

keep <- rowSums(cpm(y)) >= 2
y <- y[keep,]

y$samples$lib.size <- colSums(y$counts)
dim(y)

y <- calcNormFactors(y)
y$samples
y <- estimateCommonDisp(y, verbose=TRUE)
y <- estimateTagwiseDisp(y)

de <- exactTest(y, pair=c("7h","12h"))

D2 <- de$table
D2 <- cbind(rownames(D2),D2)
names(D2)[names(D2)=="rownames(D2)"] = "transcript"
D2 <- cbind (D2, p.adjust(D2$PValue, method = "bonferroni"))
names(D2)[names(D2)=="p.adjust(D2$PValue, method = \"bonferroni\")"] = "P_adjust_7h_12h"
names(D2)[names(D2)=="logFC"] = "logFC_7h_12h"
names(D2)[names(D2)=="logCPM"] = "logCPM_7h_12h"
names(D2)[names(D2)=="PValue"] = "PValue_7h_12h"

#12h-24h

s <- x[,c(7:10)]
rownames(s) <- x$transcript
s[is.na(s)] <- 0

TP <- factor(c("12h","12h","24h","24h"))
data.frame(Sample=colnames(s),TP)
m <- data.frame(Sample=colnames(s),TP)

y <- DGEList(counts=s[,1:4], group=m$TP)
colnames(y) <- m$Sample

keep <- rowSums(cpm(y)) >= 2
y <- y[keep,]

y$samples$lib.size <- colSums(y$counts)
dim(y)

y <- calcNormFactors(y)
y$samples
y <- estimateCommonDisp(y, verbose=TRUE)
y <- estimateTagwiseDisp(y)

de <- exactTest(y, pair=c("12h","24h"))

D3 <- de$table
D3 <- cbind(rownames(D3),D3)
names(D3)[names(D3)=="rownames(D3)"] = "transcript"
D3 <- cbind (D3, p.adjust(D3$PValue, method = "bonferroni"))
names(D3)[names(D3)=="p.adjust(D3$PValue, method = \"bonferroni\")"] = "P_adjust_12h_24h"
names(D3)[names(D3)=="logFC"] = "logFC_12h_24h"
names(D3)[names(D3)=="logCPM"] = "logCPM_12h_24h"
names(D3)[names(D3)=="PValue"] = "PValue_12h_24h"

#24h-5d

s <- x[,c(9:12)]
rownames(s) <- x$transcript
s[is.na(s)] <- 0

TP <- factor(c("24h","24h","5d","5d"))
data.frame(Sample=colnames(s),TP)
m <- data.frame(Sample=colnames(s),TP)

y <- DGEList(counts=s[,1:4], group=m$TP)
colnames(y) <- m$Sample

keep <- rowSums(cpm(y)) >= 2
y <- y[keep,]

y$samples$lib.size <- colSums(y$counts)
dim(y)

y <- calcNormFactors(y)
y$samples
y <- estimateCommonDisp(y, verbose=TRUE)
y <- estimateTagwiseDisp(y)

de <- exactTest(y, pair=c("24h","5d"))

D4 <- de$table
D4 <- cbind(rownames(D4),D4)
names(D4)[names(D4)=="rownames(D4)"] = "transcript"
D4 <- cbind (D4, p.adjust(D4$PValue, method = "bonferroni"))
names(D4)[names(D4)=="p.adjust(D4$PValue, method = \"bonferroni\")"] = "P_adjust_24h_5d"
names(D4)[names(D4)=="logFC"] = "logFC_24h_5d"
names(D4)[names(D4)=="logCPM"] = "logCPM_24h_5d"
names(D4)[names(D4)=="PValue"] = "PValue_24h_5d"

#5d-10d

s <- x[,c(11:14)]
rownames(s) <- x$transcript
s[is.na(s)] <- 0

TP <- factor(c("5d","5d","10d","10d"))
data.frame(Sample=colnames(s),TP)
m <- data.frame(Sample=colnames(s),TP)

y <- DGEList(counts=s[,1:4], group=m$TP)
colnames(y) <- m$Sample

keep <- rowSums(cpm(y)) >= 2
y <- y[keep,]

y$samples$lib.size <- colSums(y$counts)
dim(y)

y <- calcNormFactors(y)
y$samples
y <- estimateCommonDisp(y, verbose=TRUE)
y <- estimateTagwiseDisp(y)

de <- exactTest(y, pair=c("5d","10d"))

D5 <- de$table
D5 <- cbind(rownames(D5),D5)
names(D5)[names(D5)=="rownames(D5)"] = "transcript"
D5 <- cbind (D5, p.adjust(D5$PValue, method = "bonferroni"))
names(D5)[names(D5)=="p.adjust(D5$PValue, method = \"bonferroni\")"] = "P_adjust_5d_10d"
names(D5)[names(D5)=="logFC"] = "logFC_5d_10d"
names(D5)[names(D5)=="logCPM"] = "logCPM_5d_10d"
names(D5)[names(D5)=="PValue"] = "PValue_5d_10d"

#merging the interval test results into one table

m <- merge(D, D2, by.x="transcript", by.y="transcript", all=TRUE)
m <- merge(m, D3, by.x="transcript", by.y="transcript", all=TRUE)
m <- merge(m, D4, by.x="transcript", by.y="transcript", all=TRUE)
m <- merge(m, D5, by.x="transcript", by.y="transcript", all=TRUE)

#add test results to count matrix

x <- x[,c(1:32,53:59)] #matrix without edgeR analysis
m <- merge(x, m, by.x="transcript", by.y="transcript", all=TRUE)
m <- m[,c(1:32,40:59,33:39)] #sort table

setwd(file.path(outDir))
#write.table(m,"matrix.txt")

# Get the number of DE genes across timepoints (Bonferroni and 0.05 cut)

#2-7
i1 <- m[(m$logFC_2h_7h > 0) &  (m$P_adjust_2h_7h < 0.05) & (!is.na(m$P_adjust_2h_7h)),] #increasing expression
d1 <- m[(m$logFC_2h_7h < 0) &  (m$P_adjust_2h_7h < 0.05) & (!is.na(m$P_adjust_2h_7h)),] #decreasing expression
#dim(bhi)
#dim(bhd)
I1 <- c(nrow(i1),-(nrow(d1)))
#7-12
i2 <- m[(m$logFC_7h_12h > 0) &  (m$P_adjust_7h_12h < 0.05) & (!is.na(m$P_adjust_7h_12h)),]
d2 <- m[(m$logFC_7h_12h < 0) &  (m$P_adjust_7h_12h < 0.05) & (!is.na(m$P_adjust_7h_12h)),]
#dim(boi)
#dim(bod)
I2 <- c(nrow(i2),-(nrow(d2)))
#12-24
i3 <- m[(m$logFC_12h_24h > 0) &  (m$P_adjust_12h_24h < 0.05) & (!is.na(m$P_adjust_12h_24h)),]
d3 <- m[(m$logFC_12h_24h < 0) &  (m$P_adjust_12h_24h < 0.05) & (!is.na(m$P_adjust_12h_24h)),]
#dim(bhi)
#dim(bhd)
I3 <- c(nrow(i3),-(nrow(d3)))
#24-5d
i4 <- m[(m$logFC_24h_5d > 0) &  (m$P_adjust_24h_5d < 0.05) & (!is.na(m$P_adjust_24h_5d)),]
d4 <- m[(m$logFC_24h_5d < 0) &  (m$P_adjust_24h_5d < 0.05) & (!is.na(m$P_adjust_24h_5d)),]
#dim(boi)
#dim(bod)
I4 <- c(nrow(i4),-(nrow(d4)))
#5d-10d
i5 <- m[(m$logFC_5d_10d > 0) &  (m$P_adjust_5d_10d < 0.05) & (!is.na(m$P_adjust_5d_10d)),]
d5 <- m[(m$logFC_5d_10d < 0) &  (m$P_adjust_5d_10d < 0.05) & (!is.na(m$P_adjust_5d_10d)),]
#dim(bhi)
#dim(bhd)
I5 <- c(nrow(i5),-(nrow(d5)))

si <- data.frame(I1,I2,I3,I4,I5)
#make dataframe for DE genes


#Fig. 2 - Plot number of DE genes and Log2FC versus Log2CPM for all intervals

m1 <- subset(m, !is.na(m$P_adjust_2h_7h))
rownames(m1)=m1[,1]
m2 <- subset(m, !is.na(m$P_adjust_7h_12h))
rownames(m2)=m2[,1]
m3 <- subset(m, !is.na(m$P_adjust_12h_24h))
rownames(m3)=m3[,1]
m4 <- subset(m, !is.na(m$P_adjust_24h_5d))
rownames(m4)=m4[,1]
m5 <- subset(m, !is.na(m$P_adjust_5d_10d))
rownames(m5)=m5[,1]

png("Fig_2.png",height = 17, width = 17, units = "cm",res = 300)
par(mfrow=c(2,3),mgp=c(2.2, 0.7, 0))
par(mar=c(7,7, 5.5, 3.5) - 3.0)
bl <- barplot(as.matrix(si), main="A", ylab= "Number of DE transcripts", xlab= "Time intervals",ylim=c(-3800,3800),  beside=TRUE, col=c("red", "blue"), names.arg=c("2h_7h","7h_12h","12h_24h","24h_5d","5d_10d"),cex.names=0.6)
text(x= bl, y= as.matrix(si), labels=as.character(c(nrow(i1),nrow(d1),nrow(i2),nrow(d2),nrow(i3),nrow(d3),nrow(i4),nrow(d4),nrow(i5),nrow(d5))), xpd=TRUE, pos=c(3,1))
abline(0, 0, col = "black")

plot(m1$logCPM_2h_7h, m1$logFC_2h_7h, main="B",pch=16, ylim = c(-13,13), xlim=c(-1,13),col=rgb(0,0,0,40,maxColorValue=255), ylab = "log2FC", xlab = "log2CPM")
points(m1[m1$P_adjust_2h_7h < .05,]$logCPM_2h_7h, m1[m1$P_adjust_2h_7h <  .05,]$logFC_2h_7h, pch=16, col=rgb(255,0,0,70,maxColorValue=255))
abline(h=c(-1,1), col="grey")

plot(m2$logCPM_7h_12h, m2$logFC_7h_12h, main="C", pch=16, ylim = c(-13,13), col=rgb(0,0,0,40,maxColorValue=255), xaxt="n", ylab = "", xlab = "")
points(m2[m2$P_adjust_7h_12h < .05,]$logCPM_7h_12h, m2[m2$P_adjust_7h_12h <  .05,]$logFC_7h_12h, pch=16, col=rgb(255,0,0,70,maxColorValue=255))
abline(h=c(-1,1), col="grey")

plot(m3$logCPM_12h_24h, m3$logFC_12h_24h, main="D", pch=16, ylim = c(-13,13), xlim=c(-1,13),col=rgb(0,0,0,40,maxColorValue=255), ylab = "log2FC", xlab = "log2CPM")
points(m3[m3$P_adjust_12h_24h < .05,]$logCPM_12h_24h, m3[m3$P_adjust_12h_24h <  .05,]$logFC_12h_24h, pch=16, col=rgb(255,0,0,70,maxColorValue=255))
abline(h=c(-1,1), col="grey")

plot(m4$logCPM_24h_5d, m4$logFC_24h_5d, main="E", pch=16, ylim = c(-13,13),xlim=c(-1,13), col=rgb(0,0,0,40,maxColorValue=255), xaxt="n", ylab = "", xlab = "")
points(m4[m4$P_adjust_24h_5d < .05,]$logCPM_24h_5d, m4[m4$P_adjust_24h_5d <  .05,]$logFC_24h_5d, pch=16, col=rgb(255,0,0,70,maxColorValue=255))
abline(h=c(-1,1), col="grey")

plot(m5$logCPM_5d_10d, m5$logFC_5d_10d, main="F", pch=16, ylim = c(-13,13), col=rgb(0,0,0,40,maxColorValue=255), xaxt="n", ylab = "", xlab = "")
points(m5[m5$P_adjust_5d_10d < .05,]$logCPM_5d_10d, m5[m5$P_adjust_5d_10d <  .05,]$logFC_5d_10d, pch=16, col=rgb(255,0,0,70,maxColorValue=255))
abline(h=c(-1,1), col="grey")
dev.off()

#topGO

#Build the acyclic GO graph and output all annotions.

y <- read.table(file.path(inDir, "Additional_file_7_matrix.txt"),header=TRUE)
gm <- subset(y,y$GO_ID != "NA")
#> dim(gm)
#[1] 18308    59 	#18308 sequences in the reference of gene predictions have been annotated with GO terms

blast2GO <- readMappings(file.path(inDir, "Additional_file_9_blast2go_annotations.bygene.txt")) # this is the blast2go output

topgo <- function(geneList,on,ns){

topDiffGenes <- function(allScore) {return(allScore < pval)}

#build topGO object
GOdata <- new("topGOdata", ontology = on, allGenes = geneList, gene2GO = blast2GO, geneSel = topDiffGenes, annot = annFUN.gene2GO, nodeSize = ns)
#output GO-transcript associations  
ann.genes <- genesInTerm(GOdata)
ag <- stack(ann.genes)
agf <- as.data.frame(ag)
agf2 <-agf[,c(2,1)] # sort columns
write.table(agf2,quote = FALSE,col.names = F,row.names = F,file=paste(on,"_GO_transcript_annots.txt", sep=""))

}

# One can use any interval to build the graph
# Here we use interval 24h-5d.

int <- "24h-5d"
geneList <- gm$P_adjust_24h_5d #any interval could be chosen as we are only interested in building the acyclic graph and in outputting the annotations.
names(geneList) <- gm$transcript
pval <- 0.05  #any cutoff could be chosen as we are only interested in building the acyclic graph and in outputting the annotations.
ns <- 1 #nodeSize

#BP
on <- "BP"
topgo(geneList,on,ns)

#MF
on <- "MF"
topgo(geneList,on,ns)

#CC
on <- "CC"
topgo(geneList,on,ns)

#The three output files were concatenated. The complete set of annotations can be found in "Helm_et_al_GO_transcript_annots.txt"

#GOseq

annot <- read.table(file.path(inDir, "Additional_file_10_GO_transcript_annots.txt"),header=FALSE)
names(annot) <- c("go", "gene")
annot2 <- as.data.frame(annot)

rgo <- annot[c(2,1)]

#2h_7h

#--------

#vector: DE genes get a 1, non DE a 0
genes=as.integer(m1$P_adjust_2h_7h<.05)
names(genes)=row.names(m1)

#check number of DE
table(genes)

#get gene length vector
glength= m1$length
names(glength)=row.names(m1)

#weighing function
pwf=nullp(genes,bias.data = glength)

GO.wall=goseq(pwf,gene2cat = rgo)

#------

k <- as.data.frame(GO.wall) # enriched GOs
k$bh_adjust <-  p.adjust(k$over_represented_pvalue,method="BH") #add adjusted p-values
enr <- subset(k, k$bh_adjust <.05) #get enriched GO categories

#-------

sys <- m1[, c(1, 33, 36)] # generate a dataframe holding transcript id, LogFC_2h_7h and P_adjust_2h_7h
me <- merge(annot2, sys, by.x = "gene", by.y = "transcript") # merge with annotation file

sink("2h_7h.txt")
rank <- 0
for (i in 1:nrow(enr))  {
   rank <- rank + 1
   line <- enr[i,]
   got <- line$category # get the enriched GO term
   count <- 0
   count2 <- 0
   count3 <- 0
   annot2ss <- subset(annot2,annot2$go == got) 
   for (g in 1:nrow(annot2ss)) {   				#count number of sequences in reference with particular GO term
   		entry <- annot2ss[g,]
   		if (entry$go == got) {
   			count <- count + 1
  		}
   	}
	mess <- subset(me,me$go == got)
   	for (e in 1:nrow(mess)) {
   		gn <- mess[e,]
   		if ((gn$logFC_2h_7h < 0) &  (gn$P_adjust_2h_7h < 0.05)){				#count number of DE sequences with particular GO term and decreasing expression
  			count2 <- count2 + 1
   			}else if ((gn$logFC_2h_7h > 0) &  (gn$P_adjust_2h_7h < 0.05)) {	#count number of DE sequences with particular GO term and increasing expression
   			count3 <- count3 + 1
   			}
   		}
t <- strsplit(Term(got), "\r")
d <- strsplit(Definition(got), "\r")
o <- strsplit(Ontology(got), "\r")
cat("2h_7h ,",rank,",",got,",",o[[1]],",",t[[1]],",",line$bh_adjust,",",count,",",count2,",",count3,"\n")
   }
sink()

#7h_12h

genes=as.integer(m2$P_adjust_7h_12h<.05)
names(genes)=row.names(m2)

table(genes)

glength= m2$length
names(glength)=row.names(m2)

pwf=nullp(genes,bias.data = glength)

GO.wall=goseq(pwf,gene2cat = rgo)

#------

k <- as.data.frame(GO.wall)
k$bh_adjust <-  p.adjust(k$over_represented_pvalue,method="BH") 
enr <- subset(k, k$bh_adjust <.05)

#-------

sys <- m2[, c(1, 37, 40)] # generate a dataframe holding transcript id, LogFC_7h_12h and P_adjust_7h_12h
me <- merge(annot2, sys, by.x = "gene", by.y = "transcript") # merge with annotation file

sink("7h_12h.txt")
rank <- 0
for (i in 1:nrow(enr))  {
   rank <- rank + 1
   line <- enr[i,]
   got <- line$category
   count <- 0
   count2 <- 0
   count3 <- 0
   annot2ss <- subset(annot2,annot2$go == got) 
   for (g in 1:nrow(annot2ss)) {   				
   		entry <- annot2ss[g,]
   		if (entry$go == got) {
   			count <- count + 1
  		}
   	}
	mess <- subset(me,me$go == got)
   	for (e in 1:nrow(mess)) {
   		gn <- mess[e,]
   		if ((gn$logFC_7h_12h < 0) &  (gn$P_adjust_7h_12h < 0.05)){				
  			count2 <- count2 + 1
   			}else if ((gn$logFC_7h_12h > 0) &  (gn$P_adjust_7h_12h < 0.05)) {	
   			count3 <- count3 + 1
   			}
   		}
t <- strsplit(Term(got), "\r")
d <- strsplit(Definition(got), "\r")
o <- strsplit(Ontology(got), "\r")
cat("7h_12h ,",rank,",",got,",",o[[1]],",",t[[1]],",",line$bh_adjust,",",count,",",count2,",",count3,"\n")
   }
sink()

#12h_24h

genes=as.integer(m3$P_adjust_12h_24h<.05)
names(genes)=row.names(m3)

table(genes)

glength= m3$length
names(glength)=row.names(m3)

pwf=nullp(genes,bias.data = glength)

GO.wall=goseq(pwf,gene2cat = rgo)

#------

k <- as.data.frame(GO.wall)
k$bh_adjust <-  p.adjust(k$over_represented_pvalue,method="BH")
enr <- subset(k, k$bh_adjust <.05)

#-------

sys <- m3[, c(1, 41, 44)] # generate a dataframe holding transcript id, LogFC_12h_24h and P_adjust_12h_24h
me <- merge(annot2, sys, by.x = "gene", by.y = "transcript") # merge with annotation file

sink("12h_24h.txt")
rank <- 0
for (i in 1:nrow(enr))  {
   rank <- rank + 1
   line <- enr[i,]
   got <- line$category
   count <- 0
   count2 <- 0
   count3 <- 0
   annot2ss <- subset(annot2,annot2$go == got) 
   for (g in 1:nrow(annot2ss)) {
   		entry <- annot2ss[g,]
   		if (entry$go == got) {
   			count <- count + 1
  		}
   	}
	mess <- subset(me,me$go == got)
   	for (e in 1:nrow(mess)) {
   		gn <- mess[e,]
   		if ((gn$logFC_12h_24h < 0) &  (gn$P_adjust_12h_24h < 0.05)){
  			count2 <- count2 + 1
   			}else if ((gn$logFC_12h_24h > 0) &  (gn$P_adjust_12h_24h < 0.05)) {
   			count3 <- count3 + 1
   			}
   		}
t <- strsplit(Term(got), "\r")
d <- strsplit(Definition(got), "\r")
o <- strsplit(Ontology(got), "\r")
cat("12h_24h ,",rank,",",got,",",o[[1]],",",t[[1]],",",line$bh_adjust,",",count,",",count2,",",count3,"\n")
   }
sink()

#24h_5d

genes=as.integer(m4$P_adjust_24h_5d<.05)
names(genes)=row.names(m4)

table(genes)

glength= m4$length
names(glength)=row.names(m4)

pwf=nullp(genes,bias.data = glength)

GO.wall=goseq(pwf,gene2cat = rgo)

#------

k <- as.data.frame(GO.wall)
k$bh_adjust <-  p.adjust(k$over_represented_pvalue,method="BH")
enr <- subset(k, k$bh_adjust <.05)

#-------

sys <- m4[, c(1, 45, 48)] # generate a dataframe holding transcript id, LogFC_24h_5d and P_adjust_24h_5d
me <- merge(annot2, sys, by.x = "gene", by.y = "transcript") # merge with annotation file

sink("24h_5d.txt")
rank <- 0
for (i in 1:nrow(enr))  {
   rank <- rank + 1
   line <- enr[i,]
   got <- line$category
   count <- 0
   count2 <- 0
   count3 <- 0
   annot2ss <- subset(annot2,annot2$go == got) 
   for (g in 1:nrow(annot2ss)) {
   		entry <- annot2ss[g,]
   		if (entry$go == got) {
   			count <- count + 1
  		}
   	}
	mess <- subset(me,me$go == got)
   	for (e in 1:nrow(mess)) {
   		gn <- mess[e,]
   		if ((gn$logFC_24h_5d < 0) &  (gn$P_adjust_24h_5d < 0.05)){
  			count2 <- count2 + 1
   			}else if ((gn$logFC_24h_5d > 0) &  (gn$P_adjust_24h_5d < 0.05)) { 
   			count3 <- count3 + 1
   			}
   		}
t <- strsplit(Term(got), "\r")
d <- strsplit(Definition(got), "\r")
o <- strsplit(Ontology(got), "\r")
cat("24h_5d ,",rank,",",got,",",o[[1]],",",t[[1]],",",line$bh_adjust,",",count,",",count2,",",count3,"\n")
   }
sink()

#5d-10d

genes=as.integer(m5$P_adjust_5d_10d<.05)
names(genes)=row.names(m5)

table(genes)


glength= m5$length
names(glength)=row.names(m5)


pwf=nullp(genes,bias.data = glength)

GO.wall=goseq(pwf,gene2cat = rgo)

#------

k <- as.data.frame(GO.wall)
k$bh_adjust <-  p.adjust(k$over_represented_pvalue,method="BH")
enr <- subset(k, k$bh_adjust <.05)

#-------

sys <- m5[, c(1, 49, 52)] # generate a dataframe holding transcript id, LogFC_5d_10d and P_adjust_5d_10d
me <- merge(annot2, sys, by.x = "gene", by.y = "transcript")

sink("5d_10d.txt")
rank <- 0
for (i in 1:nrow(enr))  {
   rank <- rank + 1
   line <- enr[i,]
   got <- line$category # get the enriched GO term
   count <- 0
   count2 <- 0
   count3 <- 0
   annot2ss <- subset(annot2,annot2$go == got) 
   for (g in 1:nrow(annot2ss)) {
   		entry <- annot2ss[g,]
   		if (entry$go == got) {
   			count <- count + 1
  		}
   	}
	mess <- subset(me,me$go == got)
   	for (e in 1:nrow(mess)) {
   		gn <- mess[e,]
   		if ((gn$logFC_5d_10d < 0) &  (gn$P_adjust_5d_10d < 0.05)){
  			count2 <- count2 + 1
   			}else if ((gn$logFC_5d_10d > 0) &  (gn$P_adjust_5d_10d < 0.05)) {
   			count3 <- count3 + 1
   			}
   		}
t <- strsplit(Term(got), "\r")
d <- strsplit(Definition(got), "\r")
o <- strsplit(Ontology(got), "\r")
cat("5d_10d ,",rank,",",got,",",o[[1]],",",t[[1]],",",line$bh_adjust,",",count,",",count2,",",count3,"\n")
   }
sink()