Please, direct questions to Fernando Biase at fbiase at auburn dot edu. For updated contact, visit www.biaselaboratory.com
The originl data used in this study on the NCBI GEO public repository GSE74152
The files utlized as input for our analysis can can be downloaded using the following links:
2017_06_17_fpkm_d18_EET_filtered.txt.bz2
2017_06_17_fpkm_d18_ENDO_CAR_filtered.txt.bz2
2017_06_17_fpkm_d18_ENDO_ICAR_filtered.txt.bz2
2017_12_20_annotation.ensembl.symbol.txt.bz2
2017_12_20_annotation.genelength.biomart.txt.bz2
2017_12_20_annotation.GO.biomart.txt.bz2
Load the libraries needed for the analyses
library("WGCNA")
library("ggplot2")
library("reshape")
library("multtest")
library("goseq")
library("flashClust")
library("gplots")
library("foreach")
library("doParallel")
library("bigmemory")
library("gtools")
library("biomaRt")
library("VennDiagram")
library("Rtsne")
library("ComplexHeatmap")
library("circlize")
library("ggrepel")
library("dendextend")
library("edgeR")
library("parallelDist")
library('scales')
library('org.Bt.eg.db')
library('ggpubr')
library('vegan')
library('knitr')
setwd("/data/auburn/eet_endo_interaction/knit/")
Load the files contatining expression data
fpkm_eet_day_18_AI<-read.table("2017_06_17_fpkm_d18_EET_filtered.txt.bz2",stringsAsFactors=FALSE, header =TRUE)
fpkm_endo_day_18_C_AI<-read.table("2017_06_17_fpkm_d18_ENDO_CAR_filtered.txt.bz2",stringsAsFactors=FALSE, header =TRUE)
fpkm_endo_day_18_IC_AI<-read.table("2017_06_17_fpkm_d18_ENDO_ICAR_filtered.txt.bz2",stringsAsFactors=FALSE, header =TRUE)
ven_diagramm<-venn.diagram(list("EET" = rownames(fpkm_eet_day_18_AI), "CAR" = rownames(fpkm_endo_day_18_C_AI), "ICAR"= rownames(fpkm_endo_day_18_IC_AI)),filename=NULL, fill=c("#CC79A7","#009E73", "#0072B2"), alpha=0.3, euler.d=FALSE,scaled =FALSE,height = 1000, width = 1000,cex=0.9, cat.cex=c(1,1,1),cat.pos=c(0,0,180), lty = rep("blank", 3))
grid.draw(ven_diagramm)
data.for.tsne<-merge(fpkm_eet_day_18_AI,fpkm_endo_day_18_C_AI,by='row.names', all=TRUE)
data.for.tsne<-merge(data.for.tsne,fpkm_endo_day_18_IC_AI, by.x= "Row.names", by.y="row.names", all=TRUE)
rownames(data.for.tsne)<-data.for.tsne$Row.names
data.for.tsne<-data.for.tsne[,c(2:16)]
data.for.tsne[is.na(data.for.tsne)]<-0
data.for.tsne<-t(log2(data.for.tsne+1))
set.seed(8643)
tsne <- Rtsne(data.for.tsne, dims = 2, theta = 0, perplexity=4, verbose=FALSE, max_iter = 500000)
rm(data.for.tsne)
group<-factor(c(rep(c("EET", "CAR", "ICAR" ), c(5,5,5))), levels = c("EET", "CAR", "ICAR"))
TSNE<-data.frame("DIM1"=tsne$Y[,1], "DIM2"=tsne$Y[,2], "group"=group )
ggplot(data=TSNE, aes(x=DIM1,y=DIM2) ) +
geom_point(aes(colour = factor(group)), size=3 , alpha =0.8 ) +
scale_color_manual(values= c("#CC79A7", "#009E73","#0072B2") ,name =NULL) +
scale_y_continuous(name="t-SNE dim 2")+
scale_x_continuous(name="t-SNE dim 1")+
theme_bw(base_size = 20)+
theme(aspect.ratio =1,
axis.text=element_blank()
)
fpkm_eet_day_18_AI_a<-t(fpkm_eet_day_18_AI)
fpkm_eet_day_18_AI_a<-log2(fpkm_eet_day_18_AI_a+1)
fpkm_endo_day_18_C_AI_a<-t(fpkm_endo_day_18_C_AI)
fpkm_endo_day_18_C_AI_a<-log2(fpkm_endo_day_18_C_AI_a + 1)
pearson_correlation_test<-corAndPvalue(fpkm_eet_day_18_AI_a,fpkm_endo_day_18_C_AI_a, use = "pairwise.complete.obs", method="pearson",alternative = "two.sided")
pearson_correlation_COR_melt<-melt(pearson_correlation_test$cor)
pearson_correlation_PVALUE_melt<-melt(pearson_correlation_test$p)
pearson_correlation_obs_melt<-melt(pearson_correlation_test$nObs)
correlation_COR_PVALUE_EET_CAR<-pearson_correlation_COR_melt
#correlation_COR_PVALUE_EET_CAR$p_value<-pearson_correlation_PVALUE_melt[,3]
#correlation_COR_PVALUE_EET_CAR$obs<-pearson_correlation_obs_melt[,3]
rm(pearson_correlation_test,pearson_correlation_COR_melt,pearson_correlation_PVALUE_melt,pearson_correlation_obs_melt)
#colnames(correlation_COR_PVALUE_EET_CAR)<-c("gene_id_eet","gene_id_end", "pearson_cor","p_value","obs" )
colnames(correlation_COR_PVALUE_EET_CAR)<-c("gene_id_eet","gene_id_end", "pearson_cor")
fpkm_eet_day_18_AI_a<-t(fpkm_eet_day_18_AI)
fpkm_eet_day_18_AI_a<-log2(fpkm_eet_day_18_AI_a+1)
fpkm_endo_day_18_IC_AI_a<-t(fpkm_endo_day_18_IC_AI)
fpkm_endo_day_18_IC_AI_a<-log2(fpkm_endo_day_18_IC_AI_a + 1)
pearson_correlation_test<-corAndPvalue(fpkm_eet_day_18_AI_a, fpkm_endo_day_18_IC_AI_a, use = "pairwise.complete.obs", method="pearson",alternative = "two.sided")
pearson_correlation_COR_melt<-melt(pearson_correlation_test$cor)
pearson_correlation_PVALUE_melt<-melt(pearson_correlation_test$p)
pearson_correlation_obs_melt<-melt(pearson_correlation_test$nObs)
correlation_COR_PVALUE_EET_ICAR<-pearson_correlation_COR_melt
#correlation_COR_PVALUE_EET_ICAR$p_value<-pearson_correlation_PVALUE_melt[,3]
#correlation_COR_PVALUE_EET_ICAR$obs<-pearson_correlation_obs_melt[,3]
rm(pearson_correlation_test,pearson_correlation_COR_melt,pearson_correlation_PVALUE_melt,pearson_correlation_obs_melt)
#colnames(correlation_COR_PVALUE_EET_ICAR)<-c("gene_id_eet","gene_id_end", "pearson_cor","p_value","obs" )
colnames(correlation_COR_PVALUE_EET_ICAR)<-c("gene_id_eet","gene_id_end", "pearson_cor" )
eet<- as.big.matrix(fpkm_eet_day_18_AI_a , type = "double",
separated = FALSE,
backingfile = "fpkm_eet_day_18_AI_a.bin",
descriptorfile = "fpkm_eet_day_18_AI_a.desc",
share=TRUE)
# get a description of the matrix
mdesc_eet <- describe(eet)
endo <- as.big.matrix(fpkm_endo_day_18_C_AI_a, type = "double",
separated = FALSE,
backingfile = "fpkm_endo_day_18_C_AI_a.bin",
descriptorfile = "fpkm_endo_day_18_C_AI_a.desc",
share=TRUE)
# get a description of the matrix
mdesc_end<- describe(endo)
permutation<-permutations(n = 5, r = 5, v = 1:5,repeats.allowed=FALSE)
permutation<-permutation[15:114,]
rand<-dim(permutation)[1]
sequence.correlation<-seq(0.90, 1, 0.01)
results <- filebacked.big.matrix(length(sequence.correlation),rand, type="double", init=0, separated=FALSE,
backingfile="incidence_matrix.bin",
descriptor="incidence_matrix.desc")
mdesc_result<- describe(results)
cl <- makeCluster(10)
registerDoParallel(cl)
results[,]<-foreach(i = sequence.correlation, .combine='rbind', .inorder=TRUE, .packages=c("WGCNA","reshape","bigmemory"), .noexport=c("eet", "endo"), .verbose=FALSE) %:%
foreach(j = 1:rand, .combine='cbind', .inorder=FALSE,.packages=c("WGCNA","reshape","bigmemory"), .noexport=c("eet", "endo"), .verbose=FALSE ) %dopar% {
require(bigmemory)
eet<- attach.big.matrix("fpkm_eet_day_18_AI_a.desc")
endo<- attach.big.matrix("fpkm_endo_day_18_C_AI_a.desc")
random<-cor(eet[permutation[j,],], endo[,], use = "pairwise.complete.obs", method="pearson")
length(which(abs(random) > i))
}
stopCluster(cl)
#results[1:5,1:5]
system("rm fpkm_eet_day_18_AI_a.bin")
system("rm fpkm_eet_day_18_AI_a.desc")
system("rm fpkm_endo_day_18_C_AI_a.bin")
system("rm fpkm_endo_day_18_C_AI_a.desc")
system("rm incidence_matrix.bin")
system("rm incidence_matrix.desc")
total.rand <- 100 * 124572756
qvalue_CAR<-data.frame(correlation = sequence.correlation, e.pvalue= (rowSums(results[,])+1)/(total.rand+1))
rm(results)
eet<- as.big.matrix(fpkm_eet_day_18_AI_a , type = "double",
separated = FALSE,
backingfile = "fpkm_eet_day_18_AI_a.bin",
descriptorfile = "fpkm_eet_day_18_AI_a.desc",
share=TRUE)
# get a description of the matrix
mdesc_eet <- describe(eet)
endo <- as.big.matrix(fpkm_endo_day_18_IC_AI_a, type = "double",
separated = FALSE,
backingfile = "fpkm_endo_day_18_IC_AI_a.bin",
descriptorfile = "fpkm_endo_day_18_IC_AI_a.desc",
share=TRUE)
# get a description of the matrix
mdesc_end<- describe(endo)
permutation<-permutations(n = 5, r = 5, v = 1:5,repeats.allowed=FALSE)
permutation<-permutation[15:114,]
rand<-dim(permutation)[1]
sequence.correlation<-seq(0.90, 1, 0.01)
results <- filebacked.big.matrix(length(sequence.correlation),rand, type="double", init=0, separated=FALSE,
backingfile="incidence_matrix.bin",
descriptor="incidence_matrix.desc")
mdesc_result<- describe(results)
cl <- makeCluster(10)
registerDoParallel(cl)
results[,]<-foreach(i = sequence.correlation, .combine='rbind', .inorder=TRUE, .packages=c("WGCNA","reshape","bigmemory"), .noexport=c("eet", "endo"), .verbose=FALSE) %:%
foreach(j = 1:rand, .combine='cbind', .inorder=FALSE,.packages=c("WGCNA","reshape","bigmemory"), .noexport=c("eet", "endo"), .verbose=FALSE ) %dopar% {
require(bigmemory)
eet<- attach.big.matrix("fpkm_eet_day_18_AI_a.desc")
endo<- attach.big.matrix("fpkm_endo_day_18_IC_AI_a.desc")
random<-cor(eet[permutation[j,],], endo[,], use = "pairwise.complete.obs", method="pearson")
length(which(abs(random) > i))
}
stopCluster(cl)
system("rm fpkm_eet_day_18_AI_a.bin")
system("rm fpkm_eet_day_18_AI_a.desc")
system("rm fpkm_endo_day_18_IC_AI_a.bin")
system("rm fpkm_endo_day_18_IC_AI_a.desc")
system("rm incidence_matrix.bin")
system("rm incidence_matrix.desc")
total.rand <- 100 * 124610948
qvalue_ICAR<-data.frame(correlation = sequence.correlation, e.pvalue= (rowSums(results[,])+1)/(total.rand+1))
correlation_COR_PVALUE_EET_CAR_cor_above09<-correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor > 0.95, ]
correlation_COR_PVALUE_EET_CAR_cor_below_neg_09<-correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor < -0.95, ]
plot1<-ggplot(data=correlation_COR_PVALUE_EET_CAR, aes(y=pearson_cor, x=1)) +
geom_boxplot()+
stat_summary(fun.y=mean, geom="point", shape=18, size=4, fill='black')+
coord_flip()+
theme(
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.background = element_blank(),
axis.ticks.y = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
plot.margin= unit(c(0,0,0,0), "cm")
)
plot2<- ggplot() +
geom_histogram(aes(x=pearson_cor), fill="gray", data=correlation_COR_PVALUE_EET_CAR, binwidth = 0.01) +
geom_histogram(aes(x=pearson_cor), fill="red", data=correlation_COR_PVALUE_EET_CAR_cor_above09, binwidth = 0.01) +
geom_histogram(aes(x=pearson_cor), fill="blue", data=correlation_COR_PVALUE_EET_CAR_cor_below_neg_09, binwidth = 0.01) +
scale_y_continuous("Count",labels = function(x) format(x, scientific = TRUE))+
scale_x_continuous("Pearson's correlation")+
theme(
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.background = element_blank(),
axis.line = element_line(colour = "black"),
axis.text = element_text(colour="black", size =17),
axis.title = element_text(colour="black", size =17),
plot.margin= unit(c(0,0.4,0,0), "cm")
)
ggarrange(plot1, plot2, ncol=1, nrow=2, heights=c(1,4), align="hv")
correlation_COR_PVALUE_EET_ICAR_cor_above09<-correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor > 0.95, ]
correlation_COR_PVALUE_EET_ICAR_cor_below_neg_09<-correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor < -0.95, ]
plot1<-ggplot(data=correlation_COR_PVALUE_EET_ICAR, aes(y=pearson_cor, x=1)) +
geom_boxplot()+
stat_summary(fun.y=mean, geom="point", shape=18, size=4, fill='black')+
coord_flip()+
theme(
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.background = element_blank(),
axis.ticks.y = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
plot.margin= unit(c(0,0,0,0), "cm")
)
plot2<- ggplot() +
geom_histogram(aes(x=pearson_cor), fill="gray", data=correlation_COR_PVALUE_EET_ICAR, binwidth = 0.01) +
geom_histogram(aes(x=pearson_cor), fill="red", data=correlation_COR_PVALUE_EET_ICAR_cor_above09, binwidth = 0.01) +
geom_histogram(aes(x=pearson_cor), fill="blue", data=correlation_COR_PVALUE_EET_ICAR_cor_below_neg_09, binwidth = 0.01) +
scale_y_continuous("Count",labels = function(x) format(x, scientific = TRUE))+
scale_x_continuous("Pearson's correlation")+
theme(
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.background = element_blank(),
axis.line = element_line(colour = "black"),
axis.text = element_text(colour="black", size =17),
axis.title = element_text(colour="black", size =17),
plot.margin= unit(c(0,0.4,0,0), "cm")
)
ggarrange(plot1, plot2, ncol=1, nrow=2, heights=c(1,4), align="hv")
permutation<-permutations(n = 5, r = 5, v = 1:5,repeats.allowed=FALSE)
permutation<-permutation[10:110,]
permutation<-permutation[sample(101,10,replace = FALSE),]
#CAR
i=1
suffled_data_EET_CAR_melt_a<-melt(cor(fpkm_eet_day_18_AI_a[permutation[i,],], fpkm_endo_day_18_C_AI_a, use = "pairwise.complete.obs"))
for (i in c(2:10)){
suffled_data_EET_CAR<-cor(fpkm_eet_day_18_AI_a[permutation[i,],], fpkm_endo_day_18_C_AI_a, use = "pairwise.complete.obs")
suffled_data_EET_CAR_melt<-melt(suffled_data_EET_CAR)
suffled_data_EET_CAR_melt_a[,(2+i)]<-round(suffled_data_EET_CAR_melt$value ,2)
}
sample_suffled_data_EET_CAR_melt_a<-data.frame("value"=sample(as.matrix(suffled_data_EET_CAR_melt_a[,c(3:12)]), 1000000, replace=TRUE))
plot1<-ggplot(data=sample_suffled_data_EET_CAR_melt_a, aes(y=value, x=1)) +
geom_boxplot()+
stat_summary(fun.y=mean, geom="point", shape=18, size=4, fill='black')+
coord_flip()+
ggtitle("Distribution of correlations between \n EET and CAR under the null hypothesis")+
theme(
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.background = element_blank(),
axis.ticks.y = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
plot.margin= unit(c(0,0,0,0), "cm"),
plot.title = element_text(color="black", size=12,hjust = 0.5)
)
plot2<- ggplot() +
geom_histogram(aes(x=value), fill="gray", data=sample_suffled_data_EET_CAR_melt_a, binwidth = 0.01) +
#geom_histogram(aes(x=value), fill="red", data=sample_suffled_data_EET_CAR_melt_a_above09, binwidth = 0.01) +
#geom_histogram(aes(x=value), fill="blue", data=sample_suffled_data_EET_CAR_melt_a_below_neg_09, binwidth = 0.01) +
scale_y_continuous("Count",labels = function(x) format(x, scientific = TRUE))+
scale_x_continuous("Pearson's correlation")+
theme(
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.background = element_blank(),
axis.line = element_line(colour = "black"),
axis.text = element_text(colour="black", size =17),
axis.title = element_text(colour="black", size =17),
plot.margin= unit(c(0,0.4,0,0), "cm")
)
#ICAR
i=1
suffled_data_EET_ICAR_melt_a<-melt(cor(fpkm_eet_day_18_AI_a[permutation[i,],], fpkm_endo_day_18_IC_AI_a, use = "pairwise.complete.obs"))
for (i in c(2:10)){
suffled_data_EET_ICAR<-cor(fpkm_eet_day_18_AI_a[permutation[i,],], fpkm_endo_day_18_IC_AI_a, use = "pairwise.complete.obs")
suffled_data_EET_ICAR_melt<-melt(suffled_data_EET_ICAR)
suffled_data_EET_ICAR_melt_a[,(2+i)]<-round(suffled_data_EET_ICAR_melt$value ,2)
}
sample_suffled_data_EET_ICAR_melt_a<-data.frame("value"=sample(as.matrix(suffled_data_EET_ICAR_melt_a[,c(3:12)]), 1000000, replace=TRUE))
plot3<-ggplot(data=sample_suffled_data_EET_ICAR_melt_a, aes(y=value, x=1)) +
geom_boxplot()+
stat_summary(fun.y=mean, geom="point", shape=18, size=4, fill='black')+
coord_flip()+
ggtitle("Distribution of correlations between \n EET and ICAR under the null hypothesis")+
theme(
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.background = element_blank(),
axis.ticks.y = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
plot.margin= unit(c(0,0,0,0), "cm"),
plot.title = element_text(color="black", size=12,hjust = 0.5)
)
plot4<- ggplot() +
geom_histogram(aes(x=value), fill="gray", data=sample_suffled_data_EET_ICAR_melt_a, binwidth = 0.01) +
#geom_histogram(aes(x=value), fill="red", data=sample_suffled_data_EET_ICAR_melt_a_above09, binwidth = 0.01) +
#geom_histogram(aes(x=value), fill="blue", data=sample_suffled_data_EET_ICAR_melt_a_below_neg_09, binwidth = 0.01) +
scale_y_continuous("Count",labels = function(x) format(x, scientific = TRUE))+
scale_x_continuous("Pearson's correlation")+
theme(
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.background = element_blank(),
axis.line = element_line(colour = "black"),
axis.text = element_text(colour="black", size =17),
axis.title = element_text(colour="black", size =17),
plot.margin= unit(c(0,0.4,0,0), "cm")
)
ggarrange(plot1, plot3,plot2, plot4, ncol=2, nrow=2, heights=c(1.5,5), align="hv")
Testing the deviation of the null versus the the alternative correlations EET - CAR
ks.test(sample_suffled_data_EET_CAR_melt_a$value,correlation_COR_PVALUE_EET_CAR$pearson_cor)
##
## Two-sample Kolmogorov-Smirnov test
##
## data: sample_suffled_data_EET_CAR_melt_a$value and correlation_COR_PVALUE_EET_CAR$pearson_cor
## D = 0.13942, p-value < 2.2e-16
## alternative hypothesis: two-sided
Testing the deviation of the null versus the the alternative correlations EET - ICAR
ks.test(sample_suffled_data_EET_ICAR_melt_a$value,correlation_COR_PVALUE_EET_ICAR$pearson_cor)
##
## Two-sample Kolmogorov-Smirnov test
##
## data: sample_suffled_data_EET_ICAR_melt_a$value and correlation_COR_PVALUE_EET_ICAR$pearson_cor
## D = 0.059979, p-value < 2.2e-16
## alternative hypothesis: two-sided
#qvalue_ICAR<-read.table("2017-12-19_empirical_p_ICAR_rand.txt",stringsAsFactors=FALSE, header =TRUE, sep="\t",quote = "")
#qvalue_CAR<-read.table("2017-12-18_empirical_p_CAR_rand.txt",stringsAsFactors=FALSE, header =TRUE, sep="\t",quote = "")
plot1<-ggplot()+
geom_point(data=qvalue_CAR, aes(x=correlation , y=e.pvalue/2), color="black", size=1, shape=16)+
geom_line(data=qvalue_CAR, aes(x=correlation , y=e.pvalue/2), color="black", size=0.1,linetype=3)+
scale_x_continuous(name="correlation", limits = c(0.85, 1), breaks=seq(0.85,1, 0.01))+
scale_y_continuous(name="empirical eFDR", limits = c(0, 0.02), breaks=seq(0,0.2, 0.005))+
ggtitle("eFDR pair-wise gene correlations EET-CAR")+
theme_bw()+
theme(panel.grid= element_blank(),
panel.background = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color="lightgray"),
plot.background = element_blank(),
axis.title=element_text(color="black", size=8),
axis.text=element_text(color="black", size=8),
panel.spacing = unit(c(0.4,0.4,0.4,0.4),"cm"),
plot.margin = unit(c(0.5,0.5,0.5,0.5),"cm"),
legend.position="none",
plot.title = element_text(lineheight=.8, hjust=0.5))
plot2<-ggplot()+
geom_point(data=qvalue_ICAR, aes(x=correlation , y=e.pvalue/2), color="black", size=1, shape=16)+
geom_line(data=qvalue_ICAR, aes(x=correlation , y=e.pvalue/2), color="black", size=0.1,linetype=3)+
scale_x_continuous(name="correlation", limits = c(0.85, 1), breaks=seq(0.85,1, 0.01))+
scale_y_continuous(name="empirical eFDR", limits = c(0, 0.02), breaks=seq(0,0.2, 0.005))+
ggtitle("eFDR pair-wise gene correlations EET-ICAR")+
theme_bw()+
theme(panel.grid= element_blank(),
panel.background = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color="lightgray"),
plot.background = element_blank(),
axis.title=element_text(color="black", size=8),
axis.text=element_text(color="black", size=8),
panel.spacing = unit(c(0.4,0.4,0.4,0.4),"cm"),
plot.margin = unit(c(0.5,0.5,0.5,0.5),"cm"),
legend.position="none",
plot.title = element_text(lineheight=.8, hjust=0.5))
ggarrange(plot1, plot2, ncol=2, nrow=1)
The results presented below were used to produce Figure 2c.
merged_correlation<-merge(correlation_COR_PVALUE_EET_CAR[,c(1:3)], correlation_COR_PVALUE_EET_ICAR[,c(1:3)], by=c('gene_id_eet','gene_id_end'),all=TRUE, suffixes=c('.CAR','.ICAR') )
merged_correlation[is.na(merged_correlation)]<-0
nrow(merged_correlation[merged_correlation$pearson_cor.CAR > 0.99 & merged_correlation$pearson_cor.ICAR > 0.99,])
## [1] 135
length(na.omit(unique(merged_correlation[merged_correlation$pearson_cor.CAR > 0.99 & merged_correlation$pearson_cor.ICAR > 0.99,1])))
## [1] 132
length(na.omit(unique(merged_correlation[merged_correlation$pearson_cor.CAR > 0.99 & merged_correlation$pearson_cor.ICAR > 0.99,2])))
## [1] 20
nrow(merged_correlation[merged_correlation$pearson_cor.CAR < -0.99 & merged_correlation$pearson_cor.ICAR < -0.99,])
## [1] 41
length(na.omit(unique(merged_correlation[merged_correlation$pearson_cor.CAR < -0.99 & merged_correlation$pearson_cor.ICAR < -0.99,1])))
## [1] 41
length(na.omit(unique(merged_correlation[merged_correlation$pearson_cor.CAR < -0.99 & merged_correlation$pearson_cor.ICAR < -0.99,2])))
## [1] 9
nrow(merged_correlation[merged_correlation$pearson_cor.CAR > 0.99 & merged_correlation$pearson_cor.ICAR < -0.99,])
## [1] 94
length(na.omit(unique(merged_correlation[merged_correlation$pearson_cor.CAR > 0.99 & merged_correlation$pearson_cor.ICAR < -0.99,1])))
## [1] 87
length(na.omit(unique(merged_correlation[merged_correlation$pearson_cor.CAR > 0.99 & merged_correlation$pearson_cor.ICAR < -0.99,2])))
## [1] 24
nrow(merged_correlation[merged_correlation$pearson_cor.CAR < -0.99 & merged_correlation$pearson_cor.ICAR > 0.99,])
## [1] 84
length(na.omit(unique(merged_correlation[merged_correlation$pearson_cor.CAR < -0.99 & merged_correlation$pearson_cor.ICAR > 0.99,1])))
## [1] 81
length(na.omit(unique(merged_correlation[merged_correlation$pearson_cor.CAR < -0.99 & merged_correlation$pearson_cor.ICAR > 0.99,2])))
## [1] 14
nrow(merged_correlation[merged_correlation$pearson_cor.CAR > 0.99 ,])
## [1] 207189
length(na.omit(unique(merged_correlation[merged_correlation$pearson_cor.CAR > 0.99 ,1])))
## [1] 8907
length(na.omit(unique(merged_correlation[merged_correlation$pearson_cor.CAR > 0.99 ,2])))
## [1] 8858
nrow(merged_correlation[merged_correlation$pearson_cor.CAR < -0.99 ,])
## [1] 49967
length(na.omit(unique(merged_correlation[merged_correlation$pearson_cor.CAR < -0.99 ,1])))
## [1] 8337
length(na.omit(unique(merged_correlation[merged_correlation$pearson_cor.CAR < -0.99 ,2])))
## [1] 4069
nrow(merged_correlation[merged_correlation$pearson_cor.ICAR > 0.99 ,])
## [1] 107421
length(na.omit(unique(merged_correlation[merged_correlation$pearson_cor.ICAR > 0.99 ,1])))
## [1] 9526
length(na.omit(unique(merged_correlation[merged_correlation$pearson_cor.ICAR > 0.99 ,2])))
## [1] 7081
nrow(merged_correlation[merged_correlation$pearson_cor.ICAR < -0.99 ,])
## [1] 119549
length(na.omit(unique(merged_correlation[merged_correlation$pearson_cor.ICAR < -0.99 ,1])))
## [1] 9499
length(na.omit(unique(merged_correlation[merged_correlation$pearson_cor.ICAR < -0.99 ,2])))
## [1] 6535
rm(merged_correlation)
This is the code used to obtain the results used to create S1 Table
Calculation of the number of genes according to each treshold
length(unique(correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor>0.9999,1]))
## [1] 180
length(unique(correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor>0.999,1]))
## [1] 3044
length(unique(correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor>0.99,1]))
## [1] 8907
length(unique(correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor>0.95,1]))
## [1] 9548
length(unique(correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor>0.90,1]))
## [1] 9548
length(unique(correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor< -0.90,1]))
## [1] 9548
length(unique(correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor< -0.95,1]))
## [1] 9548
length(unique(correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor< -0.99,1]))
## [1] 8337
length(unique(correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor< -0.999,1]))
## [1] 1248
length(unique(correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor< -0.9999,1]))
## [1] 50
length(unique(correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor>0.9999,2]))
## [1] 172
length(unique(correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor>0.999,2]))
## [1] 2534
length(unique(correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor>0.99,2]))
## [1] 8858
length(unique(correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor>0.95,2]))
## [1] 12430
length(unique(correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor>0.90,2]))
## [1] 12959
length(unique(correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor< -0.90,2]))
## [1] 12641
length(unique(correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor< -0.95,2]))
## [1] 10411
length(unique(correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor< -0.99,2]))
## [1] 4069
length(unique(correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor< -0.999,2]))
## [1] 685
length(unique(correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor< -0.9999,2]))
## [1] 46
length(unique(correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor>0.9999,1]))
## [1] 106
length(unique(correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor>0.999,1]))
## [1] 2714
length(unique(correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor>0.99,1]))
## [1] 9526
length(unique(correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor>0.95,1]))
## [1] 9548
length(unique(correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor>0.90,1]))
## [1] 9548
length(unique(correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor< -0.90,1]))
## [1] 9548
length(unique(correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor< -0.95,1]))
## [1] 9548
length(unique(correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor< -0.99,1]))
## [1] 9499
length(unique(correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor< -0.999,1]))
## [1] 2728
length(unique(correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor< -0.9999,1]))
## [1] 105
length(unique(correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor>0.9999,2]))
## [1] 102
length(unique(correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor>0.999,2]))
## [1] 1589
length(unique(correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor>0.99,2]))
## [1] 7081
length(unique(correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor>0.95,2]))
## [1] 11504
length(unique(correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor>0.90,2]))
## [1] 12612
length(unique(correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor< -0.90,2]))
## [1] 12410
length(unique(correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor< -0.95,2]))
## [1] 10873
length(unique(correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor< -0.99,2]))
## [1] 6535
length(unique(correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor< -0.999,2]))
## [1] 1595
length(unique(correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor< -0.9999,2]))
## [1] 102
Obtain the proportion of pairs that showed correlation on the scrambled data greater or lower than the trenshold.
rm(plot1, plot2, plot3, plot4,suffled_data_EET_ICAR,suffled_data_EET_CAR,suffled_data_EET_CAR_melt_a, suffled_data_EET_ICAR_melt_a, sample_suffled_data_EET_CAR_melt_a, sample_suffled_data_EET_ICAR_melt_a,correlation_COR_PVALUE_EET_ICAR_cor_above09,correlation_COR_PVALUE_EET_ICAR_cor_below_neg_09,
correlation_COR_PVALUE_EET_CAR_cor_above09,correlation_COR_PVALUE_EET_CAR_cor_below_neg_09,tsne,TSNE,ven_diagramm,eet,endo,suffled_data_EET_CAR_melt,suffled_data_EET_ICAR_melt,mdesc_eet,mdesc_end,results,mdesc_result,qvalue_ICAR,qvalue_CAR,fpkm_eet_day_18_AI,fpkm_endo_day_18_IC_AI,fpkm_endo_day_18_C_AI)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 7829710 418.2 196694636 10504.7 272015474 14527.2
## Vcells 645954902 4928.3 7799221088 59503.4 9749026361 74379.2
eet<- as.big.matrix(fpkm_eet_day_18_AI_a , type = "double",
separated = FALSE,
backingfile = "fpkm_eet_day_18_AI_a.bin",
descriptorfile = "fpkm_eet_day_18_AI_a.desc",
share=TRUE)
# get a description of the matrix
mdesc_eet <- describe(eet)
endo <- as.big.matrix(fpkm_endo_day_18_C_AI_a, type = "double",
separated = FALSE,
backingfile = "fpkm_endo_day_18_C_AI_a.bin",
descriptorfile = "fpkm_endo_day_18_C_AI_a.desc",
share=TRUE)
# get a description of the matrix
mdesc_end<- describe(endo)
permutation<-permutations(n = 5, r = 5, v = 1:5,repeats.allowed=FALSE)
permutation<-permutation[c(4:24,26:48,50:95,97:114,117:118),]
rand<-dim(permutation)[1]
sequence.correlation<-c(0.95,0.99,0.999,0.9999)
cl <- makeCluster(10)
registerDoParallel(cl)
results_eet_car_scrambled<-data.frame()
results_eet_car_scrambled<- foreach(i = sequence.correlation, .combine='rbind', .inorder=FALSE, .packages=c("reshape","bigmemory"), .noexport=c("eet", "endo"), .verbose=FALSE) %:%
foreach(j = 1:rand, .combine='rbind', .inorder=FALSE,.packages=c("reshape","bigmemory"), .noexport=c("eet", "endo"), .verbose=FALSE ) %dopar% {
correlation_COR_PVALUE_EET_CAR_subset<-correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor > i,]
eet<- attach.big.matrix("fpkm_eet_day_18_AI_a.desc")
endo<- attach.big.matrix("fpkm_endo_day_18_C_AI_a.desc")
random<-cor(eet[permutation[j,],], endo[,], use = "pairwise.complete.obs", method="pearson")
random<-melt(random)
random<-random[random$value > i,]
random<-merge(random,correlation_COR_PVALUE_EET_CAR_subset, by.x=c("X1","X2"), by.y=c("gene_id_eet", "gene_id_end"), all=FALSE)
if(is.null(random)) { k=0} else {k=dim(random)[1]}
data.frame(k,i,j)
}
stopCluster(cl)
results_eet_car_scrambled_positive<-aggregate(k~i, data=results_eet_car_scrambled,sum)
results_eet_car_scrambled_positive$ratio<-results_eet_car_scrambled_positive$k / (124572756*110)
rm(correlation_COR_PVALUE_EET_CAR_subset,results_eet_car_scrambled,eet,endo,random)
cl <- makeCluster(10)
registerDoParallel(cl)
results_eet_car_scrambled<-data.frame()
results_eet_car_scrambled<- foreach(i = sequence.correlation, .combine='rbind', .inorder=FALSE, .packages=c("reshape","bigmemory"), .noexport=c("eet", "endo"), .verbose=FALSE) %:%
foreach(j = 1:rand, .combine='rbind', .inorder=FALSE,.packages=c("reshape","bigmemory"), .noexport=c("eet", "endo"), .verbose=FALSE ) %dopar% {
correlation_COR_PVALUE_EET_CAR_subset<-correlation_COR_PVALUE_EET_CAR[correlation_COR_PVALUE_EET_CAR$pearson_cor > i,]
eet<- attach.big.matrix("fpkm_eet_day_18_AI_a.desc")
endo<- attach.big.matrix("fpkm_endo_day_18_C_AI_a.desc")
random<-cor(eet[permutation[j,],], endo[,], use = "pairwise.complete.obs", method="pearson")
random<-melt(random)
random<-random[random$value < -i,]
random<-merge(random,correlation_COR_PVALUE_EET_CAR_subset, by.x=c("X1","X2"), by.y=c("gene_id_eet", "gene_id_end"), all=FALSE)
if(is.null(random)) { k=0} else {k=dim(random)[1]}
data.frame(k,i,j)
}
stopCluster(cl)
results_eet_car_scrambled_negative<-aggregate(k~i, data=results_eet_car_scrambled,sum)
results_eet_car_scrambled_negative$ratio<-results_eet_car_scrambled_negative$k / (124572756*110)
rm(correlation_COR_PVALUE_EET_CAR_subset,results_eet_car_scrambled,eet,endo,random)
system("rm fpkm_eet_day_18_AI_a.bin")
system("rm fpkm_eet_day_18_AI_a.desc")
system("rm fpkm_endo_day_18_C_AI_a.bin")
system("rm fpkm_endo_day_18_C_AI_a.desc")
eet<- as.big.matrix(fpkm_eet_day_18_AI_a , type = "double",
separated = FALSE,
backingfile = "fpkm_eet_day_18_AI_a.bin",
descriptorfile = "fpkm_eet_day_18_AI_a.desc",
share=TRUE)
# get a description of the matrix
mdesc_eet <- describe(eet)
endo <- as.big.matrix(fpkm_endo_day_18_IC_AI_a, type = "double",
separated = FALSE,
backingfile = "fpkm_endo_day_18_IC_AI_a.bin",
descriptorfile = "fpkm_endo_day_18_IC_AI_a.desc",
share=TRUE)
# get a description of the matrix
mdesc_end<- describe(endo)
permutation<-permutations(n = 5, r = 5, v = 1:5,repeats.allowed=FALSE)
permutation<-permutation[c(4:24,26:48,50:95,97:114,117:118),]
rand<-dim(permutation)[1]
sequence.correlation<-c(0.95,0.99,0.999,0.9999)
cl <- makeCluster(10)
registerDoParallel(cl)
results_eet_icar_scrambled<-data.frame()
results_eet_icar_scrambled<- foreach(i = sequence.correlation, .combine='rbind', .inorder=FALSE, .packages=c("reshape","bigmemory"), .noexport=c("eet", "endo"), .verbose=FALSE) %:%
foreach(j = 1:rand, .combine='rbind', .inorder=FALSE,.packages=c("reshape","bigmemory"), .noexport=c("eet", "endo"), .verbose=FALSE ) %dopar% {
correlation_COR_PVALUE_EET_ICAR_subset<-correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor > i,]
eet<- attach.big.matrix("fpkm_eet_day_18_AI_a.desc")
endo<- attach.big.matrix("fpkm_endo_day_18_IC_AI_a.desc")
random<-cor(eet[permutation[j,],], endo[,], use = "pairwise.complete.obs", method="pearson")
random<-melt(random)
random<-random[random$value > i,]
random<-merge(random,correlation_COR_PVALUE_EET_ICAR_subset, by.x=c("X1","X2"), by.y=c("gene_id_eet", "gene_id_end"), all=FALSE)
if(is.null(random)) { k=0} else {k=dim(random)[1]}
data.frame(k,i,j)
}
stopCluster(cl)
results_eet_icar_scrambled_positive<-aggregate(k~i, data=results_eet_icar_scrambled,sum)
results_eet_icar_scrambled_positive$ratio<-results_eet_icar_scrambled_positive$k / (124610948*110)
rm(correlation_COR_PVALUE_EET_ICAR_subset,results_eet_icar_scrambled,eet,endo,random)
cl <- makeCluster(10)
registerDoParallel(cl)
results_eet_icar_scrambled<-data.frame()
results_eet_icar_scrambled<- foreach(i = sequence.correlation, .combine='rbind', .inorder=FALSE, .packages=c("reshape","bigmemory"), .noexport=c("eet", "endo"), .verbose=FALSE) %:%
foreach(j = 1:rand, .combine='rbind', .inorder=FALSE,.packages=c("reshape","bigmemory"), .noexport=c("eet", "endo"), .verbose=FALSE ) %dopar% {
correlation_COR_PVALUE_EET_ICAR_subset<-correlation_COR_PVALUE_EET_ICAR[correlation_COR_PVALUE_EET_ICAR$pearson_cor > i,]
eet<- attach.big.matrix("fpkm_eet_day_18_AI_a.desc")
endo<- attach.big.matrix("fpkm_endo_day_18_IC_AI_a.desc")
random<-cor(eet[permutation[j,],], endo[,], use = "pairwise.complete.obs", method="pearson")
random<-melt(random)
random<-random[random$value < -i,]
random<-merge(random,correlation_COR_PVALUE_EET_ICAR_subset, by.x=c("X1","X2"), by.y=c("gene_id_eet", "gene_id_end"), all=FALSE)
if(is.null(random)) { k=0} else {k=dim(random)[1]}
data.frame(k,i,j)
}
stopCluster(cl)
results_eet_car_scrambled_negative<-aggregate(k~i, data=results_eet_icar_scrambled,sum)
results_eet_car_scrambled_negative$ratio<-results_eet_car_scrambled_negative$k / (124610948*110)
system("rm fpkm_eet_day_18_AI_a.bin")
system("rm fpkm_eet_day_18_AI_a.desc")
system("rm fpkm_endo_day_18_IC_AI_a.bin")
system("rm fpkm_endo_day_18_IC_AI_a.desc")
rm(correlation_COR_PVALUE_EET_ICAR_subset,results_eet_icar_scrambled,eet,endo,random,mdesc_end,mdesc_eet)
#
results_eet_car_scrambled_positive
## i k ratio
## 1 0.9500 3866457 2.821613e-04
## 2 0.9900 115923 8.459678e-06
## 3 0.9990 1009 7.363349e-08
## 4 0.9999 5 3.648835e-10
results_eet_car_scrambled_negative
## i k ratio
## 1 0.9500 636201 4.641362e-05
## 2 0.9900 7358 5.367980e-07
## 3 0.9990 16 1.167269e-09
## 4 0.9999 0 0.000000e+00
results_eet_icar_scrambled_positive
## i k ratio
## 1 0.9500 1891224 1.379730e-04
## 2 0.9900 53592 3.909769e-06
## 3 0.9990 401 2.925469e-08
## 4 0.9999 3 2.188630e-10
results_eet_car_scrambled_negative
## i k ratio
## 1 0.9500 636201 4.641362e-05
## 2 0.9900 7358 5.367980e-07
## 3 0.9990 16 1.167269e-09
## 4 0.9999 0 0.000000e+00
Load the files contatining expression data
fpkm_eet_day_18_AI<-read.table("2017_06_17_fpkm_d18_EET_filtered.txt.bz2",stringsAsFactors=FALSE, header =TRUE)
fpkm_endo_day_18_C_AI<-read.table("2017_06_17_fpkm_d18_ENDO_CAR_filtered.txt.bz2",stringsAsFactors=FALSE, header =TRUE)
fpkm_endo_day_18_IC_AI<-read.table("2017_06_17_fpkm_d18_ENDO_ICAR_filtered.txt.bz2",stringsAsFactors=FALSE, header =TRUE)
annotation.ensembl.symbol<-read.table("2017_12_20_annotation.ensembl.symbol.txt.bz2",stringsAsFactors=FALSE, header =TRUE, sep="\t",quote = "")
correlation_COR_PVALUE_EET_CAR<-correlation_COR_PVALUE_EET_CAR[with(correlation_COR_PVALUE_EET_CAR, order(-pearson_cor)),]
correlation_COR_PVALUE_EET_CAR_subset<-rbind(head(correlation_COR_PVALUE_EET_CAR, n=5), head(correlation_COR_PVALUE_EET_CAR[abs(correlation_COR_PVALUE_EET_CAR$pearson_cor)<0.00001,],n=5), tail(correlation_COR_PVALUE_EET_CAR, n=5))
data_chart_1<-data.frame( stringsAsFactors=FALSE)
data_chart_2<-data.frame( stringsAsFactors=FALSE)
for (i in seq(dim(correlation_COR_PVALUE_EET_CAR_subset)[1])){
gene_eet<-correlation_COR_PVALUE_EET_CAR_subset[i,1]
gene_endo<-correlation_COR_PVALUE_EET_CAR_subset[i,2]
fpkm_gene_eet<-fpkm_eet_day_18_AI[row.names(fpkm_eet_day_18_AI)==gene_eet,]
fpkm_gene_endo<-fpkm_endo_day_18_C_AI[row.names(fpkm_endo_day_18_C_AI)==gene_endo,]
data_chart_1<-data.frame(t(fpkm_gene_eet))
data_chart_1<-cbind(data_chart_1,data.frame(t(fpkm_gene_endo)))
data_chart_1$chart<-i
data_chart_1$gene1<-gene_eet
data_chart_1$gene2<-gene_endo
colnames(data_chart_1)<-c("gene_EET", "geneCAR", "chart", "gene1","gene2")
data_chart_2<-rbind(data_chart_2,data_chart_1)
}
data_chart_2$gene_EET_symbol<-annotation.ensembl.symbol$external_gene_name[match( data_chart_2$gene1, annotation.ensembl.symbol$ensembl_gene_id )]
data_chart_2$gene_END_symbol<-annotation.ensembl.symbol$external_gene_name[match( data_chart_2$gene2, annotation.ensembl.symbol$ensembl_gene_id )]
data_chart_2$gene1<-as.character(data_chart_2$gene1)
data_chart_2$gene2<-as.character(data_chart_2$gene2)
data_chart_2$gene_EET_symbol<-ifelse(is.na(data_chart_2$gene_EET_symbol), data_chart_2$gene1,data_chart_2$gene_EET_symbol)
data_chart_2$gene_END_symbol<-ifelse(is.na(data_chart_2$gene_END_symbol), data_chart_2$gene1,data_chart_2$gene_END_symbol)
data_chart_2$gene_EET_symbol<-ifelse(data_chart_2$gene_EET_symbol=="", data_chart_2$gene1,data_chart_2$gene_EET_symbol)
data_chart_2$gene_END_symbol<-ifelse(data_chart_2$gene_END_symbol=="", data_chart_2$gene1,data_chart_2$gene_END_symbol)
plots <- list()
k<-1
for (j in c(1:15)){
data_chart_3<-data_chart_2[data_chart_2$chart %in% j , ]
plot<-ggplot(data=data_chart_3, aes(x=geneCAR,y=gene_EET ))+
geom_point(size=0.4)+
scale_y_continuous(name=data_chart_3$gene_EET_symbol[1])+
scale_x_continuous(name=data_chart_3$gene_END_symbol[1])+
theme(aspect.ratio = 1,
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.background = element_blank(),
axis.text.x = element_text( colour = 'black' ,size = 7),
axis.text.y = element_text( colour = 'black',size = 7),
axis.title= element_text( colour = 'black' ,size = 7, face="italic"),
axis.ticks = element_line(size=0.1),
panel.spacing = unit(1, "mm"),
legend.position="none",
axis.line=element_line(size = 0.1, colour = "black")
)
plots[[k]] <- plot
k<-k+1
}
ggarrange(plotlist =plots,ncol = 5, nrow = 3)
correlation_COR_PVALUE_EET_ICAR<-correlation_COR_PVALUE_EET_ICAR[with(correlation_COR_PVALUE_EET_ICAR, order(-pearson_cor)),]
correlation_COR_PVALUE_EET_ICAR_subset<-rbind(head(correlation_COR_PVALUE_EET_ICAR, n=5), head(correlation_COR_PVALUE_EET_ICAR[abs(correlation_COR_PVALUE_EET_ICAR$pearson_cor)<0.00001,],n=5), tail(correlation_COR_PVALUE_EET_ICAR, n=5))
data_chart_1<-data.frame( stringsAsFactors=FALSE)
data_chart_2<-data.frame( stringsAsFactors=FALSE)
for (i in seq(dim(correlation_COR_PVALUE_EET_ICAR_subset)[1])){
gene_eet<-correlation_COR_PVALUE_EET_ICAR_subset[i,1]
gene_endo<-correlation_COR_PVALUE_EET_ICAR_subset[i,2]
fpkm_gene_eet<-fpkm_eet_day_18_AI[row.names(fpkm_eet_day_18_AI)==gene_eet,]
fpkm_gene_endo<-fpkm_endo_day_18_IC_AI[row.names(fpkm_endo_day_18_IC_AI)==gene_endo,]
data_chart_1<-data.frame(t(fpkm_gene_eet))
data_chart_1<-cbind(data_chart_1,data.frame(t(fpkm_gene_endo)))
data_chart_1$chart<-i
data_chart_1$gene1<-gene_eet
data_chart_1$gene2<-gene_endo
colnames(data_chart_1)<-c("gene_EET", "geneICAR", "chart", "gene1","gene2")
data_chart_2<-rbind(data_chart_2,data_chart_1)
}
data_chart_2$gene_EET_symbol<-annotation.ensembl.symbol$external_gene_name[match( data_chart_2$gene1, annotation.ensembl.symbol$ensembl_gene_id )]
data_chart_2$gene_END_symbol<-annotation.ensembl.symbol$external_gene_name[match( data_chart_2$gene2, annotation.ensembl.symbol$ensembl_gene_id )]
data_chart_2$gene1<-as.character(data_chart_2$gene1)
data_chart_2$gene2<-as.character(data_chart_2$gene2)
data_chart_2$gene_EET_symbol<-ifelse(is.na(data_chart_2$gene_EET_symbol), data_chart_2$gene1,data_chart_2$gene_EET_symbol)
data_chart_2$gene_END_symbol<-ifelse(is.na(data_chart_2$gene_END_symbol), data_chart_2$gene1,data_chart_2$gene_END_symbol)
data_chart_2$gene_EET_symbol<-ifelse(data_chart_2$gene_EET_symbol=="", data_chart_2$gene1,data_chart_2$gene_EET_symbol)
data_chart_2$gene_END_symbol<-ifelse(data_chart_2$gene_END_symbol=="", data_chart_2$gene1,data_chart_2$gene_END_symbol)
plots <- list()
k<-1
for (j in c(1:15)){
data_chart_3<-data_chart_2[data_chart_2$chart %in% j , ]
plot<-ggplot(data=data_chart_3, aes(x=geneICAR,y=gene_EET ))+
geom_point(size=0.4)+
scale_y_continuous(name=data_chart_3$gene_EET_symbol[1])+
scale_x_continuous(name=data_chart_3$gene_END_symbol[1])+
theme(aspect.ratio = 1,
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.background = element_blank(),
axis.text.x = element_text( colour = 'black' ,size = 7),
axis.text.y = element_text( colour = 'black',size = 7),
axis.title= element_text( colour = 'black' ,size = 7, face="italic"),
axis.ticks = element_line(size=0.1),
panel.spacing = unit(1, "mm"),
legend.position="none",
axis.line=element_line(size = 0.1, colour = "black")
)
plots[[k]] <- plot
k<-k+1
}
ggarrange( plotlist =plots,ncol = 5, nrow = 3)
annotation.ensembl.symbol<-read.table("2017_12_20_annotation.ensembl.symbol.txt.bz2",stringsAsFactors=FALSE, header =TRUE, sep="\t",quote = "")
treshold<-0.95
correlation_COR_PVALUE_EET_CAR_subset<-correlation_COR_PVALUE_EET_CAR[abs(correlation_COR_PVALUE_EET_CAR$pearson_cor) > treshold,]
degree<-data.frame(table(correlation_COR_PVALUE_EET_CAR_subset$gene_id_eet))
ave(degree$Freq)[1]
## [1] 295.5072
summary(degree$Freq)[3]
## Median
## 101
degree<-degree[with(degree,order(-Freq)),]
degree_data<-data.frame(table(degree$Freq))
degree_data$Var1<-as.numeric(levels(degree_data$Var1))[degree_data$Var1]
degree<-degree[c(1:10),]
degree_data$ID<-degree$Var1[match(degree_data$Var1,degree$Freq)]
degree_data$symbol<-annotation.ensembl.symbol$external_gene_name[match(degree_data$ID,annotation.ensembl.symbol$ensembl_gene_id)]
plot1<-ggplot(data=degree_data,aes(y=(Freq), x=(Var1)))+
geom_point( shape=20, size=0.5)+
geom_text_repel(aes(label=symbol),max.iter = 3e5,fontface = 'italic',angle = 0,nudge_y = 5,
na.rm = TRUE, force=TRUE,size = 2, colour = "red",segment.color="gray",segment.size = 0.1,box.padding = unit(0.6, 'lines'))+
scale_y_continuous(name="Number of genes in EET")+
scale_x_continuous(name="Number of connections with genes in CAR")+
#labs(x=expression(paste("Degree of centrality ", (Log[10]), sep=" ")),y=expression(paste("Number of nodes ", (Log[10]), sep=" ")))+
theme_bw()+
theme(axis.text = element_text(size = 7, color="black"),
#axis.title = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)
treshold<-0.95
correlation_COR_PVALUE_EET_ICAR_subset<-correlation_COR_PVALUE_EET_ICAR[abs(correlation_COR_PVALUE_EET_ICAR$pearson_cor) > treshold,]
degree<-data.frame(table(correlation_COR_PVALUE_EET_ICAR_subset$gene_id_eet))
ave(degree$Freq)[1]
## [1] 266.4669
summary(degree$Freq)[3]
## Median
## 252
degree<-degree[with(degree,order(-Freq)),]
degree_data<-data.frame(table(degree$Freq))
degree_data$Var1<-as.numeric(levels(degree_data$Var1))[degree_data$Var1]
degree<-degree[c(1:8),]
degree_data$ID<-degree$Var1[match(degree_data$Var1,degree$Freq)]
degree_data$symbol<-annotation.ensembl.symbol$external_gene_name[match(degree_data$ID,annotation.ensembl.symbol$ensembl_gene_id)]
degree_data$symbol[degree_data$symbol =="ROR2"]<-"MYOZ1,ROR2"
plot2<-ggplot(data=degree_data,aes(y=(Freq), x=(Var1)))+
geom_point( shape=20, size=0.5)+
geom_text_repel(aes(label=symbol),max.iter = 3e5,fontface = 'italic',angle = 0,nudge_y = 4,nudge_x = -10,
na.rm = TRUE, force=TRUE,size = 2, colour = "red",segment.color="gray",segment.size = 0.1,box.padding = unit(0.6, 'lines'))+
scale_y_continuous(name="Number of genes in EET")+
scale_x_continuous(name="Number of connections with genes in ICAR")+
#labs(x=expression(paste("Degree of centrality ", (Log[10]), sep=" ")),y=expression(paste("Number of nodes ", (Log[10]), sep=" ")))+
theme_bw()+
theme(axis.text = element_text(size = 7, color="black"),
#axis.title = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)
correlation_COR_PVALUE_EET_CAR_subset<-correlation_COR_PVALUE_EET_CAR[abs(correlation_COR_PVALUE_EET_CAR$pearson_cor) > treshold,]
degree<-data.frame(table(correlation_COR_PVALUE_EET_CAR_subset$gene_id_end))
ave(degree$Freq)[1]
## [1] 216.2568
summary(degree$Freq)[3]
## Median
## 63
degree<-degree[with(degree,order(-Freq)),]
degree_data<-data.frame(table(degree$Freq))
degree_data$Var1<-as.numeric(levels(degree_data$Var1))[degree_data$Var1]
degree<-degree[c(1:10),]
degree_data$ID<-degree$Var1[match(degree_data$Var1,degree$Freq)]
degree_data$symbol<-annotation.ensembl.symbol$external_gene_name[match(degree_data$ID,annotation.ensembl.symbol$ensembl_gene_id)]
degree_data$symbol[degree_data$symbol =="SCO2"]<-"SCO2,TUBGCP4"
plot3<-ggplot(data=degree_data,aes(y=(Freq), x=(Var1)))+
geom_point( shape=20, size=0.5)+
geom_text_repel(aes(label=symbol),max.iter = 3e5,fontface = 'italic',angle = 0,nudge_y = 8,nudge_x = 5,
na.rm = TRUE, force=TRUE,size = 2, colour = "red",segment.color="gray",segment.size = 0.1,box.padding = unit(0.6, 'lines'))+
scale_y_continuous(name="Number of genes in CAR")+
scale_x_continuous(name="Number of connections with genes in EET")+
#labs(x=expression(paste("Degree of centrality ", (Log[10]), sep=" ")),y=expression(paste("Number of nodes ", (Log[10]), sep=" ")))+
theme_bw()+
theme(axis.text = element_text(size = 7, color="black"),
#axis.title = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.background = element_rect(fill = "transparent",colour = NA),
panel.background = element_rect(fill = "transparent",colour = NA)
)
correlation_COR_PVALUE_EET_ICAR_subset<-correlation_COR_PVALUE_EET_ICAR[abs(correlation_COR_PVALUE_EET_ICAR$pearson_cor) > treshold,]
degree<-data.frame(table(correlation_COR_PVALUE_EET_ICAR_subset$gene_id_end))
ave(degree$Freq)[1]
## [1] 194.9449
summary(degree$Freq)[3]
## Median
## 77
degree<-degree[with(degree,order(-Freq)),]
degree_data<-data.frame(table(degree$Freq))
degree_data$Var1<-as.numeric(levels(degree_data$Var1))[degree_data$Var1]
degree<-degree[c(1:10),]
degree_data$ID<-degree$Var1[match(degree_data$Var1,degree$Freq)]
degree_data$symbol<-annotation.ensembl.symbol$external_gene_name[match(degree_data$ID,annotation.ensembl.symbol$ensembl_gene_id)]
plot4<-ggplot(data=degree_data,aes(y=(Freq), x=(Var1)))+
geom_point( shape=20, size=0.5)+
geom_text_repel(aes(label=symbol),max.iter = 3e5,fontface = 'italic',angle = 0,nudge_y = 5,nudge_x = 2,
na.rm = TRUE, force=TRUE,size = 2, colour = "red",segment.color="gray",segment.size = 0.1,box.padding = unit(0.6, 'lines'))+
scale_y_continuous(name="Number of genes in ICAR")+
scale_x_continuous(name="Number of connections with genes in EET")+
#labs(x=expression(paste("Degree of centrality ", (Log[10]), sep=" ")),y=expression(paste("Number of nodes ", (Log[10]), sep=" ")))+
theme_bw()+
theme(axis.text = element_text(size = 7, color="black"),
#axis.title = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.background = element_rect(fill = "transparent",colour = NA),
panel.background = element_rect(fill = "transparent",colour = NA)
)
ggarrange( plotlist =list(plot1, plot2, plot3, plot4), nrow=2, ncol = 2)
treshold<-0.9999
correlation_COR_PVALUE_EET_CAR_subset<-correlation_COR_PVALUE_EET_CAR[abs(correlation_COR_PVALUE_EET_CAR$pearson_cor) > treshold,]
#summary(correlation_COR_PVALUE_EET_CAR_subset$pearson_cor)
fpkm_eet_day_18_AI_subset<-fpkm_eet_day_18_AI[rownames(fpkm_eet_day_18_AI) %in% correlation_COR_PVALUE_EET_CAR_subset$gene_id_eet, ]
colnames(fpkm_eet_day_18_AI_subset)<-c("P129","P353","P1091","P1092","P1096")
#number of genes in the EET
dim(fpkm_eet_day_18_AI_subset)[1]
## [1] 229
fpkm_eet_day_18_CAR_AI_subset<-fpkm_endo_day_18_C_AI[rownames(fpkm_endo_day_18_C_AI) %in% correlation_COR_PVALUE_EET_CAR_subset$gene_id_end, ]
colnames(fpkm_eet_day_18_CAR_AI_subset)<-c("P129","P353","P1091","P1092","P1096")
#number of genes in CAR
dim(fpkm_eet_day_18_CAR_AI_subset)[1]
## [1] 218
#number of unique genes
length(unique(c(rownames(fpkm_eet_day_18_AI_subset), rownames(fpkm_eet_day_18_CAR_AI_subset))))
## [1] 441
distance_EET<-as.dist(1-cor(fpkm_eet_day_18_AI_subset, method = "pearson"))
clustering_EET<-flashClust(distance_EET, method="complete")
dendogram_EET<-as.dendrogram(clustering_EET)
distance_CAR<-as.dist(1-cor(fpkm_eet_day_18_CAR_AI_subset, method = "pearson"))
clustering_CAR<-flashClust(distance_CAR, method="complete")
dendogram_CAR<-as.dendrogram(clustering_CAR)
dendrogram_list<-dendlist(rank_branches(dendogram_EET), rank_branches(dendogram_CAR))
correlation_COR_PVALUE_EET_CAR_subset_annotated<-merge(correlation_COR_PVALUE_EET_CAR_subset, annotation.ensembl.symbol, by.x="gene_id_eet", by.y="ensembl_gene_id" , all.x=TRUE, all.y=FALSE)
correlation_COR_PVALUE_EET_CAR_subset_annotated<-merge(correlation_COR_PVALUE_EET_CAR_subset_annotated, annotation.ensembl.symbol, by.x="gene_id_end", by.y="ensembl_gene_id" , all.x=TRUE, all.y=FALSE)
colnames(correlation_COR_PVALUE_EET_CAR_subset_annotated)<-c("gene_id_end","gene_id_eet","pearson_cor", "hgnc_symbol.EET", "description.EET" ,"external_gene_name.EET","hgnc_symbol.CAR","description.CAR", "external_gene_name.CAR" )
correlation_COR_PVALUE_EET_CAR_subset_annotated<-correlation_COR_PVALUE_EET_CAR_subset_annotated[,c(2,1,3:9)]
Testing for the similarity of the dendrograms.
set.seed(1236)
mantel(distance_EET, distance_CAR, permutations = 119)
##
## Mantel statistic based on Pearson's product-moment correlation
##
## Call:
## mantel(xdis = distance_EET, ydis = distance_CAR, permutations = 119)
##
## Mantel statistic r: 0.9651
## Significance: 0.0083333
##
## Upper quantiles of permutations (null model):
## 90% 95% 97.5% 99%
## 0.510 0.622 0.662 0.891
## Permutation: free
## Number of permutations: 119
tanglegram(dendrogram_list, sort = FALSE, common_subtrees_color_lines = TRUE, highlight_distinct_edges = FALSE, highlight_branches_lwd = FALSE)
Testing for gene ontology enrichment for the 441 genes that form the matching dendrograms between conceptus and endometrium
annotation.ensembl.symbol<-read.table("2017_12_20_annotation.ensembl.symbol.txt.bz2",stringsAsFactors=FALSE, header =TRUE, sep="\t",quote = "")
annotation.genelength.biomart<-read.table("2017_10_26_annotation.genelength.biomart.txt.bz2",stringsAsFactors=FALSE, header =TRUE, sep="\t")
annotation.GO.biomart<-read.table("2017_10_26_annotation.GO.biomart.txt.bz2",stringsAsFactors=FALSE, header =TRUE, sep="\t")
test.genes<-data.frame(a=unique(c(as.character(correlation_COR_PVALUE_EET_CAR_subset$gene_id_eet),as.character(correlation_COR_PVALUE_EET_CAR_subset$gene_id_end))), stringsAsFactors=FALSE)
all_genes<-data.frame( gene=unique(c(rownames(fpkm_endo_day_18_C_AI),rownames(fpkm_eet_day_18_AI))), stringsAsFactors=FALSE )
rownames(all_genes)<-all_genes$gene
all_genes_numeric<-as.integer(all_genes$gene %in%test.genes$a)
names(all_genes_numeric)<-all_genes$gene
annotation.genelength.biomart <-annotation.genelength.biomart[with(annotation.genelength.biomart, order(ensembl_gene_id, -transcript_length)), ]
annotation.genelength.biomart<-annotation.genelength.biomart[!duplicated(annotation.genelength.biomart$ensembl_gene_id),]
annotation.genelength.biomart <- annotation.genelength.biomart[annotation.genelength.biomart$ensembl_gene_id %in% rownames(all_genes),]
annotation.genelength.biomart_vector<-annotation.genelength.biomart[,2]
names(annotation.genelength.biomart_vector)<-annotation.genelength.biomart[,1]
annotation.GO.BP.biomart<-annotation.GO.biomart[annotation.GO.biomart$namespace_1003=="biological_process", c(1:2)]
annotation.GO.BP.biomart<-annotation.GO.BP.biomart[annotation.GO.BP.biomart$ensembl_gene_id %in% rownames(all_genes),]
pwf<-nullp(all_genes_numeric, bias.data=annotation.genelength.biomart_vector, plot.fit=FALSE )
set.seed(51790)
GO_BP_Cats_EET_CAR_subset<-goseq(pwf,gene2cat=annotation.GO.BP.biomart, method ="Sampling", repcnt = 7000, use_genes_without_cat=FALSE)
GO_BP_Cats_EET_CAR_subset<-GO_BP_Cats_EET_CAR_subset[GO_BP_Cats_EET_CAR_subset$numDEInCat>1,]
GO_BP_Cats_EET_CAR_subset$BY_FDR<-p.adjust(GO_BP_Cats_EET_CAR_subset$over_represented_pvalue, method ="fdr")
GO_BP_Cats_EET_CAR_subset<-GO_BP_Cats_EET_CAR_subset[with(GO_BP_Cats_EET_CAR_subset, order(BY_FDR, -numDEInCat, term)), ]
annotation.GO.BP.biomart_testgenes<-annotation.GO.BP.biomart[annotation.GO.BP.biomart$ensembl_gene_id %in% test.genes$a, ]
GO_BP_Cats_EET_CAR_subset<-merge(GO_BP_Cats_EET_CAR_subset,annotation.GO.BP.biomart_testgenes, by.x="category", by.y="go_id", all.x=TRUE, all.y=FALSE)
GO_BP_Cats_EET_CAR_subset<-merge(GO_BP_Cats_EET_CAR_subset, annotation.ensembl.symbol, by.x="ensembl_gene_id", by.y="ensembl_gene_id", all.x=TRUE, all.y=FALSE)
GO_BP_Cats_EET_CAR_subset<-GO_BP_Cats_EET_CAR_subset[with(GO_BP_Cats_EET_CAR_subset, order(BY_FDR, -numDEInCat)), ]
GO_BP_Cats_EET_CAR_subset<-GO_BP_Cats_EET_CAR_subset[GO_BP_Cats_EET_CAR_subset$numDEInCat>2 & GO_BP_Cats_EET_CAR_subset$BY_FDR<0.13,]
Testing for KEGG pathways enrichment for the 441 genes that form the matching dendrograms between conceptus and endometrium
test.genes<-data.frame(a=unique(c(as.character(correlation_COR_PVALUE_EET_CAR_subset$gene_id_eet),as.character(correlation_COR_PVALUE_EET_CAR_subset$gene_id_end))), stringsAsFactors=FALSE)
test.genes_entrez<-egIDs <- stack(mget(test.genes$a, org.Bt.egENSEMBL2EG, ifnotfound = NA))
all_genes<-data.frame( gene=unique(c(rownames(fpkm_endo_day_18_C_AI),rownames(fpkm_eet_day_18_AI))), stringsAsFactors=FALSE )
rownames(all_genes)<-all_genes$gene
test.genes_entrez <- stack(mget(test.genes$a, org.Bt.egENSEMBL2EG, ifnotfound = NA))
test.genes_entrez<-test.genes_entrez[complete.cases(test.genes_entrez),]
all_genes_entrez<- stack(mget(all_genes$gene, org.Bt.egENSEMBL2EG, ifnotfound = NA))
all_genes_entrez<-all_genes_entrez[complete.cases(all_genes_entrez),]
all_genes_entrez<-all_genes_entrez[!duplicated(all_genes_entrez$values),]
all_genes_entrez_numeric<-as.integer(all_genes_entrez$values %in% test.genes_entrez$values)
names(all_genes_entrez_numeric)<-all_genes_entrez$values
entrez_kegg<- stack(mget(all_genes_entrez$values, org.Bt.egPATH, ifnotfound = NA))
entrez_kegg<-entrez_kegg[complete.cases(entrez_kegg),]
entrez_kegg<-entrez_kegg[,c(2,1)]
entrez_kegg_test_genes<-entrez_kegg[entrez_kegg$ind %in% test.genes_entrez$values,]
pwf<-nullp(all_genes_entrez_numeric, 'bosTau4','refGene', plot.fit=FALSE )
set.seed(8503)
kegg_EET_CAR_subset<-goseq(pwf, gene2cat=entrez_kegg,method ="Sampling", repcnt = 7000, use_genes_without_cat=FALSE)
kegg_EET_CAR_subset<-kegg_EET_CAR_subset[kegg_EET_CAR_subset$numDEInCat>3,]
kegg_EET_CAR_subset$BY_FDR<-p.adjust(kegg_EET_CAR_subset$over_represented_pvalue, method ="fdr")
kegg_EET_CAR_subset<-merge(kegg_EET_CAR_subset,entrez_kegg_test_genes, by.x="category", by.y="values", all.x=TRUE, all.y=FALSE)
kegg_EET_CAR_subset<-merge(kegg_EET_CAR_subset,all_genes_entrez, by.x="ind", by.y="values", all.x=TRUE, all.y=FALSE, suffixes =c('.entrez','.ensembl'))
kegg_EET_CAR_subset<-kegg_EET_CAR_subset[with(kegg_EET_CAR_subset, order(BY_FDR, -numDEInCat)), ]
kegg_EET_CAR_subset<-kegg_EET_CAR_subset[,c(2:8)]
kegg_EET_CAR_subset<-merge(kegg_EET_CAR_subset, annotation.ensembl.symbol, by.x="ind.ensembl", by.y="ensembl_gene_id", all.x=TRUE, all.y=FALSE)
kegg_EET_CAR_subset<-kegg_EET_CAR_subset[with(kegg_EET_CAR_subset, order(BY_FDR, -numDEInCat)), ]
kegg_EET_CAR_subset<-kegg_EET_CAR_subset[kegg_EET_CAR_subset$BY_FDR<0.1,]
genes_interest_1<-sort(GO_BP_Cats_EET_CAR_subset[GO_BP_Cats_EET_CAR_subset$term == 'mRNA processing', 12])
genes_interest_2<-sort(GO_BP_Cats_EET_CAR_subset[GO_BP_Cats_EET_CAR_subset$term == 'chromatin organization', 12])
genes_interest<-unique(c(genes_interest_1,genes_interest_2))
correlation_COR_PVALUE_EET_CAR_subset_annotated<-correlation_COR_PVALUE_EET_CAR_subset_annotated[with(correlation_COR_PVALUE_EET_CAR_subset_annotated, order(-pearson_cor)),]
data_chart_1<-data.frame( stringsAsFactors=FALSE)
data_chart_2<-data.frame( stringsAsFactors=FALSE)
j=1
for (i in genes_interest){
correlation_COR_PVALUE_EET_CAR_subset_annotated_a<-correlation_COR_PVALUE_EET_CAR_subset_annotated[correlation_COR_PVALUE_EET_CAR_subset_annotated$external_gene_name.EET == i | correlation_COR_PVALUE_EET_CAR_subset_annotated$external_gene_name.CAR == i ,]
gene_eet<-as.character(correlation_COR_PVALUE_EET_CAR_subset_annotated_a[1,1])
gene_endo<-as.character(correlation_COR_PVALUE_EET_CAR_subset_annotated_a[1,2])
fpkm_gene_eet<-fpkm_eet_day_18_AI[row.names(fpkm_eet_day_18_AI)==gene_eet,]
fpkm_gene_endo<-fpkm_endo_day_18_C_AI[row.names(fpkm_endo_day_18_C_AI)==gene_endo,]
data_chart_1<-data.frame(t(fpkm_gene_eet))
data_chart_1<-cbind(data_chart_1,data.frame(t(fpkm_gene_endo)))
data_chart_1$chart<-j
data_chart_1$gene1<-gene_eet
data_chart_1$gene2<-gene_endo
colnames(data_chart_1)<-c("gene_EET", "geneCAR", "chart", "gene1","gene2")
data_chart_2<-rbind(data_chart_2,data_chart_1)
j=j+1
}
data_chart_2$gene_EET_symbol<-annotation.ensembl.symbol$external_gene_name[match( data_chart_2$gene1, annotation.ensembl.symbol$ensembl_gene_id )]
data_chart_2$gene_END_symbol<-annotation.ensembl.symbol$external_gene_name[match( data_chart_2$gene2, annotation.ensembl.symbol$ensembl_gene_id )]
data_chart_2$gene1<-as.character(data_chart_2$gene1)
data_chart_2$gene2<-as.character(data_chart_2$gene2)
data_chart_2$gene_EET_symbol<-ifelse(is.na(data_chart_2$gene_EET_symbol), data_chart_2$gene1,data_chart_2$gene_EET_symbol)
data_chart_2$gene_END_symbol<-ifelse(is.na(data_chart_2$gene_END_symbol), data_chart_2$gene1,data_chart_2$gene_END_symbol)
data_chart_2$gene_EET_symbol<-ifelse(data_chart_2$gene_EET_symbol=="", data_chart_2$gene1,data_chart_2$gene_EET_symbol)
data_chart_2$gene_END_symbol<-ifelse(data_chart_2$gene_END_symbol=="", data_chart_2$gene1,data_chart_2$gene_END_symbol)
plots <- list()
k<-1
for (j in c(1:8)){
data_chart_3<-data_chart_2[data_chart_2$chart %in% j , ]
plot<-ggplot(data=data_chart_3, aes(x=geneCAR,y=gene_EET))+
geom_point(size=1)+
scale_y_continuous(name=data_chart_3$gene_EET_symbol[1],breaks= pretty_breaks(3))+
scale_x_continuous(name=data_chart_3$gene_END_symbol[1],breaks= pretty_breaks(3))+
theme(aspect.ratio = 1,
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.background = element_blank(),
axis.text.x = element_text( colour = 'black' ,size = 13),
axis.text.y = element_text( colour = 'black',size = 13),
axis.title= element_text( colour = 'black' ,size = 13, face="italic"),
axis.ticks = element_line(size=0.1),
panel.spacing = unit(0, "mm"),
legend.position="none",
axis.line=element_line(size = 0.8, colour = "black")
)
plots[[k]] <- plot
k<-k+1
}
ggarrange( plotlist =plots)
genes_interest<-sort(unique(as.character(kegg_EET_CAR_subset[kegg_EET_CAR_subset$category == '03013', 10])))
correlation_COR_PVALUE_EET_CAR_subset_annotated<-correlation_COR_PVALUE_EET_CAR_subset_annotated[with(correlation_COR_PVALUE_EET_CAR_subset_annotated, order(-pearson_cor)),]
data_chart_1<-data.frame( stringsAsFactors=FALSE)
data_chart_2<-data.frame( stringsAsFactors=FALSE)
j=1
for (i in genes_interest){
correlation_COR_PVALUE_EET_CAR_subset_annotated_a<-correlation_COR_PVALUE_EET_CAR_subset_annotated[correlation_COR_PVALUE_EET_CAR_subset_annotated$external_gene_name.EET == i | correlation_COR_PVALUE_EET_CAR_subset_annotated$external_gene_name.CAR == i ,]
gene_eet<-as.character(correlation_COR_PVALUE_EET_CAR_subset_annotated_a[1,1])
gene_endo<-as.character(correlation_COR_PVALUE_EET_CAR_subset_annotated_a[1,2])
fpkm_gene_eet<-fpkm_eet_day_18_AI[row.names(fpkm_eet_day_18_AI)==gene_eet,]
fpkm_gene_endo<-fpkm_endo_day_18_C_AI[row.names(fpkm_endo_day_18_C_AI)==gene_endo,]
data_chart_1<-data.frame(t(fpkm_gene_eet))
data_chart_1<-cbind(data_chart_1,data.frame(t(fpkm_gene_endo)))
data_chart_1$chart<-j
data_chart_1$gene1<-gene_eet
data_chart_1$gene2<-gene_endo
colnames(data_chart_1)<-c("gene_EET", "geneCAR", "chart", "gene1","gene2")
data_chart_2<-rbind(data_chart_2,data_chart_1)
j=j+1
}
data_chart_2$gene_EET_symbol<-annotation.ensembl.symbol$external_gene_name[match( data_chart_2$gene1, annotation.ensembl.symbol$ensembl_gene_id )]
data_chart_2$gene_END_symbol<-annotation.ensembl.symbol$external_gene_name[match( data_chart_2$gene2, annotation.ensembl.symbol$ensembl_gene_id )]
data_chart_2$gene1<-as.character(data_chart_2$gene1)
data_chart_2$gene2<-as.character(data_chart_2$gene2)
data_chart_2$gene_EET_symbol<-ifelse(is.na(data_chart_2$gene_EET_symbol), data_chart_2$gene1,data_chart_2$gene_EET_symbol)
data_chart_2$gene_END_symbol<-ifelse(is.na(data_chart_2$gene_END_symbol), data_chart_2$gene1,data_chart_2$gene_END_symbol)
data_chart_2$gene_EET_symbol<-ifelse(data_chart_2$gene_EET_symbol=="", data_chart_2$gene1,data_chart_2$gene_EET_symbol)
data_chart_2$gene_END_symbol<-ifelse(data_chart_2$gene_END_symbol=="", data_chart_2$gene1,data_chart_2$gene_END_symbol)
plots <- list()
k<-1
for (j in c(1:10)){
data_chart_3<-data_chart_2[data_chart_2$chart %in% j , ]
plot<-ggplot(data=data_chart_3, aes(x=geneCAR,y=gene_EET))+
geom_point(size=0.5)+
scale_y_continuous(name=data_chart_3$gene_EET_symbol[1])+
scale_x_continuous(name=data_chart_3$gene_END_symbol[1])+
theme(aspect.ratio = 1,
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.background = element_blank(),
axis.text.x = element_text( colour = 'black' ,size = 8),
axis.text.y = element_text( colour = 'black',size = 8),
axis.title= element_text( colour = 'black' ,size = 8, face="italic"),
axis.ticks = element_line(size=0.1),
panel.spacing = unit(0, "mm"),
legend.position="none",
axis.line=element_line(size = 0.5, colour = "black")
)
plots[[k]] <- plot
k<-k+1
}
ggarrange( plotlist =plots,ncol = 5, nrow = 2)
Assessing the probability of false positive results from GO results by a bootstrapping procedure.
cl <- makeCluster(10)
registerDoParallel(cl)
results<-foreach(j = 1:2000, .combine='rbind', .inorder=FALSE,.packages=c("multtest","goseq"), .verbose=FALSE ) %dopar% {
test.genes<-data.frame(a=sample(unique(c(rownames(fpkm_endo_day_18_C_AI),rownames(fpkm_eet_day_18_AI))),441), stringsAsFactors=FALSE)
all_genes<-data.frame( gene=unique(c(rownames(fpkm_endo_day_18_C_AI),rownames(fpkm_eet_day_18_AI))), stringsAsFactors=FALSE )
rownames(all_genes)<-all_genes$gene
all_genes_numeric<-as.integer(all_genes$gene %in%test.genes$a)
names(all_genes_numeric)<-all_genes$gene
pwf<-nullp(all_genes_numeric, bias.data=annotation.genelength.biomart_vector, plot.fit=FALSE )
GO_BP_Cats_EET_CAR_subset<-goseq(pwf,gene2cat=annotation.GO.BP.biomart, method ="Wallenius", use_genes_without_cat=FALSE)
GO_BP_Cats_EET_CAR_subset<-GO_BP_Cats_EET_CAR_subset[GO_BP_Cats_EET_CAR_subset$numDEInCat>=1,]
GO_BP_Cats_EET_CAR_subset$BY_FDR<-p.adjust(GO_BP_Cats_EET_CAR_subset$over_represented_pvalue, method ="fdr")
GO_BP_Cats_EET_CAR_subset[c("term","BY_FDR")]
}
stopCluster(cl)
plot1<-ggplot(results, aes(BY_FDR))+
geom_histogram(binwidth=0.005)+
scale_x_continuous(breaks=c(0,0.25,0.5,0.75,1))+
theme(
axis.text.x = element_text( colour = 'black' ,size = 12),
axis.text.y = element_text( colour = 'black',size = 12),
axis.title.y= element_text( colour = 'black' ,size = 12),
axis.title.x= element_blank()
)
plot2<-ggplot(data=na.omit(results[results$term == 'mRNA processing',]), aes(y=BY_FDR,x=0))+
geom_jitter(width=c(0.02,0.01), color='black', size=0.5, alpha=0.5, point=16)+
geom_point(aes(x=0, y=0.1297709),color='red', size=1, point=16)+
coord_flip()+
scale_y_continuous(limits=c(0,1), breaks=c(0,0.25,0.5,0.75,1))+
theme(
axis.text.x = element_text( colour = 'black' ,size = 12),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.title.x= element_blank(),
axis.title.y= element_blank(),
plot.margin=unit(c(0,0.2,0.5,2),"cm")
)
plot3<-ggplot(data=na.omit(results[results$term == 'chromatin organization',]), aes(y=BY_FDR,x=0))+
geom_jitter(width=c(0.02,0.01), color='black', size=0.5, alpha=0.5, point=16)+
geom_point(aes(x=0, y=0.1297709),color='red', size=1, point=16)+
coord_flip()+
scale_y_continuous(name="False discovery rate",limits=c(0,1), breaks=c(0,0.25,0.5,0.75,1))+
theme(
axis.text.x = element_text( colour = 'black' ,size = 12),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.title.x= element_text( colour = 'black' ,size = 12),
axis.title.y= element_blank(),
plot.margin=unit(c(0,0.2,0.5,2),"cm")
)
ggarrange( plotlist =list(plot1, plot2, plot3), nrow=3, ncol = 1, heights=c(2.5,1,1))
Probability of occurrence of KEGG categories P values lower than the result obtained for the real data.
dim(results[results$BY_FDR<0.1297709,])[1]/dim(results)[1]
## [1] 0.0005020508
Assessing the probability of false positive results from KEGG results by bootstrapping procedure.
all_genes<-data.frame( gene=unique(c(rownames(fpkm_endo_day_18_C_AI),rownames(fpkm_eet_day_18_AI))), stringsAsFactors=FALSE )
rownames(all_genes)<-all_genes$gene
all_genes_entrez<- stack(mget(all_genes$gene, org.Bt.egENSEMBL2EG, ifnotfound = NA))
all_genes_entrez<-all_genes_entrez[complete.cases(all_genes_entrez),]
all_genes_entrez<-all_genes_entrez[!duplicated(all_genes_entrez$values),]
cl <- makeCluster(10)
registerDoParallel(cl)
results<-foreach(j = 1:2000, .combine='rbind', .inorder=FALSE,.packages=c("multtest","goseq","org.Bt.eg.db"), .verbose=FALSE ) %dopar% {
test.genes<-data.frame(a=sample(unique(c(rownames(fpkm_endo_day_18_C_AI),rownames(fpkm_eet_day_18_AI))),441), stringsAsFactors=FALSE)
test.genes_entrez<-egIDs <- stack(mget(test.genes$a, org.Bt.egENSEMBL2EG, ifnotfound = NA))
test.genes_entrez <- stack(mget(test.genes$a, org.Bt.egENSEMBL2EG, ifnotfound = NA))
test.genes_entrez<-test.genes_entrez[complete.cases(test.genes_entrez),]
all_genes_entrez_numeric<-as.integer(all_genes_entrez$values %in% test.genes_entrez$values)
names(all_genes_entrez_numeric)<-all_genes_entrez$values
entrez_kegg<- stack(mget(all_genes_entrez$values, org.Bt.egPATH, ifnotfound = NA))
entrez_kegg<-entrez_kegg[complete.cases(entrez_kegg),]
entrez_kegg<-entrez_kegg[,c(2,1)]
entrez_kegg_test_genes<-entrez_kegg[entrez_kegg$ind %in% test.genes_entrez$values,]
pwf<-nullp(all_genes_entrez_numeric, 'bosTau4','refGene', plot.fit=FALSE )
kegg_EET_CAR_subset<-goseq(pwf, gene2cat=entrez_kegg,method ="Wallenius", use_genes_without_cat=FALSE)
kegg_EET_CAR_subset$BY_FDR<-p.adjust(kegg_EET_CAR_subset$over_represented_pvalue, method ="fdr")
kegg_EET_CAR_subset<-kegg_EET_CAR_subset[,c("category","BY_FDR")]
}
stopCluster(cl)
plot1<-ggplot(results, aes(BY_FDR))+
geom_histogram(binwidth=0.005)+
scale_x_continuous(breaks=c(0,0.25,0.5,0.75,1))+
theme(
axis.text.x = element_text( colour = 'black' ,size = 12),
axis.text.y = element_text( colour = 'black',size = 12),
axis.title.y= element_text( colour = 'black' ,size = 12),
axis.title.x= element_blank()
)
plot2<-ggplot(data=na.omit(results[results$category == '03013',]), aes(y=BY_FDR,x=0))+
geom_jitter(width=c(0.02,0.01), color='black', size=0.5, alpha=0.5, point=16)+
geom_point(aes(x=0, y=0.07713184),color='red', size=1, point=16)+
coord_flip()+
scale_y_continuous(limits=c(0,1), breaks=c(0,0.25,0.5,0.75,1))+
theme(
axis.text.x = element_text( colour = 'black' ,size = 12),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.title.x= element_blank(),
axis.title.y= element_blank(),
plot.margin=unit(c(0,0.2,0.5,2),"cm")
)
ggarrange( plotlist =list(plot1, plot2), nrow=2, ncol = 1, heights=c(2,1))
Probability of occurrence of KEGG categories P values lower than the result obtained for the real data.
dim(results[results$BY_FDR<0.07713184,])[1]/dim(results)[1]
## [1] 0.0002388393
Clustering of EET and CAR genes based on the correlations
pearson_correlation_EET<-cor(fpkm_eet_day_18_AI_a, fpkm_endo_day_18_C_AI_a, use = "pairwise.complete.obs", method="pearson")
dissimilarity<-as.matrix(parDist(pearson_correlation_EET, method = "euclidean", diag = TRUE, upper = TRUE))
rownames(dissimilarity)<-colnames(fpkm_eet_day_18_AI_a)
colnames(dissimilarity)<-colnames(fpkm_eet_day_18_AI_a)
geneTree_eet <- flashClust(as.dist(dissimilarity), method = "average")
rm(dissimilarity)
pearson_correlation_EDO<-cor(fpkm_endo_day_18_C_AI_a,fpkm_eet_day_18_AI_a, use = "pairwise.complete.obs", method="pearson")
dissimilarity<- as.matrix(parDist(pearson_correlation_EDO, method = "euclidean",diag = TRUE, upper = TRUE))
rownames(dissimilarity)<-colnames(fpkm_endo_day_18_C_AI_a)
colnames(dissimilarity)<-colnames(fpkm_endo_day_18_C_AI_a)
geneTree_endo <- flashClust(as.dist(dissimilarity), method = "average")
rm(dissimilarity)
pearson_correlation_EET_heatmap<-pearson_correlation_EET[geneTree_eet$order,geneTree_endo$order ]
Heatmap(pearson_correlation_EET_heatmap,
name="correlation",
cluster_rows= FALSE,
cluster_columns = FALSE,
show_row_names = FALSE,
show_column_names = FALSE,
col = colorRamp2(c(-1, 0, 1), c("blue", "white", "red"))
)
The final output of the next section “go.results3_EET” was used to produce Table S2
annotation.ensembl.symbol<-read.table("2017_12_20_annotation.ensembl.symbol.txt.bz2",stringsAsFactors=FALSE, header =TRUE, sep="\t",quote = "")
annotation.genelength.biomart<-read.table("2017_12_20_annotation.genelength.biomart.txt.bz2",stringsAsFactors=FALSE, header =TRUE, sep="\t",quote = "")
annotation.GO.biomart<-read.table("2017_12_20_annotation.GO.biomart.txt.bz2",stringsAsFactors=FALSE, header =TRUE, sep="\t",quote = "")
all_genes<-data.frame( gene=rownames(fpkm_eet_day_18_AI), stringsAsFactors=FALSE )
rownames(all_genes)<-all_genes$gene
annotation.genelength.biomart <-annotation.genelength.biomart[with(annotation.genelength.biomart, order(ensembl_gene_id, -transcript_length)), ]
annotation.genelength.biomart<-annotation.genelength.biomart[!duplicated(annotation.genelength.biomart$ensembl_gene_id),]
annotation.genelength.biomart <- annotation.genelength.biomart[annotation.genelength.biomart$ensembl_gene_id %in% rownames(all_genes),]
annotation.genelength.biomart_vector<-annotation.genelength.biomart[,2]
names(annotation.genelength.biomart_vector)<-annotation.genelength.biomart[,1]
annotation.GO.BP.biomart<-annotation.GO.biomart[annotation.GO.biomart$namespace_1003=="biological_process", c(1:2)]
annotation.GO.BP.biomart<-annotation.GO.BP.biomart[annotation.GO.BP.biomart$ensembl_gene_id %in% rownames(all_genes),]
summary(geneTree_eet$height)
clusters.cut<-seq(1,50, 1)
genes_at_heigh<-cutree(geneTree_eet, h=clusters.cut)
genes_at_heigh<-data.frame(genes_at_heigh)
head(genes_at_heigh, n=20)
go.results2<-data.frame()
for (j in seq(1:max(genes_at_heigh$X23))){
genes_at_heigh_a<-genes_at_heigh[genes_at_heigh$X23==j,]
test.genes<-data.frame(a=rownames(genes_at_heigh_a), stringsAsFactors=FALSE)
if (length(test.genes$a) >3){
annotation.GO.BP.biomart_testgenes<-annotation.GO.BP.biomart[annotation.GO.BP.biomart$ensembl_gene_id %in% test.genes$a, ]
if( length(unique(annotation.GO.BP.biomart_testgenes$ensembl_gene_id)) >= 3 ) {
all_genes_numeric<-as.integer(all_genes$gene %in%test.genes$a)
names(all_genes_numeric)<-all_genes$gene
pwf<-try(nullp(all_genes_numeric, bias.data=annotation.genelength.biomart_vector, plot.fit=FALSE ), silent =TRUE)
if ('try-error' %in% class(pwf)) { pwf<-try(nullp(all_genes_numeric,'bosTau4','ensGene', plot.fit=FALSE ) )}
if(!('try-error' %in% class(pwf))){
GO_BP_Cats<-goseq(pwf,gene2cat=annotation.GO.BP.biomart, method ="Wallenius", use_genes_without_cat=FALSE)
GO_BP_Cats<-GO_BP_Cats[GO_BP_Cats$numDEInCat>2,]
if( dim(GO_BP_Cats)[1] != 0) {
GO_BP_Cats$BY_FDR<-p.adjust(GO_BP_Cats$over_represented_pvalue, method ="BY")
GO_BP_Cats$height <- "X23"
GO_BP_Cats$cluster <- j
GO_BP_Cats<-merge(GO_BP_Cats,annotation.GO.BP.biomart_testgenes, by.x="category", by.y="go_id", all.x=TRUE, all.y=FALSE)
go.results2<-rbind(go.results2,GO_BP_Cats)
}
}
rm(annotation.GO.BP.biomart_testgenes,GO_BP_Cats, test.genes)
}
}
}
go.results3<-go.results2[go.results2$BY_FDR < 0.2,]
go.results3<-go.results3[with(go.results3, order(BY_FDR)), ]
go.results3<-merge(go.results3, annotation.ensembl.symbol, by.x="ensembl_gene_id", by.y="ensembl_gene_id", all.x=TRUE, all.y=FALSE)
go.results3_EET<-go.results3[with(go.results3, order(BY_FDR)), ]
The final output of the next section “go.results3_CAR” was used to produce Table S3
all_genes<-data.frame( gene=rownames(fpkm_endo_day_18_C_AI), stringsAsFactors=FALSE )
rownames(all_genes)<-all_genes$gene
annotation.genelength.biomart <-annotation.genelength.biomart[with(annotation.genelength.biomart, order(ensembl_gene_id, -transcript_length)), ]
annotation.genelength.biomart<-annotation.genelength.biomart[!duplicated(annotation.genelength.biomart$ensembl_gene_id),]
annotation.genelength.biomart <- annotation.genelength.biomart[annotation.genelength.biomart$ensembl_gene_id %in% rownames(all_genes),]
annotation.genelength.biomart_vector<-annotation.genelength.biomart[,2]
names(annotation.genelength.biomart_vector)<-annotation.genelength.biomart[,1]
annotation.GO.BP.biomart<-annotation.GO.biomart[annotation.GO.biomart$namespace_1003=="biological_process", c(1:2)]
annotation.GO.BP.biomart<-annotation.GO.BP.biomart[annotation.GO.BP.biomart$ensembl_gene_id %in% rownames(all_genes),]
genes_at_heigh<-cutree(geneTree_endo, h=clusters.cut)
genes_at_heigh<-data.frame(genes_at_heigh)
go.results2<-data.frame()
for (j in seq(1:max(genes_at_heigh$X27))){
genes_at_heigh_a<-genes_at_heigh[genes_at_heigh$X27==j,]
test.genes<-data.frame(a=rownames(genes_at_heigh_a), stringsAsFactors=FALSE)
if (length(test.genes$a) >3){
annotation.GO.BP.biomart_testgenes<-annotation.GO.BP.biomart[annotation.GO.BP.biomart$ensembl_gene_id %in% test.genes$a, ]
if( length(unique(annotation.GO.BP.biomart_testgenes$ensembl_gene_id)) >= 3 ) {
all_genes_numeric<-as.integer(all_genes$gene %in%test.genes$a)
names(all_genes_numeric)<-all_genes$gene
pwf<-try(nullp(all_genes_numeric, bias.data=annotation.genelength.biomart_vector, plot.fit=FALSE ), silent =TRUE)
if ('try-error' %in% class(pwf)) { pwf<-try(nullp(all_genes_numeric,'bosTau4','ensGene', plot.fit=FALSE ) )}
if(!('try-error' %in% class(pwf))){
GO_BP_Cats<-goseq(pwf,gene2cat=annotation.GO.BP.biomart, method ="Wallenius", use_genes_without_cat=FALSE)
GO_BP_Cats<-GO_BP_Cats[GO_BP_Cats$numDEInCat>2,]
if( dim(GO_BP_Cats)[1] != 0) {
GO_BP_Cats$BY_FDR<-p.adjust(GO_BP_Cats$over_represented_pvalue, method ="BY")
GO_BP_Cats$height <- "X27"
GO_BP_Cats$cluster <- j
GO_BP_Cats<-merge(GO_BP_Cats,annotation.GO.BP.biomart_testgenes, by.x="category", by.y="go_id", all.x=TRUE, all.y=FALSE)
go.results2<-rbind(go.results2,GO_BP_Cats)
}
}
rm(annotation.GO.BP.biomart_testgenes,GO_BP_Cats, test.genes)
}
}
}
go.results3<-go.results2[go.results2$BY_FDR < 0.2,]
go.results3<-go.results3[with(go.results3, order(BY_FDR)), ]
go.results3<-merge(go.results3, annotation.ensembl.symbol, by.x="ensembl_gene_id", by.y="ensembl_gene_id", all.x=TRUE, all.y=FALSE)
go.results3_CAR<-go.results3[with(go.results3, order(BY_FDR)), ]
genes_at_heigh<-cutree(geneTree_eet, h=clusters.cut)
genes_at_heigh<-data.frame(genes_at_heigh)
genes_at_heigh<-data.frame(genes_at_heigh)
genes_at_heigh_color_EET<-data.frame(genes_at_heigh)
genes_at_heigh_color_EET<-genes_at_heigh_color_EET[geneTree_eet$order,]
genes_at_heigh_color_EET$X23[ !(genes_at_heigh_color_EET$X23 %in% unique(c(go.results3_EET$cluster)))]<- 0
data_frame_annotation_EET<-data.frame(cluster = genes_at_heigh_color_EET$X23)
colors_row_BP<-list(cluster=c(
"0" = "#FFFFFF",
"1" = "#00141c",
"2" = "#0075fd",
"7" = "#bcffbd",
"11" = "#018699",
"12" = "#7a3900",
"13" = "#00501e",
"14" = "#00386d",
"17" = "#012600",
"19" = "#dbffd3",
"20" = "#490023",
"21" = "#97bbff",
"24" = "#00764a",
"25" = "#e8ffa6",
"26" = "#430039",
"27" = "#00634f",
"28" = "#ffc660",
"29" = "#99f7ff",
"30" = "#9d8800",
"31" = "#270700",
"32" = "#ffa4b2",
"33" = "#2a0019",
"34" = "#98ff8e",
"35" = "#77a6ff",
"38" = "#ffdcca",
"39" = "#ee00d2",
"41" = "#ff4f6a",
"46" = "#000e8e",
"50" = "#001957",
"54" = "#ddd3ff",
"59" = "#009641",
"67" = "#cc000e",
"70" = "#df7bff",
"71" = "#00524f",
"73" = "#3a2300",
"76" = "#ffa871",
"101" = "#661acb"
))
font_size=6
row_annotation <- HeatmapAnnotation(df = data_frame_annotation_EET, col = colors_row_BP, which="row", width = unit(0.4, "cm"),gap = unit(0, "mm"),
annotation_legend_param=list(title="EET clusters",legend_direction="horizontal",nrow = 2,title_gp = gpar(fontsize = font_size), labels_gp = gpar(fontsize = font_size), grid_height = unit(2, "mm"), grid_width = unit(2, "mm")))
clusters.cut<-seq(1,50, 1)
genes_at_heigh<-cutree(geneTree_endo, h=clusters.cut)
genes_at_heigh<-data.frame(genes_at_heigh)
genes_at_heigh<-data.frame(genes_at_heigh)
genes_at_heigh_color_CAR<-data.frame(genes_at_heigh)
genes_at_heigh_color_CAR<-genes_at_heigh_color_CAR[geneTree_endo$order,]
genes_at_heigh_color_CAR$X27[ !(genes_at_heigh_color_CAR$X27 %in% unique(c(go.results3_CAR$cluster)))]<- 0
data_frame_annotation_CAR<-data.frame(cluster = genes_at_heigh_color_CAR$X27)
colors_column_BP<-list(cluster=c(
"0" = "#FFFFFF",
"3" = "#4eff79",
"4" = "#b84800",
"5" = "#029398",
"7" = "#8bdcff",
"8" = "#fff415",
"9" = "#01a687",
"10" = "#fff7cd",
"11" = "#02e89b",
"12" = "#e5002a",
"13" = "#228b00",
"14" = "#83d100",
"15" = "#fa8bff",
"16" = "#763a0b",
"19" = "#53474f",
"20" = "#673870",
"23" = "#b9ffc4",
"21" = "#97bbff",
"24" = "#8d78ff",
"25" = "#4c6100",
"26" = "#0142af",
"27" = "#326e00",
"29" = "#ffa636",
"32" = "#896000",
"33" = "#ffb87e",
"34" = "#4c3a9b",
"36" = "#ff7592",
"37" = "#f3b9ff",
"38" = "#73ae00",
"39" = "#930086",
"40" = "#9c005d",
"43" = "#d5009a",
"44" = "#ff3a81",
"48" = "#9ebaff"
))
column_annotation <- HeatmapAnnotation(df = data_frame_annotation_CAR, col = colors_column_BP, which="column", height=unit(0.4, "cm"),width = unit(0.1, "cm"),gap = unit(0, "mm"),
annotation_legend_param=list(title="CAR clusters",title_position = "topcenter",legend_position="center",legend_direction="horizontal",nrow = 2,title_gp = gpar(fontsize =font_size),labels_gp = gpar(fontsize = font_size),grid_height = unit(2, "mm"), grid_width = unit(2, "mm")))
pearson_correlation_EET_heatmap<-pearson_correlation_EET[geneTree_eet$order,geneTree_endo$order ]
pearson_correlation_EET_heatmap[(pearson_correlation_EET_heatmap >= -0.95 & pearson_correlation_EET_heatmap <= 0.95)]<-0
heatmap_pearson_eet_CAR<- Heatmap(pearson_correlation_EET_heatmap,
name="correlation",
cluster_rows= FALSE,
cluster_columns = FALSE,
show_row_names = FALSE,
show_column_names = FALSE,
col = colorRamp2(c(-1, 0, 1), c("blue", "white", "red")),
bottom_annotation=column_annotation
)
draw(heatmap_pearson_eet_CAR + row_annotation , annotation_legend_side = "bottom",heatmap_legend_side = "bottom")
Clustering of EET and ICAR genes based on the correlations
pearson_correlation_EET<-cor(fpkm_eet_day_18_AI_a, fpkm_endo_day_18_IC_AI_a, use = "pairwise.complete.obs", method="pearson")
dissimilarity<-as.matrix(parDist(pearson_correlation_EET, method = "euclidean", diag = TRUE, upper = TRUE))
rownames(dissimilarity)<-colnames(fpkm_eet_day_18_AI_a)
colnames(dissimilarity)<-colnames(fpkm_eet_day_18_AI_a)
geneTree_eet <- flashClust(as.dist(dissimilarity), method = "average")
rm(dissimilarity)
pearson_correlation_EDO<-cor(fpkm_endo_day_18_IC_AI_a,fpkm_eet_day_18_AI_a, use = "pairwise.complete.obs", method="pearson")
dissimilarity<- as.matrix(parDist(pearson_correlation_EDO, method = "euclidean",diag = TRUE, upper = TRUE))
rownames(dissimilarity)<-colnames(fpkm_endo_day_18_IC_AI_a)
colnames(dissimilarity)<-colnames(fpkm_endo_day_18_IC_AI_a)
geneTree_endo <- flashClust(as.dist(dissimilarity), method = "average")
rm(dissimilarity,pearson_correlation_EDO)
pearson_correlation_EET_heatmap<-pearson_correlation_EET[geneTree_eet$order,geneTree_endo$order ]
Heatmap(pearson_correlation_EET_heatmap,
name="correlation",
cluster_rows= FALSE,
cluster_columns = FALSE,
show_row_names = FALSE,
show_column_names = FALSE,
col = colorRamp2(c(-1, 0, 1), c("blue", "white", "red"))
)
The final output of the next section “go.results3_EET” was used to produce Table S5
annotation.ensembl.symbol<-read.table("2017_12_20_annotation.ensembl.symbol.txt.bz2",stringsAsFactors=FALSE, header =TRUE, sep="\t",quote = "")
annotation.genelength.biomart<-read.table("2017_12_20_annotation.genelength.biomart.txt.bz2",stringsAsFactors=FALSE, header =TRUE, sep="\t",quote = "")
annotation.GO.biomart<-read.table("2017_12_20_annotation.GO.biomart.txt.bz2",stringsAsFactors=FALSE, header =TRUE, sep="\t",quote = "")
all_genes<-data.frame( gene=rownames(fpkm_eet_day_18_AI), stringsAsFactors=FALSE )
rownames(all_genes)<-all_genes$gene
annotation.genelength.biomart <-annotation.genelength.biomart[with(annotation.genelength.biomart, order(ensembl_gene_id, -transcript_length)), ]
annotation.genelength.biomart<-annotation.genelength.biomart[!duplicated(annotation.genelength.biomart$ensembl_gene_id),]
annotation.genelength.biomart <- annotation.genelength.biomart[annotation.genelength.biomart$ensembl_gene_id %in% rownames(all_genes),]
annotation.genelength.biomart_vector<-annotation.genelength.biomart[,2]
names(annotation.genelength.biomart_vector)<-annotation.genelength.biomart[,1]
annotation.GO.BP.biomart<-annotation.GO.biomart[annotation.GO.biomart$namespace_1003=="biological_process", c(1:2)]
annotation.GO.BP.biomart<-annotation.GO.BP.biomart[annotation.GO.BP.biomart$ensembl_gene_id %in% rownames(all_genes),]
summary(geneTree_eet$height)
clusters.cut<-seq(1,50, 1)
genes_at_heigh<-cutree(geneTree_eet, h=clusters.cut)
genes_at_heigh<-data.frame(genes_at_heigh)
head(genes_at_heigh, n=20)
go.results2<-data.frame()
for (j in seq(1:max(genes_at_heigh$X39))){
genes_at_heigh_a<-genes_at_heigh[genes_at_heigh$X39==j,]
test.genes<-data.frame(a=rownames(genes_at_heigh_a), stringsAsFactors=FALSE)
if (length(test.genes$a) >3){
annotation.GO.BP.biomart_testgenes<-annotation.GO.BP.biomart[annotation.GO.BP.biomart$ensembl_gene_id %in% test.genes$a, ]
if( length(unique(annotation.GO.BP.biomart_testgenes$ensembl_gene_id)) >= 3 ) {
all_genes_numeric<-as.integer(all_genes$gene %in%test.genes$a)
names(all_genes_numeric)<-all_genes$gene
pwf<-try(nullp(all_genes_numeric, bias.data=annotation.genelength.biomart_vector, plot.fit=FALSE ), silent =TRUE)
if ('try-error' %in% class(pwf)) { pwf<-try(nullp(all_genes_numeric,'bosTau4','ensGene', plot.fit=FALSE ) )}
if(!('try-error' %in% class(pwf))){
GO_BP_Cats<-goseq(pwf,gene2cat=annotation.GO.BP.biomart, method ="Wallenius", use_genes_without_cat=FALSE)
GO_BP_Cats<-GO_BP_Cats[GO_BP_Cats$numDEInCat>2,]
if( dim(GO_BP_Cats)[1] != 0) {
GO_BP_Cats$BY_FDR<-p.adjust(GO_BP_Cats$over_represented_pvalue, method ="BY")
GO_BP_Cats$height <- "X39"
GO_BP_Cats$cluster <- j
GO_BP_Cats<-merge(GO_BP_Cats,annotation.GO.BP.biomart_testgenes, by.x="category", by.y="go_id", all.x=TRUE, all.y=FALSE)
go.results2<-rbind(go.results2,GO_BP_Cats)
}
}
rm(annotation.GO.BP.biomart_testgenes,GO_BP_Cats, test.genes)
}
}
}
go.results3<-go.results2[go.results2$BY_FDR < 0.2,]
go.results3<-go.results3[with(go.results3, order(BY_FDR)), ]
go.results3<-merge(go.results3, annotation.ensembl.symbol, by.x="ensembl_gene_id", by.y="ensembl_gene_id", all.x=TRUE, all.y=FALSE)
go.results3_EET<-go.results3[with(go.results3, order(BY_FDR)), ]
The final output of the next section “go.results3_ICAR” was used to produce Table S6
annotation.ensembl.symbol<-read.table("2017_12_20_annotation.ensembl.symbol.txt.bz2",stringsAsFactors=FALSE, header =TRUE, sep="\t",quote = "")
annotation.genelength.biomart<-read.table("2017_12_20_annotation.genelength.biomart.txt.bz2",stringsAsFactors=FALSE, header =TRUE, sep="\t",quote = "")
annotation.GO.biomart<-read.table("2017_12_20_annotation.GO.biomart.txt.bz2",stringsAsFactors=FALSE, header =TRUE, sep="\t",quote = "")
all_genes<-data.frame( gene=rownames(fpkm_endo_day_18_IC_AI), stringsAsFactors=FALSE )
rownames(all_genes)<-all_genes$gene
annotation.genelength.biomart <-annotation.genelength.biomart[with(annotation.genelength.biomart, order(ensembl_gene_id, -transcript_length)), ]
annotation.genelength.biomart<-annotation.genelength.biomart[!duplicated(annotation.genelength.biomart$ensembl_gene_id),]
annotation.genelength.biomart <- annotation.genelength.biomart[annotation.genelength.biomart$ensembl_gene_id %in% rownames(all_genes),]
annotation.genelength.biomart_vector<-annotation.genelength.biomart[,2]
names(annotation.genelength.biomart_vector)<-annotation.genelength.biomart[,1]
annotation.GO.BP.biomart<-annotation.GO.biomart[annotation.GO.biomart$namespace_1003=="biological_process", c(1:2)]
annotation.GO.BP.biomart<-annotation.GO.BP.biomart[annotation.GO.BP.biomart$ensembl_gene_id %in% rownames(all_genes),]
genes_at_heigh<-cutree(geneTree_endo, h=clusters.cut)
genes_at_heigh<-data.frame(genes_at_heigh)
go.results2<-data.frame()
for (j in seq(1:max(genes_at_heigh$X44))){
genes_at_heigh_a<-genes_at_heigh[genes_at_heigh$X44==j,]
test.genes<-data.frame(a=rownames(genes_at_heigh_a), stringsAsFactors=FALSE)
if (length(test.genes$a) >3){
annotation.GO.BP.biomart_testgenes<-annotation.GO.BP.biomart[annotation.GO.BP.biomart$ensembl_gene_id %in% test.genes$a, ]
if( length(unique(annotation.GO.BP.biomart_testgenes$ensembl_gene_id)) >= 3 ) {
all_genes_numeric<-as.integer(all_genes$gene %in%test.genes$a)
names(all_genes_numeric)<-all_genes$gene
pwf<-try(nullp(all_genes_numeric, bias.data=annotation.genelength.biomart_vector, plot.fit=FALSE ), silent =TRUE)
if ('try-error' %in% class(pwf)) { pwf<-try(nullp(all_genes_numeric,'bosTau4','ensGene', plot.fit=FALSE ) )}
if(!('try-error' %in% class(pwf))){
GO_BP_Cats<-goseq(pwf,gene2cat=annotation.GO.BP.biomart, method ="Wallenius", use_genes_without_cat=FALSE)
GO_BP_Cats<-GO_BP_Cats[GO_BP_Cats$numDEInCat>2,]
if( dim(GO_BP_Cats)[1] != 0) {
GO_BP_Cats$BY_FDR<-p.adjust(GO_BP_Cats$over_represented_pvalue, method ="BY")
GO_BP_Cats$height <- "X44"
GO_BP_Cats$cluster <- j
GO_BP_Cats<-merge(GO_BP_Cats,annotation.GO.BP.biomart_testgenes, by.x="category", by.y="go_id", all.x=TRUE, all.y=FALSE)
go.results2<-rbind(go.results2,GO_BP_Cats)
}
}
rm(annotation.GO.BP.biomart_testgenes,GO_BP_Cats, test.genes)
}
}
}
go.results3<-go.results2[go.results2$BY_FDR < 0.2,]
go.results3<-go.results3[with(go.results3, order(BY_FDR)), ]
go.results3<-merge(go.results3, annotation.ensembl.symbol, by.x="ensembl_gene_id", by.y="ensembl_gene_id", all.x=TRUE, all.y=FALSE)
go.results3_ICAR<-go.results3[with(go.results3, order(BY_FDR)), ]
genes_at_heigh<-cutree(geneTree_eet, h=clusters.cut)
genes_at_heigh<-data.frame(genes_at_heigh)
genes_at_heigh<-data.frame(genes_at_heigh)
genes_at_heigh_color_EET<-data.frame(genes_at_heigh)
genes_at_heigh_color_EET<-genes_at_heigh_color_EET[geneTree_eet$order,]
genes_at_heigh_color_EET$X39[ !(genes_at_heigh_color_EET$X39 %in% unique(c(go.results3_EET$cluster)))]<- 0
data_frame_annotation_EET<-data.frame(cluster = genes_at_heigh_color_EET$X39)
colors_row_BP<-list(cluster=c(
"0" = "#FFFFFF",
"2" = "#4b3376",
"3" = "#71d14b",
"4" = "#924ed0",
"5" = "#c8c54d",
"7" = "#cb5592",
"8" = "#78cb8f",
"9" = "#ca4e37",
"13" = "#93c2c3",
"17" = "#593338",
"20" = "#c09061",
"21" = "#948fc2",
"26" = "#4b633c"
))
font_size=6
row_annotation <- HeatmapAnnotation(df = data_frame_annotation_EET, col = colors_row_BP, which="row", width = unit(0.4, "cm"),gap = unit(0, "mm"),
annotation_legend_param=list(title="EET clusters",legend_direction="horizontal",nrow = 2,title_gp = gpar(fontsize = font_size), labels_gp = gpar(fontsize = font_size), grid_height = unit(2, "mm"), grid_width = unit(2, "mm")))
clusters.cut<-seq(1,50, 1)
genes_at_heigh<-cutree(geneTree_endo, h=clusters.cut)
genes_at_heigh<-data.frame(genes_at_heigh)
genes_at_heigh<-data.frame(genes_at_heigh)
genes_at_heigh_color_ICAR<-data.frame(genes_at_heigh)
genes_at_heigh_color_ICAR<-genes_at_heigh_color_ICAR[geneTree_endo$order,]
genes_at_heigh_color_ICAR$X44[ !(genes_at_heigh_color_ICAR$X44 %in% unique(c(go.results3_ICAR$cluster)))]<- 0
data_frame_annotation_ICAR<-data.frame(cluster = genes_at_heigh_color_ICAR$X44)
colors_column_BP<-list(cluster=c(
"0" = "#FFFFFF",
"1" = "#4dad98",
"2" = "#7aa444",
"3" = "#71d14b",
"4" = "#cb547b",
"6" = "#c5793e",
"7" = "#cb5592",
"8" = "#78cb8f"
))
column_annotation <- HeatmapAnnotation(df = data_frame_annotation_ICAR, col = colors_column_BP, which="column", height=unit(0.4, "cm"),width = unit(0.1, "cm"),gap = unit(0, "mm"),
annotation_legend_param=list(title="ICAR clusters",title_position = "topcenter",legend_position="center",legend_direction="horizontal",nrow = 2,title_gp = gpar(fontsize =font_size),labels_gp = gpar(fontsize = font_size),grid_height = unit(2, "mm"), grid_width = unit(2, "mm")))
pearson_correlation_EET_heatmap<-pearson_correlation_EET[geneTree_eet$order,geneTree_endo$order ]
pearson_correlation_EET_heatmap[(pearson_correlation_EET_heatmap >= -0.95 & pearson_correlation_EET_heatmap <= 0.95)]<-0
heatmap_pearson_eet_ICAR<- Heatmap(pearson_correlation_EET_heatmap,
name="correlation",
cluster_rows= FALSE,
cluster_columns = FALSE,
show_row_names = FALSE,
show_column_names = FALSE,
col = colorRamp2(c(-1, 0, 1), c("blue", "white", "red")),
bottom_annotation=column_annotation
)
draw(heatmap_pearson_eet_ICAR + row_annotation , annotation_legend_side = "bottom",heatmap_legend_side = "bottom")
sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.1 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 grid parallel stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] knitr_1.20 vegan_2.5-3 lattice_0.20-38 permute_0.9-4 ggpubr_0.1.8 magrittr_1.5 org.Bt.eg.db_3.7.0 AnnotationDbi_1.44.0 IRanges_2.16.0 S4Vectors_0.20.0 scales_1.0.0 parallelDist_0.2.2 edgeR_3.24.0 limma_3.38.2 dendextend_1.9.0 ggrepel_0.8.0 circlize_0.4.4 ComplexHeatmap_1.20.0 Rtsne_0.13 VennDiagram_1.6.20 futile.logger_1.4.3 biomaRt_2.38.0 gtools_3.8.1 bigmemory_4.5.33 doParallel_1.0.14 iterators_1.0.10 foreach_1.4.4 gplots_3.0.1 flashClust_1.01-2 goseq_1.34.0 geneLenDataBase_1.18.0 BiasedUrn_1.07 multtest_2.38.0 Biobase_2.42.0 BiocGenerics_0.28.0 reshape_0.8.8 ggplot2_3.1.0 WGCNA_1.66 fastcluster_1.1.25 dynamicTreeCut_1.63-1 rmarkdown_1.10
##
## loaded via a namespace (and not attached):
## [1] backports_1.1.2 Hmisc_4.1-1 plyr_1.8.4 lazyeval_0.2.1 splines_3.5.1 BiocParallel_1.16.0 GenomeInfoDb_1.18.0 robust_0.4-18 digest_0.6.18 htmltools_0.3.6 viridis_0.5.1 GO.db_3.7.0 gdata_2.18.0 checkmate_1.8.5 memoise_1.1.0 fit.models_0.5-14 cluster_2.0.7-1 Biostrings_2.50.1 RcppParallel_4.4.1 matrixStats_0.54.0 prettyunits_1.0.2 colorspace_1.3-2 blob_1.1.1 rrcov_1.4-4 dplyr_0.7.7 crayon_1.3.4 RCurl_1.95-4.11 bigmemory.sri_0.1.3 bindr_0.1.1 impute_1.56.0 survival_2.43-1 glue_1.3.0 gtable_0.2.0 zlibbioc_1.28.0 XVector_0.22.0 GetoptLong_0.1.7 DelayedArray_0.8.0 kernlab_0.9-27 shape_1.4.4 prabclus_2.2-6 DEoptimR_1.0-8 futile.options_1.0.1 mvtnorm_1.0-8 DBI_1.0.0 Rcpp_1.0.0 viridisLite_0.3.0 progress_1.2.0 htmlTable_1.12 foreign_0.8-71 bit_1.1-14 mclust_5.4.1 preprocessCore_1.44.0 Formula_1.2-3 htmlwidgets_1.3 httr_1.3.1 RColorBrewer_1.1-2 fpc_2.1-11.1 acepack_1.4.1 modeltools_0.2-22 pkgconfig_2.0.2 XML_3.98-1.16 flexmix_2.3-14 nnet_7.3-12 locfit_1.5-9.1 labeling_0.3 tidyselect_0.2.5 rlang_0.3.0.1 munsell_0.5.0 tools_3.5.1 RSQLite_2.1.1 evaluate_0.12 stringr_1.3.1 yaml_2.2.0 bit64_0.9-7 robustbase_0.93-3 caTools_1.17.1.1 purrr_0.2.5 bindrcpp_0.2.2 nlme_3.1-137 whisker_0.3-2 formatR_1.5 compiler_3.5.1 rstudioapi_0.8 tibble_1.4.2 pcaPP_1.9-73 stringi_1.2.4 GenomicFeatures_1.34.1 trimcluster_0.1-2.1 Matrix_1.2-15 pillar_1.3.0 GlobalOptions_0.1.0 cowplot_0.9.3 data.table_1.11.8 bitops_1.0-6 rtracklayer_1.42.0 GenomicRanges_1.34.0 R6_2.3.0 latticeExtra_0.6-28 KernSmooth_2.23-15 gridExtra_2.3 codetools_0.2-15 lambda.r_1.2.3 MASS_7.3-51.1 assertthat_0.2.0 SummarizedExperiment_1.12.0 rprojroot_1.3-2 rjson_0.2.20 withr_2.1.2 GenomicAlignments_1.18.0 Rsamtools_1.34.0 GenomeInfoDbData_1.2.0 diptest_0.75-7 mgcv_1.8-25 hms_0.4.2 rpart_4.1-13 class_7.3-14 base64enc_0.1-3