Viewing is better if Code is hidden (Top Right drop down list)
sink(file="RsessionInfoDESeq2.txt")
library('DESeq2')
library("ggplot2")
library(reshape2)
####library(tidyverse)
####library(splitstackshape)
####library(data.table)
library("RColorBrewer")
library("gplots")
####library('ggdendro')
library('ggrepel')
library("dplyr")
library("ComplexHeatmap")
library("clusterProfiler")
library(VennDiagram) ######
library(UpSetR)
library(gridExtra)
library(cluster)
library(circlize)
library(factoextra)
library(NbClust)
library("biomaRt")
library("org.Hs.eg.db")####human
library("org.Mm.eg.db")####mouse
library(venn)
library(enrichR)
library(ReactomePA)
Error in library(ReactomePA) : there is no package called ‘ReactomePA’
col_fun = colorRamp2(c(-1,-0.2, 0,0.2, 1), c("blue","cyan", "grey90","orange", "red"))#heatmap colours
col_funGR = colorRamp2(c(-1.5, 0, 1.5), c("green", "black", "red"))
col_funGR2 = colorRamp2(c(-2, 0, 2), c("green", "black", "red"))
colorsV3 <- c("cornflowerblue", "brown1","orange2")#Venn colours
colorsV2 <- c("mediumorchid1", "chartreuse3")#Venn colours
colorsV4<-c("cornflowerblue", "orange2", "green3","red")#Venn colours
colorsV5<-c("cornflowerblue", "orange2", "green3","purple","red")#Venn colours
#col_fun(seq(-3, 3))
groupsName<-"R1_R4_kmeans_q0.05"
countsTable<-read.delim("RNAseq2019July_5.txt", header = TRUE, sep = "\t",check.names=FALSE,row.names=1)
head(countsTable)
AllGeneNames<-countsTable$Gene_Symbol
#head(AllGeneNames)
tempA<-countsTable
topDEgenes <- which(tempA$padj_R1_Var37_Hours_6h_vs_0h<0.05&((tempA$Var37TNF_0h_mean>10)|(tempA$Var37TNF_6h_mean>10))&!is.na(tempA$padj_R1_Var37_Hours_6h_vs_0h))####find indexes
listA<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$padj_R1_Var37_Hours_20h_vs_0h<0.05&((tempA$Var37TNF_0h_mean>10)|(tempA$Var37TNF_20h_mean>10))&!is.na(tempA$padj_R1_Var37_Hours_20h_vs_0h))####find indexes
listB<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$padj_R1_Var37_Hours_20h_vs_6h<0.05&((tempA$Var37TNF_20h_mean>10)|(tempA$Var37TNF_6h_mean>10))&!is.na(tempA$padj_R1_Var37_Hours_20h_vs_6h))####find indexes
listC<-tempA[ topDEgenes, ]$Gene_Symbol
vennq<-venn.diagram(x = list(listA,listB,listC),#,listD) ,
category.names = c("Var37_6hv0h","Var37_20hv0h","Var37_20hv6h"),
main="padj<0.05",
filename = NULL, scaled = FALSE, fill = colorsV3, cat.col = colorsV3, cat.cex = 1, cat.dist=0.1, margin = 0.3)
topDEgenes <- which(tempA$pvalue_R1_Var37_Hours_6h_vs_0h<0.05&abs(tempA$log2FoldChange_R1_Var37_Hours_6h_vs_0h)>1&((tempA$Var37TNF_6h_mean>10)|(tempA$Var37TNF_0h_mean>10))&!is.na(tempA$pvalue_R1_Var37_Hours_6h_vs_0h))####find indexes
listA<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$pvalue_R1_Var37_Hours_20h_vs_0h<0.05&abs(tempA$log2FoldChange_R1_Var37_Hours_20h_vs_0h)>1&((tempA$Var37TNF_0h_mean>10)|(tempA$Var37TNF_20h_mean>10))&!is.na(tempA$pvalue_R1_Var37_Hours_20h_vs_0h))####find indexes
listB<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$pvalue_R1_Var37_Hours_20h_vs_6h<0.05&abs(tempA$log2FoldChange_R1_Var37_Hours_20h_vs_6h)>1&((tempA$Var37TNF_6h_mean>10)|(tempA$Var37TNF_20h_mean>10))&!is.na(tempA$pvalue_R1_Var37_Hours_20h_vs_6h))####find indexes
listC<-tempA[ topDEgenes, ]$Gene_Symbol
vennp<-venn.diagram(x = list(listA,listB,listC) ,
category.names = c("Var37_6hv0h","Var37_20hv0h","Var37_20hv6h"),
main="pvalue<0.05&fold change>2",
filename = NULL, scaled = FALSE, fill = colorsV3, cat.col = colorsV3, cat.cex = 1, cat.dist=0.1, margin = 0.3)
topDEgenes <- which(tempA$padj_R4Var14TNF_Hours_6h_vs_0h<0.05&((tempA$Var14TNF_0h_mean>10)|(tempA$Var14TNF_6h_mean>10))&!is.na(tempA$padj_R4Var14TNF_Hours_6h_vs_0h))####find indexes
listA<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$padj_R4Var14TNF_Hours_20h_vs_0h<0.05&((tempA$Var14TNF_20h_mean>10)|(tempA$Var14TNF_0h_mean>10))&!is.na(tempA$padj_R4Var14TNF_Hours_20h_vs_0h))####find indexes
listB<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$padj_R1_Var37_Hours_20h_vs_6h<0.05&((tempA$Var14TNF_20h_mean>10)|(tempA$Var14TNF_6h_mean>10))&!is.na(tempA$padj_R1_Var37_Hours_20h_vs_6h))####find indexes
listC<-tempA[ topDEgenes, ]$Gene_Symbol
vennq2<-venn.diagram(x = list(listA,listB,listC),#,listD) ,
category.names = c("Var14_6hv0h","Var14_20hv0h","Var14_20hv6h"),
main="padj<0.05",
filename = NULL, scaled = FALSE, fill = colorsV3, cat.col = colorsV3, cat.cex = 1, cat.dist=0.1, margin = 0.3)
topDEgenes <- which(tempA$pvalue_R4Var14TNF_Hours_6h_vs_0h<0.05&abs(tempA$log2FoldChange_R4Var14TNF_Hours_6h_vs_0h)>1&((tempA$Var14TNF_0h_mean>10)|(tempA$Var14TNF_6h_mean>10))&!is.na(tempA$pvalue_R4Var14TNF_Hours_6h_vs_0h))####find indexes
listA<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$pvalue_R4Var14TNF_Hours_20h_vs_0h<0.05&abs(tempA$log2FoldChange_R4Var14TNF_Hours_20h_vs_0h)>1&((tempA$Var14TNF_20h_mean>10)|(tempA$Var14TNF_0h_mean>10))&!is.na(tempA$pvalue_R4Var14TNF_Hours_20h_vs_0h))####find indexes
listB<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$pvalue_R4Var14TNF_Hours_20h_vs_6h<0.05&abs(tempA$log2FoldChange_R4Var14TNF_Hours_20h_vs_6h)>1&((tempA$Var14TNF_20h_mean>10)|(tempA$Var14TNF_6h_mean>10))&!is.na(tempA$pvalue_R4Var14TNF_Hours_20h_vs_6h))####find indexes
listC<-tempA[ topDEgenes, ]$Gene_Symbol
vennp2<-venn.diagram(x = list(listA,listB,listC) ,
category.names = c("Var14_6hv0h","Var14_20hv0h","Var14_20hv6h"),
main="pvalue<0.05&fold change>2",
filename = NULL, scaled = FALSE, fill = colorsV3, cat.col = colorsV3, cat.cex = 1, cat.dist=0.1, margin = 0.3)
topDEgenes <- which((tempA$padj_R1_Var37_Hours_6h_vs_0h<0.05&((tempA$Var37TNF_6h_mean>10)|(tempA$Var37TNF_0h_mean>10))&!is.na(tempA$padj_R1_Var37_Hours_6h_vs_0h))|
(tempA$padj_R1_Var37_Hours_20h_vs_0h<0.05&((tempA$Var37TNF_0h_mean>10)|(tempA$Var37TNF_20h_mean>10))&!is.na(tempA$padj_R1_Var37_Hours_20h_vs_0h))|
(tempA$padj_R1_Var37_Hours_20h_vs_6h<0.05&((tempA$Var37TNF_6h_mean>10)|(tempA$Var37TNF_20h_mean>10))&!is.na(tempA$padj_R1_Var37_Hours_20h_vs_6h))
)
listA<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which((tempA$pvalue_R1_Var37_Hours_6h_vs_0h<0.05&abs(tempA$log2FoldChange_R1_Var37_Hours_6h_vs_0h)>1&((tempA$Var37TNF_6h_mean>10)|(tempA$Var37TNF_0h_mean>10))&!is.na(tempA$pvalue_R1_Var37_Hours_6h_vs_0h))|
(tempA$pvalue_R1_Var37_Hours_20h_vs_0h<0.05&abs(tempA$log2FoldChange_R1_Var37_Hours_20h_vs_0h)>1&((tempA$Var37TNF_0h_mean>10)|(tempA$Var37TNF_20h_mean>10))&!is.na(tempA$pvalue_R1_Var37_Hours_20h_vs_0h))|
(tempA$pvalue_R1_Var37_Hours_20h_vs_6h<0.05&abs(tempA$log2FoldChange_R1_Var37_Hours_20h_vs_6h)>1&((tempA$Var37TNF_6h_mean>10)|(tempA$Var37TNF_20h_mean>10))&!is.na(tempA$pvalue_R1_Var37_Hours_20h_vs_6h))
)####find indexes
listB<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which((tempA$padj_R4Var14TNF_Hours_6h_vs_0h<0.05&((tempA$Var14TNF_0h_mean>10)|(tempA$Var14TNF_6h_mean>10))&!is.na(tempA$padj_R4Var14TNF_Hours_6h_vs_0h))|
(tempA$padj_R4Var14TNF_Hours_20h_vs_0h<0.05&((tempA$Var14TNF_20h_mean>10)|(tempA$Var14TNF_0h_mean>10))&!is.na(tempA$padj_R4Var14TNF_Hours_20h_vs_0h))|
(tempA$padj_R4Var14TNF_Hours_20h_vs_6h<0.05&((tempA$Var14TNF_20h_mean>10)|(tempA$Var14TNF_6h_mean>10))&!is.na(tempA$padj_R4Var14TNF_Hours_20h_vs_6h))
)
listA2<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which((tempA$pvalue_R4Var14TNF_Hours_6h_vs_0h<0.05&abs(tempA$log2FoldChange_R4Var14TNF_Hours_6h_vs_0h)>1&((tempA$Var14TNF_0h_mean>10)|(tempA$Var14TNF_6h_mean>10))&!is.na(tempA$pvalue_R4Var14TNF_Hours_6h_vs_0h))|
(tempA$pvalue_R4Var14TNF_Hours_20h_vs_0h<0.05&abs(tempA$log2FoldChange_R4Var14TNF_Hours_20h_vs_0h)>1&((tempA$Var14TNF_20h_mean>10)|(tempA$Var14TNF_0h_mean>10))&!is.na(tempA$pvalue_R4Var14TNF_Hours_20h_vs_0h))|
(tempA$pvalue_R4Var14TNF_Hours_20h_vs_6h<0.05&abs(tempA$log2FoldChange_R4Var14TNF_Hours_20h_vs_6h)>1&((tempA$Var14TNF_20h_mean>10)|(tempA$Var14TNF_6h_mean>10))&!is.na(tempA$pvalue_R4Var14TNF_Hours_20h_vs_6h))
)####find indexes
listB2<-tempA[ topDEgenes, ]$Gene_Symbol
vennpq<-venn.diagram(x = list(listA,listB,listA2,listB2) ,
category.names = c("V37padj<0.05","V37p<0.05&fc>2","V14padj<0.05","V14p<0.05&fc>2"),
main="padj compared to pvalue",
filename = NULL, scaled = FALSE, fill = colorsV4, cat.col = colorsV4, cat.cex = 1, cat.dist=0.3, margin = 0.15)
grid.arrange(gTree(children=vennq), gTree(children=vennp), ncol=2,top="R1 Var37 TNF")
grid.arrange(gTree(children=vennq2), gTree(children=vennp2), ncol=2,top="R4 Var14 TNF")
grid.arrange(gTree(children=vennpq), ncol=1,top="R4 Var14 TNF")
#tempA<-resAll[-c(10:30) ]
tempA<-countsTable
#rownames(tempA)
rownames(tempA) <- NULL
tempA = mutate(tempA, Include=
ifelse(tempA$padj_R1_Var37_Hours_6h_vs_0h<0.05&((tempA$Var37TNF_6h_mean>10)|(tempA$Var37TNF_0h_mean>10))&!is.na(tempA$padj_R1_Var37_Hours_6h_vs_0h), "in",
ifelse(tempA$padj_R1_Var37_Hours_20h_vs_0h<0.05&((tempA$Var37TNF_0h_mean>10)|(tempA$Var37TNF_20h_mean>10))&!is.na(tempA$padj_R1_Var37_Hours_20h_vs_0h), "in",
ifelse(tempA$padj_R1_Var37_Hours_20h_vs_6h<0.05&((tempA$Var37TNF_6h_mean>10)|(tempA$Var37TNF_20h_mean>10))&!is.na(tempA$padj_R1_Var37_Hours_20h_vs_6h), "in",
ifelse(tempA$padj_R4Var14TNF_Hours_6h_vs_0h<0.05&((tempA$Var14TNF_0h_mean>10)|(tempA$Var14TNF_6h_mean>10))&!is.na(tempA$padj_R4Var14TNF_Hours_6h_vs_0h), "in",
ifelse(tempA$padj_R4Var14TNF_Hours_20h_vs_0h<0.05&((tempA$Var14TNF_20h_mean>10)|(tempA$Var14TNF_0h_mean>10))&!is.na(tempA$padj_R4Var14TNF_Hours_20h_vs_0h),"in",
ifelse(tempA$padj_R4Var14TNF_Hours_20h_vs_6h<0.05&((tempA$Var14TNF_20h_mean>10)|(tempA$Var14TNF_6h_mean>10))&!is.na(tempA$padj_R4Var14TNF_Hours_20h_vs_6h),"in",
"out")))))))
#tempA
####library(dplyr)
tempA %>%
group_by(Include) %>%
tally()
topDEgenes <- which(tempA$Include=="in")####find indexes
head(countsTable)
baseMeansHm <-countsTable[,c(48:50,110,112,113)]
head(baseMeansHm)
#baseMeansHm <-countsTable[,c(60:63)]
baseMeansHm <-countsTable[,c(48:50,110,112,113)]
head(baseMeansHm)
tail(baseMeansHm[ topDEgenes, ])
baseMeansHm$Var37TNF_0h<-baseMeansHm$Var37TNF_0h_mean
baseMeansHm$Var37TNF_6h<-baseMeansHm$Var37TNF_6h_mean
baseMeansHm$Var37TNF_20h<-baseMeansHm$Var37TNF_20h_mean
baseMeansHm$Var14TNF_0h<-baseMeansHm$Var14TNF_0h_mean
baseMeansHm$Var14TNF_6h<-baseMeansHm$Var14TNF_6h_mean
baseMeansHm$Var14TNF_20h<-baseMeansHm$Var14TNF_20h_mean
baseMeansHm <-baseMeansHm[,c(7:12)]
#replace low values with 0
baseMeansHm$Var37TNF_0h[baseMeansHm$Var37TNF_0h<10]<-0
baseMeansHm$Var37TNF_6h[baseMeansHm$Var37TNF_6h<10]<-0
baseMeansHm$Var37TNF_20h[baseMeansHm$Var37TNF_20h<10]<-0
baseMeansHm$Var14TNF_0h[baseMeansHm$Var14TNF_0h<10]<-0
baseMeansHm$Var14TNF_6h[baseMeansHm$Var14TNF_6h<10]<-0
baseMeansHm$Var14TNF_20h[baseMeansHm$Var14TNF_20h<10]<-0
tail(baseMeansHm)
baseMeansHm <- log2(baseMeansHm+1)
tail(baseMeansHm)
#baseMeansHmM <-baseMeansHm2[,c(1:8)]
#head(baseMeansHmM)
topDEgenes <- which(tempA$Include=="in")####find indexes
#scale Var35 and Var14 separately
var14mn<-baseMeansHm[,c(4:6)]
var14mn<- t(as.matrix(var14mn))
var14mn <- t(scale(var14mn))
#head(var14mn)
baseMeansHm2<-baseMeansHm[,c(1:3)]
baseMeansHm2<- t(as.matrix(baseMeansHm2))
baseMeansHm2 <- t(scale(baseMeansHm2))
baseMeansHm2 <- as.data.frame(cbind(baseMeansHm2, var14mn))
baseMeansHm2[is.na(baseMeansHm2)] <- 0
#head(baseMeansHm2)
baseMeansHm2$Var37TNF_0h_lfc<-baseMeansHm2$Var37TNF_0h-baseMeansHm2$Var37TNF_0h
baseMeansHm2$Var37TNF_6h_lfc<-baseMeansHm2$Var37TNF_6h-baseMeansHm2$Var37TNF_0h
baseMeansHm2$Var37TNF_20h_lfc<-baseMeansHm2$Var37TNF_20h-baseMeansHm2$Var37TNF_0h
baseMeansHm2$Var14TNF_0h_lfc<-baseMeansHm2$Var14TNF_0h-baseMeansHm2$Var14TNF_0h
baseMeansHm2$Var14TNF_6h_lfc<-baseMeansHm2$Var14TNF_6h-baseMeansHm2$Var14TNF_0h
baseMeansHm2$Var14TNF_20h_lfc<-baseMeansHm2$Var14TNF_20h-baseMeansHm2$Var14TNF_0h
#baseMeansHm1<-baseMeansHm2[,c(1:6)]
baseMeansHm3<-baseMeansHm2[,c(7:12)]
head(baseMeansHm3)
baseMeansHm2<-baseMeansHm2[,c(1:6)]
head(baseMeansHm2)
dataHMm2<-as.matrix(baseMeansHm2[ topDEgenes, ])
tail(dataHMm2)
Var37TNF_0h Var37TNF_6h Var37TNF_20h Var14TNF_0h Var14TNF_6h Var14TNF_20h
ENSG00000120805.13 -0.5782822 -0.5764178 1.154700 -1.0855261 0.2018455 0.8836806
ENSG00000184307.14 0.5771031 0.5775974 -1.154701 0.1224345 0.9331455 -1.0555800
ENSG00000109920.12 -0.5767641 -0.5779363 1.154700 -1.0632535 0.1416024 0.9216511
ENSG00000088280.18 -0.5815036 -0.5731870 1.154691 0.8191921 -1.1143607 0.2951686
ENSG00000007171.16 0.0000000 0.0000000 0.000000 -0.5773503 -0.5773503 1.1547005
ENSG00000132185.16 0.0000000 0.0000000 0.000000 -0.5773503 1.1547005 -0.5773503
####mean
dataHMm2<-as.matrix(baseMeansHm2[ topDEgenes, ])
dataHMm2_37<-dataHMm2[,c(1,2,3)]
dataHMm2_14<-dataHMm2[,c(4,5,6)]
hmap_hier_factors37 <- Heatmap(
dataHMm2_37, name = "mean37",
row_labels = paste0(rownames(dataHMm2_37)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("MeansV37"),
col = col_funGR,
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
width = unit(25, "mm"),
cluster_columns = FALSE,
#cluster_rows = FALSE,
show_row_names = FALSE)
hmap_hier_factors37b <- Heatmap(
dataHMm2_37, name = "mean37b",
row_labels = paste0(rownames(dataHMm2_37)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("MeansV37"),
col = col_funGR,
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
width = unit(25, "mm"),
#cluster_columns = FALSE,
#cluster_rows = FALSE,
show_row_names = FALSE)
hmap_hier_factors14 <- Heatmap(
dataHMm2_14, name = "mean14",
row_labels = paste0(rownames(dataHMm2_14)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("MeansV14"),
col = col_funGR,
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
width = unit(25, "mm"),
cluster_columns = FALSE,
#cluster_rows = FALSE,
show_row_names = FALSE)
hmap_hier_factors14b <- Heatmap(
dataHMm2_14, name = "mean14b",
row_labels = paste0(rownames(dataHMm2_14)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("MeansV14"),
col = col_funGR,
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
width = unit(25, "mm"),
#cluster_columns = FALSE,
#cluster_rows = FALSE,
show_row_names = FALSE)
#write.table(dataHMm2,"dataHMm2.txt", sep = "\t")
hmap_hier_factors4 <- Heatmap(
dataHMm2, name = "mean1",
row_labels = paste0(rownames(dataHMm2)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("Means"),
col = col_funGR,
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
width = unit(50, "mm"),
cluster_columns = FALSE,
#cluster_rows = FALSE,
show_row_names = FALSE)
dataHMm2b<-dataHMm2[,c(1,4,2,5,3,6)]
hmap_hier_factors4a <- Heatmap(
dataHMm2b, name = "mean2",
row_labels = paste0(rownames(dataHMm2b)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("Means Rearranged"),
col = col_funGR,
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
width = unit(50, "mm"),
cluster_columns = FALSE,
#cluster_rows = FALSE,
show_row_names = FALSE)
hmap_hier_factors4b <- Heatmap(
dataHMm2, name = "mean3",
row_labels = paste0(rownames(dataHMm2)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("Means Clustered"),
col = col_funGR,
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
width = unit(50, "mm"),
#cluster_columns = FALSE,
#cluster_rows = FALSE,
show_row_names = FALSE)
dataHMm3<-as.matrix(baseMeansHm3[ topDEgenes, ])
write.table(dataHMm3,"dataHMm3.txt", sep = "\t")
#baseMeansHm2<-as.matrix(baseMeansHm2)
hmap_hier_factors6 <- Heatmap(
dataHMm3, name = "logfc1",
row_labels = paste0(rownames(dataHMm3)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("vs 0h"),
col = col_funGR2,
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
width = unit(50, "mm"),
cluster_columns = FALSE,
show_row_names = FALSE)
dataHMm3b<-dataHMm3[,c(1,4,2,5,3,6)]
hmap_hier_factors6b <- Heatmap(
dataHMm3b, name = "logfc2",
row_labels = paste0(rownames(dataHMm3b)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("vs 0h Rearranged"),
col = col_funGR2,
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
width = unit(50, "mm"),
cluster_columns = FALSE,
show_row_names = FALSE)
hmap_hier_factors6c <- Heatmap(
dataHMm3, name = "logfc3",
row_labels = paste0(rownames(dataHMm3)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("vs 0h"),
col = col_funGR2,
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
width = unit(50, "mm"),
#cluster_columns = FALSE,
show_row_names = FALSE)
hmlist1=hmap_hier_factors37+hmap_hier_factors14+hmap_hier_factors37b+hmap_hier_factors14b
draw(hmlist1, column_title = "Heatmaps on Means (scaled per strain). Genelists combined from VAR37 and VAR14 timecourses padj<0.05", column_title_gp = gpar(fontsize = 22))
hmlist2=hmap_hier_factors4+hmap_hier_factors4a+hmap_hier_factors4b
draw(hmlist2, column_title = "Heatmaps on Means (scaled per strain)", column_title_gp = gpar(fontsize = 22))
hmlist3=hmap_hier_factors4+hmap_hier_factors4a+hmap_hier_factors4b+hmap_hier_factors6+hmap_hier_factors6b+hmap_hier_factors6c
draw(hmlist3, column_title = "Heatmaps on Means (scaled per strain) and logfc Means vs Strain 0h", column_title_gp = gpar(fontsize = 22))
par(mfrow=c(1,2))
#### Silhouette method
fviz_nbclust(dataHMm3, kmeans, method = "silhouette",k.max = 16)+
labs(subtitle = "Silhouette method")
#### Elbow method
fviz_nbclust(dataHMm3, kmeans, method = "wss",k.max = 16) +
labs(subtitle = "Elbow method")
####gap stat slow!!!
####set.seed(123)
####fviz_nbclust(dataHMm, kmeans, nstart = 25, method = "gap_stat", nboot = 100,k.max = 16)+
#### labs(subtitle = "Gap statistic method")
#kclust7 <- kmeans(dataHMm3, 4)
#silhouette plot
distK<-daisy(dataHMm3)
plot(silhouette(kclust7$cluster, distK), col=1:4, border=NA)
split <- paste0("Cluster\n", kclust7$cluster)
#split <- factor(paste0("Cluster\n", kclust3$cluster), levels=c("Cluster\n3","Cluster\n1","Cluster\n4","Cluster\n5","Cluster\n2","Cluster\n6"))
hmap_k <- Heatmap(dataHMm3, split=split, cluster_row_slices = FALSE,
cluster_columns = FALSE,
show_row_names = FALSE,
name = "Means (scaled per strain",
col = col_funGR2,
width = unit(50, "mm"),
column_title = "Means",
column_title_gp = gpar(fontsize = 16, fontface = "bold"))
hmap_k#+hmap_hier_factors6+hmap_hier_factors5
Mean profiles of clusters
clustercount<-data.frame(kclust7$cluster)
clustersizes<-table(clustercount$kclust7.cluster)
clusterMeans<-data.frame(kclust7$centers)
clusterMeans1<-data.frame(t(clusterMeans))
clusterMeans1 <- cbind(rownames(clusterMeans1), clusterMeans1)
orderN<-c("Var37TNF_0h_lfc","Var37TNF_6h_lfc","Var37TNF_20h_lfc","Var14TNF_0h_lfc","Var14TNF_6h_lfc","Var14TNF_20h_lfc")#### manual
rownames(clusterMeans1) <- NULL
names(clusterMeans1)[names(clusterMeans1)=="rownames(clusterMeans1)"] <- "Sample"
####clusterMeans1
Strain<-factor(c(rep("VAR37",3),rep("VAR14",3)))####note names
#p1=ggplot(data=dataHmt, aes(x=row.names(dataHmt), y=ENSG00000162551.14),group=Run) + ggtitle("ALPL") +geom_point() + scale_x_discrete(limits=limitsPlot)+ ylab(ylabPlot)+xlab(xlabPlot)+geom_line(aes(group = Run))
pX1<-ggplot(data=clusterMeans1, aes(x=Sample, y=X1,group=1)) +
geom_line(aes(group = Strain))+ geom_point()+ggtitle(paste("Cluster X1 Profile ",clustersizes[1]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),axis.title.y = element_blank())+ylim (-1.8,1.8)
pX2<-ggplot(data=clusterMeans1, aes(x=Sample, y=X2,group=1)) +
geom_line(aes(group = Strain))+ geom_point()+ggtitle(paste("Cluster X2 Profile ",clustersizes[2]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),axis.title.y = element_blank())+ylim (-1.8,1.8)
pX3<-ggplot(data=clusterMeans1, aes(x=Sample, y=X3,group=1)) +
geom_line(aes(group = Strain))+ geom_point()+ggtitle(paste("Cluster X3 Profile ",clustersizes[3]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),axis.title.y = element_blank())+ylim (-1.8,1.8)
pX4<-ggplot(data=clusterMeans1, aes(x=Sample, y=X4,group=1)) +
geom_line(aes(group = Strain))+ geom_point()+ggtitle(paste("Cluster X4 Profile ",clustersizes[4]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),axis.title.y = element_blank())+ylim (-1.8,1.8)
#pX5<-ggplot(data=clusterMeans1, aes(x=Sample, y=X5,group=1)) +
# geom_line(aes(group = Strain))+ geom_point()+ggtitle(paste("Cluster X5 Profile ",clustersizes[5]," genes"))+ scale_x_discrete(limits=orderN)+
# theme(axis.title.x = element_blank(),axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),axis.title.y = element_blank())+ylim (-1.8,1.8)
#pX8<-ggplot(data=clusterMeans1, aes(x=Sample, y=X8,group=1)) +
# geom_line()+ geom_point()+ggtitle(paste("Cluster X8 Profile ",clustersizes[6]," genes"))+ scale_x_discrete(limits=orderN)+
# theme(axis.title.x = element_blank(),axis.title.y = element_blank())
#plot
multiplot(pX1,pX2,pX3,pX4, cols=2)
topDEgenes <- which(tempA$Include=="in")####find indexes
tempAkm<-tempA[ topDEgenes, ]
SymbolsKm<-dplyr::pull(tempAkm, Gene_Symbol)
#### export the gene expression data for the clusters
write.table(clusterMeans,paste0("ClusterMeansKm_",groupsName,".txt"), sep = "\t")
ClusteredGenes<-data.frame(kclust7$cluster,SymbolsKm,dataHMm3)
write.table(ClusteredGenes,paste0("ScaledDataInClustersKm_",groupsName,".txt"), sep = "\t")
#head(ClusteredGenes)
bottomDEgenes<-which(tempA$Include=="out")####find indexes
bottomG<-tempA[ bottomDEgenes, ]
bottomG<-dplyr::pull(bottomG, Gene_Symbol)
write.table(bottomG,paste0("ipaBottomKmeans_",groupsName,".txt"), sep = "\t")
topDEgenes <- which(tempA$Include=="in")####find indexes
tempAkm<-tempA[ topDEgenes, ]
SymbolsKm<-dplyr::pull(tempAkm, Gene_Symbol)
ipaKmeans<-ClusteredGenes
#countsTable <-countsTable[,c(1:15)]####if samples need removing
ipaKmeans<-ipaKmeans[,c(1:2)]
ipaKmeans$name2<-rownames(ipaKmeans)
#ipaKmeans%>% rownames_to_column(var = "rowname")
#ipaKmeans
#rowid_to_column(ipaKmeans)
ipaKmeans = mutate(ipaKmeans, x1= ifelse(ipaKmeans$kclust7.cluster==1, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x2= ifelse(ipaKmeans$kclust7.cluster==2, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x3= ifelse(ipaKmeans$kclust7.cluster==3, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x4= ifelse(ipaKmeans$kclust7.cluster==4, "1", "0"))
#ipaKmeans = mutate(ipaKmeans, x5= ifelse(ipaKmeans$kclust3.cluster==5, "1", "0"))
#ipaKmeans = mutate(ipaKmeans, x6= ifelse(ipaKmeans$kclust3.cluster==6, "1", "0"))
#ipaKmeans = mutate(ipaKmeans, x7= ifelse(ipaKmeans$kclust3.cluster==7, "1", "0"))
#ipaKmeans
write.table(ipaKmeans,paste0("ipaKmeans_",groupsName,".txt"), sep = "\t")
#head(ipaKmeans)
ClusteredGenes2<-ClusteredGenes[c(1)]
#ClusteredGenes2
listAll<-list()
for(i in 1:4) {
clusterName<-paste0("x",i)
#clusterName<-row.names(subset(ClusteredGenes,ClusteredGenes==i))
clusterName<-(subset(ClusteredGenes$SymbolsKm,ClusteredGenes==i))
listAll[[i]]<-clusterName
}
#need to name the vectors in the list, example here is for 8 clusters
names(listAll)<-c("X1", "X2", "X3", "X4")#,"X5", "X6", "X7")
#if you want to rearrange the order
#listAll<-listAll[c("x3", "x7", "x8", "x2", "x6", "x5", "x4", "x1")]
lapply(listAll, head)
$X1
[1] "SOD2" "RIPK2" "CCL20" "SLC12A2" "SELE" "NFKB1"
$X2
[1] "CYP1A1" "PLXNA4" "NPTX1" "EMCN" "PALMD" "CTGF"
$X3
[1] "CSF2" "SLC41A1" "GPRC5B" "CXCL3" "SDC4" "PLAU"
$X4
[1] "FRY" "ELMOD1" "SLC1A1" "SLC7A8" "GALNT15" "MS4A6A"
(subset(ClusteredGenes$SymbolsKm,ClusteredGenes==1))
setEnrichrSite("Enrichr") # Human genes
Connection changed to https://maayanlab.cloud/Enrichr/
Connection is Live!
websiteLive <- TRUE
dbs <- listEnrichrDbs()
if (is.null(dbs)) websiteLive <- FALSE
if (websiteLive) head(dbs)
The simplify function has been used to cut down on GO redundancy
#str(AllGeneNames)
####CC
cgoCC <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
####OrgDb=org.Mm.eg.db,
keyType="SYMBOL",
ont = "CC",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoCC2 <- simplify(cgoCC, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoCC2),paste0("GO_CC_",groupsName,".csv"))
dotplot(cgoCC2,showCategory = 30,
title = paste0("GO Cellular Compartment ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
Plots and GO data were written to files
png(paste0("GO_CC_",groupsName,".png"), width = 1224, height = 824)
dotplot(cgoCC2,showCategory = 30,
title = paste0("GO Cellular Compartment ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
null device
1
GO BP
####CC
cgoBP <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
keyType="SYMBOL",
ont = "BP",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoBP2 <- simplify(cgoBP, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoBP2),paste0("GO_BP_",groupsName,".csv"))
dotplot(cgoBP2,showCategory = 30,
title = paste0("GO Biological Process ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
png(paste0("GO_BP_",groupsName,".png"), width = 1024, height = 1224)
dotplot(cgoBP2,showCategory = 30,
title = paste0("GO Biological Process ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
null device
1
GO MF
####MF
cgoMF <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
keyType="SYMBOL",
ont = "MF",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoMF2 <- simplify(cgoMF, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoMF2),paste0("GO_MF_",groupsName,".csv"))
dotplot(cgoMF2,showCategory = 30,
title = paste0("GO Molecular Function ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
#dbs <- c("GO_Molecular_Function_2018", "GO_Cellular_Component_2018", "GO_Biological_Process_2018")
dbs <- c("Reactome_2016","WikiPathways_2019_Mouse")
if (websiteLive) { enriched1 <- enrichr((subset(ClusteredGenes$SymbolsKm,ClusteredGenes==1)), dbs)}
if (websiteLive) plotEnrich(enriched1[[1]], showTerms = 30, numChar = 80, y = "Count", orderBy = "P.value", title ="Reactome Enrichment Analysis Cluster 1")
if (websiteLive) { enriched2 <- enrichr((subset(ClusteredGenes$SymbolsKm,ClusteredGenes==2)), dbs)}
if (websiteLive) plotEnrich(enriched2[[1]], showTerms = 30, numChar = 80, y = "Count", orderBy = "P.value", title ="Reactome Enrichment Analysis Cluster 2")
if (websiteLive) { enriched3 <- enrichr((subset(ClusteredGenes$SymbolsKm,ClusteredGenes==3)), dbs)}
if (websiteLive) plotEnrich(enriched3[[1]], showTerms = 30, numChar = 80, y = "Count", orderBy = "P.value", title ="Reactome Enrichment Analysis Cluster 3")
if (websiteLive) { enriched4 <- enrichr((subset(ClusteredGenes$SymbolsKm,ClusteredGenes==4)), dbs)}
if (websiteLive) plotEnrich(enriched4[[1]], showTerms = 30, numChar = 80, y = "Count", orderBy = "P.value", title ="Reactome Enrichment Analysis Cluster 4")
if (websiteLive) plotEnrich(enriched1[[2]], showTerms = 30, numChar = 80, y = "Count", orderBy = "P.value", title ="WikiPathways Enrichment Analysis Cluster 1")
if (websiteLive) plotEnrich(enriched2[[2]], showTerms = 30, numChar = 80, y = "Count", orderBy = "P.value", title ="WikiPathways Enrichment Analysis Cluster 2")
if (websiteLive) plotEnrich(enriched3[[2]], showTerms = 30, numChar = 80, y = "Count", orderBy = "P.value", title ="WikiPathways Enrichment Analysis Cluster 3")
if (websiteLive) plotEnrich(enriched4[[2]], showTerms = 30, numChar = 80, y = "Count", orderBy = "P.value", title ="WikiPathways Enrichment Analysis Cluster 4")
png(paste0("GO_MF_",groupsName,".png"), width = 1424, height = 824)
dotplot(cgoMF2,showCategory = 30,
title = paste0("GO Molecular Function ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
null device
1
groupsName<-"R1_R4_kmeans_p0.05lfc1"
countsTable<-read.delim("RNAseq2019July_5.txt", header = TRUE, sep = "\t",check.names=FALSE,row.names=1)
head(countsTable)
AllGeneNames<-countsTable$Gene_Symbol
#head(AllGeneNames)
tempA<-countsTable
#tempA<-resAll[-c(10:30) ]
tempA<-countsTable
#rownames(tempA)
rownames(tempA) <- NULL
tempA = mutate(tempA, Include=
ifelse(tempA$pvalue_R1_Var37_Hours_6h_vs_0h<0.05&abs(tempA$log2FoldChange_R1_Var37_Hours_6h_vs_0h)>1&((tempA$Var37TNF_0h_mean>10)|(tempA$Var37TNF_6h_mean>10))&!is.na(tempA$pvalue_R1_Var37_Hours_6h_vs_0h), "in",
ifelse(tempA$pvalue_R1_Var37_Hours_20h_vs_0h<0.05&abs(tempA$log2FoldChange_R1_Var37_Hours_20h_vs_0h)>1&((tempA$Var37TNF_0h_mean>10)|(tempA$Var37TNF_20h_mean>10))&!is.na(tempA$pvalue_R1_Var37_Hours_20h_vs_0h), "in",
ifelse(tempA$pvalue_R1_Var37_Hours_20h_vs_6h<0.05&abs(tempA$log2FoldChange_R1_Var37_Hours_20h_vs_6h)>1&((tempA$Var37TNF_6h_mean>10)|(tempA$Var37TNF_20h_mean>10))&!is.na(tempA$pvalue_R1_Var37_Hours_20h_vs_6h), "in",
ifelse(tempA$pvalue_R4Var14TNF_Hours_6h_vs_0h<0.05&abs(tempA$log2FoldChange_R4Var14TNF_Hours_6h_vs_0h)>1&((tempA$Var14TNF_0h_mean>10)|(tempA$Var14TNF_6h_mean>10))&!is.na(tempA$pvalue_R4Var14TNF_Hours_6h_vs_0h), "in",
ifelse(tempA$pvalue_R4Var14TNF_Hours_20h_vs_0h<0.05&abs(tempA$log2FoldChange_R4Var14TNF_Hours_20h_vs_0h)>1&((tempA$Var14TNF_0h_mean>10)|(tempA$Var14TNF_20h_mean>10))&!is.na(tempA$pvalue_R4Var14TNF_Hours_20h_vs_0h),"in",
ifelse(tempA$pvalue_R4Var14TNF_Hours_20h_vs_6h<0.05&abs(tempA$log2FoldChange_R4Var14TNF_Hours_20h_vs_6h)>1&((tempA$Var14TNF_20h_mean>10)|(tempA$Var14TNF_6h_mean>10))&!is.na(tempA$pvalue_R4Var14TNF_Hours_20h_vs_6h),"in",
"out")))))))
#tempA
####library(dplyr)
tempA %>%
group_by(Include) %>%
tally()
topDEgenes <- which(tempA$Include=="in")####find indexes
head(countsTable)
baseMeansHm <-countsTable[,c(48:50,110,112,113)]
head(baseMeansHm)
#baseMeansHm <-countsTable[,c(60:63)]
baseMeansHm <-countsTable[,c(48:50,110,112,113)]
head(baseMeansHm)
tail(baseMeansHm[ topDEgenes, ])
baseMeansHm$Var37TNF_0h<-baseMeansHm$Var37TNF_0h_mean
baseMeansHm$Var37TNF_6h<-baseMeansHm$Var37TNF_6h_mean
baseMeansHm$Var37TNF_20h<-baseMeansHm$Var37TNF_20h_mean
baseMeansHm$Var14TNF_0h<-baseMeansHm$Var14TNF_0h_mean
baseMeansHm$Var14TNF_6h<-baseMeansHm$Var14TNF_6h_mean
baseMeansHm$Var14TNF_20h<-baseMeansHm$Var14TNF_20h_mean
baseMeansHm <-baseMeansHm[,c(7:12)]
#replace low values with 0
baseMeansHm$Var37TNF_0h[baseMeansHm$Var37TNF_0h<10]<-0
baseMeansHm$Var37TNF_6h[baseMeansHm$Var37TNF_6h<10]<-0
baseMeansHm$Var37TNF_20h[baseMeansHm$Var37TNF_20h<10]<-0
baseMeansHm$Var14TNF_0h[baseMeansHm$Var14TNF_0h<10]<-0
baseMeansHm$Var14TNF_6h[baseMeansHm$Var14TNF_6h<10]<-0
baseMeansHm$Var14TNF_20h[baseMeansHm$Var14TNF_20h<10]<-0
tail(baseMeansHm)
baseMeansHm <- log2(baseMeansHm+1)
tail(baseMeansHm)
#baseMeansHmM <-baseMeansHm2[,c(1:8)]
#head(baseMeansHmM)
topDEgenes <- which(tempA$Include=="in")####find indexes
#scale Var35 and Var14 separately
var14mn<-baseMeansHm[,c(4:6)]
var14mn<- t(as.matrix(var14mn))
var14mn <- t(scale(var14mn))
#head(var14mn)
baseMeansHm2<-baseMeansHm[,c(1:3)]
baseMeansHm2<- t(as.matrix(baseMeansHm2))
baseMeansHm2 <- t(scale(baseMeansHm2))
baseMeansHm2 <- as.data.frame(cbind(baseMeansHm2, var14mn))
baseMeansHm2[is.na(baseMeansHm2)] <- 0
#head(baseMeansHm2)
baseMeansHm2$Var37TNF_0h_lfc<-baseMeansHm2$Var37TNF_0h-baseMeansHm2$Var37TNF_0h
baseMeansHm2$Var37TNF_6h_lfc<-baseMeansHm2$Var37TNF_6h-baseMeansHm2$Var37TNF_0h
baseMeansHm2$Var37TNF_20h_lfc<-baseMeansHm2$Var37TNF_20h-baseMeansHm2$Var37TNF_0h
baseMeansHm2$Var14TNF_0h_lfc<-baseMeansHm2$Var14TNF_0h-baseMeansHm2$Var14TNF_0h
baseMeansHm2$Var14TNF_6h_lfc<-baseMeansHm2$Var14TNF_6h-baseMeansHm2$Var14TNF_0h
baseMeansHm2$Var14TNF_20h_lfc<-baseMeansHm2$Var14TNF_20h-baseMeansHm2$Var14TNF_0h
#baseMeansHm1<-baseMeansHm2[,c(1:6)]
baseMeansHm3<-baseMeansHm2[,c(7:12)]
head(baseMeansHm3)
baseMeansHm2<-baseMeansHm2[,c(1:6)]
head(baseMeansHm2)
dataHMm2<-as.matrix(baseMeansHm2[ topDEgenes, ])
tail(dataHMm2)
####mean
dataHMm2<-as.matrix(baseMeansHm2[ topDEgenes, ])
dataHMm2_37<-dataHMm2[,c(1,2,3)]
dataHMm2_14<-dataHMm2[,c(4,5,6)]
hmap_hier_factors37 <- Heatmap(
dataHMm2_37, name = "mean37",
row_labels = paste0(rownames(dataHMm2_37)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("MeansV37"),
col = col_funGR,
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
width = unit(25, "mm"),
cluster_columns = FALSE,
#cluster_rows = FALSE,
show_row_names = FALSE)
hmap_hier_factors37b <- Heatmap(
dataHMm2_37, name = "mean37b",
row_labels = paste0(rownames(dataHMm2_37)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("MeansV37"),
col = col_funGR,
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
width = unit(25, "mm"),
#cluster_columns = FALSE,
#cluster_rows = FALSE,
show_row_names = FALSE)
hmap_hier_factors14 <- Heatmap(
dataHMm2_14, name = "mean14",
row_labels = paste0(rownames(dataHMm2_14)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("MeansV14"),
col = col_funGR,
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
width = unit(25, "mm"),
cluster_columns = FALSE,
#cluster_rows = FALSE,
show_row_names = FALSE)
hmap_hier_factors14b <- Heatmap(
dataHMm2_14, name = "mean14b",
row_labels = paste0(rownames(dataHMm2_14)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("MeansV14"),
col = col_funGR,
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
width = unit(25, "mm"),
#cluster_columns = FALSE,
#cluster_rows = FALSE,
show_row_names = FALSE)
#write.table(dataHMm2,"dataHMm2.txt", sep = "\t")
hmap_hier_factors4 <- Heatmap(
dataHMm2, name = "mean1",
row_labels = paste0(rownames(dataHMm2)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("Means"),
col = col_funGR,
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
width = unit(50, "mm"),
cluster_columns = FALSE,
#cluster_rows = FALSE,
show_row_names = FALSE)
dataHMm2b<-dataHMm2[,c(1,4,2,5,3,6)]
hmap_hier_factors4a <- Heatmap(
dataHMm2b, name = "mean2",
row_labels = paste0(rownames(dataHMm2b)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("Means Rearranged"),
col = col_funGR,
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
width = unit(50, "mm"),
cluster_columns = FALSE,
#cluster_rows = FALSE,
show_row_names = FALSE)
hmap_hier_factors4b <- Heatmap(
dataHMm2, name = "mean3",
row_labels = paste0(rownames(dataHMm2)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("Means Clustered"),
col = col_funGR,
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
width = unit(50, "mm"),
#cluster_columns = FALSE,
#cluster_rows = FALSE,
show_row_names = FALSE)
dataHMm3<-as.matrix(baseMeansHm3[ topDEgenes, ])
write.table(dataHMm3,"dataHMm3.txt", sep = "\t")
#baseMeansHm2<-as.matrix(baseMeansHm2)
hmap_hier_factors6 <- Heatmap(
dataHMm3, name = "logfc1",
row_labels = paste0(rownames(dataHMm3)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("vs 0h"),
col = col_funGR2,
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
width = unit(50, "mm"),
cluster_columns = FALSE,
show_row_names = FALSE)
dataHMm3b<-dataHMm3[,c(1,4,2,5,3,6)]
hmap_hier_factors6b <- Heatmap(
dataHMm3b, name = "logfc2",
row_labels = paste0(rownames(dataHMm3b)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("vs 0h Rearranged"),
col = col_funGR2,
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
width = unit(50, "mm"),
cluster_columns = FALSE,
show_row_names = FALSE)
hmap_hier_factors6c <- Heatmap(
dataHMm3, name = "logfc3",
row_labels = paste0(rownames(dataHMm3)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("vs 0h"),
col = col_funGR2,
column_title_gp = gpar(fontsize = 14, fontface = "bold"),
width = unit(50, "mm"),
#cluster_columns = FALSE,
show_row_names = FALSE)
hmlist1=hmap_hier_factors37+hmap_hier_factors14+hmap_hier_factors37b+hmap_hier_factors14b
draw(hmlist1, column_title = "Heatmaps on Means (scaled per strain). Genelists combined from VAR37 and VAR14 timecourses p<0.05 lfc1", column_title_gp = gpar(fontsize = 22))
hmlist2=hmap_hier_factors4+hmap_hier_factors4a+hmap_hier_factors4b
draw(hmlist2, column_title = "Heatmaps on Means (scaled per strain)", column_title_gp = gpar(fontsize = 22))
hmlist3=hmap_hier_factors4+hmap_hier_factors4a+hmap_hier_factors4b+hmap_hier_factors6+hmap_hier_factors6b+hmap_hier_factors6c
draw(hmlist3, column_title = "Heatmaps on Means (scaled per strain) and logfc Means vs Strain 0h", column_title_gp = gpar(fontsize = 22))
par(mfrow=c(1,2))
#### Silhouette method
fviz_nbclust(dataHMm3, kmeans, method = "silhouette",k.max = 16)+
labs(subtitle = "Silhouette method")
#### Elbow method
fviz_nbclust(dataHMm3, kmeans, method = "wss",k.max = 16) +
labs(subtitle = "Elbow method")
####gap stat slow!!!
####set.seed(123)
####fviz_nbclust(dataHMm, kmeans, nstart = 25, method = "gap_stat", nboot = 100,k.max = 16)+
#### labs(subtitle = "Gap statistic method")
#kclust8 <- kmeans(dataHMm3, 4)
#silhouette plot
distK<-daisy(dataHMm3)
plot(silhouette(kclust8$cluster, distK), col=1:4, border=NA)
split <- paste0("Cluster\n", kclust8$cluster)
#split <- factor(paste0("Cluster\n", kclust3$cluster), levels=c("Cluster\n3","Cluster\n1","Cluster\n4","Cluster\n5","Cluster\n2","Cluster\n6"))
hmap_k <- Heatmap(dataHMm3, split=split, cluster_row_slices = FALSE,
cluster_columns = FALSE,
show_row_names = FALSE,
name = "Means (scaled per strain",
col = col_funGR2,
width = unit(50, "mm"),
column_title = "Means",
column_title_gp = gpar(fontsize = 16, fontface = "bold"))
hmap_k#+hmap_hier_factors6+hmap_hier_factors5
Mean profiles of clusters
clustercount<-data.frame(kclust8$cluster)
clustersizes<-table(clustercount$kclust8.cluster)
clusterMeans<-data.frame(kclust8$centers)
clusterMeans1<-data.frame(t(clusterMeans))
clusterMeans1 <- cbind(rownames(clusterMeans1), clusterMeans1)
orderN<-c("Var37TNF_0h_lfc","Var37TNF_6h_lfc","Var37TNF_20h_lfc","Var14TNF_0h_lfc","Var14TNF_6h_lfc","Var14TNF_20h_lfc")#### manual
rownames(clusterMeans1) <- NULL
names(clusterMeans1)[names(clusterMeans1)=="rownames(clusterMeans1)"] <- "Sample"
####clusterMeans1
Strain<-factor(c(rep("VAR37",3),rep("VAR14",3)))####note names
#p1=ggplot(data=dataHmt, aes(x=row.names(dataHmt), y=ENSG00000162551.14),group=Run) + ggtitle("ALPL") +geom_point() + scale_x_discrete(limits=limitsPlot)+ ylab(ylabPlot)+xlab(xlabPlot)+geom_line(aes(group = Run))
pX1<-ggplot(data=clusterMeans1, aes(x=Sample, y=X1,group=1)) +
geom_line(aes(group = Strain))+ geom_point()+ggtitle(paste("Cluster X1 Profile ",clustersizes[1]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),axis.title.y = element_blank())+ylim (-1.8,1.8)
pX2<-ggplot(data=clusterMeans1, aes(x=Sample, y=X2,group=1)) +
geom_line(aes(group = Strain))+ geom_point()+ggtitle(paste("Cluster X2 Profile ",clustersizes[2]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),axis.title.y = element_blank())+ylim (-1.8,1.8)
pX3<-ggplot(data=clusterMeans1, aes(x=Sample, y=X3,group=1)) +
geom_line(aes(group = Strain))+ geom_point()+ggtitle(paste("Cluster X3 Profile ",clustersizes[3]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),axis.title.y = element_blank())+ylim (-1.8,1.8)
pX4<-ggplot(data=clusterMeans1, aes(x=Sample, y=X4,group=1)) +
geom_line(aes(group = Strain))+ geom_point()+ggtitle(paste("Cluster X4 Profile ",clustersizes[4]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),axis.title.y = element_blank())+ylim (-1.8,1.8)
#pX5<-ggplot(data=clusterMeans1, aes(x=Sample, y=X5,group=1)) +
# geom_line(aes(group = Strain))+ geom_point()+ggtitle(paste("Cluster X5 Profile ",clustersizes[5]," genes"))+ scale_x_discrete(limits=orderN)+
# theme(axis.title.x = element_blank(),axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),axis.title.y = element_blank())+ylim (-1.8,1.8)
#pX8<-ggplot(data=clusterMeans1, aes(x=Sample, y=X8,group=1)) +
# geom_line()+ geom_point()+ggtitle(paste("Cluster X8 Profile ",clustersizes[6]," genes"))+ scale_x_discrete(limits=orderN)+
# theme(axis.title.x = element_blank(),axis.title.y = element_blank())
#plot
multiplot(pX1,pX2,pX4,pX3, cols=2)
topDEgenes <- which(tempA$Include=="in")####find indexes
tempAkm<-tempA[ topDEgenes, ]
SymbolsKm<-dplyr::pull(tempAkm, Gene_Symbol)
#### export the gene expression data for the clusters
write.table(clusterMeans,paste0("ClusterMeansKm_",groupsName,".txt"), sep = "\t")
ClusteredGenes<-data.frame(kclust8$cluster,SymbolsKm,dataHMm3)
write.table(ClusteredGenes,paste0("ScaledDataInClustersKm_",groupsName,".txt"), sep = "\t")
#head(ClusteredGenes)
bottomDEgenes<-which(tempA$Include=="out")####find indexes
bottomG<-tempA[ bottomDEgenes, ]
bottomG<-dplyr::pull(bottomG, Gene_Symbol)
write.table(bottomG,paste0("ipaBottomKmeans_",groupsName,".txt"), sep = "\t")
topDEgenes <- which(tempA$Include=="in")####find indexes
tempAkm<-tempA[ topDEgenes, ]
SymbolsKm<-dplyr::pull(tempAkm, Gene_Symbol)
ipaKmeans<-ClusteredGenes
#countsTable <-countsTable[,c(1:15)]####if samples need removing
ipaKmeans<-ipaKmeans[,c(1:2)]
ipaKmeans$name2<-rownames(ipaKmeans)
#ipaKmeans%>% rownames_to_column(var = "rowname")
#ipaKmeans
#rowid_to_column(ipaKmeans)
ipaKmeans = mutate(ipaKmeans, x1= ifelse(ipaKmeans$kclust8.cluster==1, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x2= ifelse(ipaKmeans$kclust8.cluster==2, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x3= ifelse(ipaKmeans$kclust8.cluster==3, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x4= ifelse(ipaKmeans$kclust8.cluster==4, "1", "0"))
#ipaKmeans = mutate(ipaKmeans, x5= ifelse(ipaKmeans$kclust3.cluster==5, "1", "0"))
#ipaKmeans = mutate(ipaKmeans, x6= ifelse(ipaKmeans$kclust3.cluster==6, "1", "0"))
#ipaKmeans = mutate(ipaKmeans, x7= ifelse(ipaKmeans$kclust3.cluster==7, "1", "0"))
#ipaKmeans
write.table(ipaKmeans,paste0("ipaKmeans_",groupsName,".txt"), sep = "\t")
#head(ipaKmeans)
ClusteredGenes2<-ClusteredGenes[c(1)]
#ClusteredGenes2
listAll<-list()
for(i in 1:4) {
clusterName<-paste0("x",i)
#clusterName<-row.names(subset(ClusteredGenes,ClusteredGenes==i))
clusterName<-(subset(ClusteredGenes$SymbolsKm,ClusteredGenes==i))
listAll[[i]]<-clusterName
}
#need to name the vectors in the list, example here is for 8 clusters
names(listAll)<-c("X1", "X2", "X3", "X4")#,"X5", "X6", "X7")
#if you want to rearrange the order
#listAll<-listAll[c("x3", "x7", "x8", "x2", "x6", "x5", "x4", "x1")]
#lapply(listAll, head)
The simplify function has been used to cut down on GO redundancy
#str(AllGeneNames)
xread
Error: object 'xread' not found
####CC
cgoCC <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
####OrgDb=org.Mm.eg.db,
keyType="SYMBOL",
ont = "CC",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoCC2 <- simplify(cgoCC, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoCC2),paste0("GO_CC_",groupsName,".csv"))
dotplot(cgoCC2,showCategory = 30,
title = paste0("GO Cellular Compartment ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
Plots and GO data were written to files
png(paste0("GO_CC_",groupsName,".png"), width = 1224, height = 824)
dotplot(cgoCC2,showCategory = 30,
title = paste0("GO Cellular Compartment ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
null device
1
GO BP
####CC
cgoBP <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
keyType="SYMBOL",
ont = "BP",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoBP2 <- simplify(cgoBP, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoBP2),paste0("GO_BP_",groupsName,".csv"))
dotplot(cgoBP2,showCategory = 30,
title = paste0("GO Biological Process ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
png(paste0("GO_BP_",groupsName,".png"), width = 1024, height = 1224)
dotplot(cgoBP2,showCategory = 30,
title = paste0("GO Biological Process ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
null device
1
GO MF
####MF
cgoMF <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
keyType="SYMBOL",
ont = "MF",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoMF2 <- simplify(cgoMF, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoMF2),paste0("GO_MF_",groupsName,".csv"))
dotplot(cgoMF2,showCategory = 30,
title = paste0("GO Molecular Function ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
png(paste0("GO_MF_",groupsName,".png"), width = 1424, height = 824)
dotplot(cgoMF2,showCategory = 30,
title = paste0("GO Molecular Function ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
null device
1
#dbs <- c("GO_Molecular_Function_2018", "GO_Cellular_Component_2018", "GO_Biological_Process_2018")
dbs <- c("Reactome_2016","WikiPathways_2019_Mouse")
if (websiteLive) { enriched1 <- enrichr((subset(ClusteredGenes$SymbolsKm,ClusteredGenes==1)), dbs)}
if (websiteLive) plotEnrich(enriched1[[1]], showTerms = 30, numChar = 80, y = "Count", orderBy = "P.value", title ="Reactome Enrichment Analysis Cluster 1")
if (websiteLive) { enriched2 <- enrichr((subset(ClusteredGenes$SymbolsKm,ClusteredGenes==2)), dbs)}
if (websiteLive) plotEnrich(enriched2[[1]], showTerms = 30, numChar = 80, y = "Count", orderBy = "P.value", title ="Reactome Enrichment Analysis Cluster 2")
if (websiteLive) { enriched3 <- enrichr((subset(ClusteredGenes$SymbolsKm,ClusteredGenes==3)), dbs)}
if (websiteLive) plotEnrich(enriched3[[1]], showTerms = 30, numChar = 80, y = "Count", orderBy = "P.value", title ="Reactome Enrichment Analysis Cluster 3")
if (websiteLive) { enriched4 <- enrichr((subset(ClusteredGenes$SymbolsKm,ClusteredGenes==4)), dbs)}
if (websiteLive) plotEnrich(enriched4[[1]], showTerms = 30, numChar = 80, y = "Count", orderBy = "P.value", title ="Reactome Enrichment Analysis Cluster 4")
if (websiteLive) plotEnrich(enriched1[[2]], showTerms = 30, numChar = 80, y = "Count", orderBy = "P.value", title ="WikiPathways Enrichment Analysis Cluster 1")
if (websiteLive) plotEnrich(enriched2[[2]], showTerms = 30, numChar = 80, y = "Count", orderBy = "P.value", title ="WikiPathways Enrichment Analysis Cluster 2")
if (websiteLive) plotEnrich(enriched3[[2]], showTerms = 30, numChar = 80, y = "Count", orderBy = "P.value", title ="WikiPathways Enrichment Analysis Cluster 3")
if (websiteLive) plotEnrich(enriched4[[2]], showTerms = 30, numChar = 80, y = "Count", orderBy = "P.value", title ="WikiPathways Enrichment Analysis Cluster 4")
save: once happy with clustering save workspace so that it can be recalled
save.image(file="KmFeb2021.RData")
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.