Viewing is better if Code is hidden (Top Right drop down list)
sink(file="RsessionInfoDESeq2.txt")
library('DESeq2')
library("ggplot2")
library(reshape2)
####library(tidyverse)
####library(splitstackshape)
####library(data.table)
library("RColorBrewer")
library("gplots")
####library('ggdendro')
library('ggrepel')
library("dplyr")
library("ComplexHeatmap")
library("clusterProfiler")
library(VennDiagram) ######
library(UpSetR)
library(gridExtra)
library(cluster)
library(circlize)
library(factoextra)
library(NbClust)
library("biomaRt")
library("org.Hs.eg.db")####human
library("org.Mm.eg.db")####mouse
library(venn)
####library(org.At.tair.db)####arabidopsis
sessionInfo()
sink()
#########################################
####multiplot
#########################################
#### Multiple plot function
####
#### ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
#### - cols: Number of columns in layout
#### - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#### If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
#### then plot 1 will go in the upper left, 2 will go in the upper right, and
#### 3 will go all the way across the bottom.
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
#### Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
#### If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
#### Make the panel
#### ncol: Number of columns of plots
#### nrow: Number of rows needed, calculated from #### of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
#### Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
#### Make each plot, in the correct location
for (i in 1:numPlots) {
#### Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
####function my code edit of plotPCA
####################################
plotPCALeo<-function (x, intgroup = "Treatment", ntop = 500, returnData = FALSE, PCx=1, PCy=2)
{
####rv <- rowVars(assay(x))
rv = apply((assay(x)), 1, var)
select <- order(rv, decreasing = TRUE)[seq_len(min(ntop,
length(rv)))]
pca <- prcomp(t(assay(x)[select, ]))
percentVar <- pca$sdev^2/sum(pca$sdev^2)
if (!all(intgroup %in% names(colData(x)))) {
stop("the argument 'intgroup' should specify columns of colData(dds)")
}
intgroup.df <- as.data.frame(colData(x)[, intgroup, drop = FALSE])
group <- factor(apply(intgroup.df, 1, paste, collapse = " : "))
d <- data.frame(PCX = pca$x[, PCx], PCY = pca$x[, PCy], group = group,
intgroup.df, names = colnames(x))
if (returnData) {
attr(d, "percentVar") <- percentVar[PCx:PCy]
return(d)
}
ggplot(data = d, aes_string(x = "PCX", y = "PCY", color = "group")) +
####ggplot(data = d, aes_string(x = "PCX", y = "PCY", color=Tgfb1, shape=Treatment)) +
geom_point(size = 3) + xlab(paste0("PC",PCx,": ", round(percentVar[1] *
100), "% variance")) + ylab(paste0("PC",PCy,": ", round(percentVar[2] *
100), "% variance"))
}
col_fun = colorRamp2(c(-1,-0.2, 0,0.2, 1), c("blue","cyan", "grey90","orange", "red"))#heatmap colours
col_funGR = colorRamp2(c(-1.5, 0, 1.5), c("green", "black", "red"))
col_funGR2 = colorRamp2(c(-2, 0, 2), c("green", "black", "red"))
colorsV3 <- c("cornflowerblue", "brown1","orange2")#Venn colours
colorsV2 <- c("mediumorchid1", "chartreuse3")#Venn colours
colorsV4<-c("cornflowerblue", "orange2", "green3","red")#Venn colours
colorsV5<-c("cornflowerblue", "orange2", "green3","purple","red")#Venn colours
#col_fun(seq(-3, 3))
groupsName<-"R3_VAR14_kmeans_q0.05"
countsTable<-read.delim("RNAseq2019July_5.txt", header = TRUE, sep = "\t",check.names=FALSE,row.names=1)
head(countsTable)
AllGeneNames<-countsTable$Gene_Symbol
#head(AllGeneNames)
tempA<-countsTable
topDEgenes <- which(tempA$padj_R3noTNF_var14_vs_RBC_0h<0.05&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_0h))####find indexes
listA<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$padj_R3noTNF_var14_vs_RBC_2h<0.05&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_2h))####find indexes
listB<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$padj_R3noTNF_var14_vs_RBC_6h<0.05&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_6h))####find indexes
listC<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$padj_R3noTNF_var14_vs_RBC_20h<0.05&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_20h))####find indexes
listD<-tempA[ topDEgenes, ]$Gene_Symbol
vennq<-venn.diagram(x = list(listA,listB,listC,listD) ,
category.names = c("Var14noTNF_0h","Var14noTNF_2h","Var14noTNF_6h","Var14noTNF_20h"),
main="padj<0.05",
filename = NULL, scaled = FALSE, fill = colorsV4, cat.col = colorsV4, cat.cex = 1, cat.dist=0.3, margin = 0.3)
topDEgenes <- which(tempA$pvalue_R3noTNF_var14_vs_RBC_0h<0.05&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_0h)>1&!is.na(tempA$pvalue_R3noTNF_var14_vs_RBC_0h))####find indexes
listA<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$pvalue_R3noTNF_var14_vs_RBC_2h<0.05&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_2h)>1&!is.na(tempA$pvalue_R3noTNF_var14_vs_RBC_2h))####find indexes
listB<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$pvalue_R3noTNF_var14_vs_RBC_6h<0.05&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_6h)>1&!is.na(tempA$pvalue_R3noTNF_var14_vs_RBC_6h))####find indexes
listC<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$pvalue_R3noTNF_var14_vs_RBC_20h<0.05&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_20h)>1&!is.na(tempA$pvalue_R3noTNF_var14_vs_RBC_20h))####find indexes
listD<-tempA[ topDEgenes, ]$Gene_Symbol
vennp<-venn.diagram(x = list(listA,listB,listC,listD) ,
category.names = c("Var14noTNF_0h","Var14noTNF_2h","Var14noTNF_6h","Var14noTNF_20h"),
main="pvalue<0.05&fold change>2",
filename = NULL, scaled = FALSE, fill = colorsV4, cat.col = colorsV4, cat.cex = 1, cat.dist=0.3, margin = 0.3)
topDEgenes <- which(tempA$padj_R3noTNF_var14_vs_RBC_0h<0.1&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_0h)>1&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_0h))####find indexes
listA<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$padj_R3noTNF_var14_vs_RBC_2h<0.1&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_2h)>1&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_2h))####find indexes
listB<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$padj_R3noTNF_var14_vs_RBC_6h<0.1&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_6h)>1&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_6h))####find indexes
listC<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$padj_R3noTNF_var14_vs_RBC_20h<0.1&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_20h)>1&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_20h))####find indexes
listD<-tempA[ topDEgenes, ]$Gene_Symbol
vennq2<-venn.diagram(x = list(listA,listB,listC,listD) ,
category.names = c("Var14noTNF_0h","Var14noTNF_2h","Var14noTNF_6h","Var14noTNF_20h"),
main="padj0.1&fold change>2",
filename = NULL, scaled = FALSE, fill = colorsV4, cat.col = colorsV4, cat.cex = 1, cat.dist=0.3, margin = 0.3)
topDEgenes <- which((tempA$padj_R3noTNF_var14_vs_RBC_0h<0.05&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_0h))|
(tempA$padj_R3noTNF_var14_vs_RBC_2h<0.05&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_2h))|
(tempA$padj_R3noTNF_var14_vs_RBC_6h<0.05&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_6h))|
(tempA$padj_R3noTNF_var14_vs_RBC_20h<0.05&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_20h))
)
listA<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which((tempA$pvalue_R3noTNF_var14_vs_RBC_0h<0.05&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_0h)>1&!is.na(tempA$pvalue_R3noTNF_var14_vs_RBC_0h))|
(tempA$pvalue_R3noTNF_var14_vs_RBC_2h<0.05&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_2h)>1&!is.na(tempA$pvalue_R3noTNF_var14_vs_RBC_2h))|
(tempA$pvalue_R3noTNF_var14_vs_RBC_6h<0.05&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_6h)>1&!is.na(tempA$pvalue_R3noTNF_var14_vs_RBC_6h))|
(tempA$pvalue_R3noTNF_var14_vs_RBC_20h<0.05&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_20h)>1&!is.na(tempA$pvalue_R3noTNF_var14_vs_RBC_20h))
)####find indexes
listC<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which((tempA$padj_R3noTNF_var14_vs_RBC_0h<0.1&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_0h)>1&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_0h))|
(tempA$padj_R3noTNF_var14_vs_RBC_2h<0.1&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_2h)>1&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_2h))|
(tempA$padj_R3noTNF_var14_vs_RBC_6h<0.1&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_6h)>1&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_6h))|
(tempA$padj_R3noTNF_var14_vs_RBC_20h<0.1&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_20h)>1&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_20h))
)####find indexes
listB<-tempA[ topDEgenes, ]$Gene_Symbol
vennpq<-venn.diagram(x = list(listA,listB,listC) ,
category.names = c("padj<0.05","padj<0.1&fc>2","p<0.05&fc>2"),
main="padj compared to pvalue",
filename = NULL, scaled = FALSE, fill = colorsV3, cat.col = colorsV3, cat.cex = 1, cat.dist=0.1, margin = 0.15)
grid.arrange(gTree(children=vennq), gTree(children=vennpq), ncol=2,top="R2 Var14 no TNF")
#tempA<-resAll[-c(10:30) ]
tempA<-countsTable
#rownames(tempA)
rownames(tempA) <- NULL
tempA = mutate(tempA, Include=
ifelse(tempA$padj_R3noTNF_var14_vs_RBC_0h<0.05&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_0h), "in",
ifelse(tempA$padj_R3noTNF_var14_vs_RBC_2h<0.05&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_2h), "in",
ifelse(tempA$padj_R3noTNF_var14_vs_RBC_6h<0.05&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_6h), "in",
ifelse(tempA$padj_R3noTNF_var14_vs_RBC_20h<0.05&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_20h), "in",
"out")))))
#tempA
####library(dplyr)
tempA %>%
group_by(Include) %>%
tally()
topDEgenes <- which(tempA$Include=="in")####find indexes
head(countsTable)
#baseMeansHm <-countsTable[,c(60:63)]
baseMeansHm <-countsTable[,c(60:63,79:82)]
head(baseMeansHm)
baseMeansHm2 <- log2(baseMeansHm+1)
baseMeansHm2$Var14_RBC_0h<-baseMeansHm2$Var14noTNF_0h_mean-baseMeansHm2$RBCnoTNF_0h_mean
baseMeansHm2$Var14_RBC_2h<-baseMeansHm2$Var14noTNF_2h_mean-baseMeansHm2$RBCnoTNF_2h_mean
baseMeansHm2$Var14_RBC_6h<-baseMeansHm2$Var14noTNF_6h_mean-baseMeansHm2$RBCnoTNF_6h_mean
baseMeansHm2$Var14_RBC_20h<-baseMeansHm2$Var14noTNF_20h_mean-baseMeansHm2$RBCnoTNF_20h_mean
baseMeansHm <-baseMeansHm2[,c(9:12)]
head(baseMeansHm)
baseMeansHmM <-baseMeansHm2[,c(1:8)]
head(baseMeansHmM)
topDEgenes <- which(tempA$Include=="in")####find indexes
####mean logfc
dataHMm<-baseMeansHm[ topDEgenes, ]
#dataHMm <- log2(dataHMm+1)
dataHMm<- t(as.matrix(dataHMm))
dataHMm <- t(scale(dataHMm))
hmap_hier_factors4 <- Heatmap(
dataHMm, name = "logfc",
row_labels = paste0(rownames(dataHMm)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("Means logfc"),
col = col_funGR,
column_title_gp = gpar(fontsize = 16, fontface = "bold"),
width = unit(50, "mm"),
cluster_columns = FALSE,
show_row_names = FALSE)
dataHMmPlot<-as.data.frame(dataHMm)
dataHMmPlot$Var14_RBC_2h<-dataHMmPlot$Var14_RBC_2h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot$Var14_RBC_6h<-dataHMmPlot$Var14_RBC_6h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot$Var14_RBC_20h<-dataHMmPlot$Var14_RBC_20h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot$Var14_RBC_0h<-dataHMmPlot$Var14_RBC_0h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot<-as.matrix(dataHMmPlot)
hmap_hier_factors6 <- Heatmap(
dataHMmPlot, name = "Normalised logfc",
row_labels = paste0(rownames(dataHMm)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("0h Normalised logfc"),
col = col_funGR2,
column_title_gp = gpar(fontsize = 16, fontface = "bold"),
width = unit(50, "mm"),
cluster_columns = FALSE,
show_row_names = FALSE)
####means
dataHMm3<-baseMeansHmM[ topDEgenes, ]
#dataHMm <- log2(dataHMm+1)
dataHMm3<- t(as.matrix(dataHMm3))
dataHMm3 <- t(scale(dataHMm3))
dataHMm3<-dataHMm3[,c(5,1,6,2,7,3,8,4)]
hmap_hier_factors5 <- Heatmap(
dataHMm3, name = "Expression",
row_labels = paste0(rownames(dataHMm)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("Means"),
col = col_fun,
column_title_gp = gpar(fontsize = 16, fontface = "bold"),
width = unit(100, "mm"),
cluster_columns = FALSE,
show_row_names = FALSE)
hmap_hier_factors4+hmap_hier_factors6+hmap_hier_factors5
par(mfrow=c(1,2))
#### Silhouette method
fviz_nbclust(dataHMm, kmeans, method = "silhouette",k.max = 16)+
labs(subtitle = "Silhouette method")
#### Elbow method
fviz_nbclust(dataHMm, kmeans, method = "wss",k.max = 16) +
labs(subtitle = "Elbow method")
####gap stat slow!!!
####set.seed(123)
####fviz_nbclust(dataHMm, kmeans, nstart = 25, method = "gap_stat", nboot = 100,k.max = 16)+
#### labs(subtitle = "Gap statistic method")
kclust3 <- kmeans(dataHMm, 7)
#silhouette plot
distK<-daisy(dataHMm)
plot(silhouette(kclust3$cluster, distK), col=1:7, border=NA)
split <- paste0("Cluster\n", kclust3$cluster)
#split <- factor(paste0("Cluster\n", kclust3$cluster), levels=c("Cluster\n3","Cluster\n1","Cluster\n4","Cluster\n5","Cluster\n2","Cluster\n6"))
hmap_k <- Heatmap(dataHMm, split=split, cluster_row_slices = FALSE,
cluster_columns = FALSE,
show_row_names = FALSE,
name = "logfc",
col = col_funGR,
width = unit(50, "mm"),
column_title = "means logfc",
column_title_gp = gpar(fontsize = 16, fontface = "bold"))
hmap_k+hmap_hier_factors6+hmap_hier_factors5
Mean profiles of clusters
clustercount<-data.frame(kclust3$cluster)
clustersizes<-table(clustercount$kclust3.cluster)
clusterMeans<-data.frame(kclust3$centers)
clusterMeans1<-data.frame(t(clusterMeans))
clusterMeans1 <- cbind(rownames(clusterMeans1), clusterMeans1)
orderN<-c("Var14_RBC_0h","Var14_RBC_2h","Var14_RBC_6h","Var14_RBC_20h")#### manual
rownames(clusterMeans1) <- NULL
names(clusterMeans1)[names(clusterMeans1)=="rownames(clusterMeans1)"] <- "Sample"
####clusterMeans1
pX1<-ggplot(data=clusterMeans1, aes(x=Sample, y=X1,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X1 Profile ",clustersizes[1]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX2<-ggplot(data=clusterMeans1, aes(x=Sample, y=X2,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X2 Profile ",clustersizes[2]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX3<-ggplot(data=clusterMeans1, aes(x=Sample, y=X3,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X3 Profile ",clustersizes[3]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX4<-ggplot(data=clusterMeans1, aes(x=Sample, y=X4,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X4 Profile ",clustersizes[4]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX5<-ggplot(data=clusterMeans1, aes(x=Sample, y=X5,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X5 Profile ",clustersizes[5]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX6<-ggplot(data=clusterMeans1, aes(x=Sample, y=X6,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X6 Profile ",clustersizes[6]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX7<-ggplot(data=clusterMeans1, aes(x=Sample, y=X7,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X7 Profile ",clustersizes[5]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
#pX8<-ggplot(data=clusterMeans1, aes(x=Sample, y=X8,group=1)) +
# geom_line()+ geom_point()+ggtitle(paste("Cluster X8 Profile ",clustersizes[6]," genes"))+ scale_x_discrete(limits=orderN)+
# theme(axis.title.x = element_blank(),axis.title.y = element_blank())
#plot
multiplot(pX1, pX2, pX3, pX4,pX5, pX6,pX7, cols=2)
topDEgenes <- which(tempA$Include=="in")####find indexes
tempAkm<-tempA[ topDEgenes, ]
SymbolsKm<-dplyr::pull(tempAkm, Gene_Symbol)
#### export the gene expression data for the clusters
write.table(clusterMeans,paste0("ClusterMeansKm_",groupsName,".txt"), sep = "\t")
ClusteredGenes<-data.frame(kclust3$cluster,SymbolsKm,dataHMm)
write.table(ClusteredGenes,paste0("ScaledDataInClustersKm_",groupsName,".txt"), sep = "\t")
#head(ClusteredGenes)
bottomDEgenes<-which(tempA$Include=="out")####find indexes
bottomG<-tempA[ bottomDEgenes, ]
bottomG<-dplyr::pull(bottomG, Gene_Symbol)
write.table(bottomG,paste0("ipaBottomKmeans_",groupsName,".txt"), sep = "\t")
topDEgenes <- which(tempA$Include=="in")####find indexes
tempAkm<-tempA[ topDEgenes, ]
SymbolsKm<-dplyr::pull(tempAkm, Gene_Symbol)
ipaKmeans<-ClusteredGenes
#countsTable <-countsTable[,c(1:15)]####if samples need removing
ipaKmeans<-ipaKmeans[,c(1:2)]
ipaKmeans$name2<-rownames(ipaKmeans)
#ipaKmeans%>% rownames_to_column(var = "rowname")
#ipaKmeans
#rowid_to_column(ipaKmeans)
ipaKmeans = mutate(ipaKmeans, x1= ifelse(ipaKmeans$kclust3.cluster==1, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x2= ifelse(ipaKmeans$kclust3.cluster==2, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x3= ifelse(ipaKmeans$kclust3.cluster==3, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x4= ifelse(ipaKmeans$kclust3.cluster==4, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x5= ifelse(ipaKmeans$kclust3.cluster==5, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x6= ifelse(ipaKmeans$kclust3.cluster==6, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x7= ifelse(ipaKmeans$kclust3.cluster==7, "1", "0"))
#ipaKmeans
write.table(ipaKmeans,paste0("ipaKmeans_",groupsName,".txt"), sep = "\t")
#head(ipaKmeans)
ClusteredGenes2<-ClusteredGenes[c(1)]
#ClusteredGenes2
listAll<-list()
for(i in 1:7) {
clusterName<-paste0("x",i)
#clusterName<-row.names(subset(ClusteredGenes,ClusteredGenes==i))
clusterName<-(subset(ClusteredGenes$SymbolsKm,ClusteredGenes==i))
listAll[[i]]<-clusterName
}
#need to name the vectors in the list, example here is for 8 clusters
names(listAll)<-c("X1", "X2", "X3", "X4","X5", "X6", "X7")
#if you want to rearrange the order
#listAll<-listAll[c("x3", "x7", "x8", "x2", "x6", "x5", "x4", "x1")]
#lapply(listAll, head)
The simplify function has been used to cut down on GO redundancy
#str(AllGeneNames)
####CC
cgoCC <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
####OrgDb=org.Mm.eg.db,
keyType="SYMBOL",
ont = "CC",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoCC2 <- simplify(cgoCC, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoCC2),paste0("GO_CC_",groupsName,".csv"))
dotplot(cgoCC2,showCategory = 30,
title = paste0("GO Cellular Compartment ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
Plots and GO data were written to files
png(paste0("GO_CC_",groupsName,".png"), width = 1224, height = 824)
dotplot(cgoCC2,showCategory = 30,
title = paste0("GO Cellular Compartment ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
null device
1
GO BP
####CC
cgoBP <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
keyType="SYMBOL",
ont = "BP",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoBP2 <- simplify(cgoBP, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoBP2),paste0("GO_BP_",groupsName,".csv"))
dotplot(cgoBP2,showCategory = 30,
title = paste0("GO Biological Process ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
png(paste0("GO_BP_",groupsName,".png"), width = 1024, height = 1224)
dotplot(cgoBP2,showCategory = 30,
title = paste0("GO Biological Process ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
null device
1
GO MF
####MF
cgoMF <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
keyType="SYMBOL",
ont = "MF",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoMF2 <- simplify(cgoMF, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoMF2),paste0("GO_MF_",groupsName,".csv"))
dotplot(cgoMF2,showCategory = 30,
title = paste0("GO Molecular Function ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
png(paste0("GO_MF_",groupsName,".png"), width = 1424, height = 824)
dotplot(cgoMF2,showCategory = 30,
title = paste0("GO Molecular Function ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
null device
1
groupsName<-"R3_VAR14_kmeans_p0.05fc2"
countsTable<-read.delim("RNAseq2019July_5.txt", header = TRUE, sep = "\t",check.names=FALSE,row.names=1)
head(countsTable)
AllGeneNames<-countsTable$Gene_Symbol
#head(AllGeneNames)
grid.arrange(gTree(children=vennp), gTree(children=vennpq) , ncol=2,top="R3 Var14 no TNF")
#tempA<-resAll[-c(10:30) ]
tempA<-countsTable
#rownames(tempA)
rownames(tempA) <- NULL
tempA = mutate(tempA, Include=
ifelse(tempA$pvalue_R3noTNF_var14_vs_RBC_0h<0.05&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_0h)>1&!is.na(tempA$pvalue_R3noTNF_var14_vs_RBC_0h), "in",
ifelse(tempA$pvalue_R3noTNF_var14_vs_RBC_2h<0.05&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_2h)>1&!is.na(tempA$pvalue_R3noTNF_var14_vs_RBC_2h), "in",
ifelse(tempA$pvalue_R3noTNF_var14_vs_RBC_6h<0.05&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_6h)>1&!is.na(tempA$pvalue_R3noTNF_var14_vs_RBC_6h), "in",
ifelse(tempA$pvalue_R3noTNF_var14_vs_RBC_20h<0.05&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_20h)>1&!is.na(tempA$pvalue_R3noTNF_var14_vs_RBC_20h), "in",
"out")))))
#tempA
####library(dplyr)
tempA %>%
group_by(Include) %>%
tally()
topDEgenes <- which(tempA$Include=="in")####find indexes
baseMeansHm <-countsTable[,c(60:63,79:82)]
head(baseMeansHm)
baseMeansHm2 <- log2(baseMeansHm+1)
baseMeansHm2$Var14_RBC_0h<-baseMeansHm2$Var14noTNF_0h_mean-baseMeansHm2$RBCnoTNF_0h_mean
baseMeansHm2$Var14_RBC_2h<-baseMeansHm2$Var14noTNF_2h_mean-baseMeansHm2$RBCnoTNF_2h_mean
baseMeansHm2$Var14_RBC_6h<-baseMeansHm2$Var14noTNF_6h_mean-baseMeansHm2$RBCnoTNF_6h_mean
baseMeansHm2$Var14_RBC_20h<-baseMeansHm2$Var14noTNF_20h_mean-baseMeansHm2$RBCnoTNF_20h_mean
baseMeansHm <-baseMeansHm2[,c(9:12)]
head(baseMeansHm)
topDEgenes <- which(tempA$Include=="in")####find indexes
####mean logfc
dataHMm<-baseMeansHm[ topDEgenes, ]
#dataHMm <- log2(dataHMm+1)
dataHMm<- t(as.matrix(dataHMm))
dataHMm <- t(scale(dataHMm))
hmap_hier_factors4 <- Heatmap(
dataHMm, name = "logfc",
row_labels = paste0(rownames(dataHMm)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("Means logfc"),
col = col_funGR,
column_title_gp = gpar(fontsize = 16, fontface = "bold"),
width = unit(50, "mm"),
cluster_columns = FALSE,
show_row_names = FALSE)
dataHMmPlot<-as.data.frame(dataHMm)
dataHMmPlot$Var14_RBC_2h<-dataHMmPlot$Var14_RBC_2h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot$Var14_RBC_6h<-dataHMmPlot$Var14_RBC_6h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot$Var14_RBC_20h<-dataHMmPlot$Var14_RBC_20h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot$Var14_RBC_0h<-dataHMmPlot$Var14_RBC_0h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot<-as.matrix(dataHMmPlot)
hmap_hier_factors6 <- Heatmap(
dataHMmPlot, name = "Normalised logfc",
row_labels = paste0(rownames(dataHMm)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("0h Normalised logfc"),
col = col_funGR2,
column_title_gp = gpar(fontsize = 16, fontface = "bold"),
width = unit(50, "mm"),
cluster_columns = FALSE,
show_row_names = FALSE)
####means
dataHMm3<-baseMeansHmM[ topDEgenes, ]
#dataHMm <- log2(dataHMm+1)
dataHMm3<- t(as.matrix(dataHMm3))
dataHMm3 <- t(scale(dataHMm3))
dataHMm3<-dataHMm3[,c(5,1,6,2,7,3,8,4)]
hmap_hier_factors5 <- Heatmap(
dataHMm3, name = "Expression",
row_labels = paste0(rownames(dataHMm)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("Means"),
col = col_fun,
column_title_gp = gpar(fontsize = 16, fontface = "bold"),
width = unit(100, "mm"),
cluster_columns = FALSE,
show_row_names = FALSE)
hmap_hier_factors4+hmap_hier_factors6+hmap_hier_factors5
par(mfrow=c(1,2))
#### Silhouette method
fviz_nbclust(dataHMm, kmeans, method = "silhouette",k.max = 16)+
labs(subtitle = "Silhouette method")
#### Elbow method
fviz_nbclust(dataHMm, kmeans, method = "wss",k.max = 16) +
labs(subtitle = "Elbow method")
####gap stat slow!!!
####set.seed(123)
####fviz_nbclust(dataHMm, kmeans, nstart = 25, method = "gap_stat", nboot = 100,k.max = 16)+
#### labs(subtitle = "Gap statistic method")
#kclust4 <- kmeans(dataHMm, 5)
#silhouette plot
distK<-daisy(dataHMm)
plot(silhouette(kclust4$cluster, distK), col=1:5, border=NA)
split <- paste0("Cluster\n", kclust4$cluster)
#split <- factor(paste0("Cluster\n", kclust3$cluster), levels=c("Cluster\n3","Cluster\n1","Cluster\n4","Cluster\n5","Cluster\n2","Cluster\n6"))
hmap_k <- Heatmap(dataHMm, split=split, cluster_row_slices = FALSE,
cluster_columns = FALSE,
show_row_names = FALSE,
name = "logfc",
col = col_funGR,
width = unit(50, "mm"),
column_title = "means logfc",
column_title_gp = gpar(fontsize = 16, fontface = "bold"))
hmap_k+hmap_hier_factors6+hmap_hier_factors5
Mean profiles of clusters
clustercount<-data.frame(kclust4$cluster)
clustersizes<-table(clustercount$kclust4.cluster)
clusterMeans<-data.frame(kclust4$centers)
clusterMeans1<-data.frame(t(clusterMeans))
clusterMeans1 <- cbind(rownames(clusterMeans1), clusterMeans1)
orderN<-c("Var14_RBC_0h","Var14_RBC_2h","Var14_RBC_6h","Var14_RBC_20h")#### manual
rownames(clusterMeans1) <- NULL
names(clusterMeans1)[names(clusterMeans1)=="rownames(clusterMeans1)"] <- "Sample"
####clusterMeans1
pX1<-ggplot(data=clusterMeans1, aes(x=Sample, y=X1,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X1 Profile ",clustersizes[1]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX2<-ggplot(data=clusterMeans1, aes(x=Sample, y=X2,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X2 Profile ",clustersizes[2]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX3<-ggplot(data=clusterMeans1, aes(x=Sample, y=X3,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X3 Profile ",clustersizes[3]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX4<-ggplot(data=clusterMeans1, aes(x=Sample, y=X4,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X4 Profile ",clustersizes[4]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX5<-ggplot(data=clusterMeans1, aes(x=Sample, y=X5,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X5 Profile ",clustersizes[5]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
#pX6<-ggplot(data=clusterMeans1, aes(x=Sample, y=X6,group=1)) +
# geom_line()+ geom_point()+ggtitle(paste("Cluster X6 Profile ",clustersizes[6]," genes"))+ scale_x_discrete(limits=orderN)+
# theme(axis.title.x = element_blank(),axis.title.y = element_blank())
#plot
multiplot(pX1, pX2, pX3,pX4, pX5, cols=2)
topDEgenes <- which(tempA$Include=="in")####find indexes
tempAkm<-tempA[ topDEgenes, ]
SymbolsKm<-dplyr::pull(tempAkm, Gene_Symbol)
#### export the gene expression data for the clusters
write.table(clusterMeans,paste0("ClusterMeansKm_",groupsName,".txt"), sep = "\t")
ClusteredGenes<-data.frame(kclust4$cluster,SymbolsKm,dataHMm)
write.table(ClusteredGenes,paste0("ScaledDataInClustersKm_",groupsName,".txt"), sep = "\t")
#head(ClusteredGenes)
bottomDEgenes<-which(tempA$Include=="out")####find indexes
bottomG<-tempA[ bottomDEgenes, ]
bottomG<-dplyr::pull(bottomG, Gene_Symbol)
write.table(bottomG,paste0("ipaBottomKmeans_",groupsName,".txt"), sep = "\t")
topDEgenes <- which(tempA$Include=="in")####find indexes
tempAkm<-tempA[ topDEgenes, ]
SymbolsKm<-dplyr::pull(tempAkm, Gene_Symbol)
ipaKmeans<-ClusteredGenes
#countsTable <-countsTable[,c(1:15)]####if samples need removing
ipaKmeans<-ipaKmeans[,c(1:2)]
ipaKmeans$name2<-rownames(ipaKmeans)
#ipaKmeans%>% rownames_to_column(var = "rowname")
#ipaKmeans
#rowid_to_column(ipaKmeans)
ipaKmeans = mutate(ipaKmeans, x1= ifelse(ipaKmeans$kclust4.cluster==1, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x2= ifelse(ipaKmeans$kclust4.cluster==2, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x3= ifelse(ipaKmeans$kclust4.cluster==3, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x4= ifelse(ipaKmeans$kclust4.cluster==4, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x5= ifelse(ipaKmeans$kclust4.cluster==5, "1", "0"))
#ipaKmeans = mutate(ipaKmeans, x6= ifelse(ipaKmeans$kclust4.cluster==6, "1", "0"))
#ipaKmeans
write.table(ipaKmeans,paste0("ipaKmeans_",groupsName,".txt"), sep = "\t")
#head(ipaKmeans)
ClusteredGenes2<-ClusteredGenes[c(1)]
#ClusteredGenes2
listAll<-list()
for(i in 1:5) {
clusterName<-paste0("x",i)
#clusterName<-row.names(subset(ClusteredGenes,ClusteredGenes==i))
clusterName<-(subset(ClusteredGenes$SymbolsKm,ClusteredGenes==i))
listAll[[i]]<-clusterName
}
#need to name the vectors in the list, example here is for 8 clusters
names(listAll)<-c("X1", "X2", "X3","X4", "X5")
#if you want to rearrange the order
#listAll<-listAll[c("x3", "x7", "x8", "x2", "x6", "x5", "x4", "x1")]
lapply(listAll, head)
$X1
[1] "CDH11" "KCTD12" "DNAJB4" "NFKBIZ" "SAMD9" "PMAIP1"
$X2
[1] "ABCB1" "CLEC10A" "PCDH17" "RPS7P11" "FTH1P23" "EEF1A1P12"
$X3
[1] "PSMD6-AS2" "RGS7BP" "SNHG26" "NCR3LG1" "ANGPTL4" "KRT7"
$X4
[1] "CYP1A1" "TXNIP" "TTC39A" "CCDC68" "KLF4" "FOSB"
$X5
[1] "F2RL3" "FCN3" "B9D2" "RAB11FIP1" "AC139530.1" "PRR29"
The simplify function has been used to cut down on GO redundancy
#str(AllGeneNames)
####CC
cgoCC <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
####OrgDb=org.Mm.eg.db,
keyType="SYMBOL",
ont = "CC",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoCC2 <- simplify(cgoCC, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoCC2),paste0("GO_CC_",groupsName,".csv"))
dotplot(cgoCC2,showCategory = 30,
title = paste0("GO Cellular Compartment ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
Plots and GO data were written to files
png(paste0("GO_CC_",groupsName,".png"), width = 1224, height = 824)
dotplot(cgoCC2,showCategory = 30,
title = paste0("GO Cellular Compartment ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
GO BP
####CC
cgoBP <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
keyType="SYMBOL",
ont = "BP",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoBP2 <- simplify(cgoBP, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoBP2),paste0("GO_BP_",groupsName,".csv"))
dotplot(cgoBP2,showCategory = 30,
title = paste0("GO Biological Process ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
png(paste0("GO_BP_",groupsName,".png"), width = 1024, height = 1224)
dotplot(cgoBP2,showCategory = 30,
title = paste0("GO Biological Process ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
null device
1
GO MF
####MF
cgoMF <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
keyType="SYMBOL",
ont = "MF",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoMF2 <- simplify(cgoMF, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoMF2),paste0("GO_MF_",groupsName,".csv"))
dotplot(cgoMF2,showCategory = 30,
title = paste0("GO Molecular Function ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
png(paste0("GO_MF_",groupsName,".png"), width = 1424, height = 824)
dotplot(cgoMF2,showCategory = 30,
title = paste0("GO Molecular Function ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
groupsName<-"R3_VAR14_kmeans_padj0.1fc2"
countsTable<-read.delim("RNAseq2019July_5.txt", header = TRUE, sep = "\t",check.names=FALSE,row.names=1)
head(countsTable)
AllGeneNames<-countsTable$Gene_Symbol
#head(AllGeneNames)
grid.arrange(gTree(children=vennp), gTree(children=vennpq) , ncol=2,top="R3 Var14 no TNF")
#tempA<-resAll[-c(10:30) ]
tempA<-countsTable
#rownames(tempA)
rownames(tempA) <- NULL
tempA = mutate(tempA, Include=
ifelse(tempA$padj_R3noTNF_var14_vs_RBC_0h<0.1&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_0h)>1&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_0h), "in",
ifelse(tempA$padj_R3noTNF_var14_vs_RBC_2h<0.1&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_2h)>1&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_2h), "in",
ifelse(tempA$padj_R3noTNF_var14_vs_RBC_6h<0.1&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_6h)>1&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_6h), "in",
ifelse(tempA$padj_R3noTNF_var14_vs_RBC_20h<0.1&abs(tempA$log2FoldChange_R3noTNF_var14_vs_RBC_20h)>1&!is.na(tempA$padj_R3noTNF_var14_vs_RBC_20h), "in",
"out")))))
#tempA
####library(dplyr)
tempA %>%
group_by(Include) %>%
tally()
topDEgenes <- which(tempA$Include=="in")####find indexes
baseMeansHm <-countsTable[,c(60:63,79:82)]
baseMeansHm2 <- log2(baseMeansHm+1)
baseMeansHm2$Var14_RBC_0h<-baseMeansHm2$Var14noTNF_0h_mean-baseMeansHm2$RBCnoTNF_0h_mean
baseMeansHm2$Var14_RBC_2h<-baseMeansHm2$Var14noTNF_2h_mean-baseMeansHm2$RBCnoTNF_2h_mean
baseMeansHm2$Var14_RBC_6h<-baseMeansHm2$Var14noTNF_6h_mean-baseMeansHm2$RBCnoTNF_6h_mean
baseMeansHm2$Var14_RBC_20h<-baseMeansHm2$Var14noTNF_20h_mean-baseMeansHm2$RBCnoTNF_20h_mean
baseMeansHm <-baseMeansHm2[,c(9:12)]
head(baseMeansHm)
baseMeansHmM <-baseMeansHm2[,c(1:8)]
head(baseMeansHmM)
topDEgenes <- which(tempA$Include=="in")####find indexes
####mean logfc
dataHMm<-baseMeansHm[ topDEgenes, ]
#dataHMm <- log2(dataHMm+1)
dataHMm<- t(as.matrix(dataHMm))
dataHMm <- t(scale(dataHMm))
hmap_hier_factors4 <- Heatmap(
dataHMm, name = "logfc",
row_labels = paste0(rownames(dataHMm)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("Means logfc"),
col = col_funGR,
column_title_gp = gpar(fontsize = 16, fontface = "bold"),
width = unit(50, "mm"),
cluster_columns = FALSE,
show_row_names = FALSE)
dataHMmPlot<-as.data.frame(dataHMm)
dataHMmPlot$Var14_RBC_2h<-dataHMmPlot$Var14_RBC_2h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot$Var14_RBC_6h<-dataHMmPlot$Var14_RBC_6h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot$Var14_RBC_20h<-dataHMmPlot$Var14_RBC_20h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot$Var14_RBC_0h<-dataHMmPlot$Var14_RBC_0h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot<-as.matrix(dataHMmPlot)
hmap_hier_factors6 <- Heatmap(
dataHMmPlot, name = "Normalised logfc",
row_labels = paste0(rownames(dataHMm)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("0h Normalised logfc"),
col = col_funGR2,
column_title_gp = gpar(fontsize = 16, fontface = "bold"),
width = unit(50, "mm"),
cluster_columns = FALSE,
show_row_names = FALSE)
####means
dataHMm3<-baseMeansHmM[ topDEgenes, ]
#dataHMm <- log2(dataHMm+1)
dataHMm3<- t(as.matrix(dataHMm3))
dataHMm3 <- t(scale(dataHMm3))
dataHMm3<-dataHMm3[,c(5,1,6,2,7,3,8,4)]
hmap_hier_factors5 <- Heatmap(
dataHMm3, name = "Expression",
row_labels = paste0(rownames(dataHMm)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("Means"),
col = col_fun,
column_title_gp = gpar(fontsize = 16, fontface = "bold"),
width = unit(100, "mm"),
cluster_columns = FALSE,
show_row_names = FALSE)
hmap_hier_factors4+hmap_hier_factors6+hmap_hier_factors5
par(mfrow=c(1,2))
#### Silhouette method
fviz_nbclust(dataHMm, kmeans, method = "silhouette",k.max = 16)+
labs(subtitle = "Silhouette method")
#### Elbow method
fviz_nbclust(dataHMm, kmeans, method = "wss",k.max = 16) +
labs(subtitle = "Elbow method")
####gap stat slow!!!
####set.seed(123)
####fviz_nbclust(dataHMm, kmeans, nstart = 25, method = "gap_stat", nboot = 100,k.max = 16)+
#### labs(subtitle = "Gap statistic method")
kclust4b <- kmeans(dataHMm, 3)
#silhouette plot
distK<-daisy(dataHMm)
plot(silhouette(kclust4b$cluster, distK), col=1:3, border=NA)
split <- paste0("Cluster\n", kclust4b$cluster)
#split <- factor(paste0("Cluster\n", kclust3$cluster), levels=c("Cluster\n3","Cluster\n1","Cluster\n4","Cluster\n5","Cluster\n2","Cluster\n6"))
hmap_k <- Heatmap(dataHMm, split=split, cluster_row_slices = FALSE,
cluster_columns = FALSE,
show_row_names = FALSE,
name = "logfc",
col = col_funGR,
width = unit(50, "mm"),
column_title = "means logfc",
column_title_gp = gpar(fontsize = 16, fontface = "bold"))
hmap_k+hmap_hier_factors6+hmap_hier_factors5
Mean profiles of clusters
clustercount<-data.frame(kclust4b$cluster)
clustersizes<-table(clustercount$kclust4b.cluster)
clusterMeans<-data.frame(kclust4b$centers)
clusterMeans1<-data.frame(t(clusterMeans))
clusterMeans1 <- cbind(rownames(clusterMeans1), clusterMeans1)
orderN<-c("Var14_RBC_0h","Var14_RBC_2h","Var14_RBC_6h","Var14_RBC_20h")#### manual
rownames(clusterMeans1) <- NULL
names(clusterMeans1)[names(clusterMeans1)=="rownames(clusterMeans1)"] <- "Sample"
####clusterMeans1
pX1<-ggplot(data=clusterMeans1, aes(x=Sample, y=X1,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X1 Profile ",clustersizes[1]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX2<-ggplot(data=clusterMeans1, aes(x=Sample, y=X2,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X2 Profile ",clustersizes[2]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX3<-ggplot(data=clusterMeans1, aes(x=Sample, y=X3,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X3 Profile ",clustersizes[3]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
#pX4<-ggplot(data=clusterMeans1, aes(x=Sample, y=X4,group=1)) +
# geom_line()+ geom_point()+ggtitle(paste("Cluster X4 Profile ",clustersizes[4]," genes"))+ scale_x_discrete(limits=orderN)+
# theme(axis.title.x = element_blank(),axis.title.y = element_blank())
#pX5<-ggplot(data=clusterMeans1, aes(x=Sample, y=X5,group=1)) +
# geom_line()+ geom_point()+ggtitle(paste("Cluster X5 Profile ",clustersizes[5]," genes"))+ scale_x_discrete(limits=orderN)+
# theme(axis.title.x = element_blank(),axis.title.y = element_blank())
#pX6<-ggplot(data=clusterMeans1, aes(x=Sample, y=X6,group=1)) +
# geom_line()+ geom_point()+ggtitle(paste("Cluster X6 Profile ",clustersizes[6]," genes"))+ scale_x_discrete(limits=orderN)+
# theme(axis.title.x = element_blank(),axis.title.y = element_blank())
#plot
multiplot(pX1, pX2, pX3, cols=2)
topDEgenes <- which(tempA$Include=="in")####find indexes
tempAkm<-tempA[ topDEgenes, ]
SymbolsKm<-dplyr::pull(tempAkm, Gene_Symbol)
#### export the gene expression data for the clusters
write.table(clusterMeans,paste0("ClusterMeansKm_",groupsName,".txt"), sep = "\t")
ClusteredGenes<-data.frame(kclust4b$cluster,SymbolsKm,dataHMm)
write.table(ClusteredGenes,paste0("ScaledDataInClustersKm_",groupsName,".txt"), sep = "\t")
#head(ClusteredGenes)
bottomDEgenes<-which(tempA$Include=="out")####find indexes
bottomG<-tempA[ bottomDEgenes, ]
bottomG<-dplyr::pull(bottomG, Gene_Symbol)
write.table(bottomG,paste0("ipaBottomKmeans_",groupsName,".txt"), sep = "\t")
topDEgenes <- which(tempA$Include=="in")####find indexes
tempAkm<-tempA[ topDEgenes, ]
SymbolsKm<-dplyr::pull(tempAkm, Gene_Symbol)
ipaKmeans<-ClusteredGenes
#countsTable <-countsTable[,c(1:15)]####if samples need removing
ipaKmeans<-ipaKmeans[,c(1:2)]
ipaKmeans$name2<-rownames(ipaKmeans)
#ipaKmeans%>% rownames_to_column(var = "rowname")
#ipaKmeans
#rowid_to_column(ipaKmeans)
ipaKmeans = mutate(ipaKmeans, x1= ifelse(ipaKmeans$kclust4b.cluster==1, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x2= ifelse(ipaKmeans$kclust4b.cluster==2, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x3= ifelse(ipaKmeans$kclust4b.cluster==3, "1", "0"))
#ipaKmeans = mutate(ipaKmeans, x4= ifelse(ipaKmeans$kclust4b.cluster==4, "1", "0"))
#ipaKmeans = mutate(ipaKmeans, x5= ifelse(ipaKmeans$kclust4b.cluster==5, "1", "0"))
#ipaKmeans = mutate(ipaKmeans, x6= ifelse(ipaKmeans$kclust4b.cluster==6, "1", "0"))
#ipaKmeans
write.table(ipaKmeans,paste0("ipaKmeans_",groupsName,".txt"), sep = "\t")
#head(ipaKmeans)
ClusteredGenes2<-ClusteredGenes[c(1)]
#ClusteredGenes2
listAll<-list()
for(i in 1:3) {
clusterName<-paste0("x",i)
#clusterName<-row.names(subset(ClusteredGenes,ClusteredGenes==i))
clusterName<-(subset(ClusteredGenes$SymbolsKm,ClusteredGenes==i))
listAll[[i]]<-clusterName
}
#need to name the vectors in the list, example here is for 8 clusters
names(listAll)<-c("X1", "X2", "X3")
#if you want to rearrange the order
#listAll<-listAll[c("x3", "x7", "x8", "x2", "x6", "x5", "x4", "x1")]
lapply(listAll, head)
$X1
[1] "CYP1A1" "KCTD12" "TXNIP" "DNAJB4" "PMAIP1" "USP53"
$X2
[1] "PRR29" "CLDN5" "AMH" "TRMT61A" "DUS3L" "EEF1A1P4"
$X3
[1] "ANGPTL4" "KRT7" "STARD4-AS1" "ZNF770" "CEP295"
The simplify function has been used to cut down on GO redundancy
#str(AllGeneNames)
####CC
cgoCC <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
####OrgDb=org.Mm.eg.db,
keyType="SYMBOL",
ont = "CC",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoCC2 <- simplify(cgoCC, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoCC2),paste0("GO_CC_",groupsName,".csv"))
dotplot(cgoCC2,showCategory = 30,
title = paste0("GO Cellular Compartment ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
Plots and GO data were written to files
png(paste0("GO_CC_",groupsName,".png"), width = 1224, height = 824)
dotplot(cgoCC2,showCategory = 30,
title = paste0("GO Cellular Compartment ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
null device
1
GO BP
####CC
cgoBP <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
keyType="SYMBOL",
ont = "BP",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoBP2 <- simplify(cgoBP, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoBP2),paste0("GO_BP_",groupsName,".csv"))
dotplot(cgoBP2,showCategory = 30,
title = paste0("GO Biological Process ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
png(paste0("GO_BP_",groupsName,".png"), width = 1024, height = 1224)
dotplot(cgoBP2,showCategory = 30,
title = paste0("GO Biological Process ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
null device
1
GO MF
####MF
cgoMF <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
keyType="SYMBOL",
ont = "MF",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoMF2 <- simplify(cgoMF, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoMF2),paste0("GO_MF_",groupsName,".csv"))
dotplot(cgoMF2,showCategory = 30,
title = paste0("GO Molecular Function ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
png(paste0("GO_MF_",groupsName,".png"), width = 1424, height = 824)
dotplot(cgoMF2,showCategory = 30,
title = paste0("GO Molecular Function ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
null device
1
groupsName<-"R5_VAR14_RBC_TNF_kmeans_q0.05"
countsTable<-read.delim("RNAseq2019July_5.txt", header = TRUE, sep = "\t",check.names=FALSE,row.names=1)
head(countsTable)
AllGeneNames<-countsTable$Gene_Symbol
#head(AllGeneNames)
tempA<-countsTable
topDEgenes <- which(tempA$padj_R5_TNF_var14_vs_RBC_0h<0.05&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_0h))####find indexes
listA<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$padj_R5_TNF_var14_vs_RBC_2h<0.05&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_2h))####find indexes
listB<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$padj_R5_TNF_var14_vs_RBC_6h<0.05&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_6h))####find indexes
listC<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$padj_R5_TNF_var14_vs_RBC_20h<0.05&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_20h))####find indexes
listD<-tempA[ topDEgenes, ]$Gene_Symbol
vennq<-venn.diagram(x = list(listA,listB,listC,listD) ,
category.names = c("Var14nNF_0h","Var14TNF_2h","Var14TNF_6h","Var14TNF_20h"),
main="padj<0.05",
filename = NULL, scaled = FALSE, fill = colorsV4, cat.col = colorsV4, cat.cex = 1, cat.dist=0.3, margin = 0.3)
topDEgenes <- which(tempA$pvalue_R5_TNF_var14_vs_RBC_0h<0.05&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_0h)>1&!is.na(tempA$pvalue_R5_TNF_var14_vs_RBC_0h))####find indexes
listA<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$pvalue_R5_TNF_var14_vs_RBC_2h<0.05&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_2h)>1&!is.na(tempA$pvalue_R5_TNF_var14_vs_RBC_2h))####find indexes
listB<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$pvalue_R5_TNF_var14_vs_RBC_6h<0.05&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_6h)>1&!is.na(tempA$pvalue_R5_TNF_var14_vs_RBC_6h))####find indexes
listC<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$pvalue_R5_TNF_var14_vs_RBC_20h<0.05&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_20h)>1&!is.na(tempA$pvalue_R5_TNF_var14_vs_RBC_20h))####find indexes
listD<-tempA[ topDEgenes, ]$Gene_Symbol
vennp<-venn.diagram(x = list(listA,listB,listC,listD) ,
category.names = c("Var14TNF_0h","Var14TNF_2h","Var14TNF_6h","Var14TNF_20h"),
main="pvalue<0.05&fold change>2",
filename = NULL, scaled = FALSE, fill = colorsV4, cat.col = colorsV4, cat.cex = 1, cat.dist=0.3, margin = 0.3)
topDEgenes <- which(tempA$padj_R5_TNF_var14_vs_RBC_0h<0.1&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_0h)>1&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_0h))####find indexes
listA<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$padj_R5_TNF_var14_vs_RBC_2h<0.1&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_2h)>1&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_2h))####find indexes
listB<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$padj_R5_TNF_var14_vs_RBC_6h<0.1&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_6h)>1&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_6h))####find indexes
listC<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which(tempA$padj_R5_TNF_var14_vs_RBC_20h<0.1&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_20h)>1&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_20h))####find indexes
listD<-tempA[ topDEgenes, ]$Gene_Symbol
vennq2<-venn.diagram(x = list(listA,listB,listC,listD) ,
category.names = c("Var14TNF_0h","Var14TNF_2h","Var14TNF_6h","Var14TNF_20h"),
main="padj0.1&fold change>2",
filename = NULL, scaled = FALSE, fill = colorsV4, cat.col = colorsV4, cat.cex = 1, cat.dist=0.3, margin = 0.3)
topDEgenes <- which((tempA$padj_R5_TNF_var14_vs_RBC_0h<0.05&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_0h))|
(tempA$padj_R5_TNF_var14_vs_RBC_2h<0.05&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_2h))|
(tempA$padj_R5_TNF_var14_vs_RBC_6h<0.05&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_6h))|
(tempA$padj_R5_TNF_var14_vs_RBC_20h<0.05&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_20h))
)
listA<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which((tempA$pvalue_R5_TNF_var14_vs_RBC_0h<0.05&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_0h)>1&!is.na(tempA$pvalue_R5_TNF_var14_vs_RBC_0h))|
(tempA$pvalue_R5_TNF_var14_vs_RBC_2h<0.05&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_2h)>1&!is.na(tempA$pvalue_R5_TNF_var14_vs_RBC_2h))|
(tempA$pvalue_R5_TNF_var14_vs_RBC_6h<0.05&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_6h)>1&!is.na(tempA$pvalue_R5_TNF_var14_vs_RBC_6h))|
(tempA$pvalue_R5_TNF_var14_vs_RBC_20h<0.05&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_20h)>1&!is.na(tempA$pvalue_R5_TNF_var14_vs_RBC_20h))
)####find indexes
listC<-tempA[ topDEgenes, ]$Gene_Symbol
topDEgenes <- which((tempA$padj_R5_TNF_var14_vs_RBC_0h<0.1&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_0h)>1&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_0h))|
(tempA$padj_R5_TNF_var14_vs_RBC_2h<0.1&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_2h)>1&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_2h))|
(tempA$padj_R5_TNF_var14_vs_RBC_6h<0.1&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_6h)>1&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_6h))|
(tempA$padj_R5_TNF_var14_vs_RBC_20h<0.1&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_20h)>1&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_20h))
)####find indexes
listB<-tempA[ topDEgenes, ]$Gene_Symbol
vennpq<-venn.diagram(x = list(listA,listB,listC) ,
category.names = c("padj<0.05","padj<0.1&fc>2","p<0.05&fc>2"),
main="padj compared to pvalue",
filename = NULL, scaled = FALSE, fill = colorsV3, cat.col = colorsV3, cat.cex = 1, cat.dist=0.1, margin = 0.15)
grid.arrange(gTree(children=vennq), gTree(children=vennpq), ncol=2,top="R2 Var14 TNF")
#tempA<-resAll[-c(10:30) ]
tempA<-countsTable
#rownames(tempA)
rownames(tempA) <- NULL
tempA = mutate(tempA, Include=
ifelse(tempA$padj_R5_TNF_var14_vs_RBC_0h<0.05&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_0h), "in",
ifelse(tempA$padj_R5_TNF_var14_vs_RBC_2h<0.05&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_2h), "in",
ifelse(tempA$padj_R5_TNF_var14_vs_RBC_6h<0.05&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_6h), "in",
ifelse(tempA$padj_R5_TNF_var14_vs_RBC_20h<0.05&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_20h), "in",
"out")))))
#tempA
####library(dplyr)
tempA %>%
group_by(Include) %>%
tally()
topDEgenes <- which(tempA$Include=="in")####find indexes
head(countsTable)
#baseMeansHm <-countsTable[,c(60:63)]
baseMeansHm <-countsTable[,c(110:113,129:132)]
head(baseMeansHm)
baseMeansHm2 <- log2(baseMeansHm+1)
baseMeansHm2$Var14_RBC_0h<-baseMeansHm2$Var14TNF_0h_mean-baseMeansHm2$RBC_TNF_0h_mean
baseMeansHm2$Var14_RBC_2h<-baseMeansHm2$Var14TNF_2h_mean-baseMeansHm2$RBC_TNF_2h_mean
baseMeansHm2$Var14_RBC_6h<-baseMeansHm2$Var14TNF_6h_mean-baseMeansHm2$RBC_TNF_6h_mean
baseMeansHm2$Var14_RBC_20h<-baseMeansHm2$Var14TNF_20h_mean-baseMeansHm2$RBC_TNF_20h_mean
baseMeansHm <-baseMeansHm2[,c(9:12)]
head(baseMeansHm)
baseMeansHmM <-baseMeansHm2[,c(1:8)]
head(baseMeansHmM)
topDEgenes <- which(tempA$Include=="in")####find indexes
####mean logfc
dataHMm<-baseMeansHm[ topDEgenes, ]
#dataHMm <- log2(dataHMm+1)
dataHMm<- t(as.matrix(dataHMm))
dataHMm <- t(scale(dataHMm))
hmap_hier_factors4 <- Heatmap(
dataHMm, name = "logfc",
row_labels = paste0(rownames(dataHMm)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("Means logfc"),
col = col_funGR,
column_title_gp = gpar(fontsize = 16, fontface = "bold"),
width = unit(50, "mm"),
cluster_columns = FALSE,
show_row_names = FALSE)
dataHMmPlot<-as.data.frame(dataHMm)
dataHMmPlot$Var14_RBC_2h<-dataHMmPlot$Var14_RBC_2h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot$Var14_RBC_6h<-dataHMmPlot$Var14_RBC_6h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot$Var14_RBC_20h<-dataHMmPlot$Var14_RBC_20h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot$Var14_RBC_0h<-dataHMmPlot$Var14_RBC_0h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot<-as.matrix(dataHMmPlot)
hmap_hier_factors6 <- Heatmap(
dataHMmPlot, name = "Normalised logfc",
row_labels = paste0(rownames(dataHMm)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("0h Normalised logfc"),
col = col_funGR2,
column_title_gp = gpar(fontsize = 16, fontface = "bold"),
width = unit(50, "mm"),
cluster_columns = FALSE,
show_row_names = FALSE)
####means
dataHMm3<-baseMeansHmM[ topDEgenes, ]
#dataHMm <- log2(dataHMm+1)
dataHMm3<- t(as.matrix(dataHMm3))
dataHMm3 <- t(scale(dataHMm3))
dataHMm3<-dataHMm3[,c(5,1,6,2,7,3,8,4)]
hmap_hier_factors5 <- Heatmap(
dataHMm3, name = "Expression",
row_labels = paste0(rownames(dataHMm)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("Means"),
col = col_fun,
column_title_gp = gpar(fontsize = 16, fontface = "bold"),
width = unit(100, "mm"),
cluster_columns = FALSE,
show_row_names = FALSE)
hmap_hier_factors4+hmap_hier_factors6+hmap_hier_factors5
par(mfrow=c(1,2))
#### Silhouette method
fviz_nbclust(dataHMm, kmeans, method = "silhouette",k.max = 16)+
labs(subtitle = "Silhouette method")
#### Elbow method
fviz_nbclust(dataHMm, kmeans, method = "wss",k.max = 16) +
labs(subtitle = "Elbow method")
####gap stat slow!!!
####set.seed(123)
####fviz_nbclust(dataHMm, kmeans, nstart = 25, method = "gap_stat", nboot = 100,k.max = 16)+
#### labs(subtitle = "Gap statistic method")
kclust7 <- kmeans(dataHMm, 6)
#silhouette plot
distK<-daisy(dataHMm)
plot(silhouette(kclust7$cluster, distK), col=1:6, border=NA)
split <- paste0("Cluster\n", kclust7$cluster)
#split <- factor(paste0("Cluster\n", kclust3$cluster), levels=c("Cluster\n3","Cluster\n1","Cluster\n4","Cluster\n5","Cluster\n2","Cluster\n6"))
hmap_k <- Heatmap(dataHMm, split=split, cluster_row_slices = FALSE,
cluster_columns = FALSE,
show_row_names = FALSE,
name = "logfc",
col = col_funGR,
width = unit(50, "mm"),
column_title = "means logfc",
column_title_gp = gpar(fontsize = 16, fontface = "bold"))
hmap_k+hmap_hier_factors6+hmap_hier_factors5
Mean profiles of clusters
clustercount<-data.frame(kclust7$cluster)
clustersizes<-table(clustercount$kclust7.cluster)
clusterMeans<-data.frame(kclust7$centers)
clusterMeans1<-data.frame(t(clusterMeans))
clusterMeans1 <- cbind(rownames(clusterMeans1), clusterMeans1)
orderN<-c("Var14_RBC_0h","Var14_RBC_2h","Var14_RBC_6h","Var14_RBC_20h")#### manual
rownames(clusterMeans1) <- NULL
names(clusterMeans1)[names(clusterMeans1)=="rownames(clusterMeans1)"] <- "Sample"
pX1<-ggplot(data=clusterMeans1, aes(x=Sample, y=X1,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X1 Profile ",clustersizes[1]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX2<-ggplot(data=clusterMeans1, aes(x=Sample, y=X2,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X2 Profile ",clustersizes[2]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX3<-ggplot(data=clusterMeans1, aes(x=Sample, y=X3,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X3 Profile ",clustersizes[3]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX4<-ggplot(data=clusterMeans1, aes(x=Sample, y=X4,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X4 Profile ",clustersizes[4]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX5<-ggplot(data=clusterMeans1, aes(x=Sample, y=X5,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X5 Profile ",clustersizes[5]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX6<-ggplot(data=clusterMeans1, aes(x=Sample, y=X6,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X6 Profile ",clustersizes[6]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
#plot
multiplot(pX1, pX2, pX3, pX4,pX5, pX6, cols=2)
topDEgenes <- which(tempA$Include=="in")####find indexes
tempAkm<-tempA[ topDEgenes, ]
SymbolsKm<-dplyr::pull(tempAkm, Gene_Symbol)
#### export the gene expression data for the clusters
write.table(clusterMeans,paste0("ClusterMeansKm_",groupsName,".txt"), sep = "\t")
ClusteredGenes<-data.frame(kclust7$cluster,SymbolsKm,dataHMm)
write.table(ClusteredGenes,paste0("ScaledDataInClustersKm_",groupsName,".txt"), sep = "\t")
#head(ClusteredGenes)
bottomDEgenes<-which(tempA$Include=="out")####find indexes
bottomG<-tempA[ bottomDEgenes, ]
bottomG<-dplyr::pull(bottomG, Gene_Symbol)
write.table(bottomG,paste0("ipaBottomKmeans_",groupsName,".txt"), sep = "\t")
topDEgenes <- which(tempA$Include=="in")####find indexes
tempAkm<-tempA[ topDEgenes, ]
SymbolsKm<-dplyr::pull(tempAkm, Gene_Symbol)
ipaKmeans<-ClusteredGenes
#countsTable <-countsTable[,c(1:15)]####if samples need removing
ipaKmeans<-ipaKmeans[,c(1:2)]
ipaKmeans$name2<-rownames(ipaKmeans)
#ipaKmeans%>% rownames_to_column(var = "rowname")
#ipaKmeans
#rowid_to_column(ipaKmeans)
ipaKmeans = mutate(ipaKmeans, x1= ifelse(ipaKmeans$kclust7.cluster==1, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x2= ifelse(ipaKmeans$kclust7.cluster==2, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x3= ifelse(ipaKmeans$kclust7.cluster==3, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x4= ifelse(ipaKmeans$kclust7.cluster==4, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x5= ifelse(ipaKmeans$kclust7.cluster==5, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x6= ifelse(ipaKmeans$kclust7.cluster==6, "1", "0"))
#ipaKmeans
write.table(ipaKmeans,paste0("ipaKmeans_",groupsName,".txt"), sep = "\t")
#head(ipaKmeans)
ClusteredGenes2<-ClusteredGenes[c(1)]
#ClusteredGenes2
listAll<-list()
for(i in 1:6) {
clusterName<-paste0("x",i)
#clusterName<-row.names(subset(ClusteredGenes,ClusteredGenes==i))
clusterName<-(subset(ClusteredGenes$SymbolsKm,ClusteredGenes==i))
listAll[[i]]<-clusterName
}
#need to name the vectors in the list, example here is for 8 clusters
names(listAll)<-c("X1", "X2", "X3", "X4","X5", "X6")
#if you want to rearrange the order
#listAll<-listAll[c("x3", "x7", "x8", "x2", "x6", "x5", "x4", "x1")]
#lapply(listAll, head)
The simplify function has been used to cut down on GO redundancy
#str(AllGeneNames)
####CC
cgoCC <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
####OrgDb=org.Mm.eg.db,
keyType="SYMBOL",
ont = "CC",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoCC2 <- simplify(cgoCC, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoCC2),paste0("GO_CC_",groupsName,".csv"))
dotplot(cgoCC2,showCategory = 30,
title = paste0("GO Cellular Compartment ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
Plots and GO data were written to files
png(paste0("GO_CC_",groupsName,".png"), width = 1224, height = 824)
dotplot(cgoCC2,showCategory = 30,
title = paste0("GO Cellular Compartment ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
null device
1
GO BP
####CC
cgoBP <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
keyType="SYMBOL",
ont = "BP",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoBP2 <- simplify(cgoBP, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoBP2),paste0("GO_BP_",groupsName,".csv"))
dotplot(cgoBP2,showCategory = 30,
title = paste0("GO Biological Process ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
png(paste0("GO_BP_",groupsName,".png"), width = 1024, height = 1224)
dotplot(cgoBP2,showCategory = 30,
title = paste0("GO Biological Process ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
null device
1
GO MF
####MF
cgoMF <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
keyType="SYMBOL",
ont = "MF",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoMF2 <- simplify(cgoMF, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoMF2),paste0("GO_MF_",groupsName,".csv"))
dotplot(cgoMF2,showCategory = 30,
title = paste0("GO Molecular Function ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
png(paste0("GO_MF_",groupsName,".png"), width = 1424, height = 624)
dotplot(cgoMF2,showCategory = 30,
title = paste0("GO Molecular Function ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
null device
1
groupsName<-"R5_Var14vRBC_TNF_kmeans_p0.05fc2"
countsTable<-read.delim("RNAseq2019July_5.txt", header = TRUE, sep = "\t",check.names=FALSE,row.names=1)
head(countsTable)
AllGeneNames<-countsTable$Gene_Symbol
#head(AllGeneNames)
grid.arrange(gTree(children=vennp), gTree(children=vennpq) , ncol=2,top="R5 Var14 TNF")
#tempA<-resAll[-c(10:30) ]
tempA<-countsTable
#rownames(tempA)
rownames(tempA) <- NULL
tempA = mutate(tempA, Include=
ifelse(tempA$pvalue_R5_TNF_var14_vs_RBC_0h<0.05&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_0h)>1&!is.na(tempA$pvalue_R5_TNF_var14_vs_RBC_0h), "in",
ifelse(tempA$pvalue_R5_TNF_var14_vs_RBC_2h<0.05&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_2h)>1&!is.na(tempA$pvalue_R5_TNF_var14_vs_RBC_2h), "in",
ifelse(tempA$pvalue_R5_TNF_var14_vs_RBC_6h<0.05&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_6h)>1&!is.na(tempA$pvalue_R5_TNF_var14_vs_RBC_6h), "in",
ifelse(tempA$pvalue_R5_TNF_var14_vs_RBC_20h<0.05&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_20h)>1&!is.na(tempA$pvalue_R5_TNF_var14_vs_RBC_20h), "in",
"out")))))
#tempA
####library(dplyr)
tempA %>%
group_by(Include) %>%
tally()
topDEgenes <- which(tempA$Include=="in")####find indexes
#baseMeansHm <-countsTable[,c(60:63)]
baseMeansHm <-countsTable[,c(110:113,129:132)]
head(baseMeansHm)
baseMeansHm2 <- log2(baseMeansHm+1)
baseMeansHm2$Var14_RBC_0h<-baseMeansHm2$Var14TNF_0h_mean-baseMeansHm2$RBC_TNF_0h_mean
baseMeansHm2$Var14_RBC_2h<-baseMeansHm2$Var14TNF_2h_mean-baseMeansHm2$RBC_TNF_2h_mean
baseMeansHm2$Var14_RBC_6h<-baseMeansHm2$Var14TNF_6h_mean-baseMeansHm2$RBC_TNF_6h_mean
baseMeansHm2$Var14_RBC_20h<-baseMeansHm2$Var14TNF_20h_mean-baseMeansHm2$RBC_TNF_20h_mean
baseMeansHm <-baseMeansHm2[,c(9:12)]
head(baseMeansHm)
baseMeansHmM <-baseMeansHm2[,c(1:8)]
head(baseMeansHmM)
topDEgenes <- which(tempA$Include=="in")####find indexes
####mean logfc
dataHMm<-baseMeansHm[ topDEgenes, ]
#dataHMm <- log2(dataHMm+1)
dataHMm<- t(as.matrix(dataHMm))
dataHMm <- t(scale(dataHMm))
hmap_hier_factors4 <- Heatmap(
dataHMm, name = "logfc",
row_labels = paste0(rownames(dataHMm)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("Means logfc"),
col = col_funGR,
column_title_gp = gpar(fontsize = 16, fontface = "bold"),
width = unit(50, "mm"),
cluster_columns = FALSE,
show_row_names = FALSE)
dataHMmPlot<-as.data.frame(dataHMm)
dataHMmPlot$Var14_RBC_2h<-dataHMmPlot$Var14_RBC_2h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot$Var14_RBC_6h<-dataHMmPlot$Var14_RBC_6h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot$Var14_RBC_20h<-dataHMmPlot$Var14_RBC_20h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot$Var14_RBC_0h<-dataHMmPlot$Var14_RBC_0h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot<-as.matrix(dataHMmPlot)
hmap_hier_factors6 <- Heatmap(
dataHMmPlot, name = "Normalised logfc",
row_labels = paste0(rownames(dataHMm)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("0h Normalised logfc"),
col = col_funGR2,
column_title_gp = gpar(fontsize = 16, fontface = "bold"),
width = unit(50, "mm"),
cluster_columns = FALSE,
show_row_names = FALSE)
####means
dataHMm3<-baseMeansHmM[ topDEgenes, ]
#dataHMm <- log2(dataHMm+1)
dataHMm3<- t(as.matrix(dataHMm3))
dataHMm3 <- t(scale(dataHMm3))
dataHMm3<-dataHMm3[,c(5,1,6,2,7,3,8,4)]
hmap_hier_factors5 <- Heatmap(
dataHMm3, name = "Expression",
row_labels = paste0(rownames(dataHMm)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("Means"),
col = col_fun,
column_title_gp = gpar(fontsize = 16, fontface = "bold"),
width = unit(100, "mm"),
cluster_columns = FALSE,
show_row_names = FALSE)
hmap_hier_factors4+hmap_hier_factors6+hmap_hier_factors5
par(mfrow=c(1,2))
#### Silhouette method
fviz_nbclust(dataHMm, kmeans, method = "silhouette",k.max = 16)+
labs(subtitle = "Silhouette method")
#### Elbow method
fviz_nbclust(dataHMm, kmeans, method = "wss",k.max = 16) +
labs(subtitle = "Elbow method")
####gap stat slow!!!
####set.seed(123)
####fviz_nbclust(dataHMm, kmeans, nstart = 25, method = "gap_stat", nboot = 100,k.max = 16)+
#### labs(subtitle = "Gap statistic method")
kclust8 <- kmeans(dataHMm, 6)
#silhouette plot
distK<-daisy(dataHMm)
plot(silhouette(kclust8$cluster, distK), col=1:6, border=NA)
split <- paste0("Cluster\n", kclust8$cluster)
#split <- factor(paste0("Cluster\n", kclust3$cluster), levels=c("Cluster\n3","Cluster\n1","Cluster\n4","Cluster\n5","Cluster\n2","Cluster\n6"))
hmap_k <- Heatmap(dataHMm, split=split, cluster_row_slices = FALSE,
cluster_columns = FALSE,
show_row_names = FALSE,
name = "logfc",
col = col_funGR,
width = unit(50, "mm"),
column_title = "means logfc",
column_title_gp = gpar(fontsize = 16, fontface = "bold"))
hmap_k+hmap_hier_factors6+hmap_hier_factors5
Mean profiles of clusters
clustercount<-data.frame(kclust8$cluster)
clustersizes<-table(clustercount$kclust8.cluster)
clusterMeans<-data.frame(kclust8$centers)
clusterMeans1<-data.frame(t(clusterMeans))
clusterMeans1 <- cbind(rownames(clusterMeans1), clusterMeans1)
orderN<-c("Var14_RBC_0h","Var14_RBC_2h","Var14_RBC_6h","Var14_RBC_20h")#### manual
rownames(clusterMeans1) <- NULL
names(clusterMeans1)[names(clusterMeans1)=="rownames(clusterMeans1)"] <- "Sample"
####clusterMeans1
pX1<-ggplot(data=clusterMeans1, aes(x=Sample, y=X1,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X1 Profile ",clustersizes[1]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX2<-ggplot(data=clusterMeans1, aes(x=Sample, y=X2,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X2 Profile ",clustersizes[2]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX3<-ggplot(data=clusterMeans1, aes(x=Sample, y=X3,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X3 Profile ",clustersizes[3]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX4<-ggplot(data=clusterMeans1, aes(x=Sample, y=X4,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X4 Profile ",clustersizes[4]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX5<-ggplot(data=clusterMeans1, aes(x=Sample, y=X5,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X5 Profile ",clustersizes[5]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX6<-ggplot(data=clusterMeans1, aes(x=Sample, y=X6,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X6 Profile ",clustersizes[6]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
#plot
multiplot(pX1, pX2, pX3, pX4,pX5, pX6, cols=2)
topDEgenes <- which(tempA$Include=="in")####find indexes
tempAkm<-tempA[ topDEgenes, ]
SymbolsKm<-dplyr::pull(tempAkm, Gene_Symbol)
#### export the gene expression data for the clusters
write.table(clusterMeans,paste0("ClusterMeansKm_",groupsName,".txt"), sep = "\t")
ClusteredGenes<-data.frame(kclust8$cluster,SymbolsKm,dataHMm)
write.table(ClusteredGenes,paste0("ScaledDataInClustersKm_",groupsName,".txt"), sep = "\t")
#head(ClusteredGenes)
bottomDEgenes<-which(tempA$Include=="out")####find indexes
bottomG<-tempA[ bottomDEgenes, ]
bottomG<-dplyr::pull(bottomG, Gene_Symbol)
write.table(bottomG,paste0("ipaBottomKmeans_",groupsName,".txt"), sep = "\t")
topDEgenes <- which(tempA$Include=="in")####find indexes
tempAkm<-tempA[ topDEgenes, ]
SymbolsKm<-dplyr::pull(tempAkm, Gene_Symbol)
ipaKmeans<-ClusteredGenes
#countsTable <-countsTable[,c(1:15)]####if samples need removing
ipaKmeans<-ipaKmeans[,c(1:2)]
ipaKmeans$name2<-rownames(ipaKmeans)
#ipaKmeans%>% rownames_to_column(var = "rowname")
#ipaKmeans
#rowid_to_column(ipaKmeans)
ipaKmeans = mutate(ipaKmeans, x1= ifelse(ipaKmeans$kclust8.cluster==1, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x2= ifelse(ipaKmeans$kclust8.cluster==2, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x3= ifelse(ipaKmeans$kclust8.cluster==3, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x4= ifelse(ipaKmeans$kclust8.cluster==4, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x5= ifelse(ipaKmeans$kclust8.cluster==5, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x6= ifelse(ipaKmeans$kclust8.cluster==6, "1", "0"))
#ipaKmeans
write.table(ipaKmeans,paste0("ipaKmeans_",groupsName,".txt"), sep = "\t")
#head(ipaKmeans)
ClusteredGenes2<-ClusteredGenes[c(1)]
#ClusteredGenes2
listAll<-list()
for(i in 1:6) {
clusterName<-paste0("x",i)
#clusterName<-row.names(subset(ClusteredGenes,ClusteredGenes==i))
clusterName<-(subset(ClusteredGenes$SymbolsKm,ClusteredGenes==i))
listAll[[i]]<-clusterName
}
#need to name the vectors in the list, example here is for 8 clusters
names(listAll)<-c("X1", "X2", "X3", "X4","X5", "X6")
#if you want to rearrange the order
#listAll<-listAll[c("x3", "x7", "x8", "x2", "x6", "x5", "x4", "x1")]
#lapply(listAll, head)
The simplify function has been used to cut down on GO redundancy
#str(AllGeneNames)
####CC
cgoCC <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
####OrgDb=org.Mm.eg.db,
keyType="SYMBOL",
ont = "CC",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoCC2 <- simplify(cgoCC, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoCC2),paste0("GO_CC_",groupsName,".csv"))
dotplot(cgoCC2,showCategory = 30,
title = paste0("GO Cellular Compartment ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
Plots and GO data were written to files
png(paste0("GO_CC_",groupsName,".png"), width = 1224, height = 824)
dotplot(cgoCC2,showCategory = 30,
title = paste0("GO Cellular Compartment ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
null device
1
GO BP
####CC
cgoBP <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
keyType="SYMBOL",
ont = "BP",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoBP2 <- simplify(cgoBP, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoBP2),paste0("GO_BP_",groupsName,".csv"))
dotplot(cgoBP2,showCategory = 30,
title = paste0("GO Biological Process ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
png(paste0("GO_BP_",groupsName,".png"), width = 1024, height = 1224)
dotplot(cgoBP2,showCategory = 30,
title = paste0("GO Biological Process ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
null device
1
GO MF
####MF
cgoMF <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
keyType="SYMBOL",
ont = "MF",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoMF2 <- simplify(cgoMF, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoMF2),paste0("GO_MF_",groupsName,".csv"))
dotplot(cgoMF2,showCategory = 30,
title = paste0("GO Molecular Function ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
png(paste0("GO_MF_",groupsName,".png"), width = 1424, height = 824)
dotplot(cgoMF2,showCategory = 30,
title = paste0("GO Molecular Function ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
null device
1
groupsName<-"R5_Var14vRBC_TNF_kmeans_padj0.1fc2"
countsTable<-read.delim("RNAseq2019July_5.txt", header = TRUE, sep = "\t",check.names=FALSE,row.names=1)
head(countsTable)
AllGeneNames<-countsTable$Gene_Symbol
#head(AllGeneNames)
grid.arrange(gTree(children=vennq2), gTree(children=vennpq) , ncol=2,top="R5 Var14 TNF")
#tempA<-resAll[-c(10:30) ]
tempA<-countsTable
#rownames(tempA)
rownames(tempA) <- NULL
tempA = mutate(tempA, Include=
ifelse(tempA$padj_R5_TNF_var14_vs_RBC_0h<0.1&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_0h)>1&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_0h), "in",
ifelse(tempA$padj_R5_TNF_var14_vs_RBC_2h<0.1&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_2h)>1&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_2h), "in",
ifelse(tempA$padj_R5_TNF_var14_vs_RBC_6h<0.1&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_6h)>1&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_6h), "in",
ifelse(tempA$padj_R5_TNF_var14_vs_RBC_20h<0.1&abs(tempA$log2FoldChange_R5_TNF_var14_vs_RBC_20h)>1&!is.na(tempA$padj_R5_TNF_var14_vs_RBC_20h), "in",
"out")))))
#tempA
####library(dplyr)
tempA %>%
group_by(Include) %>%
tally()
topDEgenes <- which(tempA$Include=="in")####find indexes
#baseMeansHm <-countsTable[,c(60:63)]
baseMeansHm <-countsTable[,c(110:113,129:132)]
head(baseMeansHm)
baseMeansHm2 <- log2(baseMeansHm+1)
baseMeansHm2$Var14_RBC_0h<-baseMeansHm2$Var14TNF_0h_mean-baseMeansHm2$RBC_TNF_0h_mean
baseMeansHm2$Var14_RBC_2h<-baseMeansHm2$Var14TNF_2h_mean-baseMeansHm2$RBC_TNF_2h_mean
baseMeansHm2$Var14_RBC_6h<-baseMeansHm2$Var14TNF_6h_mean-baseMeansHm2$RBC_TNF_6h_mean
baseMeansHm2$Var14_RBC_20h<-baseMeansHm2$Var14TNF_20h_mean-baseMeansHm2$RBC_TNF_20h_mean
baseMeansHm <-baseMeansHm2[,c(9:12)]
head(baseMeansHm)
baseMeansHmM <-baseMeansHm2[,c(1:8)]
head(baseMeansHmM)
topDEgenes <- which(tempA$Include=="in")####find indexes
####mean logfc
dataHMm<-baseMeansHm[ topDEgenes, ]
#dataHMm <- log2(dataHMm+1)
dataHMm<- t(as.matrix(dataHMm))
dataHMm <- t(scale(dataHMm))
hmap_hier_factors4 <- Heatmap(
dataHMm, name = "logfc",
row_labels = paste0(rownames(dataHMm)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("Means logfc"),
col = col_funGR,
column_title_gp = gpar(fontsize = 16, fontface = "bold"),
width = unit(50, "mm"),
cluster_columns = FALSE,
show_row_names = FALSE)
dataHMmPlot<-as.data.frame(dataHMm)
dataHMmPlot$Var14_RBC_2h<-dataHMmPlot$Var14_RBC_2h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot$Var14_RBC_6h<-dataHMmPlot$Var14_RBC_6h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot$Var14_RBC_20h<-dataHMmPlot$Var14_RBC_20h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot$Var14_RBC_0h<-dataHMmPlot$Var14_RBC_0h-dataHMmPlot$Var14_RBC_0h
dataHMmPlot<-as.matrix(dataHMmPlot)
hmap_hier_factors6 <- Heatmap(
dataHMmPlot, name = "Normalised logfc",
row_labels = paste0(rownames(dataHMm)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("0h Normalised logfc"),
col = col_funGR2,
column_title_gp = gpar(fontsize = 16, fontface = "bold"),
width = unit(50, "mm"),
cluster_columns = FALSE,
show_row_names = FALSE)
####means
dataHMm3<-baseMeansHmM[ topDEgenes, ]
#dataHMm <- log2(dataHMm+1)
dataHMm3<- t(as.matrix(dataHMm3))
dataHMm3 <- t(scale(dataHMm3))
dataHMm3<-dataHMm3[,c(5,1,6,2,7,3,8,4)]
hmap_hier_factors5 <- Heatmap(
dataHMm3, name = "Expression",
row_labels = paste0(rownames(dataHMm)," ",(tempA[ topDEgenes, ])$Gene_Symbol),
column_title = paste0("Means"),
col = col_fun,
column_title_gp = gpar(fontsize = 16, fontface = "bold"),
width = unit(100, "mm"),
cluster_columns = FALSE,
show_row_names = FALSE)
hmap_hier_factors4+hmap_hier_factors6+hmap_hier_factors5
par(mfrow=c(1,2))
#### Silhouette method
fviz_nbclust(dataHMm, kmeans, method = "silhouette",k.max = 16)+
labs(subtitle = "Silhouette method")
#### Elbow method
fviz_nbclust(dataHMm, kmeans, method = "wss",k.max = 16) +
labs(subtitle = "Elbow method")
####gap stat slow!!!
####set.seed(123)
####fviz_nbclust(dataHMm, kmeans, nstart = 25, method = "gap_stat", nboot = 100,k.max = 16)+
#### labs(subtitle = "Gap statistic method")
kclust8b <- kmeans(dataHMm, 6)
#silhouette plot
distK<-daisy(dataHMm)
plot(silhouette(kclust8b$cluster, distK), col=1:6, border=NA)
split <- paste0("Cluster\n", kclust8b$cluster)
#split <- factor(paste0("Cluster\n", kclust3$cluster), levels=c("Cluster\n3","Cluster\n1","Cluster\n4","Cluster\n5","Cluster\n2","Cluster\n6"))
hmap_k <- Heatmap(dataHMm, split=split, cluster_row_slices = FALSE,
cluster_columns = FALSE,
show_row_names = FALSE,
name = "logfc",
col = col_funGR,
width = unit(50, "mm"),
column_title = "means logfc",
column_title_gp = gpar(fontsize = 16, fontface = "bold"))
hmap_k+hmap_hier_factors6+hmap_hier_factors5
Mean profiles of clusters
clustercount<-data.frame(kclust8b$cluster)
clustersizes<-table(clustercount$kclust8b.cluster)
clusterMeans<-data.frame(kclust8b$centers)
clusterMeans1<-data.frame(t(clusterMeans))
clusterMeans1 <- cbind(rownames(clusterMeans1), clusterMeans1)
orderN<-c("Var14_RBC_0h","Var14_RBC_2h","Var14_RBC_6h","Var14_RBC_20h")#### manual
rownames(clusterMeans1) <- NULL
names(clusterMeans1)[names(clusterMeans1)=="rownames(clusterMeans1)"] <- "Sample"
####clusterMeans1
pX1<-ggplot(data=clusterMeans1, aes(x=Sample, y=X1,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X1 Profile ",clustersizes[1]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX2<-ggplot(data=clusterMeans1, aes(x=Sample, y=X2,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X2 Profile ",clustersizes[2]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX3<-ggplot(data=clusterMeans1, aes(x=Sample, y=X3,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X3 Profile ",clustersizes[3]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX4<-ggplot(data=clusterMeans1, aes(x=Sample, y=X4,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X4 Profile ",clustersizes[4]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX5<-ggplot(data=clusterMeans1, aes(x=Sample, y=X5,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X5 Profile ",clustersizes[5]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
pX6<-ggplot(data=clusterMeans1, aes(x=Sample, y=X6,group=1)) +
geom_line()+ geom_point()+ggtitle(paste("Cluster X6 Profile ",clustersizes[6]," genes"))+ scale_x_discrete(limits=orderN)+
theme(axis.title.x = element_blank(),axis.title.y = element_blank())
#plot
multiplot(pX1, pX2, pX3, pX4,pX5, pX6, cols=2)
topDEgenes <- which(tempA$Include=="in")####find indexes
tempAkm<-tempA[ topDEgenes, ]
SymbolsKm<-dplyr::pull(tempAkm, Gene_Symbol)
#### export the gene expression data for the clusters
write.table(clusterMeans,paste0("ClusterMeansKm_",groupsName,".txt"), sep = "\t")
ClusteredGenes<-data.frame(kclust8b$cluster,SymbolsKm,dataHMm)
write.table(ClusteredGenes,paste0("ScaledDataInClustersKm_",groupsName,".txt"), sep = "\t")
#head(ClusteredGenes)
bottomDEgenes<-which(tempA$Include=="out")####find indexes
bottomG<-tempA[ bottomDEgenes, ]
bottomG<-dplyr::pull(bottomG, Gene_Symbol)
write.table(bottomG,paste0("ipaBottomKmeans_",groupsName,".txt"), sep = "\t")
topDEgenes <- which(tempA$Include=="in")####find indexes
tempAkm<-tempA[ topDEgenes, ]
SymbolsKm<-dplyr::pull(tempAkm, Gene_Symbol)
ipaKmeans<-ClusteredGenes
#countsTable <-countsTable[,c(1:15)]####if samples need removing
ipaKmeans<-ipaKmeans[,c(1:2)]
ipaKmeans$name2<-rownames(ipaKmeans)
#ipaKmeans%>% rownames_to_column(var = "rowname")
#ipaKmeans
#rowid_to_column(ipaKmeans)
ipaKmeans = mutate(ipaKmeans, x1= ifelse(ipaKmeans$kclust8b.cluster==1, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x2= ifelse(ipaKmeans$kclust8b.cluster==2, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x3= ifelse(ipaKmeans$kclust8b.cluster==3, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x4= ifelse(ipaKmeans$kclust8b.cluster==4, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x5= ifelse(ipaKmeans$kclust8b.cluster==5, "1", "0"))
ipaKmeans = mutate(ipaKmeans, x6= ifelse(ipaKmeans$kclust8b.cluster==6, "1", "0"))
#ipaKmeans
write.table(ipaKmeans,paste0("ipaKmeans_",groupsName,".txt"), sep = "\t")
#head(ipaKmeans)
ClusteredGenes2<-ClusteredGenes[c(1)]
#ClusteredGenes2
listAll<-list()
for(i in 1:6) {
clusterName<-paste0("x",i)
#clusterName<-row.names(subset(ClusteredGenes,ClusteredGenes==i))
clusterName<-(subset(ClusteredGenes$SymbolsKm,ClusteredGenes==i))
listAll[[i]]<-clusterName
}
#need to name the vectors in the list, example here is for 8 clusters
names(listAll)<-c("X1", "X2", "X3", "X4","X5", "X6")
#if you want to rearrange the order
#listAll<-listAll[c("x3", "x7", "x8", "x2", "x6", "x5", "x4", "x1")]
#lapply(listAll, head)
The simplify function has been used to cut down on GO redundancy
#str(AllGeneNames)
####CC
cgoCC <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
####OrgDb=org.Mm.eg.db,
keyType="SYMBOL",
ont = "CC",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoCC2 <- simplify(cgoCC, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoCC2),paste0("GO_CC_",groupsName,".csv"))
dotplot(cgoCC2,showCategory = 30,
title = paste0("GO Cellular Compartment ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
Plots and GO data were written to files
png(paste0("GO_CC_",groupsName,".png"), width = 1224, height = 824)
dotplot(cgoCC2,showCategory = 30,
title = paste0("GO Cellular Compartment ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
null device
1
GO BP
####CC
cgoBP <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
keyType="SYMBOL",
ont = "BP",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoBP2 <- simplify(cgoBP, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoBP2),paste0("GO_BP_",groupsName,".csv"))
dotplot(cgoBP2,showCategory = 30,
title = paste0("GO Biological Process ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
png(paste0("GO_BP_",groupsName,".png"), width = 1024, height = 1224)
dotplot(cgoBP2,showCategory = 30,
title = paste0("GO Biological Process ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
null device
1
GO MF
####MF
cgoMF <- compareCluster(geneCluster = listAll,
universe = AllGeneNames,
fun = "enrichGO",
OrgDb=org.Hs.eg.db,
keyType="SYMBOL",
ont = "MF",
pvalueCutoff=0.05,
qvalueCutoff = 0.10)
cgoMF2 <- simplify(cgoMF, cutoff=0.7, by="p.adjust", select_fun=min)
####write as spreadsheet
write.csv(as.data.frame(cgoMF2),paste0("GO_MF_",groupsName,".csv"))
dotplot(cgoMF2,showCategory = 30,
title = paste0("GO Molecular Function ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
png(paste0("GO_MF_",groupsName,".png"), width = 1424, height = 824)
dotplot(cgoMF2,showCategory = 30,
title = paste0("GO Molecular Function ",groupsName))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
null device
1
save: once happy with clustering save workspace so that it can be recalled
save.image(file="KmDecember.RData")
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.