#GSE141941 #suppose all bw files in current directory require(rtracklayer) require(rTensor) files <- list.files("./") files <- files[intersect(grep("GSM",files),grep(".bw",files))] j=1 dir <- gsub(".bw","",files[j]) system(paste("mkdir",dir)) which <- GRanges("chr1", IRanges(1, 30000)) bw <- import(files[j],which=which) id <- c(1:length(bw@seqinfo@seqnames))[-grep("_",bw@seqinfo@seqnames)] for (k in c(1:length(id))) { seqname <- bw@seqinfo@seqnames[id[k]] if (bw@seqinfo@seqlengths[id[k]]>=25000) { breaks<-seq(1,bw@seqinfo@seqlengths[id[k]],by=25000) } else { breaks<- c(1,bw@seqinfo@seqlengths[id[k]]) } Chip <- NULL for (i in (c(1:(length(breaks)-1)))) { cat(i," ") which <- GRanges(seqname, IRanges(breaks[i], breaks[i+1]-1)) bw <- import(files[j],which=which) Chip <- c(Chip,sum(bw$score)) } save(file=paste(dir,"/Chip_",seqname,sep=""),Chip) } dirs <- list.files(pattern="GSM") dirs <- dirs[-grep("bw",dirs)] chr <- paste("chr",c(1:22,"M","X","Y"),sep="") for (l in c(1:length(chr))) { cat(l," ") x<-NULL for (i in c(1:length(dirs))) { #cat (i, " ") load(paste(dirs[i],"/Chip_",chr[l],sep="")) x <- cbind(x,Chip) } set.seed(0) Z <- array(NA,c(dim(x)[1],4,2)) Z[,1,1] <- data.matrix(x[,1]) Z[,1,2] <- data.matrix(x[,2]) Z[,2,1] <- data.matrix(x[,3]) Z[,2,2] <- data.matrix(x[,4]) Z[,3,1] <- data.matrix(x[,5]) Z[,3,2] <- data.matrix(x[,6]) Z[,4,1] <- data.matrix(x[,7]) Z[,4,2] <- data.matrix(x[,8]) save(file=paste("Z_",chr[l],sep=""),Z) } require(abind) dnum <-NULL Z_all <- NULL for(l in c(1:length(chr))) { load(paste("./Z/Z_",chr[l],sep="")) dnum<-c(dnum,dim(Z)[1]) Z_all <- abind(Z_all,Z,along=1) } #save(file="Z_all",Z_all) #save(file="dnum",dnum) HOSVD <- hosvd(as.tensor(Z_all),c(10,4,2)) save(file="HOSVD",HOSVD) pdf(file="HOSVD.pdf",width=10,height=5) par(mfrow=c(1,2)) matplot(c(0,6,12,24),HOSVD$U[[2]],type="o",pch=1:4,xlab="hours",ylab="PC loading",cex=2,lwd=2,cex.lab=1.5,cex.axis=2) abline(0,0,col=6,lty=2,lwd=2) plot(pca$rotation[,2],HOSVD$U[[2]][,2],xlab="gene expression",ylab="m6A", cex=2,xlim=c(-0.7,0.9),ylim=c(-0.7,0.8),cex.axis=2,cex.lab=1.5,lwd=2) abline(0,1,col=2,lty=2,lwd=2) par(mfrow=c(1,1)) dev.off() cor(c(0,6,12,24),HOSVD$U[[2]]) HOSVD$Z@data[,2,2] P <- pchisq(scale(HOSVD$U[[1]][,2])^2,1,lower.tail=F) table(p.adjust(P,"BH")<0.01) require(biomaRt) grch37 = useMart(biomart="ENSEMBL_MART_ENSEMBL", host="grch37.ensembl.org", path="/biomart/martservice", dataset="hsapiens_gene_ensembl") gene_all <-NULL for (l in c(1:22)){ cat("\n l= ",l,"\n") if (l==1) { id <- c(1:dnum[l])[(p.adjust(P,"BH")<0.01)[1:dnum[l]]] }else{ id <- c(1:dnum[l])[(p.adjust(P,"BH")<0.01)[sum(dnum[1:(l-1)])+c(1:dnum[l])]] } if (length(id)>0) { reg<-id*25000 reg<-data.frame(reg-24999,reg) gene <- NULL for (i in c(1:dim(reg)[1])) { cat(i," ") gene_lst0 <- getBM(attributes=c('hgnc_symbol','entrezgene_id','chromosome_name','start_position','end_position'),filters = c('chromosome_name','start','end'), values=list(l,reg[i,1],reg[i,2]), mart = grch37) gene <- rbind(gene,gene_lst0) } gene <- gene[!is.na(gene[,1]),] gene <- gene[!is.na(gene[,1]) & gene[,1]!="",] gene_all <- rbind(gene_all,gene) } } #save(file="gene_all",gene_all) write.table(file="gene_all.csv",gene_all,row.names=F,col.names=F,quote=F,sep="\t") x <- read.xlsx("GSE141941_normoxiaVShypoxia6h.12h.24h_RNA-seq.PROCESSED.DATA.xlsx",sheetIndex=1) #save(file="x",x) pca <- prcomp(scale(x[,-1])) pdf(file="PCA.pdf",width=5,height=5) matplot(c(0,6,12,24),pca$rotation,type="o",pch=1:4,xlab="hours",ylab="PC loading",cex=2,lwd=2,cex.lab=1.5,cex.axis=2) abline(0,0,col=6,lty=2,lwd=2) dev.off() cor(c(0,6,12,24),pca$rotation) P <- pchisq(scale(pca$x[,2])^2,1,lower.tail=F) table(p.adjust(P,"BH")<0.01) data.frame(unlist(lapply(strsplit(x[p.adjust(P,"BH")<0.01,1],".",fixed=T),"[",1))) #PCA method data.frame(unlist(lapply(strsplit(as.character(x[order(-abs(pca$x[,1]))[1:52],1]),".",fixed=T),"[",1))) gene<- read.ods("gene_RNA-seq.ods",sheet=2) load("gene_all") table(unique(gene_all[,1]) %in% gene[,2]) fisher.test(matrix(c(20000-189-45+7,189,45,7),2)) ZZ <- tensor(as.array(Z_all),as.array(Z_all),1,1) xx <- t(data.matrix(x[,-1])) %*% data.matrix(x[,-1]) Zx <- tensor(ZZ,xx,1,1) HOSVD <- hosvd(as.tensor(Zx)) pdf(file="TKD.pdf",width=15,height=5) par(mfrow=c(1,3)) matplot(c(0,6,12,24),HOSVD$U[[4]],type="o",pch=1:4,xlab="hours", ylab="singlar value vectors",cex=2,lwd=2,cex.lab=1.5,cex.axis=2,main="gene expression") abline(0,0,col=6,lty=2,lwd=2) matplot(c(0,6,12,24),HOSVD$U[[2]],type="o",pch=1:4,xlab="hours", ylab="singlar value vectors",cex=2,lwd=2,cex.lab=1.5,cex.axis=2,main="m6A") abline(0,0,col=6,lty=2,lwd=2) plot(HOSVD$U[[4]][,2],HOSVD$U[[2]][,2],xlab="gene expression",ylab="m6A", cex=2,xlim=c(-0.7,0.9),ylim=c(-0.7,0.8),cex.axis=2,cex.lab=1.5,lwd=2) abline(0,1,col=2,lty=2,lwd=2) par(mfrow=c(1,1)) dev.off() cor(c(0,6,12,24),HOSVD$U[[4]]) cor(c(0,6,12,24),HOSVD$U[[2]]) v<-tensor(tensor(Z_all,HOSVD$U[[1]][,2],3,1),HOSVD$U[[2]][,2],2,1) P <- pchisq(scale(v)^2,1,lower.tail=F) table(p.adjust(P,"BH")<0.01) #save(file="gene_all_Z",gene_all) write.table(file="gene_all_Z.csv",gene_all,row.names=F,col.names=F,quote=F,sep="\t") load("gene_all_Z") gene_all_Z <- gene_all load("gene_all") table(unique(gene_all[,1]) %in% unique(gene_all_Z[,1])) u <- data.matrix(x[,-1]) %*% HOSVD$U[[4]][,2] P <- pchisq(scale(u)^2,1,lower.tail=F) table(p.adjust(P,"BH")<0.01) data.frame(unlist(lapply(strsplit(x[p.adjust(P,"BH")<0.01,1],".",fixed=T),"[",1))) #gene_RNA-seq_Z gene<- read.ods("gene_RNA-seq.ods",sheet=2) gene1<- read.ods("gene_RNA-seq_Z.ods",sheet=2) table(gene[,2] %in% gene1[,2]) load("gene_all_Z") table(unique(gene_all[,1]) %in% gene1[,2]) fisher.test(matrix(c(20000-188-41+12,188,41,12),2)) LM <- lm(t(scale(x[,-1]))~c(0,6,12,24)) SLM <- summary(LM) fs <- lapply(SLM,"[",10) fs <- t(data.frame(lapply(SLM,"[",10))) P <- pf(fs[,1],fs[,2],fs[,3],lower.tail=F) table(p.adjust(P,"BH")<0.01) table(p.adjust(P,"BH")<0.05) SAM <- sam(scale(x[,-1]),0:3) require(limma) design <- model.matrix(~0+c(1,2,3,4)) colnames(design) <- c("N") fit <- lmFit(log(x[,-1]), design) fit <- eBayes(fit) print(table(topTable(fit,num=dim(x[,-1])[1])[,5]<0.01)) print(table(topTable(fit,num=dim(x[,-1])[1])[,5]<0.05)) dim(Z_all) <- c(dim(Z_all)[1],8) #LM <- lm(t(Z_all)~as.vector(outer(c(0,6,12,24),1:2))) #LM <- lm(t(Z_all)~as.vector(outer(c(0,6,12,24),2:1))) SLM <- summary(LM) fs <- lapply(SLM,"[",10) fs <- t(data.frame(lapply(SLM,"[",10))) P <- pf(fs[,1],fs[,2],fs[,3],lower.tail=F) table(p.adjust(P,"BH")<0.01) require(limma) design <- model.matrix(~0+as.vector(outer(c(1:4),c(0,4),"+"))) colnames(design) <- c("N") fit <- lmFit(log(Z_all), design) fit <- eBayes(fit) print(table(topTable(fit,num=dim(Z_all)[1])[,5]<0.01)) print(table(topTable(fit,num=dim(Z_all)[1])[,5]<0.05)) set.seed(0) rf <- randomForest(t(scale(x[,-1])),c(1:4)) table(abs(rf$importance)>0) data.frame(unlist(lapply(strsplit(x[abs(rf$importance)>0,1],".",fixed=T),"[",1))) #gene_RNA-seq_rf set.seed(0) rf <- randomForest(t(Z_all),as.vector(outer(c(1:4),1:2))) table(abs(rf$importance)>0) require(biomaRt) grch37 = useMart(biomart="ENSEMBL_MART_ENSEMBL", host="grch37.ensembl.org", path="/biomart/martservice", dataset="hsapiens_gene_ensembl") gene_all <-NULL for (l in c(1:22)){ cat("\n l= ",l,"\n") if (l==1) { id <- c(1:dnum[l])[(abs(rf$importance)>0)[1:dnum[l]]] }else{ id <- c(1:dnum[l])[(abs(rf$importance)>0)[sum(dnum[1:(l-1)])+c(1:dnum[l])]] } if (length(id)>0) { reg<-id*25000 reg<-data.frame(reg-24999,reg) gene <- NULL for (i in c(1:dim(reg)[1])) { cat(i," ") gene_lst0 <- getBM(attributes=c('hgnc_symbol','entrezgene_id','chromosome_name','start_position','end_position'),filters = c('chromosome_name','start','end'), values=list(l,reg[i,1],reg[i,2]), mart = grch37) gene <- rbind(gene,gene_lst0) } gene <- gene[!is.na(gene[,1]),] gene <- gene[!is.na(gene[,1]) & gene[,1]!="",] gene_all <- rbind(gene_all,gene) } } #save(file="gene_all_rf",gene_all) write.table(file="gene_all_rf.csv",gene_all,row.names=F,col.names=F,quote=F,sep="\t") LARS <- lars(t(scale(x[,-1])),c(1:4)) There are more than 500 variables and n