library(minfi) library(limma) library(missMethyl) ################## KIDNEY CANCER ANALYSIS ############ # read in targets file with sample information Ktargets<-read.delim("KidneyTargets.txt") # read data into R rgSet450 = read.450k.exp(targets = Ktargets) # swan normalisation mSet450SWAN = preprocessSWAN(rgSet450) # read in annotation file ann450 = read.csv("HumanMethylation450_15017482_v.1.2.csv",header=TRUE,sep=",",skip=7) # Filter out poor quality probes detP<-detectionP(rgSet450) keep<-rowSums(detP<0.01) == ncol(rgSet450) mSet450SWAN<-mSet450SWAN[keep,] # Remove X and Y chromosome CpGs mSet450SWAN = mSet450SWAN[!(featureNames(mSet450SWAN) %in% ann450$IlmnID[ann450$CHR %in% c("X","Y")]),] # Retrieve beta and m values from MethylSet object meth<-getMeth(mSet450SWAN) unmeth<-getUnmeth(mSet450SWAN) beta<-getBeta(mSet450SWAN) Mval<-log2((meth+100)/(unmeth+100)) Aval<-0.5*log2(meth+unmeth) # Set up designK matrix group<-factor(pData(mSet450SWAN)$Sample_Group,levels=c("normal","Cancer")) designK<-model.matrix(~group) # Test for differential methylation fit<-lmFit(Mval,designK) fit$Amean <- rowMeans(Mval) fit<-eBayes(fit,trend=T) # Determine significant differentially methylated CpGs using FDR and deltaBeta cut-off of 0.1 betaNorm<-beta[,designK[,ncol(designK)]==0] betaCan<-beta[,designK[,ncol(designK)]==1] deltaBeta<-rowMeans(betaCan)-rowMeans(betaNorm) adj.P.Val<-p.adjust(fit$p.value[,2],method="BH") sigAllDM<- adj.P.Val < 0.05 & abs(deltaBeta) > 0.1 # Test for differential variability fitvar<-varFit(Mval,designK) # Determine significant differentially variable CpGs using FDR and logVarRatio cut-off of log(5) adj.P.ValDV<-p.adjust(fitvar$p.value[,2],method="BH") sigAllDV<-adj.P.ValDV < 0.05 & abs(fitvar$LogVarRatio[,2]) > log(5) #Annotate fit objects q<-match(rownames(fit),ann450$IlmnID) ann<-ann450[q,c("IlmnID","CHR","UCSC_RefGene_Name","UCSC_RefGene_Group","Relation_to_UCSC_CpG_Island","UCSC_CpG_Islands_Name","Chromosome_36","Coordinate_36")] fit$genes<-ann fitvar$genes<-ann #Other tests of differential variability # F-test group <- designK[,2] i.group <- names(table(group)) i.data1 <- Mval[,group==i.group[1]] i.data2 <- Mval[,group==i.group[2]] varnorm<-apply(i.data1,1,var) varcan<-apply(i.data2,1,var) Fstat<-varcan/varnorm FPval<-2*pmin(pf(Fstat,df1=ncol(i.data1)-1,df2=ncol(i.data2)-1,lower.tail=FALSE),pf(1/Fstat,df1=ncol(i.data2)-1,df2=ncol(i.data1)-1,lower.tail=FALSE)) adj.FPval<-p.adjust(FPval,method="BH") Ftest<-data.frame(fit$genes,VarCan=varcan,VarNorm=varnorm,Fstat,FPval,adj.FPval) ## Bartlett test narrays=ncol(Mval) varp<-(159*varnorm+282*varcan)/(narrays-2) top<-(narrays-2)*log(varp) - (282*log(varcan)+159*log(varnorm)) bottom<-1+(1/159+1/282-1/(narrays-2))/3 Bstat<-top/bottom BartPval<-pchisq(Bstat,df=1,lower.tail=FALSE) adj.BPval<-p.adjust(BartPval,method="BH") Btest<-data.frame(fit$genes,Bstat=Bstat,BPval=BartPval,adj.BPval) # Island-level analysis: Kidney island<-fit$genes$Relation_to_UCSC_CpG_Island=="Island" MvalIslandK<-Mval[island,] island_name<-factor(as.character(fit$genes$UCSC_CpG_Islands_Name[island])) # Averaging the CpG sites M values in each CpG island newKMval<-matrix(0,nrow=length(levels(island_name)),ncol=ncol(Mval)) for(i in 1:ncol(newKMval)) newKMval[,i]<-tapply(MvalIslandK[,i],island_name,mean) # Differential methylation analysis fitKI<-lmFit(newKMval,designK) fitKI<-eBayes(fitKI) fitKI$genes$Island_name<-levels(island_name) fitKI$genes<-as.data.frame(fitKI$genes) # Differential variability analysis fitvarKI<-varFit(newKMval,designK) fitvarKI$genes$Island_name<-levels(island_name) fitvarKI$genes<-as.data.frame(fitvarKI$genes) # Annotate fit objects ann450$UCSC_CpG_Islands_Name<-as.character(ann450$UCSC_CpG_Islands_Name) q<-match(fitvarKI$genes$Island_name,ann450$UCSC_CpG_Islands_Name) ann<-ann450[q,c("IlmnID","CHR","UCSC_RefGene_Name","UCSC_RefGene_Group","Relation_to_UCSC_CpG_Island","UCSC_CpG_Islands_Name","Chromosome_36","Coordinate_36")] fitvarKI$genes<-ann fitKI$genes<-ann ################## LUNG CANCER ANALYSIS ############ # read in targets file with sample information Ltargets<-read.delim("LungTargets.txt") # Read data into R rgSetLung = read.450k.exp(targets = Ltargets) # swan normalisation mSetLung = preprocessSWAN(rgSetLung) # Filter out poor quality probes detP<-detectionP(rgSetLung) keep<-rowSums(detP<0.01) == ncol(rgSetLung) mSetLung<-mSetLung[keep,] # Filter out X and Y chromosomes mSetLung = mSetLung[!(featureNames(mSetLung) %in% ann450$IlmnID[ann450$CHR %in% c("X","Y")]),] mdsPlot(mSetLung,sampNames= pData(mSetLung)$HID,sampGroups = pData(mSetLung)$group) # Extract group information group<-as.character(pData(mSetLung)$group) group[pData(mSetLung)$group=="Solid Tissue Normal"]<-"normal" group[pData(mSetLung)$group=="Primary solid Tumor"]<-"cancer" group<-factor(group,levels=c("normal","cancer")) # Extract M values and Beta values from MethylSet object methL<-getMeth(mSetLung) unmethL<-getUnmeth(mSetLung) MvalLung<-log2((methL+100)/(unmethL+100)) betaL<-getBeta(mSetLung) # Set up design matrix designL<-model.matrix(~group) # Test for differential methylation fitLM<-lmFit(MvalLung,designL) fitLM$Amean <- rowMeans(MvalLung) fitLM<-eBayes(fitLM,trend=T) # Determine significant differentially methylated CpGs with FDR and deltaBeta cut-off of 0.1 betaNormL<-betaL[,designL[,ncol(designL)]==0] betaCanL<-betaL[,designL[,ncol(designL)]==1] deltaBetaL<-rowMeans(betaCanL)-rowMeans(betaNormL) adj.P.Val<-p.adjust(fitLM$p.value[,2],method="BH") sigAllDML<- adj.P.Val < 0.05 & abs(deltaBetaL) > 0.1 # Test for differential variability fitvarL<-varFit(MvalLung,designL) # Determine significant differentially variable CpGs with FDR and logVarRatio cut-off of log(5) adj.P.Val <- p.adjust(fitvarL$p.value[,2],method="BH") sigAllDVL <- adj.P.Val < 0.05 & abs(fitvarL$LogVarRatio[,2]) > log(5) #Annotate fit objects q<-match(fitLM$genes[,1],ann450$IlmnID) ann<-ann450[q,c("IlmnID","CHR","UCSC_RefGene_Name","UCSC_RefGene_Group","Relation_to_UCSC_CpG_Island","UCSC_CpG_Islands_Name","Chromosome_36","Coordinate_36")] fitLM$genes<-ann fitvarL$genes<-ann # Island-level analysis: Lung island<-fitLM$genes$Relation_to_UCSC_CpG_Island=="Island" MvalIslandL<-MvalLung[island,] island_name<-factor(as.character(fitLM$genes$UCSC_CpG_Islands_Name[island])) # Averaging the CpG sites M values in each CpG island newLMval<-matrix(0,nrow=length(levels(island_name)),ncol=ncol(MvalLung)) for(i in 1:ncol(newLMval)) newLMval[,i]<-tapply(MvalIslandL[,i],island_name,mean) # Differential methylation analysis fitLI<-lmFit(newLMval,designL) fitLI<-eBayes(fitLI) fitLI$genes$Island_name<-levels(island_name) fitLI$genes<-as.data.frame(fitLI$genes) # Differential variability analysis fitvarLI<-varFit(newLMval,designL) fitvarLI$genes$Island_name<-levels(island_name) fitvarLI$genes<-as.data.frame(fitvarLI$genes) # Annotate fit objects q<-match(fitvarLI$genes$Island_name,ann450$UCSC_CpG_Islands_Name) ann<-ann450[q,c("IlmnID","CHR","UCSC_RefGene_Name","UCSC_RefGene_Group","Relation_to_UCSC_CpG_Island","UCSC_CpG_Islands_Name","Chromosome_36","Coordinate_36")] fitvarLI$genes<-ann fitLI$genes<-ann ############## PROSTATE CANCER ANALYSIS ################# # read in targets file with sample information Ptargets<-read.delim("ProstateTargets.txt") # Read data into R rgSetProstate = read.450k.exp(targets = targetsProstate) # swan normalisation mSetProstate = preprocessSWAN(rgSetProstate) # Filter out poor quality probes detP<-detectionP(rgSetProstate) keep<-rowSums(detP<0.01) == ncol(rgSetProstate) mSetProstate<-mSetProstate[keep,] # Filter out X and Y chromosome CpGs mSetProstate = mSetProstate[!(featureNames(mSetProstate) %in% ann450$IlmnID[ann450$CHR %in% c("X","Y")]),] mdsPlot(mSetProstate,sampNames= pData(mSetProstate)$HID,sampGroups = pData(mSetProstate)$group) #Extract group information group<-as.character(pData(mSetProstate)$group) group[pData(mSetProstate)$group=="Solid Tissue Normal"]<-"normal" group[pData(mSetProstate)$group=="Primary solid Tumor"]<-"cancer" group<-factor(group,levels=c("normal","cancer")) # Extract M values and Beta values from MethylSet object methP<-getMeth(mSetProstate) unmethP<-getUnmeth(mSetProstate) MvalProstate<-log2((methP+100)/(unmethP+100)) betaP<-getBeta(mSetProstate) # Test for differential methylation designP<-model.matrix(~group) fitPM<-lmFit(MvalProstate,designP) fitPM$Amean <- rowMeans(MvalProstate) fitPM<-eBayes(fitPM,trend=T) # Determine significant differentially methylated CpGs using FDR and deltaBeta cut-off of 0.1 betaNormP<-betaP[,designP[,ncol(designP)]==0] betaCanP<-betaP[,designP[,ncol(designP)]==1] deltaBetaP<-rowMeans(betaCanP)-rowMeans(betaNormP) adj.P.Val<-p.adjust(fitPM$p.value[,2],method="BH") sigAllDMP<- adj.P.Val < 0.05 & abs(deltaBetaP) > 0.1 # Test for differential variability fitvarP<-varFit(MvalProstate,designP) # Determine significant differentially variable CpGs using FDR and logVarRatio cut-off of log(5) adj.P.Val<-p.adjust(fitvarP$p.value[,2],method="BH") sigAllDVP<-adj.P.Val < 0.05 & abs(fitvarP$LogVarRatio[,2]) > log(5) #Annotate fit objects q<-match(fitPM$genes[,1],ann450$IlmnID) ann<-ann450[q,c("IlmnID","CHR","UCSC_RefGene_Name","UCSC_RefGene_Group","Relation_to_UCSC_CpG_Island","UCSC_CpG_Islands_Name","Chromosome_36","Coordinate_36")] fitPM$genes<-ann fitvarP$genes<-ann # Island-level analysis: Prostate island<-fitPM$genes$Relation_to_UCSC_CpG_Island=="Island" MvalIslandP<-MvalProstate[island,] island_name<-factor(as.character(fitPM$genes$UCSC_CpG_Islands_Name[island])) # Averaging the CpG sites M values in each CpG island newPMval<-matrix(0,nrow=length(levels(island_name)),ncol=ncol(MvalProstate)) for(i in 1:ncol(newPMval)) newPMval[,i]<-tapply(MvalIslandP[,i],island_name,mean) # Differential methylation analysis fitPI<-lmFit(newPMval,designP) fitPI<-eBayes(fitPI) fitPI$genes$Island_name<-levels(island_name) fitPI$genes<-as.data.frame(fitPI$genes) # Differential variability analysis fitvarPI<-varFit(newPMval,designP) fitvarPI$genes$Island_name<-levels(island_name) fitvarPI$genes<-as.data.frame(fitvarPI$genes) # Annotate fit objects q<-match(fitvarPI$genes$Island_name,ann450$UCSC_CpG_Islands_Name) ann<-ann450[q,c("IlmnID","CHR","UCSC_RefGene_Name","UCSC_RefGene_Group","Relation_to_UCSC_CpG_Island","UCSC_CpG_Islands_Name","Chromosome_36","Coordinate_36")] fitvarPI$genes<-ann fitPI$genes<-ann #################### UTERINE CANCER ANALYSIS ################# # read in targets file with sample information targetsUC<-read.delim("UCECtargets.txt") # Read data into R rgSetUCEC = read.450k.exp(targets = targetsUC) # swan normalisation mSetUCEC = preprocessSWAN(rgSetUCEC) # Filter out poor quality probes detP<-detectionP(rgSetUCEC) keep<-rowSums(detP<0.01) == ncol(rgSetUCEC) mSetUCEC<-mSetUCEC[keep,] # Filter out X and Y chromosome CpGs mSetUCEC = mSetUCEC[!(featureNames(mSetUCEC) %in% ann450$IlmnID[ann450$CHR %in% c("X","Y")]),] mdsPlot(mSetUCEC,sampNames= pData(mSetUCEC)$HID,sampGroups = pData(mSetUCEC)$group) #Extract group information group<-as.character(pData(mSetUCEC)$group) group[pData(mSetUCEC)$group=="Solid Tissue Normal"]<-"normal" group[pData(mSetUCEC)$group=="Primary solid Tumor"]<-"cancer" group<-factor(group,levels=c("normal","cancer")) designUC<-model.matrix(~group) # Extract M values and Beta values from methylSet object methUC<-getMeth(mSetUCEC) unmethUC<-getUnmeth(mSetUCEC) betaUC<-getBeta(mSetUCEC) MvalUC<-log2((methUC+100)/(unmethUC+100)) # Differential methylation analysis fitUM<-lmFit(MvalUC,designUC) fitUM$Amean <- rowMeans(MvalUC) fitUM<-eBayes(fitUM,trend=T,robust=T) # Identify significant differentially methylated CpGs using FDR and deltaBeta cut-off of 0.1 betaNormUC<-betaUC[,designUC[,ncol(designUC)]==0] betaCanUC<-betaUC[,designUC[,ncol(designUC)]==1] deltaBetaUC<-rowMeans(betaCanUC)-rowMeans(betaNormUC) adj.P.Val<-p.adjust(fitUM$p.value[,2],method="BH") sigAllDMUC<- adj.P.Val < 0.05 & abs(deltaBetaUC) > 0.1 # Differential variability analysis fitvarUC<-varFit(MvalUC,designUC) # Identify significant differentially variable CpGs using FDR and logVarRatio cut-off og log(5) adj.P.Val<-p.adjust(fitvarUC$p.value[,2],method="BH") sigAllDVUC<-adj.P.Val < 0.05 & abs(fitvarUC$LogVarRatio[,2]) > log(5) #Annotate fit objects q<-match(rownames(fitUM),ann450$IlmnID) ann<-ann450[q,c("IlmnID","CHR","UCSC_RefGene_Name","UCSC_RefGene_Group","Relation_to_UCSC_CpG_Island","UCSC_CpG_Islands_Name","Chromosome_36","Coordinate_36")] fitUM$genes<-ann fitvarUC$genes<-ann ################### AGING DATA ANALYSIS ###################### # read in targets file with sample information targets<-read.delim("targetsAge.txt",header=T) # Read data into R rgSetAge<-read.450k.exp(targets=targets) # swan normalisation mSetAge = preprocessSWAN(rgSetAge) # Filter out poor quality probes detP<-detectionP(rgSetAge) keep<-rowSums(detP<0.01) == ncol(rgSetAge) mSetAge<-mSetAge[keep,] # Set up design matrix group<-factor(targets$Sample_Group,levels=c("NewBorns","OLD")) design<-model.matrix(~group) # Extract M values and Beta values from methylSet object meth<-getMeth(mSetAge) unmeth<-getUnmeth(mSetAge) beta<-getBeta(mSetAge) Mval<-log2((meth+100)/(unmeth+100)) # Differential methylation fit<-lmFit(Mval,design) fit$Amean<-rowMeans(Mval) fit<-eBayes(fit,trend=T,robust=T) # Determine significant differentially methylated CpGs using FDR and deltaBeta cut-off of 0.1 adj.P.Val<-p.adjust(fit$p.value[,2],method="BH") betaNB<-beta[,design[,2]==0] betaOLD<-beta[,design[,2]==1] deltaBeta<-rowMeans(betaOLD)-rowMeans(betaNB) sigDM<-adj.P.Val < 0.05 & abs(deltaBeta) > 0.1 ## Differential variability fitvar<-varFit(Mval,design) adj.P.Val<-p.adjust(fitvar$p.value[,2],method="BH") sigDV<-adj.P.Val < 0.05 & abs(fitvar$LogVarRatio[,2]) > log(5) # Separate into more variable in old vs newborn (up) and more variable in newborn vs old (dn) upDV<-adj.P.Val < 0.05 & fitvar$LogVarRatio[,2] > log(5) dnDV<-adj.P.Val < 0.05 & fitvar$LogVarRatio[,2] < -log(5) # Annotate fit objects q<-match(rownames(fit),ann450$IlmnID) ann<-ann450[q,c("IlmnID","CHR","UCSC_RefGene_Name","UCSC_RefGene_Group","Relation_to_UCSC_CpG_Island","UCSC_CpG_Islands_Name","Chromosome_36","Coordinate_36")] fit$genes<-ann fitvar$genes<-ann # GOstats analysis library(GO.db) library(GOstats) library(org.Hs.eg.db) # Define significant DV genes topgenes_updv<-as.character(fitvar$genes$UCSC_RefGene_Name[upDV]) topgenes_dndv<-as.character(fitvar$genes$UCSC_RefGene_Name[dnDV]) topgenes_updv<-gsub(";.*","",topgenes_updv) topgenes_updv<-unique(topgenes_updv) # Get Entrez gene IDs symbolEG<-toTable(org.Hs.egSYMBOL2EG) m<-match(topgenes_updv,symbolEG[,2]) topEG_updv <- symbolEG[m[!is.na(m)],1] topgenes_dndv<-gsub(";.*","",topgenes_dndv) topgenes_dndv<-unique(topgenes_dndv) length(topgenes_dndv) symbolEG<-toTable(org.Hs.egSYMBOL2EG) m<-match(topgenes_dndv,symbolEG[,2]) topEG_dndv <- symbolEG[m[!is.na(m)],1] # Define gene universe allgenes<-as.character(fitvar$genes$UCSC_RefGene_Name) allgenes<-gsub(";.*","",allgenes) allgenes<-unique(allgenes) m<-match(allgenes,symbolEG[,2]) allEG <- symbolEG[m[!is.na(m)],1] # Get GO terms GOterms<-toTable(org.Hs.egGO) # remove genes with no GO category uniqueG <-unique(GOterms$gene_id) m<-match(allEG,uniqueG) allEG<-allEG[!is.na(m)] m<-match(topEG_updv,uniqueG) topEG_updv<-topEG_updv[!is.na(m)] m<-match(topEG_dndv,uniqueG) topEG_dndv<-topEG_dndv[!is.na(m)] cutoff = 0.0001 BPparams_updv <- new("GOHyperGParams", geneIds = topEG_updv, universeGeneIds = allEG, annotation = "org.Hs.eg.db", ontology = "BP",pvalueCutoff=cutoff,conditional=FALSE,testDirection="over") CCparams_updv <- new("GOHyperGParams", geneIds = topEG_updv, universeGeneIds = allEG, annotation = "org.Hs.eg.db", ontology = "CC",pvalueCutoff=cutoff,conditional=FALSE,testDirection="over") MFparams_updv<- new("GOHyperGParams", geneIds = topEG_updv, universeGeneIds = allEG, annotation = "org.Hs.eg.db", ontology = "MF",pvalueCutoff=cutoff,conditional=FALSE,testDirection="over") BPparams_dndv <- new("GOHyperGParams", geneIds = topEG_dndv, universeGeneIds = allEG, annotation = "org.Hs.eg.db", ontology = "BP",pvalueCutoff=cutoff,conditional=FALSE,testDirection="over") CCparams_dndv <- new("GOHyperGParams", geneIds = topEG_dndv, universeGeneIds = allEG, annotation = "org.Hs.eg.db", ontology = "CC",pvalueCutoff=cutoff,conditional=FALSE,testDirection="over") MFparams_dndv<- new("GOHyperGParams", geneIds = topEG_dndv, universeGeneIds = allEG, annotation = "org.Hs.eg.db", ontology = "MF",pvalueCutoff=cutoff,conditional=FALSE,testDirection="over") # Perform GOstats test BPhgOver_updv <- hyperGTest(BPparams_updv) CChgOver_updv <- hyperGTest(CCparams_updv) MFhgOver_updv <- hyperGTest(MFparams_updv) BPhgOver_dndv <- hyperGTest(BPparams_dndv) CChgOver_dndv <- hyperGTest(CCparams_dndv) MFhgOver_dndv <- hyperGTest(MFparams_dndv) # save html report htmlReport(BPhgOver_updv, file = "GO_results_Age_updv.html", label="Biological Process Results - Old var", digits=10) htmlReport(CChgOver_updv, file = "GO_results_Age_updv.html", label="Cellular Component results - Old var", digits=10,append=T) htmlReport(MFhgOver_updv, file = "GO_results_Age_updv.html", label="Molecular Function results - Old var", digits=10,append=T) htmlReport(BPhgOver_dndv, file = "GO_results_Age_dndv.html", label="Biological Process Results - Young var", digits=10) htmlReport(CChgOver_dndv, file = "GO_results_Age_dndv.html", label="Cellular Component results - Young var", digits=10,append=T) htmlReport(MFhgOver_dndv, file = "GO_results_Age_dndv.html", label="Molecular Function results - Young var", digits=10,append=T)