# R-script to perform a simulation study based on self-self hybridised dual-color microarray data # as descriped in "Assessment and optimisation of normalisation methods for # dual-colour antibody microarrays" # The first part of the R-script will generate semi-artifical data sets for different simulation scenarios # The second part is the main part of the simulation study # The third part will generate the different figures, e.g. ROC-plot, boxplots of the AUC values and MAplots # Author: Martin Sill ############################################################################################################# #1. generate semi-artifical data using self-self hybridised dual-color microarray data ###################### library(Biobase) library(limma) library(statmod) library(ROC) library(vegan) Sys.setlocale("LC_ALL","C") rm(list = ls()) #setwd("~/workspace") # path to selfself.RData # load self-self hybridised data load("selfself.RData") mu <- 0.1 sigma <- 0.1 pup <- c(0.1,0.2,0,0.2,0.4,0,0.05,0.1,0.5,0.6,0.8,0,0,0,0,0) pdown <- c(0.1,0,0.2,0.2,0,0.4,0,0,0,0,0,0.05,0.1,0.5,0.6,0.8) simsteps <- 100 #generate data sets for the simulation scenarios for (e in 1:length(pup)){ set.seed(07092010) RGset <- list() for (g in 1:simsteps){ MA <- MAstart tumor <- sample(1:ncol(RG),10) control <- which(!1:ncol(RG)%in%tumor) y <- character(20) y[tumor] <- "tumor" y[control] <- "control" MA$M[which(is.na(MA$M))] <- 0 pr <- which(MA$genes$Status=="protein") names <- MA$genes$Name[pr] unames <- unique(names) dexup <- sample(unames,floor(pup[e]*length(unames))) if(pup[e]!=0){ dexd <- sample(unames[-which(unames%in%dexup)],floor(pdown[e]*length(unames))) }else{dexd <- sample(unames,floor(pdown[e]*length(unames)))} rgdexup <- which(MA$genes$Name%in%dexup) rgdexdown <- which(MA$genes$Name%in%dexd) rgdex <- union(rgdexup,rgdexdown) MA$genes$Status[rgdex] <- "diffexpr" attr(MA$genes$Status,"values")[19] <- "diffexpr" attr(MA$genes$Status,"col")[19] <- "red" for (i in 1:length(rgdexup)){ for (j in 1:length(tumor)){ MA$M[rgdexup[i],tumor[j]] <- MA$M[rgdexup[i],tumor[j]]+abs(rnorm(1,mu,sigma)) } } for (i in 1:length(rgdexdown)){ for (j in 1:length(tumor)){ MA$M[rgdexdown[i],tumor[j]] <- MA$M[rgdexdown[i],tumor[j]]-abs(rnorm(1,mu,sigma)) } } RG <- RG.MA(MA) RG[[2]]$X <- y #save chip assignment RGset[[g]] <- RG } save(RGset,file=paste("RG_",pup[e],"_",pdown[e],".RData",sep="")) } #2. simulation study######################################################################################## library(Biobase) library(limma) library(statmod) library(ROC) library(vegan) Sys.setlocale("LC_ALL","C") rm(list = ls()) #setwd("~/workspace") # path to the generated RGsets #simulation settings pup <- c(0.1,0.2,0,0.2,0.4,0,0.05,0.1,0.5,0.6,0.8,0,0,0,0,0) pdown <- c(0.1,0,0.2,0.2,0,0.4,0,0,0,0,0,0.05,0.1,0.5,0.6,0.8) simsteps <- 100 # source invTseng, InvMod, RDWGL and GPA normalization source("invariant.R") for(e in 1:length(pup)){ load(paste("RG_",pup[e],"_",pdown[e],".RData",sep="")) res <- list() for(g in 1:simsteps){ RG <- RGset[[g]] #same background correction RG <- backgroundCorrect(RG,method="normexp",offset=50,normexp.method="mle") #no within normalization NN MA <- MA.RG(RG,bc.method="none") MA <- normalizeBetweenArrays(MA) #vsn normalization MAvsn <- normalizeBetweenArrays(RG,method="vsn") #set weights for spike-ins,control and housekeeping features at zero pr <- which(MA$genes$Status=="protein"|MA$genes$Status=="diffexpr") weights <- modifyWeights(RG$weights,RG$genes$Status, c( "protein","background","actin","polyclonal","IgM","albumin","GAPDH","sp_CHTB","sp_GDA0","sp_Hamster", "sp_Mouse","sp_DNP1","sp_DNP2","sp_GFP","sp_FITC","control both","control Cy3","control Cy5","diffexpr") ,c(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1)) #global loess MAgl <- normalizeWithinArrays(RG,method="loess",weights=weights,iterations=10) MAgl <- normalizeBetweenArrays(MAgl) weights <- modifyWeights(RG$weights,RG$genes$Status, c( "protein","background","actin","polyclonal","IgM","albumin","GAPDH","sp_CHTB","sp_GDA0","sp_Hamster", "sp_Mouse","sp_DNP1","sp_DNP2","sp_GFP","sp_FITC","control both","control Cy3","control Cy5","diffexpr") #,c(1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1)) ,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)) #Tseng invariant selection algorithm MAinvTseng <- invselTseng(RG,pr,weights=weights,plot.chip=NULL) MAinvTseng <- normalizeBetweenArrays(MAinvTseng) #InvMod MAinvMod <- InvMod(RG,pr,weights=weights,plot.chip=NULL) MAinvMod <- normalizeBetweenArrays(MAinvMod) #RDWGL MArdwgl <- RDWGL(RG,pr,weights=weights,plot.chip=NULL) MArdwgl <- normalizeBetweenArrays(MArdwgl) #GPA MAgpa <- procrustesNormalization(MA.RG(RG,bc.method="none")[pr,]) MAgpa <- normalizeBetweenArrays(MAgpa) #limma design matrix tumor <- which(RG[[2]]$X=="tumor") control <- which(RG[[2]]$X=="control") RG[[2]]$Cy3[tumor] <- "tumor" RG[[2]]$Cy3[control] <- "control" RG[[2]]$Cy5 <- "Ref" design <- modelMatrix(RG[[2]],ref="Ref") #reorder features and correct for dublicate correlation (each feature has a replicate) MA<-MA[MA$genes$Status %in% c("protein","diffexpr"),] MAvsn<-MAvsn[MAvsn$genes$Status %in% c("protein","diffexpr"),] MAgl<-MAgl[MAgl$genes$Status %in% c("protein","diffexpr"),] MAinvTseng<-MAinvTseng[MAinvTseng$genes$Status %in% c("protein","diffexpr"),] MAinvMod<-MAinvMod[MAinvMod$genes$Status %in% c("protein","diffexpr"),] MArdwgl<-MArdwgl[MArdwgl$genes$Status %in% c("protein","diffexpr"),] MAgpa<-MAgpa[MAgpa$genes$Status %in% c("protein","diffexpr"),] MA <- MA[order(MA$genes[,"Name"]),] MAvsn <- MAvsn[order(MAvsn$genes[,"Name"]),] MAgl <- MAgl[order(MAgl$genes[,"Name"]),] MAinvTseng <- MAinvTseng[order(MAinvTseng$genes[,"Name"]),] MAinvMod <- MAinvMod[order(MAinvMod$genes[,"Name"]),] MArdwgl <- MArdwgl[order(MArdwgl$genes[,"Name"]),] MAgpa <- MAgpa[order(MAgpa$genes[,"Name"]),] corfit <- duplicateCorrelation(MA, ndups=2, spacing=1) corfitvsn <- duplicateCorrelation(MAvsn, ndups=2, spacing=1) corfitgl <- duplicateCorrelation(MAgl, ndups=2, spacing=1) corfitinvTseng <- duplicateCorrelation(MAinvTseng, ndups=2, spacing=1) corfitinvMod <- duplicateCorrelation(MAinvMod, ndups=2, spacing=1) corfitrdwgl <- duplicateCorrelation(MArdwgl, ndups=2, spacing=1) corfitgpa <- duplicateCorrelation(MAgpa, ndups=2, spacing=1) # fit linear model fit <- lmFit(MA,ndups=2,design,correlation=corfit$consensus) fitvsn <- lmFit(MAvsn,ndups=2,design,correlation=corfitvsn$consensus) fitgl <- lmFit(MAgl,design,ndups=2,correlation=corfitgl$consensus) fitinvTseng <- lmFit(MAinvTseng,design,ndups=2,correlation=corfitinvTseng$consensus) fitinvMod <- lmFit(MAinvMod,design,ndups=2,correlation=corfitinvMod$consensus) fitrdwgl <- lmFit(MArdwgl,design,ndups=2,correlation=corfitrdwgl$consensus) fitgpa <- lmFit(MAgpa,design,ndups=2,correlation=corfitgpa$consensus) # define contrast matrix contrasts.matrix <- makeContrasts(tumor-control, levels=design) fit2 <- contrasts.fit(fit, contrasts.matrix) fit2 <- eBayes(fit2) fit2 <- fit2[which(fit2$genes$Status=="diffexpr"|fit2$genes$Status=="protein"),] results <-topTable(fit2,adjust="BH",sort.by="P",number=nrow(MA)) fit2.vsn <- contrasts.fit(fitvsn, contrasts.matrix) fit2.vsn <- eBayes(fit2.vsn) fit2.vsn <- fit2.vsn[which(fit2.vsn$genes$Status=="diffexpr"|fit2.vsn$genes$Status=="protein"),] results.vsn <-topTable(fit2.vsn,adjust="BH",sort.by="P",number=nrow(MA)) fit2.gl <- contrasts.fit(fitgl, contrasts.matrix) fit2.gl <- eBayes(fit2.gl) fit2.gl <- fit2.gl[which(fit2.gl$genes$Status=="diffexpr"|fit2.gl$genes$Status=="protein"),] results.gl <-topTable(fit2.gl,adjust="BH",sort.by="P",number=nrow(MAgl)) fit2.invTseng <- contrasts.fit(fitinvTseng, contrasts.matrix) fit2.invTseng <- eBayes(fit2.invTseng) fit2.invTseng <- fit2.invTseng[which(fit2.invTseng$genes$Status=="diffexpr"|fit2.invTseng$genes$Status=="protein"),] results.invTseng <-topTable(fit2.invTseng,adjust="BH",sort.by="P",number=nrow(MAinvTseng)) fit2.invMod <- contrasts.fit(fitinvMod, contrasts.matrix) fit2.invMod <- eBayes(fit2.invMod) fit2.invMod <- fit2.invMod[which(fit2.invMod$genes$Status=="diffexpr"|fit2.invMod$genes$Status=="protein"),] results.invMod <-topTable(fit2.invMod,adjust="BH",sort.by="P",number=nrow(MAinvMod)) fit2.rdwgl <- contrasts.fit(fitrdwgl, contrasts.matrix) fit2.rdwgl <- eBayes(fit2.rdwgl) fit2.rdwgl <- fit2.rdwgl[which(fit2.rdwgl$genes$Status=="diffexpr"|fit2.rdwgl$genes$Status=="protein"),] results.rdwgl <-topTable(fit2.rdwgl,adjust="BH",sort.by="P",number=nrow(MArdwgl)) fit2.gpa <- contrasts.fit(fitgpa, contrasts.matrix) fit2.gpa <- eBayes(fit2.gpa) fit2.gpa <- fit2.gpa[which(fit2.gpa$genes$Status=="diffexpr"|fit2.gpa$genes$Status=="protein"),] results.gpa <-topTable(fit2.gpa,adjust="BH",sort.by="P",number=nrow(MAgpa)) res[[g]] <- list(results, results.vsn, results.gl, results.invTseng,results.invMod,results.rdwgl,results.gpa) cat("step",g,"\n") } save(res,file=paste("res_",pup[e],"_",pdown[e],".RData",sep="")) } #calculate sensitivity and specificity step.fdr <- function(results){ truepos <- numeric() falsepos <- numeric() trueneg <- numeric() falseneg <- numeric() for(e in 1:nrow(results)){ truepos[e] <- sum((results[1:e,"Status"]=="diffexpr")) falsepos[e] <- sum((results[1:e,"Status"]=="protein")) trueneg[e] <- sum((results[-c(1:e),"Status"]=="protein")) falseneg[e] <- sum((results[-c(1:e),"Status"]=="diffexpr")) } sens <- truepos/(truepos+falseneg) spec <- trueneg/(trueneg+falsepos) return(cbind(sensitivity=sens,specificity=spec)) } for(e in 1:length(pup)){ load(paste("res_",pup[e],"_",pdown[e],".RData",sep="")) sensNN <- matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res)) specNN <- matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res)) sensVSN <- matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res)) specVSN <- matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res)) sensGL <- matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res)) specGL <-matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res)) sensINVTseng <-matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res)) specINVTseng <-matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res)) sensINVMod <-matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res)) specINVMod <-matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res)) sensINVrdwgl <-matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res)) specINVrdwgl <-matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res)) sensGPA <-matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res)) specGPA <-matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res)) senspec <- list() for(g in 1:length(res)){ temp <- data.frame(lapply(res[[g]],step.fdr)) sensNN[,g] <- temp[,1] specNN[,g] <- temp[,2] sensVSN[,g] <- temp[,3] specVSN[,g] <- temp[,4] sensGL[,g] <- temp[,5] specGL[,g] <- temp[,6] sensINVTseng[,g] <- temp[,7] specINVTseng[,g] <- temp[,8] sensINVMod[,g] <- temp[,9] specINVMod[,g] <- temp[,10] sensINVrdwgl[,g] <- temp[,11] specINVrdwgl[,g] <- temp[,12] sensGPA[,g] <- temp[,13] specGPA[,g] <- temp[,14] } sfname <- paste("SenSpec_",pup[e],"_",pdown[e],".RData",sep="") save.image(sfname) cat("scenario:",e,"\n") } #3. plot figures of the results ############################################################################# library(Biobase) library(limma) library(statmod) library(ROC) library(vegan) Sys.setlocale("LC_ALL","C") rm(list = ls()) #setwd("~/workspace") # path to SenSpec....RData files #simulation settings pup <- c(0.1,0.1,0.2,0.2,0,0) pdown <- c(0.1,0,0,0.2,0.1,0.2) simsteps <- 100 #ROC-plots pdf("ROC1.pdf",width=12,height=6.75) par(mfrow=c(2,3),omi=c(0,0,0,1)) for(f in 1:6){ sfname <- paste("SenSpec_",pup[f],"_",pdown[f],".RData",sep="") load(sfname) pup <- c(0.1,0.1,0.2,0.2,0,0) pdown <- c(0.1,0,0,0.2,0.1,0.2) par(mar=c(4, 3.9, 2.4, 0.4)+ 0.1) titlel <- paste(I(pup[f]*100),"% up ",I(pdown[f]*100),"% down") plot(1,1,type="n",ylim=c(0,1),xlim=c(0,1),ylab="",xlab="",main=titlel,axes=FALSE) lines(apply(sensNN,1,median)~I(1-apply(specNN,1,median)),lwd=2,lty=1) lines(apply(sensVSN,1,median)~I(1-apply(specVSN,1,median)),col="red",lwd=2,lty=2) lines(apply(sensGL,1,median)~I(1-apply(specGL,1,median)),col="magenta",lwd=2,lty=3) lines(apply(sensINVTseng,1,median)~I(1-apply(specINVTseng,1,median)),col="blue",lwd=2,lty=4) lines(apply(sensINVMod,1,median)~I(1-apply(specINVMod,1,median)),col="orange",lwd=2,lty=5) lines(apply(sensINVrdwgl,1,median)~I(1-apply(specINVrdwgl,1,median)),col="green",lwd=2,lty=6) lines(apply(sensGPA,1,median)~I(1-apply(specGPA,1,median)),col="brown",lwd=2,lty=7) lines(seq(0,1,0.1),seq(0,1,0.1),lty=2) axis(2,at=seq(0,1,0.2),label=seq(0,1,0.2)) axis(1,at=seq(0,1,0.2),label=seq(0,1,0.2)) } par(xpd=NA) text(-2.575,2.55,"a)") text(-1.3,2.55,"b)") text(-0.025,2.55,"c)") text(-2.575,1.1,"d)") text(-1.3,1.1,"e)") text(-0.025,1.1,"f)") text(-0.75,-0.2,"1-Specificity",cex=1.2) text(-2.75,1.225,"Sensitivity",cex=1.2,srt=90) legend(1,1.5,legend= c("NN","VSN","GL","InvTseng","InvMod","RDWGL","GPA") ,text.col=c("black","red","magenta","blue","orange","green","brown") ,col=c("black","red","magenta","blue","orange","green","brown") ,lty=c(1,2,3,4,5,6,7),cex=1.2,lwd=rep(2,7),box.lty=0) dev.off() pup <- c(0.4,0.5,0.6,0,0,0) pdown <- c(0,0,0,0.4,0.5,0.6) simsteps <- 100 pdf("ROC2.pdf",width=12,height=6.75) par(mfrow=c(2,3),omi=c(0,0,0,1)) for(f in 1:6){ sfname <- paste("SenSpec_",pup[f],"_",pdown[f],".RData",sep="") load(sfname) pup <- c(0.4,0.5,0.6,0,0,0) pdown <- c(0,0,0,0.4,0.5,0.6) par(mar=c(4, 3.9, 2.4, 0.4)+ 0.1) titlel <- paste(I(pup[f]*100),"% up ",I(pdown[f]*100),"% down") plot(1,1,type="n",ylim=c(0,1),xlim=c(0,1),ylab="",xlab="",main=titlel,axes=FALSE) lines(apply(sensNN,1,median)~I(1-apply(specNN,1,median)),lwd=2,lty=1) lines(apply(sensVSN,1,median)~I(1-apply(specVSN,1,median)),col="red",lwd=2,lty=2) lines(apply(sensGL,1,median)~I(1-apply(specGL,1,median)),col="magenta",lwd=2,lty=3) lines(apply(sensINVTseng,1,median)~I(1-apply(specINVTseng,1,median)),col="blue",lwd=2,lty=4) lines(apply(sensINVMod,1,median)~I(1-apply(specINVMod,1,median)),col="orange",lwd=2,lty=5) lines(apply(sensINVrdwgl,1,median)~I(1-apply(specINVrdwgl,1,median)),col="green",lwd=2,lty=6) lines(apply(sensGPA,1,median)~I(1-apply(specGPA,1,median)),col="brown",lwd=2,lty=7) lines(seq(0,1,0.1),seq(0,1,0.1),lty=2) axis(2,at=seq(0,1,0.2),label=seq(0,1,0.2)) axis(1,at=seq(0,1,0.2),label=seq(0,1,0.2)) } par(xpd=NA) text(-2.575,2.55,"a)") text(-1.3,2.55,"b)") text(-0.025,2.55,"c)") text(-2.575,1.1,"d)") text(-1.3,1.1,"e)") text(-0.025,1.1,"f)") text(-0.75,-0.2,"1-Specificity",cex=1.2) text(-2.75,1.225,"Sensitivity",cex=1.2,srt=90) legend(1,1.5,legend= c("NN","VSN","GL","InvTseng","InvMod","RDWGL","GPA") ,text.col=c("black","red","magenta","blue","orange","green","brown") ,col=c("black","red","magenta","blue","orange","green","brown") ,lty=c(1,2,3,4,5,6,7),cex=1.2,lwd=rep(2,7),box.lty=0) dev.off() #AUC-boxplots #function to calculate the area under the curve, depends on the R-package ROC (bioconductor) auc <- function(sens,spec){ x <- 1 - spec y <- sens if (x[1] > x[length(x)]) { x <- rev(x) y <- rev(y) } trapezint(x, y, 0, 1) } pdf("AUCboxplot1.pdf",width=12,height=6.75) par(mfrow=c(2,3)) aucs <- list() pup <- c(0.1,0.1,0.2,0.2,0,0) pdown <- c(0.1,0,0,0.2,0.1,0.2) for(f in 1:6){ sfname <- paste("SenSpec_",pup[f],"_",pdown[f],".RData",sep="") load(sfname) pup <- c(0.1,0.1,0.2,0.2,0,0) pdown <- c(0.1,0,0,0.2,0.1,0.2) aucv.nn <- numeric() for (i in 1:ncol(sensNN)){ aucv.nn[i] <- auc(sensNN[,i],specNN[,i]) } aucv.vsn <- numeric() for (i in 1:ncol(sensVSN)){ aucv.vsn[i] <- auc(sensVSN[,i],specVSN[,i]) } aucv.gl <- numeric() for (i in 1:ncol(sensGL)){ aucv.gl[i] <- auc(sensGL[,i],specGL[,i]) } aucv.invTseng <- numeric() for (i in 1:ncol(sensINVTseng)){ aucv.invTseng[i] <- auc(sensINVTseng[,i],specINVTseng[,i]) } aucv.invMod <- numeric() for (i in 1:ncol(sensINVMod)){ aucv.invMod[i] <- auc(sensINVMod[,i],specINVMod[,i]) } aucv.rdwgl <- numeric() for (i in 1:ncol(sensINVrdwgl)){ aucv.rdwgl[i] <- auc(sensINVrdwgl[,i],specINVrdwgl[,i]) } aucv.gpa <- numeric() for (i in 1:ncol(sensGPA)){ aucv.gpa[i] <- auc(sensGPA[,i],specGPA[,i]) } aucs <- data.frame(aucv.nn,aucv.vsn,aucv.gl,aucv.invTseng,aucv.invMod,aucv.rdwgl,aucv.gpa) boxplot(aucs,ylim=c(0,1),main=paste(pup[f]*100,"% up ",pdown[f]*100,"% down",sep=""),ylab="AUC",ylim=c(0,1),axes=F,xlab="") axis(1,at=1:7,labels=c("NN","VSN","GL","InvTseng","InvMod","RDWGL","GPA"),cex.axis=0.8) abline(h=.5,lty=2) axis(2,cex.axis=0.8) } par(xpd=NA) text(-18.9,2.8,"a)") text(-9.1,2.8,"b)") text(0.31,2.8,"c)") text(-18.9,1.1,"d)") text(-9.1,1.1,"e)") text(0.31,1.1,"f)") dev.off() pdf("AUCboxplot2.pdf",width=12,height=6.75) par(mfrow=c(2,3)) aucs <- list() pup <- c(0.4,0.5,0.6,0,0,0) pdown <- c(0,0,0,0.4,0.5,0.6) for(f in 1:6){ sfname <- paste("SenSpec_",pup[f],"_",pdown[f],".RData",sep="") load(sfname) pup <- c(0.4,0.5,0.6,0,0,0) pdown <- c(0,0,0,0.4,0.5,0.6) aucv.nn <- numeric() for (i in 1:ncol(sensNN)){ aucv.nn[i] <- auc(sensNN[,i],specNN[,i]) } aucv.vsn <- numeric() for (i in 1:ncol(sensVSN)){ aucv.vsn[i] <- auc(sensVSN[,i],specVSN[,i]) } aucv.gl <- numeric() for (i in 1:ncol(sensGL)){ aucv.gl[i] <- auc(sensGL[,i],specGL[,i]) } aucv.invTseng <- numeric() for (i in 1:ncol(sensINVTseng)){ aucv.invTseng[i] <- auc(sensINVTseng[,i],specINVTseng[,i]) } aucv.invMod <- numeric() for (i in 1:ncol(sensINVMod)){ aucv.invMod[i] <- auc(sensINVMod[,i],specINVMod[,i]) } aucv.rdwgl <- numeric() for (i in 1:ncol(sensINVrdwgl)){ aucv.rdwgl[i] <- auc(sensINVrdwgl[,i],specINVrdwgl[,i]) } aucv.gpa <- numeric() for (i in 1:ncol(sensGPA)){ aucv.gpa[i] <- auc(sensGPA[,i],specGPA[,i]) } aucs <- data.frame(aucv.nn,aucv.vsn,aucv.gl,aucv.invTseng,aucv.invMod,aucv.rdwgl,aucv.gpa) boxplot(aucs,ylim=c(0,1),main=paste(pup[f]*100,"% up ",pdown[f]*100,"% down",sep=""),ylab="AUC",ylim=c(0,1),axes=F,xlab="") axis(1,at=1:7,labels=c("NN","VSN","GL","InvTseng","InvMod","RDWGL","GPA"),cex.axis=0.8) abline(h=.5,lty=2) axis(2,cex.axis=0.8) } par(xpd=NA) text(-18.9,2.8,"a)") text(-9.1,2.8,"b)") text(0.31,2.8,"c)") text(-18.9,1.1,"d)") text(-9.1,1.1,"e)") text(0.31,1.1,"f)") dev.off() #MAplots library(Biobase) library(limma) library(statmod) library(ROC) library(vegan) Sys.setlocale("LC_ALL","C") rm(list = ls()) # generate semi artifical data using self-self hybridised dual-color microarray data #setwd("~/workspace") # path to selfself.RData # load self-self hybridised data load("selfself.RData") mu <- 0.3 sigma <- 0.1 pup <- 0.4 pdown <- 0 #generate a data set set.seed(07092010) MA <- MAstart tumor <- 11:20 control <- 1:10 y <- character(20) y[tumor] <- "tumor" y[control] <- "control" MA$M[which(is.na(MA$M))] <- 0 pr <- which(MA$genes$Status=="protein") names <- MA$genes$Name[pr] unames <- unique(names) dexup <- sample(unames,floor(pup*length(unames))) if(pup!=0){ dexd <- sample(unames[-which(unames%in%dexup)],floor(pdown*length(unames))) }else{dexd <- sample(unames,floor(pdown*length(unames)))} rgdexup <- which(MA$genes$Name%in%dexup) rgdexdown <- which(MA$genes$Name%in%dexd) rgdex <- union(rgdexup,rgdexdown) MA$genes$Status[rgdex] <- "diffexpr" attr(MA$genes$Status,"values")[19] <- "diffexpr" attr(MA$genes$Status,"col")[19] <- "red" for (i in 1:length(rgdexup)){ for (j in 1:length(tumor)){ MA$M[rgdexup[i],tumor[j]] <- MA$M[rgdexup[i],tumor[j]]+abs(rnorm(1,mu,sigma)) } } for (i in 1:length(rgdexdown)){ for (j in 1:length(tumor)){ MA$M[rgdexdown[i],tumor[j]] <- MA$M[rgdexdown[i],tumor[j]]-abs(rnorm(1,mu,sigma)) } } RG <- RG.MA(MA) pr <- which(MA$genes$Status=="protein"|MA$genes$Status=="diffexpr") i <- 20 weights <- modifyWeights(RG$weights,RG$genes$Status, c( "protein","background","actin","polyclonal","IgM","albumin","GAPDH","sp_CHTB","sp_GDA0","sp_Hamster", "sp_Mouse","sp_DNP1","sp_DNP2","sp_GFP","sp_FITC","control both","control Cy3","control Cy5","diffexpr") ,c(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1)) #global loess y <- MA$M[,i] x <- MA$A[,i] w <- weights[,i] glfit <- loessFit(y,x,w,span=0.3, iterations=10) weights <- modifyWeights(RG$weights,RG$genes$Status, c( "protein","background","actin","polyclonal","IgM","albumin","GAPDH","sp_CHTB","sp_GDA0","sp_Hamster", "sp_Mouse","sp_DNP1","sp_DNP2","sp_GFP","sp_FITC","control both","control Cy3","control Cy5","diffexpr") ,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)) #invTseng p=0.02 lthr=5 weights <- modifyWeights(RG$weights,RG$genes$Status, c( "protein","background","actin","polyclonal","IgM","albumin","GAPDH","sp_CHTB","sp_GDA0","sp_Hamster", "sp_Mouse","sp_DNP1","sp_DNP2","sp_GFP","sp_FITC","control both","control Cy3","control Cy5","diffexpr") ,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)) G <- length(pr) invg <- pr[which(abs(rank(RG$R[pr,i])-rank(RG$G[pr,i]))< p*G & abs(rank(RG$R[pr,i]+RG$G[pr,i])/2)> lthr & abs(rank(RG$R[pr,i]+RG$G[pr,i])/2)< (G-lthr))] while(TRUE){ inS <- length(invg) invg <- invg[which(abs(rank(RG$R[invg,i])-rank(RG$G[invg,i])) < p*length(invg))] if(length(invg)==inS) break } weights[invg,i] <- 1 y <- MA$M[,i] x <- MA$A[,i] w.tseng <- weights[,i] invTsengfit <- loessFit(y,x,w.tseng,span=0.3, iterations=10) #invMod weights <- modifyWeights(RG$weights,RG$genes$Status, c( "protein","background","actin","polyclonal","IgM","albumin","GAPDH","sp_CHTB","sp_GDA0","sp_Hamster", "sp_Mouse","sp_DNP1","sp_DNP2","sp_GFP","sp_FITC","control both","control Cy3","control Cy5","diffexpr") ,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)) p=0.99 pinvg=0.25 maxgenes <- length(pr)*pinvg rd <- abs(rank(RG$R[pr,i])-rank(RG$G[pr,i])) invgM <- pr[order(rd)[1:I(length(pr)*p)]] while(length(invgM) > maxgenes){ rd <- abs(rank(RG$R[invgM,i])-rank(RG$G[invgM,i])) invgM <- invgM[order(rd)[1:I(length(invgM)*p)]] } rd <- abs(rank(RG$R[invgM,i])-rank(RG$G[invgM,i])) weights[invgM,i] <- (max(rd)-rd)/max(rd) w.invmod <- weights[,i] y <- MA$M[,i] x <- MA$A[,i] invModfit <- loessFit(y,x,w.invmod,span=0.3, iterations=10) #RDWGL weights <- modifyWeights(RG$weights,RG$genes$Status, c( "protein","background","actin","polyclonal","IgM","albumin","GAPDH","sp_CHTB","sp_GDA0","sp_Hamster", "sp_Mouse","sp_DNP1","sp_DNP2","sp_GFP","sp_FITC","control both","control Cy3","control Cy5","diffexpr") ,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)) rd <- abs(rank(RG$R[pr,i])-rank(RG$G[pr,i])) weights[pr,i] <- (max(rd)-rd)/max(rd) w.rdwgl <- weights[,i] y <- MA$M[,i] x <- MA$A[,i] invRDWGLfit <- loessFit(y,x,w.rdwgl,span=0.3, iterations=10) pdf("MAloess.pdf",width=12,height=5) par(mfrow=c(1,3),omi=c(.1,.5,.1,.1)) par(mar=c(4, 0.4, 1, 0.4)+ 0.1) plot(MA$M[pr,i]~MA$A[pr,i],cex=0.75,ylab="M",xlab="",xlim=c(11,16),ylim=c(-1,2.5),main="InvTseng",axes=F) points(MA$A[invg,i],MA$M[invg,i],col="green",cex=0.75) points(MA$A[rgdexup,i],MA$M[rgdexup,i],col="red",cex=0.75,pch=3) ord <- order(MA$A[,i]) lines(MA$A[ord,i],glfit$fitted[ord],col="orange",lwd=2) lines(MA$A[ord,i],invTsengfit$fitted[ord],col="blue",lwd=2) axis(2,at=c(-1,-.5, 0,.5,1,1.5,2),outer=F) axis(1,at=11:16) abline(h=0,lty=2) plot(MA$M[pr,i]~MA$A[pr,i],cex=0.75,ylab="",xlab="A",xlim=c(11,16),ylim=c(-1,2.5),main="InvMod",axes=F) for(j in 1:length(invgM)){ points(MA$A[invgM[j],i],MA$M[invgM[j],i],col=rgb(0,I(.25+(.75*w.tseng[invgM[j]])),0),cex=0.75) } points(MA$A[rgdexup,i],MA$M[rgdexup,i],col="red",cex=0.75,pch=3) ord <- order(MA$A[,i]) lines(MA$A[ord,i],glfit$fitted[ord],col="orange",lwd=2) lines(MA$A[ord,i],invModfit$fitted[ord],col="blue",lwd=2) axis(1,at=11:16) abline(h=0,lty=2) plot(MA$M[pr,i]~MA$A[pr,i],cex=0.75,ylab="",xlab="",xlim=c(11,16),ylim=c(-1,2.5),main="RDWGL",axes=F) for(j in 1:length(pr)){ points(MA$A[pr[j],i],MA$M[pr[j],i],col=rgb(0,I(.25+(.75*w.rdwgl[pr[j]])),0),cex=0.75) } points(MA$A[rgdexup,i],MA$M[rgdexup,i],col="red",cex=0.75,pch=3) ord <- order(MA$A[,i]) lines(MA$A[ord,i],glfit$fitted[ord],col="orange",lwd=2) lines(MA$A[ord,i],invRDWGLfit$fitted[ord],col="blue",lwd=2) axis(1,at=11:16) abline(h=0,lty=2) par(xpd=NA) text(-.9,0.5,"M",cex=1) legend(13,-.25,legend=c("upregulated protein","selected invariant protein","global loess curve","invariant loess curve") ,text.col=c("red","green","orange","blue","green"),lwd=c(1,1,2,2),col=c("red","green","orange","blue","green"),pch=c(4,1,-1,-1),lty=c(0,0,1,1),cex=1.2) dev.off()