# R-script to perform a simulation study based on self-self hybridised dual-color microarray data 
# as descriped in "Assessment and optimisation of normalisation methods for
# dual-colour antibody microarrays"
# The first part of the R-script will generate semi-artifical data sets for different simulation scenarios
# The second part is the main part of the simulation study  
# The third part will generate the different figures, e.g. ROC-plot, boxplots of the AUC values and MAplots
# Author: Martin Sill
#############################################################################################################

#1. generate semi-artifical data using self-self hybridised dual-color microarray data ######################
library(Biobase)
library(limma)
library(statmod)
library(ROC)
library(vegan)
Sys.setlocale("LC_ALL","C")
rm(list = ls())

#setwd("~/workspace") # path to selfself.RData
# load self-self hybridised data
load("selfself.RData")

mu       <- 0.1        
sigma 	 <- 0.1	
pup 	 <- c(0.1,0.2,0,0.2,0.4,0,0.05,0.1,0.5,0.6,0.8,0,0,0,0,0)
pdown 	 <- c(0.1,0,0.2,0.2,0,0.4,0,0,0,0,0,0.05,0.1,0.5,0.6,0.8)
simsteps <- 100

#generate data sets for the simulation scenarios

for (e in 1:length(pup)){  
	set.seed(07092010)
    RGset <- list()
	
	for (g in 1:simsteps){
		MA <- MAstart
		tumor  <- sample(1:ncol(RG),10)
		control <- which(!1:ncol(RG)%in%tumor)
		y <- character(20)
		y[tumor] <- "tumor"
		y[control] <- "control"
		MA$M[which(is.na(MA$M))] <- 0
		pr <- which(MA$genes$Status=="protein")
		names <- MA$genes$Name[pr]
		unames <- unique(names)
		dexup <- sample(unames,floor(pup[e]*length(unames)))
		if(pup[e]!=0){
			dexd  <- sample(unames[-which(unames%in%dexup)],floor(pdown[e]*length(unames)))
		}else{dexd <- sample(unames,floor(pdown[e]*length(unames)))}
		rgdexup <- which(MA$genes$Name%in%dexup)
		rgdexdown <- which(MA$genes$Name%in%dexd)
		rgdex <- union(rgdexup,rgdexdown)
		MA$genes$Status[rgdex] <- "diffexpr"
		attr(MA$genes$Status,"values")[19] <- "diffexpr"
		attr(MA$genes$Status,"col")[19] <- "red"
		for (i in 1:length(rgdexup)){
			for (j in 1:length(tumor)){
				MA$M[rgdexup[i],tumor[j]] <- MA$M[rgdexup[i],tumor[j]]+abs(rnorm(1,mu,sigma))
			}
		}
		
		for (i in 1:length(rgdexdown)){
			for (j in 1:length(tumor)){
				MA$M[rgdexdown[i],tumor[j]] <- MA$M[rgdexdown[i],tumor[j]]-abs(rnorm(1,mu,sigma))
			}
		}
		RG <- RG.MA(MA)
		RG[[2]]$X <- y        #save chip assignment
		RGset[[g]] <- RG
	}
save(RGset,file=paste("RG_",pup[e],"_",pdown[e],".RData",sep=""))
}	


#2. simulation study########################################################################################
library(Biobase)
library(limma)
library(statmod)
library(ROC)
library(vegan)
Sys.setlocale("LC_ALL","C")
rm(list = ls())
#setwd("~/workspace") # path to the generated RGsets

#simulation settings
pup 	 <- c(0.1,0.2,0,0.2,0.4,0,0.05,0.1,0.5,0.6,0.8,0,0,0,0,0)
pdown 	 <- c(0.1,0,0.2,0.2,0,0.4,0,0,0,0,0,0.05,0.1,0.5,0.6,0.8)
simsteps <- 100

# source invTseng, InvMod, RDWGL and GPA normalization
source("invariant.R")

for(e in 1:length(pup)){
	
	load(paste("RG_",pup[e],"_",pdown[e],".RData",sep=""))
	
	res <- list()
	
	for(g in 1:simsteps){ 
		
		RG <- RGset[[g]]
		
		#same background correction 
		RG <- backgroundCorrect(RG,method="normexp",offset=50,normexp.method="mle")
		
		#no within normalization NN 
		MA <- MA.RG(RG,bc.method="none")
		MA <- normalizeBetweenArrays(MA)
		
		#vsn normalization
		MAvsn <- normalizeBetweenArrays(RG,method="vsn")
		
		#set weights for spike-ins,control and housekeeping features at zero 
		pr <- which(MA$genes$Status=="protein"|MA$genes$Status=="diffexpr")
		weights <- modifyWeights(RG$weights,RG$genes$Status, c(
						"protein","background","actin","polyclonal","IgM","albumin","GAPDH","sp_CHTB","sp_GDA0","sp_Hamster",
						"sp_Mouse","sp_DNP1","sp_DNP2","sp_GFP","sp_FITC","control both","control Cy3","control Cy5","diffexpr")
				,c(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1)) 
		
		#global loess 
		MAgl <- normalizeWithinArrays(RG,method="loess",weights=weights,iterations=10)
		MAgl <- normalizeBetweenArrays(MAgl)
		
		weights <- modifyWeights(RG$weights,RG$genes$Status, c(
						"protein","background","actin","polyclonal","IgM","albumin","GAPDH","sp_CHTB","sp_GDA0","sp_Hamster",
						"sp_Mouse","sp_DNP1","sp_DNP2","sp_GFP","sp_FITC","control both","control Cy3","control Cy5","diffexpr")
				#,c(1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1)) 
				,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))
		
		#Tseng invariant selection algorithm 
		MAinvTseng <- invselTseng(RG,pr,weights=weights,plot.chip=NULL)
		MAinvTseng <- normalizeBetweenArrays(MAinvTseng)
		
		#InvMod
		MAinvMod <- InvMod(RG,pr,weights=weights,plot.chip=NULL)
		MAinvMod <- normalizeBetweenArrays(MAinvMod)
		
		#RDWGL
		MArdwgl <- RDWGL(RG,pr,weights=weights,plot.chip=NULL)
		MArdwgl <- normalizeBetweenArrays(MArdwgl)
		
		#GPA
		MAgpa <- procrustesNormalization(MA.RG(RG,bc.method="none")[pr,])
		MAgpa <- normalizeBetweenArrays(MAgpa) 
				
		#limma design matrix
		tumor <- which(RG[[2]]$X=="tumor")
		control <- which(RG[[2]]$X=="control")
		RG[[2]]$Cy3[tumor] <- "tumor"
		RG[[2]]$Cy3[control] <- "control"
		RG[[2]]$Cy5 <- "Ref"
		design <- modelMatrix(RG[[2]],ref="Ref")
		
		#reorder features and correct for dublicate correlation (each feature has a replicate)  
		MA<-MA[MA$genes$Status %in% c("protein","diffexpr"),]
		MAvsn<-MAvsn[MAvsn$genes$Status %in% c("protein","diffexpr"),]
		MAgl<-MAgl[MAgl$genes$Status %in% c("protein","diffexpr"),]
		MAinvTseng<-MAinvTseng[MAinvTseng$genes$Status %in% c("protein","diffexpr"),]
		MAinvMod<-MAinvMod[MAinvMod$genes$Status %in% c("protein","diffexpr"),]
		MArdwgl<-MArdwgl[MArdwgl$genes$Status %in% c("protein","diffexpr"),]
		MAgpa<-MAgpa[MAgpa$genes$Status %in% c("protein","diffexpr"),]
		
		MA <-      MA[order(MA$genes[,"Name"]),]
		MAvsn <-   MAvsn[order(MAvsn$genes[,"Name"]),]
		MAgl <-    MAgl[order(MAgl$genes[,"Name"]),]
		MAinvTseng  <-   MAinvTseng[order(MAinvTseng$genes[,"Name"]),]
		MAinvMod  <-   MAinvMod[order(MAinvMod$genes[,"Name"]),]
		MArdwgl  <-   MArdwgl[order(MArdwgl$genes[,"Name"]),]
		MAgpa  <-   MAgpa[order(MAgpa$genes[,"Name"]),]
		
		corfit 		<- duplicateCorrelation(MA, ndups=2, spacing=1)
		corfitvsn   <- duplicateCorrelation(MAvsn, ndups=2, spacing=1)
		corfitgl	<- duplicateCorrelation(MAgl, ndups=2, spacing=1)
		corfitinvTseng	<- duplicateCorrelation(MAinvTseng, ndups=2, spacing=1)
		corfitinvMod	<- duplicateCorrelation(MAinvMod, ndups=2, spacing=1)
		corfitrdwgl	<- duplicateCorrelation(MArdwgl, ndups=2, spacing=1)
		corfitgpa	<- duplicateCorrelation(MAgpa, ndups=2, spacing=1)
		
		# fit linear model
		fit <- lmFit(MA,ndups=2,design,correlation=corfit$consensus)        
		fitvsn <- lmFit(MAvsn,ndups=2,design,correlation=corfitvsn$consensus)
		fitgl <- lmFit(MAgl,design,ndups=2,correlation=corfitgl$consensus)
		fitinvTseng <- lmFit(MAinvTseng,design,ndups=2,correlation=corfitinvTseng$consensus)
		fitinvMod <- lmFit(MAinvMod,design,ndups=2,correlation=corfitinvMod$consensus)
		fitrdwgl <- lmFit(MArdwgl,design,ndups=2,correlation=corfitrdwgl$consensus)
		fitgpa <- lmFit(MAgpa,design,ndups=2,correlation=corfitgpa$consensus)
		
		# define contrast matrix 
		contrasts.matrix <- makeContrasts(tumor-control, levels=design)
		
		fit2 <- contrasts.fit(fit, contrasts.matrix)
		fit2 <- eBayes(fit2)
		fit2 <- fit2[which(fit2$genes$Status=="diffexpr"|fit2$genes$Status=="protein"),]
		results <-topTable(fit2,adjust="BH",sort.by="P",number=nrow(MA))
		
		fit2.vsn <- contrasts.fit(fitvsn, contrasts.matrix)
		fit2.vsn <- eBayes(fit2.vsn)
		fit2.vsn <- fit2.vsn[which(fit2.vsn$genes$Status=="diffexpr"|fit2.vsn$genes$Status=="protein"),]
		results.vsn <-topTable(fit2.vsn,adjust="BH",sort.by="P",number=nrow(MA))
		
		fit2.gl <- contrasts.fit(fitgl, contrasts.matrix)
		fit2.gl <- eBayes(fit2.gl)
		fit2.gl <- fit2.gl[which(fit2.gl$genes$Status=="diffexpr"|fit2.gl$genes$Status=="protein"),]
		results.gl <-topTable(fit2.gl,adjust="BH",sort.by="P",number=nrow(MAgl))
		
		fit2.invTseng <- contrasts.fit(fitinvTseng, contrasts.matrix)
		fit2.invTseng <- eBayes(fit2.invTseng)
		fit2.invTseng <- fit2.invTseng[which(fit2.invTseng$genes$Status=="diffexpr"|fit2.invTseng$genes$Status=="protein"),]
		results.invTseng <-topTable(fit2.invTseng,adjust="BH",sort.by="P",number=nrow(MAinvTseng))
			
		fit2.invMod <- contrasts.fit(fitinvMod, contrasts.matrix)
		fit2.invMod <- eBayes(fit2.invMod)
		fit2.invMod <- fit2.invMod[which(fit2.invMod$genes$Status=="diffexpr"|fit2.invMod$genes$Status=="protein"),]
		results.invMod <-topTable(fit2.invMod,adjust="BH",sort.by="P",number=nrow(MAinvMod))
			
		fit2.rdwgl <- contrasts.fit(fitrdwgl, contrasts.matrix)
		fit2.rdwgl <- eBayes(fit2.rdwgl)
		fit2.rdwgl <- fit2.rdwgl[which(fit2.rdwgl$genes$Status=="diffexpr"|fit2.rdwgl$genes$Status=="protein"),]
		results.rdwgl <-topTable(fit2.rdwgl,adjust="BH",sort.by="P",number=nrow(MArdwgl))
		
		fit2.gpa <- contrasts.fit(fitgpa, contrasts.matrix)
		fit2.gpa <- eBayes(fit2.gpa)
		fit2.gpa <- fit2.gpa[which(fit2.gpa$genes$Status=="diffexpr"|fit2.gpa$genes$Status=="protein"),]
		results.gpa <-topTable(fit2.gpa,adjust="BH",sort.by="P",number=nrow(MAgpa))
		
		res[[g]] <- list(results, results.vsn, results.gl, results.invTseng,results.invMod,results.rdwgl,results.gpa)
		cat("step",g,"\n")
	}
	save(res,file=paste("res_",pup[e],"_",pdown[e],".RData",sep=""))
}	

#calculate sensitivity and specificity

step.fdr <- function(results){
	truepos <- numeric()
	falsepos <- numeric()
	trueneg <- numeric()
	falseneg <- numeric()
	for(e in 1:nrow(results)){
		truepos[e] <- sum((results[1:e,"Status"]=="diffexpr"))
		falsepos[e] <- sum((results[1:e,"Status"]=="protein"))
		trueneg[e] <- sum((results[-c(1:e),"Status"]=="protein"))
		falseneg[e] <- sum((results[-c(1:e),"Status"]=="diffexpr"))
	}
	sens <- truepos/(truepos+falseneg)
	spec <- trueneg/(trueneg+falsepos)
	return(cbind(sensitivity=sens,specificity=spec))
}

for(e in 1:length(pup)){ 
	
	load(paste("res_",pup[e],"_",pdown[e],".RData",sep=""))
	
	sensNN <- matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res))
	specNN <- matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res))
	sensVSN <- matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res))
	specVSN <- matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res))
	sensGL <- matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res))
	specGL <-matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res))
	sensINVTseng <-matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res))
	specINVTseng <-matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res))
	sensINVMod <-matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res))
	specINVMod <-matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res))
	sensINVrdwgl <-matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res))
	specINVrdwgl <-matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res))
	sensGPA <-matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res))
	specGPA <-matrix(nrow=nrow(res[[1]][[1]]),ncol=length(res))
	
	senspec <- list()
	for(g in 1:length(res)){
		temp  <- data.frame(lapply(res[[g]],step.fdr))
		sensNN[,g] <- temp[,1]
		specNN[,g] <- temp[,2]
		sensVSN[,g] <- temp[,3]
		specVSN[,g] <- temp[,4]
		sensGL[,g] <- temp[,5]
		specGL[,g] <- temp[,6]
		sensINVTseng[,g] <- temp[,7]
		specINVTseng[,g] <- temp[,8]
		sensINVMod[,g] <- temp[,9]
		specINVMod[,g] <-	temp[,10] 		
		sensINVrdwgl[,g] <- temp[,11]
		specINVrdwgl[,g] <- temp[,12]
		sensGPA[,g] <- temp[,13]
		specGPA[,g] <- temp[,14]
	}
sfname <- paste("SenSpec_",pup[e],"_",pdown[e],".RData",sep="")
save.image(sfname)
cat("scenario:",e,"\n")
}

#3. plot figures of the results #############################################################################
library(Biobase)
library(limma)
library(statmod)
library(ROC)
library(vegan)
Sys.setlocale("LC_ALL","C")
rm(list = ls())
#setwd("~/workspace") # path to SenSpec....RData files

#simulation settings
pup 	 <- c(0.1,0.1,0.2,0.2,0,0)
pdown 	 <- c(0.1,0,0,0.2,0.1,0.2)
simsteps <- 100

#ROC-plots
pdf("ROC1.pdf",width=12,height=6.75)
par(mfrow=c(2,3),omi=c(0,0,0,1))
for(f in 1:6){
	sfname <- paste("SenSpec_",pup[f],"_",pdown[f],".RData",sep="")
	load(sfname)
	pup 	 <- c(0.1,0.1,0.2,0.2,0,0)
	pdown 	 <- c(0.1,0,0,0.2,0.1,0.2)
	par(mar=c(4, 3.9, 2.4, 0.4)+	0.1)
	titlel <- paste(I(pup[f]*100),"% up ",I(pdown[f]*100),"% down")
	plot(1,1,type="n",ylim=c(0,1),xlim=c(0,1),ylab="",xlab="",main=titlel,axes=FALSE)
	lines(apply(sensNN,1,median)~I(1-apply(specNN,1,median)),lwd=2,lty=1)
	lines(apply(sensVSN,1,median)~I(1-apply(specVSN,1,median)),col="red",lwd=2,lty=2)
	lines(apply(sensGL,1,median)~I(1-apply(specGL,1,median)),col="magenta",lwd=2,lty=3)
	lines(apply(sensINVTseng,1,median)~I(1-apply(specINVTseng,1,median)),col="blue",lwd=2,lty=4)
	lines(apply(sensINVMod,1,median)~I(1-apply(specINVMod,1,median)),col="orange",lwd=2,lty=5)
	lines(apply(sensINVrdwgl,1,median)~I(1-apply(specINVrdwgl,1,median)),col="green",lwd=2,lty=6)
	lines(apply(sensGPA,1,median)~I(1-apply(specGPA,1,median)),col="brown",lwd=2,lty=7)
	lines(seq(0,1,0.1),seq(0,1,0.1),lty=2)
	axis(2,at=seq(0,1,0.2),label=seq(0,1,0.2))
	axis(1,at=seq(0,1,0.2),label=seq(0,1,0.2))
}
par(xpd=NA)
text(-2.575,2.55,"a)")
text(-1.3,2.55,"b)")
text(-0.025,2.55,"c)")
text(-2.575,1.1,"d)")
text(-1.3,1.1,"e)")
text(-0.025,1.1,"f)")
text(-0.75,-0.2,"1-Specificity",cex=1.2)
text(-2.75,1.225,"Sensitivity",cex=1.2,srt=90)
legend(1,1.5,legend=
				c("NN","VSN","GL","InvTseng","InvMod","RDWGL","GPA")
		,text.col=c("black","red","magenta","blue","orange","green","brown")
		,col=c("black","red","magenta","blue","orange","green","brown")
		,lty=c(1,2,3,4,5,6,7),cex=1.2,lwd=rep(2,7),box.lty=0)
dev.off()

pup 	 <- c(0.4,0.5,0.6,0,0,0)
pdown 	 <- c(0,0,0,0.4,0.5,0.6)
simsteps <- 100

pdf("ROC2.pdf",width=12,height=6.75)
par(mfrow=c(2,3),omi=c(0,0,0,1))
for(f in 1:6){
	sfname <- paste("SenSpec_",pup[f],"_",pdown[f],".RData",sep="")
	load(sfname)
	pup 	 <- c(0.4,0.5,0.6,0,0,0)
	pdown 	 <- c(0,0,0,0.4,0.5,0.6)
	par(mar=c(4, 3.9, 2.4, 0.4)+	0.1)
	titlel <- paste(I(pup[f]*100),"% up ",I(pdown[f]*100),"% down")
	plot(1,1,type="n",ylim=c(0,1),xlim=c(0,1),ylab="",xlab="",main=titlel,axes=FALSE)
	lines(apply(sensNN,1,median)~I(1-apply(specNN,1,median)),lwd=2,lty=1)
	lines(apply(sensVSN,1,median)~I(1-apply(specVSN,1,median)),col="red",lwd=2,lty=2)
	lines(apply(sensGL,1,median)~I(1-apply(specGL,1,median)),col="magenta",lwd=2,lty=3)
	lines(apply(sensINVTseng,1,median)~I(1-apply(specINVTseng,1,median)),col="blue",lwd=2,lty=4)
	lines(apply(sensINVMod,1,median)~I(1-apply(specINVMod,1,median)),col="orange",lwd=2,lty=5)
	lines(apply(sensINVrdwgl,1,median)~I(1-apply(specINVrdwgl,1,median)),col="green",lwd=2,lty=6)
	lines(apply(sensGPA,1,median)~I(1-apply(specGPA,1,median)),col="brown",lwd=2,lty=7)
	lines(seq(0,1,0.1),seq(0,1,0.1),lty=2)
	axis(2,at=seq(0,1,0.2),label=seq(0,1,0.2))
	axis(1,at=seq(0,1,0.2),label=seq(0,1,0.2))
}
par(xpd=NA)
text(-2.575,2.55,"a)")
text(-1.3,2.55,"b)")
text(-0.025,2.55,"c)")
text(-2.575,1.1,"d)")
text(-1.3,1.1,"e)")
text(-0.025,1.1,"f)")
text(-0.75,-0.2,"1-Specificity",cex=1.2)
text(-2.75,1.225,"Sensitivity",cex=1.2,srt=90)
legend(1,1.5,legend=
				c("NN","VSN","GL","InvTseng","InvMod","RDWGL","GPA")
		,text.col=c("black","red","magenta","blue","orange","green","brown")
		,col=c("black","red","magenta","blue","orange","green","brown")
		,lty=c(1,2,3,4,5,6,7),cex=1.2,lwd=rep(2,7),box.lty=0)
dev.off()


#AUC-boxplots

#function to calculate the area under the curve, depends on the R-package ROC (bioconductor)
auc <- function(sens,spec){
	x <- 1 - spec
	y <- sens
	if (x[1] > x[length(x)]) {
		x <- rev(x)
		y <- rev(y)
	}
	trapezint(x, y, 0, 1)
}


pdf("AUCboxplot1.pdf",width=12,height=6.75)
par(mfrow=c(2,3))
aucs <- list()
pup 	 <- c(0.1,0.1,0.2,0.2,0,0)
pdown 	 <- c(0.1,0,0,0.2,0.1,0.2)
for(f in 1:6){
	sfname <- paste("SenSpec_",pup[f],"_",pdown[f],".RData",sep="")
	load(sfname)
	pup 	 <- c(0.1,0.1,0.2,0.2,0,0)
	pdown 	 <- c(0.1,0,0,0.2,0.1,0.2)
	aucv.nn <- numeric() 
	for (i in 1:ncol(sensNN)){
		aucv.nn[i] <- auc(sensNN[,i],specNN[,i])
	} 
	aucv.vsn <- numeric() 
	for (i in 1:ncol(sensVSN)){
		aucv.vsn[i] <- auc(sensVSN[,i],specVSN[,i])
	} 
	aucv.gl <- numeric() 
	for (i in 1:ncol(sensGL)){
		aucv.gl[i] <- auc(sensGL[,i],specGL[,i])
	} 
	aucv.invTseng <- numeric() 
	for (i in 1:ncol(sensINVTseng)){
		aucv.invTseng[i] <- auc(sensINVTseng[,i],specINVTseng[,i])
	} 
	aucv.invMod <- numeric() 
	for (i in 1:ncol(sensINVMod)){
		aucv.invMod[i] <- auc(sensINVMod[,i],specINVMod[,i])
	} 
	aucv.rdwgl <- numeric() 
	for (i in 1:ncol(sensINVrdwgl)){
		aucv.rdwgl[i] <- auc(sensINVrdwgl[,i],specINVrdwgl[,i])
	} 
	aucv.gpa <- numeric() 
	for (i in 1:ncol(sensGPA)){
		aucv.gpa[i] <- auc(sensGPA[,i],specGPA[,i])
	}
	aucs <- data.frame(aucv.nn,aucv.vsn,aucv.gl,aucv.invTseng,aucv.invMod,aucv.rdwgl,aucv.gpa)
	boxplot(aucs,ylim=c(0,1),main=paste(pup[f]*100,"% up ",pdown[f]*100,"% down",sep=""),ylab="AUC",ylim=c(0,1),axes=F,xlab="")
	axis(1,at=1:7,labels=c("NN","VSN","GL","InvTseng","InvMod","RDWGL","GPA"),cex.axis=0.8)
	abline(h=.5,lty=2)
	axis(2,cex.axis=0.8)
}
par(xpd=NA)
text(-18.9,2.8,"a)")
text(-9.1,2.8,"b)")
text(0.31,2.8,"c)")
text(-18.9,1.1,"d)")
text(-9.1,1.1,"e)")
text(0.31,1.1,"f)")
dev.off()

pdf("AUCboxplot2.pdf",width=12,height=6.75)
par(mfrow=c(2,3))
aucs <- list()
pup 	 <- c(0.4,0.5,0.6,0,0,0)
pdown 	 <- c(0,0,0,0.4,0.5,0.6)
for(f in 1:6){
	sfname <- paste("SenSpec_",pup[f],"_",pdown[f],".RData",sep="")
	load(sfname)
	pup 	 <- c(0.4,0.5,0.6,0,0,0)
	pdown 	 <- c(0,0,0,0.4,0.5,0.6)
	aucv.nn <- numeric() 
	for (i in 1:ncol(sensNN)){
		aucv.nn[i] <- auc(sensNN[,i],specNN[,i])
	} 
	aucv.vsn <- numeric() 
	for (i in 1:ncol(sensVSN)){
		aucv.vsn[i] <- auc(sensVSN[,i],specVSN[,i])
	} 
	aucv.gl <- numeric() 
	for (i in 1:ncol(sensGL)){
		aucv.gl[i] <- auc(sensGL[,i],specGL[,i])
	} 
	aucv.invTseng <- numeric() 
	for (i in 1:ncol(sensINVTseng)){
		aucv.invTseng[i] <- auc(sensINVTseng[,i],specINVTseng[,i])
	} 
	aucv.invMod <- numeric() 
	for (i in 1:ncol(sensINVMod)){
		aucv.invMod[i] <- auc(sensINVMod[,i],specINVMod[,i])
	} 
	aucv.rdwgl <- numeric() 
	for (i in 1:ncol(sensINVrdwgl)){
		aucv.rdwgl[i] <- auc(sensINVrdwgl[,i],specINVrdwgl[,i])
	} 
	aucv.gpa <- numeric() 
	for (i in 1:ncol(sensGPA)){
		aucv.gpa[i] <- auc(sensGPA[,i],specGPA[,i])
	}
	aucs <- data.frame(aucv.nn,aucv.vsn,aucv.gl,aucv.invTseng,aucv.invMod,aucv.rdwgl,aucv.gpa)
	boxplot(aucs,ylim=c(0,1),main=paste(pup[f]*100,"% up ",pdown[f]*100,"% down",sep=""),ylab="AUC",ylim=c(0,1),axes=F,xlab="")
	axis(1,at=1:7,labels=c("NN","VSN","GL","InvTseng","InvMod","RDWGL","GPA"),cex.axis=0.8)
	abline(h=.5,lty=2)
	axis(2,cex.axis=0.8)
}
par(xpd=NA)
text(-18.9,2.8,"a)")
text(-9.1,2.8,"b)")
text(0.31,2.8,"c)")
text(-18.9,1.1,"d)")
text(-9.1,1.1,"e)")
text(0.31,1.1,"f)")
dev.off()


#MAplots
library(Biobase)
library(limma)
library(statmod)
library(ROC)
library(vegan)
Sys.setlocale("LC_ALL","C")
rm(list = ls())

# generate semi artifical data using self-self hybridised dual-color microarray data
#setwd("~/workspace") # path to selfself.RData
# load self-self hybridised data
load("selfself.RData")

mu     <-   0.3         
sigma  <-   0.1	
pup    <-   0.4
pdown <-   0

#generate a data set
set.seed(07092010)
MA <- MAstart
tumor  <- 11:20
control <- 1:10 
y <- character(20)
y[tumor] <- "tumor"
y[control] <- "control"
MA$M[which(is.na(MA$M))] <- 0
pr <- which(MA$genes$Status=="protein")
names <- MA$genes$Name[pr]
unames <- unique(names)
dexup <- sample(unames,floor(pup*length(unames)))
if(pup!=0){
	dexd  <- sample(unames[-which(unames%in%dexup)],floor(pdown*length(unames)))
}else{dexd <- sample(unames,floor(pdown*length(unames)))}
rgdexup <- which(MA$genes$Name%in%dexup)
rgdexdown <- which(MA$genes$Name%in%dexd)
rgdex <- union(rgdexup,rgdexdown)
MA$genes$Status[rgdex] <- "diffexpr"
attr(MA$genes$Status,"values")[19] <- "diffexpr"
attr(MA$genes$Status,"col")[19] <- "red"
for (i in 1:length(rgdexup)){
	for (j in 1:length(tumor)){
		MA$M[rgdexup[i],tumor[j]] <- MA$M[rgdexup[i],tumor[j]]+abs(rnorm(1,mu,sigma))
	}
}

for (i in 1:length(rgdexdown)){
	for (j in 1:length(tumor)){
		MA$M[rgdexdown[i],tumor[j]] <- MA$M[rgdexdown[i],tumor[j]]-abs(rnorm(1,mu,sigma))
	}
}
RG <- RG.MA(MA)

pr <- which(MA$genes$Status=="protein"|MA$genes$Status=="diffexpr")

i <- 20

weights <- modifyWeights(RG$weights,RG$genes$Status, c(
				"protein","background","actin","polyclonal","IgM","albumin","GAPDH","sp_CHTB","sp_GDA0","sp_Hamster",
				"sp_Mouse","sp_DNP1","sp_DNP2","sp_GFP","sp_FITC","control both","control Cy3","control Cy5","diffexpr")
		,c(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1)) 

#global loess
y <- MA$M[,i]
x <- MA$A[,i]
w <- weights[,i]
glfit <- loessFit(y,x,w,span=0.3,  iterations=10)

weights <- modifyWeights(RG$weights,RG$genes$Status, c(
				"protein","background","actin","polyclonal","IgM","albumin","GAPDH","sp_CHTB","sp_GDA0","sp_Hamster",
				"sp_Mouse","sp_DNP1","sp_DNP2","sp_GFP","sp_FITC","control both","control Cy3","control Cy5","diffexpr")
		,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))


#invTseng
p=0.02
lthr=5
weights <- modifyWeights(RG$weights,RG$genes$Status, c(
				"protein","background","actin","polyclonal","IgM","albumin","GAPDH","sp_CHTB","sp_GDA0","sp_Hamster",
				"sp_Mouse","sp_DNP1","sp_DNP2","sp_GFP","sp_FITC","control both","control Cy3","control Cy5","diffexpr")
		,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))
G <- length(pr)
invg <-  pr[which(abs(rank(RG$R[pr,i])-rank(RG$G[pr,i]))< p*G &
			    		abs(rank(RG$R[pr,i]+RG$G[pr,i])/2)> lthr &
						abs(rank(RG$R[pr,i]+RG$G[pr,i])/2)< (G-lthr))]
	while(TRUE){
		inS <- length(invg)
		invg <-  invg[which(abs(rank(RG$R[invg,i])-rank(RG$G[invg,i])) < p*length(invg))]
		if(length(invg)==inS) break 
	}
weights[invg,i] <- 1 
y <- MA$M[,i]
x <- MA$A[,i]
w.tseng <- weights[,i]
invTsengfit <- loessFit(y,x,w.tseng,span=0.3,  iterations=10)

#invMod
weights <- modifyWeights(RG$weights,RG$genes$Status, c(
				"protein","background","actin","polyclonal","IgM","albumin","GAPDH","sp_CHTB","sp_GDA0","sp_Hamster",
				"sp_Mouse","sp_DNP1","sp_DNP2","sp_GFP","sp_FITC","control both","control Cy3","control Cy5","diffexpr")
		,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))
p=0.99
pinvg=0.25
maxgenes <- length(pr)*pinvg
rd <- abs(rank(RG$R[pr,i])-rank(RG$G[pr,i]))
invgM <- pr[order(rd)[1:I(length(pr)*p)]]
while(length(invgM) > maxgenes){
	rd <- abs(rank(RG$R[invgM,i])-rank(RG$G[invgM,i]))
	invgM <- invgM[order(rd)[1:I(length(invgM)*p)]]
}
rd <- abs(rank(RG$R[invgM,i])-rank(RG$G[invgM,i]))
weights[invgM,i] <- (max(rd)-rd)/max(rd)
w.invmod <- weights[,i]
y <- MA$M[,i]
x <- MA$A[,i]
invModfit <- loessFit(y,x,w.invmod,span=0.3,  iterations=10)


#RDWGL
weights <- modifyWeights(RG$weights,RG$genes$Status, c(
				"protein","background","actin","polyclonal","IgM","albumin","GAPDH","sp_CHTB","sp_GDA0","sp_Hamster",
				"sp_Mouse","sp_DNP1","sp_DNP2","sp_GFP","sp_FITC","control both","control Cy3","control Cy5","diffexpr")
		,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))
rd <- abs(rank(RG$R[pr,i])-rank(RG$G[pr,i]))
weights[pr,i] <- (max(rd)-rd)/max(rd)
w.rdwgl <- weights[,i]
y <-  MA$M[,i]
x <- MA$A[,i]
invRDWGLfit <- loessFit(y,x,w.rdwgl,span=0.3,  iterations=10)

pdf("MAloess.pdf",width=12,height=5)

par(mfrow=c(1,3),omi=c(.1,.5,.1,.1))
par(mar=c(4, 0.4, 1, 0.4)+	0.1)
plot(MA$M[pr,i]~MA$A[pr,i],cex=0.75,ylab="M",xlab="",xlim=c(11,16),ylim=c(-1,2.5),main="InvTseng",axes=F)
points(MA$A[invg,i],MA$M[invg,i],col="green",cex=0.75)
points(MA$A[rgdexup,i],MA$M[rgdexup,i],col="red",cex=0.75,pch=3)
ord <- order(MA$A[,i])
lines(MA$A[ord,i],glfit$fitted[ord],col="orange",lwd=2)
lines(MA$A[ord,i],invTsengfit$fitted[ord],col="blue",lwd=2)
axis(2,at=c(-1,-.5, 0,.5,1,1.5,2),outer=F)
axis(1,at=11:16)
abline(h=0,lty=2)
plot(MA$M[pr,i]~MA$A[pr,i],cex=0.75,ylab="",xlab="A",xlim=c(11,16),ylim=c(-1,2.5),main="InvMod",axes=F)
for(j in 1:length(invgM)){
	points(MA$A[invgM[j],i],MA$M[invgM[j],i],col=rgb(0,I(.25+(.75*w.tseng[invgM[j]])),0),cex=0.75)
}
points(MA$A[rgdexup,i],MA$M[rgdexup,i],col="red",cex=0.75,pch=3)
ord <- order(MA$A[,i])
lines(MA$A[ord,i],glfit$fitted[ord],col="orange",lwd=2)
lines(MA$A[ord,i],invModfit$fitted[ord],col="blue",lwd=2)
axis(1,at=11:16)
abline(h=0,lty=2)
plot(MA$M[pr,i]~MA$A[pr,i],cex=0.75,ylab="",xlab="",xlim=c(11,16),ylim=c(-1,2.5),main="RDWGL",axes=F)
for(j in 1:length(pr)){
	points(MA$A[pr[j],i],MA$M[pr[j],i],col=rgb(0,I(.25+(.75*w.rdwgl[pr[j]])),0),cex=0.75)
}
points(MA$A[rgdexup,i],MA$M[rgdexup,i],col="red",cex=0.75,pch=3)
ord <- order(MA$A[,i])
lines(MA$A[ord,i],glfit$fitted[ord],col="orange",lwd=2)
lines(MA$A[ord,i],invRDWGLfit$fitted[ord],col="blue",lwd=2)
axis(1,at=11:16)
abline(h=0,lty=2)
par(xpd=NA)
text(-.9,0.5,"M",cex=1)
legend(13,-.25,legend=c("upregulated protein","selected invariant protein","global loess curve","invariant loess curve")
		,text.col=c("red","green","orange","blue","green"),lwd=c(1,1,2,2),col=c("red","green","orange","blue","green"),pch=c(4,1,-1,-1),lty=c(0,0,1,1),cex=1.2)

dev.off()