####################################################### # Function for WAD (Weighted Average Difference) in R # Written by: Koji Kadota, Univ. Tokyo, 2007 # Citation: Kadota K, Nakai Y, Shimizu K., A weighted average difference # method for detecting differentially expressed genes # using DNA microarrays. Algorithms Mol. Biol., 2008 ####################################################### WAD <- function(x, cl, dynamic_r, min_v){ x.class1 <- x[(cl == 0)] x.class2 <- x[(cl == 1)] x_ave <- (mean(x.class1) + mean(x.class2))/2 weight <- (x_ave - min_v)/dynamic_r statistic <- (mean(x.class2) - mean(x.class1))*weight return(statistic) } ############################# ### Average Difference ### ############################# AveDiff <- function(x, cl){ x.class0 <- x[(cl == 0)] x.class1 <- x[(cl == 1)] statistic <- mean(x.class1) - mean(x.class0) return(statistic) } ############################# ### Average Fold Change ### ############################# AvelogFC <- function(x, cl){ x.class0 <- x[(cl == 0)] x.class1 <- x[(cl == 1)] statistic <- log2(mean(x.class1)/mean(x.class0)) return(statistic) } source('http://eh3.uc.edu/r/ibmtR.R') library(RankProd) #パッケージの読み込み library(samr) library(st) library(limma) library(ROC) ############################### ############################### ### MAS-preprocessed data ### ############################### ############################### data <- read.table("http://affycomp.biostat.jhsph.edu/AFFY2/rafa@jhu.edu/030424.1033/hgu133.csv", header=TRUE, row.names=1, sep=",") data[data < 1] <- 1 data <- log(data, 2) data.cl.org <- c(0,1,0,1,0,1) data.cl <- data.cl.org ################################################################### ### Assignment for 42 true differentially expressed probesets ### data.degenes <- as.matrix(c(rep(0, nrow(data)))) rownames(data.degenes) <- rownames(data) degenes <- c( "203508_at", "204563_at", "204513_s_at", "204205_at", "204959_at", "207655_s_at", "204836_at", "205291_at", "209795_at", "207777_s_at", "204912_at", "205569_at", "207160_at", "205692_s_at", "212827_at", "209606_at", "205267_at", "204417_at", "205398_s_at", "209734_at", "209354_at", "206060_s_at", "205790_at", "200665_s_at", "207641_at", "207540_s_at", "204430_s_at", "203471_s_at", "204951_at", "207968_s_at", "AFFX-r2-TagA_at", "AFFX-r2-TagB_at", "AFFX-r2-TagC_at", "AFFX-r2-TagD_at", "AFFX-r2-TagE_at", "AFFX-r2-TagF_at", "AFFX-r2-TagG_at", "AFFX-r2-TagH_at", "AFFX-DapX-3_at", "AFFX-LysX-3_at", "AFFX-PheX-3_at", "AFFX-ThrX-3_at" ) data.degenes[degenes, ] <- 1 ################################################################### write.table(data.degenes, "data_DEG.txt", sep = "\t", append=F, quote=F, row.names=F) ################################# ### AD (Average Difference) ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org tmpall <- apply(data[,analysis_vector == 1], 1, AveDiff, data.cl) rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j,"\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_MAS_AD.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_MAS_AD.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_AD <- apply(quantile_tmp, 1, mean) AUC_AD <- mean(AUC_tmp) ############# ### WAD ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL data.sub <- NULL tmp.class1 <- NULL tmp.class2 <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org data.sub <- data[,analysis_vector == 1] tmp.class1 <- apply(data.sub[,data.cl == 0], 1, mean) tmp.class2 <- apply(data.sub[,data.cl == 1], 1, mean) ave_vector <- (tmp.class1 + tmp.class2)/2 dynamic_range <- max(ave_vector) - min(ave_vector) tmpall <- apply(data[,analysis_vector == 1], 1, WAD, data.cl, dynamic_range, min(ave_vector)) rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_MAS_WAD.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_MAS_WAD.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_WAD <- apply(quantile_tmp, 1, mean) AUC_WAD <- mean(AUC_tmp) ############################# ### WAD's "w" term only ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL data.sub <- NULL tmp.class1 <- NULL tmp.class2 <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org data.sub <- data[,analysis_vector == 1] tmp.class1 <- apply(data.sub[,data.cl == 0], 1, mean) tmp.class2 <- apply(data.sub[,data.cl == 1], 1, mean) ave_vector <- (tmp.class1 + tmp.class2)/2 dynamic_range <- max(ave_vector) - min(ave_vector) tmpall <- (ave_vector - min(ave_vector))/dynamic_range rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_MAS_w.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_MAS_w.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_w <- apply(quantile_tmp, 1, mean) AUC_w <- mean(AUC_tmp) ############ ### FC ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL data.degenes.vector <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data <- 2^data data.cl <- data.cl.org tmpall <- apply(data[,analysis_vector == 1], 1, AvelogFC, data.cl) rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) data <- log(data, 2) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_MAS_FC.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_MAS_FC.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_FC <- apply(quantile_tmp, 1, mean) AUC_FC <- mean(AUC_tmp) ############ ### RP ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org tmpall <- RP(data[,analysis_vector == 1], data.cl, num.perm=1, logged=TRUE, na.rm = FALSE, plot = FALSE, rand = 123) rank_method <- rank(apply(tmpall$RPs, 1, min), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_MAS_RP.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_MAS_RP.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_RP <- apply(quantile_tmp, 1, mean) AUC_RP <- mean(AUC_tmp) ############## ### ibmT ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org design <- model.matrix(~data.cl) data <- as.matrix(data) fit <- lmFit(data[,analysis_vector == 1], design) fit$Amean<-rowMeans(data[,analysis_vector == 1]) fit <- IBMT(fit,2) tmpall <- fit$IBMT.t rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_MAS_ibmT.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_MAS_ibmT.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_ibmT <- apply(quantile_tmp, 1, mean) AUC_ibmT <- mean(AUC_tmp) ############## ### modT ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org + 1 tmpall <- modt.stat(t(data[,analysis_vector == 1]), data.cl) rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_MAS_modT.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_MAS_modT.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_modT <- apply(quantile_tmp, 1, mean) AUC_modT <- mean(AUC_tmp) ############## ### samT ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org + 1 tmpall <- sam.stat(t(data[,analysis_vector == 1]), data.cl) rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_MAS_samT.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_MAS_samT.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_samT <- apply(quantile_tmp, 1, mean) AUC_samT <- mean(AUC_tmp) ################# ### shrinkT ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org + 1 tmpall <- shrinkt.stat(t(data[,analysis_vector == 1]), data.cl) rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_MAS_shrinkT.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_MAS_shrinkT.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_shrinkT <- apply(quantile_tmp, 1, mean) AUC_shrinkT <- mean(AUC_tmp) tmp <- rbind(quantile_w, quantile_AD, quantile_WAD, quantile_FC, quantile_RP, quantile_modT, quantile_samT, quantile_shrinkT, quantile_ibmT) write.table(tmp, "result_Dataset2_quantile_MAS.txt", sep = "\t", append=F, quote=F, row.names=F) tmp <- AUC_w tmp <- rbind(tmp, AUC_AD) tmp <- rbind(tmp, AUC_WAD) tmp <- rbind(tmp, AUC_FC) tmp <- rbind(tmp, AUC_RP) tmp <- rbind(tmp, AUC_modT) tmp <- rbind(tmp, AUC_samT) tmp <- rbind(tmp, AUC_shrinkT) tmp <- rbind(tmp, AUC_ibmT) write.table(tmp, "result_Dataset2_AUC_MAS.txt", sep = "\t", append=F, quote=F, row.names=F) ############################### ############################### ### RMA-preprocessed data ### ############################### ############################### data <- read.table("http://affycomp.biostat.jhsph.edu/AFFY2/rafa@jhu.edu/030429.1332/hgu133.csv", header=TRUE, row.names=1, sep=",") data[data < 1] <- 1 data <- log(data, 2) data.cl <- data.cl.org ################################################################### ### Assignment for 42 true differentially expressed probesets ### data.degenes <- as.matrix(c(rep(0, nrow(data)))) rownames(data.degenes) <- rownames(data) degenes <- c( "203508_at", "204563_at", "204513_s_at", "204205_at", "204959_at", "207655_s_at", "204836_at", "205291_at", "209795_at", "207777_s_at", "204912_at", "205569_at", "207160_at", "205692_s_at", "212827_at", "209606_at", "205267_at", "204417_at", "205398_s_at", "209734_at", "209354_at", "206060_s_at", "205790_at", "200665_s_at", "207641_at", "207540_s_at", "204430_s_at", "203471_s_at", "204951_at", "207968_s_at", "AFFX-r2-TagA_at", "AFFX-r2-TagB_at", "AFFX-r2-TagC_at", "AFFX-r2-TagD_at", "AFFX-r2-TagE_at", "AFFX-r2-TagF_at", "AFFX-r2-TagG_at", "AFFX-r2-TagH_at", "AFFX-DapX-3_at", "AFFX-LysX-3_at", "AFFX-PheX-3_at", "AFFX-ThrX-3_at" ) data.degenes[degenes, ] <- 1 ################################################################### ################################# ### AD (Average Difference) ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org tmpall <- apply(data[,analysis_vector == 1], 1, AveDiff, data.cl) rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j,"\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_RMA_AD.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_RMA_AD.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_AD <- apply(quantile_tmp, 1, mean) AUC_AD <- mean(AUC_tmp) ############# ### WAD ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL data.sub <- NULL tmp.class1 <- NULL tmp.class2 <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org data.sub <- data[,analysis_vector == 1] tmp.class1 <- apply(data.sub[,data.cl == 0], 1, mean) tmp.class2 <- apply(data.sub[,data.cl == 1], 1, mean) ave_vector <- (tmp.class1 + tmp.class2)/2 dynamic_range <- max(ave_vector) - min(ave_vector) tmpall <- apply(data[,analysis_vector == 1], 1, WAD, data.cl, dynamic_range, min(ave_vector)) rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_RMA_WAD.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_RMA_WAD.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_WAD <- apply(quantile_tmp, 1, mean) AUC_WAD <- mean(AUC_tmp) ############################# ### WAD's "w" term only ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL data.sub <- NULL tmp.class1 <- NULL tmp.class2 <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org data.sub <- data[,analysis_vector == 1] tmp.class1 <- apply(data.sub[,data.cl == 0], 1, mean) tmp.class2 <- apply(data.sub[,data.cl == 1], 1, mean) ave_vector <- (tmp.class1 + tmp.class2)/2 dynamic_range <- max(ave_vector) - min(ave_vector) tmpall <- (ave_vector - min(ave_vector))/dynamic_range rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_RMA_w.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_RMA_w.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_w <- apply(quantile_tmp, 1, mean) AUC_w <- mean(AUC_tmp) ############ ### FC ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL data.degenes.vector <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data <- 2^data data.cl <- data.cl.org tmpall <- apply(data[,analysis_vector == 1], 1, AvelogFC, data.cl) rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) data <- log(data, 2) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_RMA_FC.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_RMA_FC.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_FC <- apply(quantile_tmp, 1, mean) AUC_FC <- mean(AUC_tmp) ############ ### RP ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org tmpall <- RP(data[,analysis_vector == 1], data.cl, num.perm=1, logged=TRUE, na.rm = FALSE, plot = FALSE, rand = 123) rank_method <- rank(apply(tmpall$RPs, 1, min), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_RMA_RP.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_RMA_RP.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_RP <- apply(quantile_tmp, 1, mean) AUC_RP <- mean(AUC_tmp) ############## ### ibmT ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org design <- model.matrix(~data.cl) data <- as.matrix(data) fit <- lmFit(data[,analysis_vector == 1], design) fit$Amean<-rowMeans(data[,analysis_vector == 1]) fit <- IBMT(fit,2) tmpall <- fit$IBMT.t rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_RMA_ibmT.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_RMA_ibmT.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_ibmT <- apply(quantile_tmp, 1, mean) AUC_ibmT <- mean(AUC_tmp) ############## ### modT ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org + 1 tmpall <- modt.stat(t(data[,analysis_vector == 1]), data.cl) rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_RMA_modT.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_RMA_modT.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_modT <- apply(quantile_tmp, 1, mean) AUC_modT <- mean(AUC_tmp) ############## ### samT ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org + 1 tmpall <- sam.stat(t(data[,analysis_vector == 1]), data.cl) rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_RMA_samT.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_RMA_samT.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_samT <- apply(quantile_tmp, 1, mean) AUC_samT <- mean(AUC_tmp) ################# ### shrinkT ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org + 1 tmpall <- shrinkt.stat(t(data[,analysis_vector == 1]), data.cl) rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_RMA_shrinkT.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_RMA_shrinkT.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_shrinkT <- apply(quantile_tmp, 1, mean) AUC_shrinkT <- mean(AUC_tmp) tmp <- rbind(quantile_w, quantile_AD, quantile_WAD, quantile_FC, quantile_RP, quantile_modT, quantile_samT, quantile_shrinkT, quantile_ibmT) write.table(tmp, "result_Dataset2_quantile_RMA.txt", sep = "\t", append=F, quote=F, row.names=F) tmp <- AUC_w tmp <- rbind(tmp, AUC_AD) tmp <- rbind(tmp, AUC_WAD) tmp <- rbind(tmp, AUC_FC) tmp <- rbind(tmp, AUC_RP) tmp <- rbind(tmp, AUC_modT) tmp <- rbind(tmp, AUC_samT) tmp <- rbind(tmp, AUC_shrinkT) tmp <- rbind(tmp, AUC_ibmT) write.table(tmp, "result_Dataset2_AUC_RMA.txt", sep = "\t", append=F, quote=F, row.names=F) ############################### ############################### ### DFW-preprocessed data ### ############################### ############################### data <- read.table("http://affycomp.biostat.jhsph.edu/AFFY2/zhongxue@mail.smu.edu/060612.1450/hgu133.csv", header=TRUE, row.names=1, sep=",") data[data < 1] <- 1 data <- log(data, 2) data.cl <- data.cl.org ################################################################### ### Assignment for 42 true differentially expressed probesets ### data.degenes <- as.matrix(c(rep(0, nrow(data)))) rownames(data.degenes) <- rownames(data) degenes <- c( "203508_at", "204563_at", "204513_s_at", "204205_at", "204959_at", "207655_s_at", "204836_at", "205291_at", "209795_at", "207777_s_at", "204912_at", "205569_at", "207160_at", "205692_s_at", "212827_at", "209606_at", "205267_at", "204417_at", "205398_s_at", "209734_at", "209354_at", "206060_s_at", "205790_at", "200665_s_at", "207641_at", "207540_s_at", "204430_s_at", "203471_s_at", "204951_at", "207968_s_at", "AFFX-r2-TagA_at", "AFFX-r2-TagB_at", "AFFX-r2-TagC_at", "AFFX-r2-TagD_at", "AFFX-r2-TagE_at", "AFFX-r2-TagF_at", "AFFX-r2-TagG_at", "AFFX-r2-TagH_at", "AFFX-DapX-3_at", "AFFX-LysX-3_at", "AFFX-PheX-3_at", "AFFX-ThrX-3_at" ) data.degenes[degenes, ] <- 1 ################################################################### ################################# ### AD (Average Difference) ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org tmpall <- apply(data[,analysis_vector == 1], 1, AveDiff, data.cl) rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j,"\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_DFW_AD.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_DFW_AD.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_AD <- apply(quantile_tmp, 1, mean) AUC_AD <- mean(AUC_tmp) ############# ### WAD ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL data.sub <- NULL tmp.class1 <- NULL tmp.class2 <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org data.sub <- data[,analysis_vector == 1] tmp.class1 <- apply(data.sub[,data.cl == 0], 1, mean) tmp.class2 <- apply(data.sub[,data.cl == 1], 1, mean) ave_vector <- (tmp.class1 + tmp.class2)/2 dynamic_range <- max(ave_vector) - min(ave_vector) tmpall <- apply(data[,analysis_vector == 1], 1, WAD, data.cl, dynamic_range, min(ave_vector)) rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_DFW_WAD.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_DFW_WAD.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_WAD <- apply(quantile_tmp, 1, mean) AUC_WAD <- mean(AUC_tmp) ############################# ### WAD's "w" term only ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL data.sub <- NULL tmp.class1 <- NULL tmp.class2 <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org data.sub <- data[,analysis_vector == 1] tmp.class1 <- apply(data.sub[,data.cl == 0], 1, mean) tmp.class2 <- apply(data.sub[,data.cl == 1], 1, mean) ave_vector <- (tmp.class1 + tmp.class2)/2 dynamic_range <- max(ave_vector) - min(ave_vector) tmpall <- (ave_vector - min(ave_vector))/dynamic_range rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_DFW_w.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_DFW_w.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_w <- apply(quantile_tmp, 1, mean) AUC_w <- mean(AUC_tmp) ############ ### FC ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL data.degenes.vector <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data <- 2^data data.cl <- data.cl.org tmpall <- apply(data[,analysis_vector == 1], 1, AvelogFC, data.cl) rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) data <- log(data, 2) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_DFW_FC.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_DFW_FC.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_FC <- apply(quantile_tmp, 1, mean) AUC_FC <- mean(AUC_tmp) ############ ### RP ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org tmpall <- RP(data[,analysis_vector == 1], data.cl, num.perm=1, logged=TRUE, na.rm = FALSE, plot = FALSE, rand = 123) rank_method <- rank(apply(tmpall$RPs, 1, min), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_DFW_RP.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_DFW_RP.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_RP <- apply(quantile_tmp, 1, mean) AUC_RP <- mean(AUC_tmp) ############## ### ibmT ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org design <- model.matrix(~data.cl) data <- as.matrix(data) fit <- lmFit(data[,analysis_vector == 1], design) fit$Amean<-rowMeans(data[,analysis_vector == 1]) fit <- IBMT(fit,2) tmpall <- fit$IBMT.t rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_DFW_ibmT.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_DFW_ibmT.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_ibmT <- apply(quantile_tmp, 1, mean) AUC_ibmT <- mean(AUC_tmp) ############## ### modT ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org + 1 tmpall <- modt.stat(t(data[,analysis_vector == 1]), data.cl) rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_DFW_modT.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_DFW_modT.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_modT <- apply(quantile_tmp, 1, mean) AUC_modT <- mean(AUC_tmp) ############## ### samT ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org + 1 tmpall <- sam.stat(t(data[,analysis_vector == 1]), data.cl) rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_DFW_samT.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_DFW_samT.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_samT <- apply(quantile_tmp, 1, mean) AUC_samT <- mean(AUC_tmp) ################# ### shrinkT ### quantile_tmp <- NULL AUC_tmp <- NULL rank_method <- NULL analysis_vector <- c(rep(0, ncol(data))) for(i in 1:(ncol(data)/3-1)){ for(j in (i+1):(ncol(data)/3)){ analysis_vector <- c(rep(0, ncol(data))) analysis_vector[c(i,i+ncol(data)/3, i+ncol(data)/3*2)] <- 1 analysis_vector[c(j,j+ncol(data)/3, j+ncol(data)/3*2)] <- 1 data.cl <- data.cl.org + 1 tmpall <- shrinkt.stat(t(data[,analysis_vector == 1]), data.cl) rank_method <- rank(-abs(tmpall), ties.method = "min") quantile_tmp <- cbind(quantile_tmp, quantile(rank_method[data.degenes == 1])) AUC_tmp <- rbind(AUC_tmp, AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_method), rule = dxrule.sca))) #cat(i, j, "\n") } } write.table(quantile_tmp, "result_Dataset2_quantile_DFW_shrinkT.txt", sep = "\t", append=F, quote=F, row.names=F) write.table(AUC_tmp, "result_Dataset2_AUC_DFW_shrinkT.txt", sep = "\t", append=F, quote=F, row.names=F) quantile_shrinkT <- apply(quantile_tmp, 1, mean) AUC_shrinkT <- mean(AUC_tmp) tmp <- rbind(quantile_w, quantile_AD, quantile_WAD, quantile_FC, quantile_RP, quantile_modT, quantile_samT, quantile_shrinkT, quantile_ibmT) write.table(tmp, "result_Dataset2_quantile_DFW.txt", sep = "\t", append=F, quote=F, row.names=F) tmp <- AUC_w tmp <- rbind(tmp, AUC_AD) tmp <- rbind(tmp, AUC_WAD) tmp <- rbind(tmp, AUC_FC) tmp <- rbind(tmp, AUC_RP) tmp <- rbind(tmp, AUC_modT) tmp <- rbind(tmp, AUC_samT) tmp <- rbind(tmp, AUC_shrinkT) tmp <- rbind(tmp, AUC_ibmT) write.table(tmp, "result_Dataset2_AUC_DFW.txt", sep = "\t", append=F, quote=F, row.names=F)