####################################################### # Function for WAD (Weighted Average Difference) in R # Written by: Koji Kadota, Univ. Tokyo, 2007 # Citation: Kadota K, Nakai Y, Shimizu K., A weighted average difference # method for detecting differentially expressed genes # using DNA microarrays. Algorithms Mol. Biol., 2008 ####################################################### WAD <- function(x, cl, dynamic_r, min_v){ x.class1 <- x[(cl == 0)] x.class2 <- x[(cl == 1)] x_ave <- (mean(x.class1) + mean(x.class2))/2 weight <- (x_ave - min_v)/dynamic_r statistic <- (mean(x.class2) - mean(x.class1))*weight return(statistic) } ############################# ### Average Difference ### ############################# AveDiff <- function(x, cl){ x.class0 <- x[(cl == 0)] x.class1 <- x[(cl == 1)] statistic <- mean(x.class1) - mean(x.class0) return(statistic) } ############################# ### Average Fold Change ### ############################# AvelogFC <- function(x, cl){ x.class0 <- x[(cl == 0)] x.class1 <- x[(cl == 1)] statistic <- log2(mean(x.class1)/mean(x.class0)) return(statistic) } source('http://eh3.uc.edu/r/ibmtR.R') library(RankProd) #パッケージの読み込み library(samr) library(st) library(limma) library(ROC) ############################### ############################### ### MAS-preprocessed data ### ############################### ############################### data <- read.table("http://affycomp.biostat.jhsph.edu/AFFY2/rafa@jhu.edu/030424.1033/hgu95.csv", header=TRUE, row.names=1, sep=",") subset <- c(13:16, 33:36, 52:55, 17:20, 37:40, 56:59) data <- data[,subset] data[data < 1] <- 1 data <- log(data, 2) data.cl.org <- c(rep(0, 12), rep(1, 12)) data.cl <- data.cl.org ################################################################### ### Assignment for 16 true differentially expressed probesets ### data.degenes <- as.matrix(c(rep(0, nrow(data)))) rownames(data.degenes) <- rownames(data) degenes <- c( "37777_at", "684_at", "1597_at", "38734_at", "39058_at", "36311_at", "36889_at", "1024_at", "36202_at", "36085_at", "40322_at", "407_at", "1091_at", "1708_at", "33818_at", "546_at" ) data.degenes[degenes, ] <- 1 ################################################################### ################################# ### AD (Average Difference) ### tmpall <- apply(data, 1, AveDiff, data.cl) out_AD <- abs(tmpall) ############# ### WAD ### tmp.class1 <- apply(data[,data.cl == 0], 1, mean) tmp.class2 <- apply(data[,data.cl == 1], 1, mean) ave_vector <- (tmp.class1 + tmp.class2)/2 dynamic_range <- max(ave_vector) - min(ave_vector) tmpall <- apply(data, 1, WAD, data.cl, dynamic_range, min(ave_vector)) out_WAD <- abs(tmpall) ############################# ### WAD's "w" term only ### out_w <- (ave_vector - min(ave_vector))/dynamic_range ############ ### FC ### data <- 2^data tmpall <- apply(data, 1, AvelogFC, data.cl) out_FC <- abs(tmpall) data <- log(data, 2) ############ ### RP ### tmpall <- RP(data, data.cl, num.perm=1, logged=TRUE, na.rm = FALSE, plot = FALSE, rand = 123) out_RP <- apply(tmpall$RPs, 1, min) ############## ### ibmT ### design <- model.matrix(~data.cl) data <- as.matrix(data) fit <- lmFit(data, design) fit$Amean<-rowMeans(data) fit <- IBMT(fit,2) tmpall <- fit$IBMT.t out_ibmT <- abs(tmpall) data.cl <- data.cl.org + 1 ############## ### modT ### tmpall <- modt.stat(t(data), data.cl) out_modT <- abs(tmpall) ############## ### samT ### tmpall <- sam.stat(t(data), data.cl) out_samT <- abs(tmpall) ################# ### shrinkT ### tmpall <- shrinkt.stat(t(data), data.cl) out_shrinkT <- abs(tmpall) #tmp <- cbind(rownames(data), data.degenes, data, out_w, out_AD, out_WAD, out_FC, out_RP, out_modT, out_samT, out_shrinkT, out_ibmT) #write.table(tmp, "result_Dataset1_MAS.txt", sep = "\t", append=F, quote=F, row.names=F) ################################################################## ### Calculation of percentile ranks for true positives (TPs) ### rank_w <- as.matrix(rank(-out_w, ties.method = "min")) rownames(rank_w) <- rownames(data) quantile(rank_w[data.degenes == 1]) rank_AD <- as.matrix(rank(-out_AD, ties.method = "min")) rownames(rank_AD) <- rownames(data) quantile(rank_AD[data.degenes == 1]) rank_WAD <- as.matrix(rank(-out_WAD, ties.method = "min")) rownames(rank_WAD) <- rownames(data) quantile(rank_WAD[data.degenes == 1]) rank_FC <- as.matrix(rank(-out_FC, ties.method = "min")) rownames(rank_FC) <- rownames(data) quantile(rank_FC[data.degenes == 1]) rank_RP <- as.matrix(rank(out_RP, ties.method = "min")) rownames(rank_RP) <- rownames(data) quantile(rank_RP[data.degenes == 1]) rank_modT <- as.matrix(rank(-out_modT, ties.method = "min")) rownames(rank_modT) <- rownames(data) quantile(rank_modT[data.degenes == 1]) rank_samT <- as.matrix(rank(-out_samT, ties.method = "min")) rownames(rank_samT) <- rownames(data) quantile(rank_samT[data.degenes == 1]) rank_shrinkT <- as.matrix(rank(-out_shrinkT, ties.method = "min")) rownames(rank_shrinkT) <- rownames(data) quantile(rank_shrinkT[data.degenes == 1]) rank_ibmT <- as.matrix(rank(-out_ibmT, ties.method = "min")) rownames(rank_ibmT) <- rownames(data) quantile(rank_ibmT[data.degenes == 1]) tmp <- rbind(quantile(rank_w[data.degenes == 1]), quantile(rank_AD[data.degenes == 1]), quantile(rank_WAD[data.degenes == 1]), quantile(rank_FC[data.degenes == 1]), quantile(rank_RP[data.degenes == 1]), quantile(rank_modT[data.degenes == 1]), quantile(rank_samT[data.degenes == 1]), quantile(rank_shrinkT[data.degenes == 1]), quantile(rank_ibmT[data.degenes == 1])) write.table(tmp, "result_Dataset1_quantile_MAS.txt", sep = "\t", append=F, quote=F, row.names=F) ################################################# ### Calculation of AUC values for 8 methods ### AUC_w <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_w), rule = dxrule.sca)) AUC_AD <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_AD), rule = dxrule.sca)) AUC_WAD <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_WAD), rule = dxrule.sca)) AUC_FC <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_FC), rule = dxrule.sca)) AUC_RP <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_RP), rule = dxrule.sca)) AUC_modT <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_modT), rule = dxrule.sca)) AUC_samT <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_samT), rule = dxrule.sca)) AUC_shrinkT <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_shrinkT), rule = dxrule.sca)) AUC_ibmT <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_ibmT), rule = dxrule.sca)) tmp <- AUC_w tmp <- rbind(tmp, AUC_AD) tmp <- rbind(tmp, AUC_WAD) tmp <- rbind(tmp, AUC_FC) tmp <- rbind(tmp, AUC_RP) tmp <- rbind(tmp, AUC_modT) tmp <- rbind(tmp, AUC_samT) tmp <- rbind(tmp, AUC_shrinkT) tmp <- rbind(tmp, AUC_ibmT) write.table(tmp, "result_Dataset1_AUC_MAS.txt", sep = "\t", append=F, quote=F, row.names=F) ############################### ############################### ### RMA-preprocessed data ### ############################### ############################### data <- read.table("http://affycomp.biostat.jhsph.edu/AFFY2/rafa@jhu.edu/030429.1332/hgu95.csv", header=TRUE, row.names=1, sep=",") subset <- c(13:16, 33:36, 52:55, 17:20, 37:40, 56:59) data <- data[,subset] data[data < 1] <- 1 data <- log(data, 2) data.cl.org <- c(rep(0, 12), rep(1, 12)) data.cl <- data.cl.org ################################################################### ### Assignment for 16 true differentially expressed probesets ### data.degenes <- as.matrix(c(rep(0, nrow(data)))) rownames(data.degenes) <- rownames(data) degenes <- c( "37777_at", "684_at", "1597_at", "38734_at", "39058_at", "36311_at", "36889_at", "1024_at", "36202_at", "36085_at", "40322_at", "407_at", "1091_at", "1708_at", "33818_at", "546_at" ) data.degenes[degenes, ] <- 1 ################################################################### ################################# ### AD (Average Difference) ### tmpall <- apply(data, 1, AveDiff, data.cl) out_AD <- abs(tmpall) ############# ### WAD ### tmp.class1 <- apply(data[,data.cl == 0], 1, mean) tmp.class2 <- apply(data[,data.cl == 1], 1, mean) ave_vector <- (tmp.class1 + tmp.class2)/2 dynamic_range <- max(ave_vector) - min(ave_vector) tmpall <- apply(data, 1, WAD, data.cl, dynamic_range, min(ave_vector)) out_WAD <- abs(tmpall) ############################# ### WAD's "w" term only ### out_w <- (ave_vector - min(ave_vector))/dynamic_range ############ ### FC ### data <- 2^data tmpall <- apply(data, 1, AvelogFC, data.cl) out_FC <- abs(tmpall) data <- log(data, 2) ############ ### RP ### tmpall <- RP(data, data.cl, num.perm=1, logged=TRUE, na.rm = FALSE, plot = FALSE, rand = 123) out_RP <- apply(tmpall$RPs, 1, min) ############## ### ibmT ### design <- model.matrix(~data.cl) data <- as.matrix(data) fit <- lmFit(data, design) fit$Amean<-rowMeans(data) fit <- IBMT(fit,2) tmpall <- fit$IBMT.t out_ibmT <- abs(tmpall) data.cl <- data.cl.org + 1 ############## ### modT ### tmpall <- modt.stat(t(data), data.cl) out_modT <- abs(tmpall) ############## ### samT ### tmpall <- sam.stat(t(data), data.cl) out_samT <- abs(tmpall) ################# ### shrinkT ### tmpall <- shrinkt.stat(t(data), data.cl) out_shrinkT <- abs(tmpall) #tmp <- cbind(rownames(data), data.degenes, data, out_w, out_AD, out_WAD, out_FC, out_RP, out_modT, out_samT, out_shrinkT, out_ibmT) #write.table(tmp, "result_Dataset1_RMA.txt", sep = "\t", append=F, quote=F, row.names=F) ################################################################## ### Calculation of percentile ranks for true positives (TPs) ### rank_w <- as.matrix(rank(-out_w, ties.method = "min")) rownames(rank_w) <- rownames(data) quantile(rank_w[data.degenes == 1]) rank_AD <- as.matrix(rank(-out_AD, ties.method = "min")) rownames(rank_AD) <- rownames(data) quantile(rank_AD[data.degenes == 1]) rank_WAD <- as.matrix(rank(-out_WAD, ties.method = "min")) rownames(rank_WAD) <- rownames(data) quantile(rank_WAD[data.degenes == 1]) rank_FC <- as.matrix(rank(-out_FC, ties.method = "min")) rownames(rank_FC) <- rownames(data) quantile(rank_FC[data.degenes == 1]) rank_RP <- as.matrix(rank(out_RP, ties.method = "min")) rownames(rank_RP) <- rownames(data) quantile(rank_RP[data.degenes == 1]) rank_modT <- as.matrix(rank(-out_modT, ties.method = "min")) rownames(rank_modT) <- rownames(data) quantile(rank_modT[data.degenes == 1]) rank_samT <- as.matrix(rank(-out_samT, ties.method = "min")) rownames(rank_samT) <- rownames(data) quantile(rank_samT[data.degenes == 1]) rank_shrinkT <- as.matrix(rank(-out_shrinkT, ties.method = "min")) rownames(rank_shrinkT) <- rownames(data) quantile(rank_shrinkT[data.degenes == 1]) rank_ibmT <- as.matrix(rank(-out_ibmT, ties.method = "min")) rownames(rank_ibmT) <- rownames(data) quantile(rank_ibmT[data.degenes == 1]) tmp <- rbind(quantile(rank_w[data.degenes == 1]), quantile(rank_AD[data.degenes == 1]), quantile(rank_WAD[data.degenes == 1]), quantile(rank_FC[data.degenes == 1]), quantile(rank_RP[data.degenes == 1]), quantile(rank_modT[data.degenes == 1]), quantile(rank_samT[data.degenes == 1]), quantile(rank_shrinkT[data.degenes == 1]), quantile(rank_ibmT[data.degenes == 1])) write.table(tmp, "result_Dataset1_quantile_RMA.txt", sep = "\t", append=F, quote=F, row.names=F) ################################################# ### Calculation of AUC values for 8 methods ### AUC_w <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_w), rule = dxrule.sca)) AUC_AD <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_AD), rule = dxrule.sca)) AUC_WAD <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_WAD), rule = dxrule.sca)) AUC_FC <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_FC), rule = dxrule.sca)) AUC_RP <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_RP), rule = dxrule.sca)) AUC_modT <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_modT), rule = dxrule.sca)) AUC_samT <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_samT), rule = dxrule.sca)) AUC_shrinkT <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_shrinkT), rule = dxrule.sca)) AUC_ibmT <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_ibmT), rule = dxrule.sca)) tmp <- AUC_w tmp <- rbind(tmp, AUC_AD) tmp <- rbind(tmp, AUC_WAD) tmp <- rbind(tmp, AUC_FC) tmp <- rbind(tmp, AUC_RP) tmp <- rbind(tmp, AUC_modT) tmp <- rbind(tmp, AUC_samT) tmp <- rbind(tmp, AUC_shrinkT) tmp <- rbind(tmp, AUC_ibmT) write.table(tmp, "result_Dataset1_AUC_RMA.txt", sep = "\t", append=F, quote=F, row.names=F) ############################### ############################### ### DFW-preprocessed data ### ############################### ############################### data <- read.table("http://affycomp.biostat.jhsph.edu/AFFY2/zhongxue@mail.smu.edu/060612.1450/hgu95.csv", header=TRUE, row.names=1, sep=",") subset <- c(13:16, 33:36, 52:55, 17:20, 37:40, 56:59) data <- data[,subset] data[data < 1] <- 1 data <- log(data, 2) data.cl.org <- c(rep(0, 12), rep(1, 12)) data.cl <- data.cl.org ################################################################### ### Assignment for 16 true differentially expressed probesets ### data.degenes <- as.matrix(c(rep(0, nrow(data)))) rownames(data.degenes) <- rownames(data) degenes <- c( "37777_at", "684_at", "1597_at", "38734_at", "39058_at", "36311_at", "36889_at", "1024_at", "36202_at", "36085_at", "40322_at", "407_at", "1091_at", "1708_at", "33818_at", "546_at" ) data.degenes[degenes, ] <- 1 ################################################################### ################################# ### AD (Average Difference) ### tmpall <- apply(data, 1, AveDiff, data.cl) out_AD <- abs(tmpall) ############# ### WAD ### tmp.class1 <- apply(data[,data.cl == 0], 1, mean) tmp.class2 <- apply(data[,data.cl == 1], 1, mean) ave_vector <- (tmp.class1 + tmp.class2)/2 dynamic_range <- max(ave_vector) - min(ave_vector) tmpall <- apply(data, 1, WAD, data.cl, dynamic_range, min(ave_vector)) out_WAD <- abs(tmpall) ############################# ### WAD's "w" term only ### out_w <- (ave_vector - min(ave_vector))/dynamic_range ############ ### FC ### data <- 2^data tmpall <- apply(data, 1, AvelogFC, data.cl) out_FC <- abs(tmpall) data <- log(data, 2) ############ ### RP ### tmpall <- RP(data, data.cl, num.perm=1, logged=TRUE, na.rm = FALSE, plot = FALSE, rand = 123) out_RP <- apply(tmpall$RPs, 1, min) ############## ### ibmT ### design <- model.matrix(~data.cl) data <- as.matrix(data) fit <- lmFit(data, design) fit$Amean<-rowMeans(data) fit <- IBMT(fit,2) tmpall <- fit$IBMT.t out_ibmT <- abs(tmpall) data.cl <- data.cl.org + 1 ############## ### modT ### tmpall <- modt.stat(t(data), data.cl) out_modT <- abs(tmpall) ############## ### samT ### tmpall <- sam.stat(t(data), data.cl) out_samT <- abs(tmpall) ################# ### shrinkT ### tmpall <- shrinkt.stat(t(data), data.cl) out_shrinkT <- abs(tmpall) #tmp <- cbind(rownames(data), data.degenes, data, out_w, out_AD, out_WAD, out_FC, out_RP, out_modT, out_samT, out_shrinkT, out_ibmT) #write.table(tmp, "result_Dataset1_DFW.txt", sep = "\t", append=F, quote=F, row.names=F) ################################################################## ### Calculation of percentile ranks for true positives (TPs) ### rank_w <- as.matrix(rank(-out_w, ties.method = "min")) rownames(rank_w) <- rownames(data) quantile(rank_w[data.degenes == 1]) rank_AD <- as.matrix(rank(-out_AD, ties.method = "min")) rownames(rank_AD) <- rownames(data) quantile(rank_AD[data.degenes == 1]) rank_WAD <- as.matrix(rank(-out_WAD, ties.method = "min")) rownames(rank_WAD) <- rownames(data) quantile(rank_WAD[data.degenes == 1]) rank_FC <- as.matrix(rank(-out_FC, ties.method = "min")) rownames(rank_FC) <- rownames(data) quantile(rank_FC[data.degenes == 1]) rank_RP <- as.matrix(rank(out_RP, ties.method = "min")) rownames(rank_RP) <- rownames(data) quantile(rank_RP[data.degenes == 1]) rank_modT <- as.matrix(rank(-out_modT, ties.method = "min")) rownames(rank_modT) <- rownames(data) quantile(rank_modT[data.degenes == 1]) rank_samT <- as.matrix(rank(-out_samT, ties.method = "min")) rownames(rank_samT) <- rownames(data) quantile(rank_samT[data.degenes == 1]) rank_shrinkT <- as.matrix(rank(-out_shrinkT, ties.method = "min")) rownames(rank_shrinkT) <- rownames(data) quantile(rank_shrinkT[data.degenes == 1]) rank_ibmT <- as.matrix(rank(-out_ibmT, ties.method = "min")) rownames(rank_ibmT) <- rownames(data) quantile(rank_ibmT[data.degenes == 1]) tmp <- rbind(quantile(rank_w[data.degenes == 1]), quantile(rank_AD[data.degenes == 1]), quantile(rank_WAD[data.degenes == 1]), quantile(rank_FC[data.degenes == 1]), quantile(rank_RP[data.degenes == 1]), quantile(rank_modT[data.degenes == 1]), quantile(rank_samT[data.degenes == 1]), quantile(rank_shrinkT[data.degenes == 1]), quantile(rank_ibmT[data.degenes == 1])) write.table(tmp, "result_Dataset1_quantile_DFW.txt", sep = "\t", append=F, quote=F, row.names=F) ################################################# ### Calculation of AUC values for 8 methods ### AUC_w <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_w), rule = dxrule.sca)) AUC_AD <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_AD), rule = dxrule.sca)) AUC_WAD <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_WAD), rule = dxrule.sca)) AUC_FC <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_FC), rule = dxrule.sca)) AUC_RP <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_RP), rule = dxrule.sca)) AUC_modT <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_modT), rule = dxrule.sca)) AUC_samT <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_samT), rule = dxrule.sca)) AUC_shrinkT <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_shrinkT), rule = dxrule.sca)) AUC_ibmT <- AUC(rocdemo.sca(truth = as.vector(data.degenes), data = as.vector(-rank_ibmT), rule = dxrule.sca)) tmp <- AUC_w tmp <- rbind(tmp, AUC_AD) tmp <- rbind(tmp, AUC_WAD) tmp <- rbind(tmp, AUC_FC) tmp <- rbind(tmp, AUC_RP) tmp <- rbind(tmp, AUC_modT) tmp <- rbind(tmp, AUC_samT) tmp <- rbind(tmp, AUC_shrinkT) tmp <- rbind(tmp, AUC_ibmT) write.table(tmp, "result_Dataset1_AUC_DFW.txt", sep = "\t", append=F, quote=F, row.names=F)