#function to lineary scale DREBIC scores to range from -1 to 1 normalize <- function (x) { a <- 2 / (max(x) - min(x)) b <- -1 * (max(x) + min(x)) / (max(x) - min(x)) x*a + b } # main DREBIC function # expression_score - matrix with log transformed gene expressions (in rows) for all samples (in collumns) # CRISPR_score - matrix of single column with CRISPR scores, gene names are expect to match gene names from expression_score matrix # n_top - number of genes with highest CRISPR scores (default is 225) to incorporate into DREBIC # n_bottom - number of genes with lowest CRISPR scores (default is 175) to incorporate into DREBIC # na.rm (logical, dafault FALSE) is passed to sum function and result in error if gene names do not match. # Change to TRUE to suppres this behavior. # # function returns data frame with DREBIC score in column and samples in rows scoreCells <- function (expression_score, CRISPR_score, n_top = 225, n_bottom = 175, na.rm = TRUE) { sorted_CRISPR_score <- CRISPR_score [order(CRISPR_score$V1, decreasing = TRUE), , drop = FALSE] matched_expression_score <- expression_score [match (rownames(sorted_CRISPR_score), rownames(expression_score)),] combined_score <- as.data.frame(sapply (colnames(matched_expression_score), function (x) {matched_expression_score[, x]*sorted_CRISPR_score$V1})) sample_score <- colSums (combined_score[(nrow(combined_score) - n_bottom): nrow(combined_score),], na.rm = na.rm) - colSums (combined_score [1:n_top,], na.rm = na.rm) normalize(sample_score) } ####example of usage score <- read.csv ("CRISPRscores.csv", row.names = 1) expr <- read.csv ("CPGgeneExpressions.csv", row.names = 1) DREBIC <- scoreCells (expr, score, na.rm = TRUE) #best responders tail(sort(DREBIC))