#function to lineary scale DREBIC scores to range from -1 to 1

normalize <- function (x)
{
  a <- 2 / (max(x) - min(x))
  b <- -1 *  (max(x) + min(x)) / (max(x) - min(x))
  x*a + b
}


# main DREBIC function
# expression_score - matrix with log transformed gene expressions (in rows) for all samples (in collumns)
# CRISPR_score - matrix of single column with CRISPR scores, gene names are expect to match gene names from expression_score matrix
# n_top - number of genes with highest CRISPR scores (default is 225) to incorporate into DREBIC
# n_bottom - number of genes with lowest CRISPR scores (default is 175) to incorporate into DREBIC
# na.rm (logical, dafault FALSE) is passed to sum function and result in error if gene names do not match. 
# Change to TRUE to suppres this behavior.
#
# function returns data frame with DREBIC score in column and samples in rows

scoreCells <- function (expression_score, CRISPR_score, n_top = 225, n_bottom = 175, na.rm = TRUE)
{
  sorted_CRISPR_score <- CRISPR_score [order(CRISPR_score$V1, decreasing = TRUE), , drop = FALSE]
  matched_expression_score <- expression_score [match (rownames(sorted_CRISPR_score), rownames(expression_score)),]
  combined_score <- as.data.frame(sapply (colnames(matched_expression_score), function (x) {matched_expression_score[, x]*sorted_CRISPR_score$V1}))
  sample_score <- colSums (combined_score[(nrow(combined_score) - n_bottom): nrow(combined_score),], na.rm = na.rm) - colSums (combined_score [1:n_top,], na.rm = na.rm)
  normalize(sample_score)
}

####example of usage

score <- read.csv ("CRISPRscores.csv", row.names = 1)
expr <- read.csv ("CPGgeneExpressions.csv", row.names = 1)

DREBIC <- scoreCells (expr, score, na.rm = TRUE)

#best responders
tail(sort(DREBIC))