remove(list = ls()) # R packages utilized library(tidyverse) library(MASS) library(epiR) library(lubridate) library(readxl, quietly = TRUE) library(dplyr) library(ggplot2) library(reshape2) library(hrbrthemes) library(data.table, quietly = TRUE) library(foreach, quietly = TRUE) library(doBy) library(ggpmisc) library(gridExtra) library(ggpubr) library(MatchIt) library(gtsummary) library(lme4) library(sjPlot) library(stargazer) library(cAIC4) library(MuMIn) library(fmsb) library(jtools) library(broom) library(ggstance) library(pROC) library(InformationValue) library(caret) # load functions cor_est <- function(x,y,...){ obj <- cor.test(x,y,...) val <- obj$estimate return(round(as.numeric(val),digits = 3)) } cor_pval <- function(x,y,...){ obj <- cor.test(x,y,...) pval <- obj$p.value return(as.numeric(pval)) } corr_fun <- function(dat,outcomes,new_markers,cohort,...){ dat_est <- sapply(outcomes,function(ychar){ sapply(new_markers,function(xchar){ cor_est(dat[[xchar]],dat[[ychar]],method="spearman") }) }) dat_pval <- sapply(outcomes,function(ychar){sapply(new_markers,function(xchar){ cor_pval(dat[[xchar]],dat[[ychar]],method="spearman")})}) dat_pval <- as_tibble(dat_pval) %>% mutate_all(p.adjust,method="fdr") %>% mutate(marker = new_markers,type=paste("p-value")) %>% select(marker,type,everything()) dat_est <- as_tibble(dat_est) %>% mutate(marker = new_markers,type=paste("spearman")) %>% select(marker,type,everything()) dat_results <- bind_rows(dat_est,dat_pval) dat_results <- dat_results %>% gather(key,value,outcomes) dat_results <- dat_results %>% mutate(key = factor(key,levels=unique(dat_results$key))) %>% mutate(type=factor(type,levels=c(paste("spearman"),paste("p-value")))) %>% arrange(marker,key,type) %>% mutate(key = paste(as.character(key),type)) %>% select(-type) dat_results <- dat_results %>% mutate(key = factor(key,levels=unique(dat_results$key))) %>% spread(key,value) return(dat_results) } radarchart2 <- function (df, axistype = 0, seg = 4, pty = 16, pcol = 1:8, plty = 1:6, plwd = 1, pdensity = NULL, pangle = 45, pfcol = NA, cglty = 3, cglwd = 1, cglcol = "navy", axislabcol = "blue", vlabcol = "black", title = "", maxmin = TRUE, na.itp = TRUE, centerzero = FALSE, vlabels = NULL, vlcex = NULL, caxislabels = NULL, calcex = NULL, paxislabels = NULL, palcex = NULL, ...) { if (!is.data.frame(df)) { cat("The data must be given as dataframe.\n") return() } if ((n <- length(df)) < 3) { cat("The number of variables must be 3 or more.\n") return() } if (maxmin == FALSE) { dfmax <- apply(df, 2, max) dfmin <- apply(df, 2, min) df <- rbind(dfmax, dfmin, df) } plot(c(-1.2, 1.2), c(-1.2, 1.2), type = "n", frame.plot = FALSE, axes = FALSE, xlab = "", ylab = "", main = title, asp = 1, ...) theta <- seq(90, 450, length = n + 1) * pi/180 theta <- theta[1:n] xx <- cos(theta) yy <- sin(theta) CGap <- ifelse(centerzero, 0, 1) for (i in 0:seg) { polygon(xx * (i + CGap)/(seg + CGap), yy * (i + CGap)/(seg + CGap), lty = cglty, lwd = cglwd, border = cglcol) if (axistype == 1 | axistype == 3) CAXISLABELS <- paste(i/seg * 100, "(%)") if (axistype == 4 | axistype == 5) CAXISLABELS <- sprintf("%3.2f", i/seg) if (!is.null(caxislabels) & (i < length(caxislabels))) CAXISLABELS <- caxislabels[i + 1] if (axistype == 1 | axistype == 3 | axistype == 4 | axistype == 5) { if (is.null(calcex)) text(-0.05, (i + CGap)/(seg + CGap), CAXISLABELS, col = axislabcol) else text(-0.05, (i + CGap)/(seg + CGap), CAXISLABELS, col = axislabcol, cex = calcex) } } if (centerzero) { arrows(0, 0, xx * 1, yy * 1, lwd = cglwd, lty = cglty, length = 0, col = cglcol) } else { arrows(xx/(seg + CGap), yy/(seg + CGap), xx * 1, yy * 1, lwd = cglwd, lty = cglty, length = 0, col = cglcol) } PAXISLABELS <- df[1, 1:n] if (!is.null(paxislabels)) PAXISLABELS <- paxislabels if (axistype == 2 | axistype == 3 | axistype == 5) { if (is.null(palcex)) text(xx[1:n], yy[1:n], PAXISLABELS, col = axislabcol) else text(xx[1:n], yy[1:n], PAXISLABELS, col = axislabcol, cex = palcex) } VLABELS <- colnames(df) if (!is.null(vlabels)) VLABELS <- vlabels if (is.null(vlcex)) text(xx * 1.2, yy * 1.2, VLABELS, col = vlabcol) else text(xx * 1.2, yy * 1.2, VLABELS, cex = vlcex, col = vlabcol) series <- length(df[[1]]) SX <- series - 2 if (length(pty) < SX) { ptys <- rep(pty, SX) } else { ptys <- pty } if (length(pcol) < SX) { pcols <- rep(pcol, SX) } else { pcols <- pcol } if (length(plty) < SX) { pltys <- rep(plty, SX) } else { pltys <- plty } if (length(plwd) < SX) { plwds <- rep(plwd, SX) } else { plwds <- plwd } if (length(pdensity) < SX) { pdensities <- rep(pdensity, SX) } else { pdensities <- pdensity } if (length(pangle) < SX) { pangles <- rep(pangle, SX) } else { pangles <- pangle } if (length(pfcol) < SX) { pfcols <- rep(pfcol, SX) } else { pfcols <- pfcol } for (i in 3:series) { xxs <- xx yys <- yy scale <- CGap/(seg + CGap) + (df[i, ] - df[2, ])/(df[1, ] - df[2, ]) * seg/(seg + CGap) if (sum(!is.na(df[i, ])) < 3) { cat(sprintf("[DATA NOT ENOUGH] at %d\n%g\n", i, df[i, ])) } else { for (j in 1:n) { if (is.na(df[i, j])) { if (na.itp) { left <- ifelse(j > 1, j - 1, n) while (is.na(df[i, left])) { left <- ifelse(left > 1, left - 1, n) } right <- ifelse(j < n, j + 1, 1) while (is.na(df[i, right])) { right <- ifelse(right < n, right + 1, 1) } xxleft <- xx[left] * CGap/(seg + CGap) + xx[left] * (df[i, left] - df[2, left])/(df[1, left] - df[2, left]) * seg/(seg + CGap) yyleft <- yy[left] * CGap/(seg + CGap) + yy[left] * (df[i, left] - df[2, left])/(df[1, left] - df[2, left]) * seg/(seg + CGap) xxright <- xx[right] * CGap/(seg + CGap) + xx[right] * (df[i, right] - df[2, right])/(df[1, right] - df[2, right]) * seg/(seg + CGap) yyright <- yy[right] * CGap/(seg + CGap) + yy[right] * (df[i, right] - df[2, right])/(df[1, right] - df[2, right]) * seg/(seg + CGap) if (xxleft > xxright) { xxtmp <- xxleft yytmp <- yyleft xxleft <- xxright yyleft <- yyright xxright <- xxtmp yyright <- yytmp } xxs[j] <- xx[j] * (yyleft * xxright - yyright * xxleft)/(yy[j] * (xxright - xxleft) - xx[j] * (yyright - yyleft)) yys[j] <- (yy[j]/xx[j]) * xxs[j] } else { xxs[j] <- 0 yys[j] <- 0 } } else { xxs[j] <- xx[j] * CGap/(seg + CGap) + xx[j] * (df[i, j] - df[2, j])/(df[1, j] - df[2, j]) * seg/(seg + CGap) yys[j] <- yy[j] * CGap/(seg + CGap) + yy[j] * (df[i, j] - df[2, j])/(df[1, j] - df[2, j]) * seg/(seg + CGap) } } if (is.null(pdensities)) { polygon(xxs, yys, lty = pltys[i - 2], lwd = plwds[i - 2], border = pcols[i - 2], col = pfcols[i - 2]) } else { polygon(xxs, yys, lty = pltys[i - 2], lwd = plwds[i - 2], border = pcols[i - 2], density = pdensities[i - 2], angle = pangles[i - 2], col = pfcols[i - 2]) } points(xx * scale, yy * scale, pch = ptys[i - 2], col = pcols[i - 2]) } } } #################################################################################################################### #################################################################################################################### #################################################################################################################### # load clean dataset data_sub <- read_csv("./input/data_all_split.csv") data_sub <- data_sub %>% filter(cohort %in% c("train","val")) data_sub$log_nfl_csf_age_adj <- log((data_sub$nfl_csf_age_adj - min(data_sub$nfl_csf_age_adj,rm.na=TRUE))+1,10) data_sub$log_nfl_serum_age_adj <- log((data_sub$nfl_serum_age_adj - min(data_sub$nfl_serum_age_adj,rm.na=TRUE))+1,10) # generate cel column data_sub$cel <- log(data_sub$cel_exact_number +1) # generate new and enlarging lesions colum data_sub$t2_new_large <- log(data_sub$t2_lesion_new_and_enlarging +1) # generate medulla_ll_atrophy column data_sub$medulla_ll_atrophy <- data_sub$medulla_ll + data_sub$medulla_atrophy data_sub$sdmt_age <- data_sub$sdmt / data_sub$age # generate panel11_17 column data_sub$panel_11_17 <- data_sub$panel_11 + data_sub$panel_17 # isolate training cohort data_sub_train <- data_sub %>% filter(cohort=="train") # isolate validation cohort data_sub_val <- data_sub %>% filter(cohort=="val") # plot HD vs age pred <- "nfl" obs <- "age" pred_nice <- "Log10 NFL" obs_nice <- "Age" for(i in 1:length(pred)){ # i <- 2 dfr <- data_sub %>% filter(diagnosis=="Healthy Donor") dfr_s <- dfr %>% select(age,log_nfl_serum,patientcode) names(dfr_s)[2] <- "nfl" dfr_s$source <- "serum" dfr_c <- dfr %>% select(age,log_nfl_csf,patientcode) names(dfr_c)[2] <- "nfl" dfr_c$source <- "csf" df <- rbind(dfr_s,dfr_c) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(aes(color=source),alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + ylim(c(min(df[pred[i]]-0.1*(max(df[pred[i]])-min(df[pred[i]]))), max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + scale_color_manual(values = c("#0171C0","#C00000"), name="NFL source", breaks = c("csf","serum"), label = c("CSF","Serum"))+ stat_cor(aes(label=..rr.label..,color=source), size=4,label.y.npc = .99,label.x.npc = 0)+ stat_cor(aes(label=..p.label..,color=source), size=4,label.y.npc = .99, label.x.npc = 0.4)+ geom_smooth(method="lm",se=TRUE, size=1, aes(color=source)) + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "black"), axis.title.y = element_text(colour = "black")) assign(paste("p",i,sep = ""),p) } p1 hd_serum <- lm(dfr$log_nfl_serum~dfr$age) hd_csf <- lm(dfr$log_nfl_csf~dfr$age) summary(hd_serum) summary(hd_csf) ggsave(plot = p1,filename = "./output/age_vs_nfl_hd.png", width =5,height = 3.5,units = "in",dpi = 300) # plot MS vs age pred <- "nfl" obs <- "age" pred_nice <- "Log10 NFL" obs_nice <- "Age" for(i in 1:length(pred)){ # i <- 2 dfr <- data_sub_train %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS")) dfr_s <- dfr %>% select(age,log_nfl_serum,patientcode) names(dfr_s)[2] <- "nfl" dfr_s$source <- "serum" dfr_c <- dfr %>% select(age,log_nfl_csf,patientcode) names(dfr_c)[2] <- "nfl" dfr_c$source <- "csf" df <- rbind(dfr_s,dfr_c) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(aes(color=source),alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + ylim(c(min(df[pred[i]]-0.1*(max(df[pred[i]])-min(df[pred[i]]))), max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..rr.label..,color=source), size=4,label.y.npc = .99,label.x.npc = 0)+ stat_cor(aes(label=..p.label..,color=source), size=4,label.y.npc = .99, label.x.npc = 0.4)+ geom_smooth(method="lm",se=TRUE, size=1, aes(color=source)) + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#0171C0"), axis.title.y = element_text(colour = "#C00000")) assign(paste("p",i,sep = ""),p) } p1 hd_serum <- lm(dfr$log_nfl_serum~dfr$age) hd_csf <- lm(dfr$log_nfl_csf~dfr$age) summary(hd_serum) summary(hd_csf) #plot CSF vs Serum NFL pred <- "log_nfl_serum" obs <- "log_nfl_csf" pred_nice <- "Log10 sNFL" obs_nice <- "Log10 cNFL" for(i in 1:length(pred)){ # i <- 2 test_train <- data_sub_train confound <- c("age","bmi_height","bmi_weight","estbloodvolume","bmi","serum_alt","serum_ast", "serum_creatinine","serum_egfr_calc","serum_ap","serum_bun") # generate clean dataset - no NAs test_train <- test_train[complete.cases(test_train[confound]),] df <- test_train p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.1*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+.03*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]-0.1*(max(df[pred[i]])-min(df[pred[i]]))), max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#008F00",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#008F00",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#008F00",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1, color="#008F00") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#0171C0"), axis.title.y = element_text(colour = "#C00000")) assign(paste("p",i,sep = ""),p) } ggsave(plot = p1,filename = "./output/log_nfl_csf_vs_log_nfl_serum_training_cohort.png", width =3,height = 2.5,units = "in",dpi = 300) # generate residuals nfl_orig_mod <- lm(log_nfl_serum~log_nfl_csf,data = test_train) #generate dummy dataset for dat_sub_train test <- test_train test$residuals <-nfl_orig_mod$residuals # plot residuals #plot CSF vs Serum NFL pred <- "residuals" obs <- "log_nfl_csf" pred_nice <- "NFL residuals" obs_nice <- "Log10 cNFL" for(i in 1:length(pred)){ # i <- 2 df <- test p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.1*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+.03*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]-0.1*(max(df[pred[i]])-min(df[pred[i]]))), max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="black",label.y.npc = .99)+ stat_cor(aes(label=..rr.label..), size=4,color="black",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="black",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="black") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#0171C0"), axis.title.y = element_text(colour = "#C55A11")) assign(paste("p",i,sep = ""),p) } ggsave(plot = p1,filename = "./output/log_nfl_csf_vs_nfl_residuals_training_cohort.png", width =3,height = 2.5,units = "in",dpi = 300) #plot CSF vs Serum NFL pred <- "residuals" obs <- "age" pred_nice <- "NFL residuals" obs_nice <- "Age" for(i in 1:length(pred)){ # i <- 2 df <- test p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.1*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+.03*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]-0.1*(max(df[pred[i]])-min(df[pred[i]]))), max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="black",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="black",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="black",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="black") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "red"), axis.title.y = element_text(colour = "#C55A11")) assign(paste("p",i,sep = ""),p) } ggsave(plot = p1,filename = "./output/age_vs_residuals_training_cohort.png", width =3,height = 2.5,units = "in",dpi = 300) ####################################################### ####################################################### # # demographic data (samples with confounders used for model generation) # ####################################################### ####################################################### # TRAINING COHORT training_demo <- data_sub %>% filter(cohort=="train") training_demo <- training_demo[complete.cases(training_demo[confound]),] length(unique(training_demo$patientcode)) training <- training_demo %>% # filter(cohort=="train") %>% arrange(.,age) # get number of samples per patient x <- tibble::enframe(table(training$patientcode),name = "patientcode",value = "nr_samples") # get first sample per patient training_first <- training %>% dplyr::group_by(patientcode) %>% filter(row_number()==1) training_first <- merge(training_first,x,by="patientcode") #get couunts for training_first %>% dplyr::group_by(diagnosis) %>% dplyr::group_by(gender,.add=TRUE) %>% count() training_first %>% dplyr::group_by(diagnosis) %>% summarise_at(.,.vars = "age",.funs = c(mean,sd,min,max)) training_first %>% dplyr::group_by(diagnosis) %>% dplyr::group_by(ethnicity,.add=TRUE) %>% count() tab <- training_first %>% dplyr::group_by(diagnosis) %>% dplyr::group_by(race,.add=TRUE) %>% count() training_first %>% dplyr::group_by(diagnosis) %>% summarise_at(.,.vars = "nr_samples",.funs = c(sum,mean,sd,min,max)) training_demo_group <- training_demo %>% group_by(diagnosis) summarize_at(.tbl = training_demo_group,.vars = "nfl_csf",.funs = mean) summarize_at(.tbl = training_demo_group,.vars = "nfl_csf",.funs = sd) summarize_at(.tbl = training_demo_group,.vars = "nfl_serum",.funs = mean) summarize_at(.tbl = training_demo_group,.vars = "nfl_serum",.funs = sd) # VALIDATION COHORT validation_demo <- data_sub %>% filter(cohort=="val") validation_demo <- validation_demo[complete.cases(validation_demo[confound]),] length(unique(validation_demo$patientcode)) validation <- validation_demo %>% # filter(cohort=="val") %>% arrange(.,age) # get number of samples per patient x <- tibble::enframe(table(validation$patientcode),name = "patientcode",value = "nr_samples") # get first sample per patient validation_first <- validation %>% dplyr::group_by(patientcode) %>% filter(row_number()==1) validation_first <- merge(validation_first,x,by="patientcode") #get couunts for validation_first %>% dplyr::group_by(diagnosis) %>% dplyr::group_by(gender,.add=TRUE) %>% count() validation_first %>% dplyr::group_by(diagnosis) %>% dplyr::group_by(ethnicity,.add=TRUE) %>% count() tab <- validation_first %>% dplyr::group_by(diagnosis) %>% dplyr::group_by(race,.add=TRUE) %>% count() validation_first %>% dplyr::group_by(diagnosis) %>% summarise_at(.,.vars = "age",.funs = c(mean,sd,min,max)) validation_first %>% dplyr::group_by(diagnosis) %>% summarise_at(.,.vars = "nr_samples",.funs = c(sum,mean,sd,min,max)) validation_demo_group <- validation_demo %>% group_by(diagnosis) summarize_at(.tbl = validation_demo_group,.vars = "nfl_csf",.funs = mean) summarize_at(.tbl = validation_demo_group,.vars = "nfl_csf",.funs = sd) summarize_at(.tbl = validation_demo_group,.vars = "nfl_serum",.funs = mean) summarize_at(.tbl = validation_demo_group,.vars = "nfl_serum",.funs = sd) ################################################################################################ ################################################################################################ # # # generate MLR model of Serum NFL in training cohort # # ################################################################################################ ################################################################################################ # selected varaibles for building a model vars_model <- c("log_nfl_csf","log_nfl_serum","age","bmi_height","bmi_weight","estbloodvolume","bmi","serum_alt","serum_ast", "serum_creatinine","serum_egfr_calc","serum_ap","serum_bun") confound <- c("age","bmi_height","bmi_weight","estbloodvolume","bmi","serum_alt","serum_ast", "serum_creatinine","serum_egfr_calc","serum_ap","serum_bun") # generate clean dataset - no NAs data_sub_train_clean <- data_sub_train data_sub_train_clean <- data_sub_train_clean[complete.cases(data_sub_train[confound]),] #generate simple LM model in the clean cohort mod_train_clean <- lm(log_nfl_serum~log_nfl_csf,data_sub_train_clean) summary(mod_train_clean) #build the model: fit <- lm(log_nfl_serum~log_nfl_csf + age + bmi + bmi_height + bmi_weight + estbloodvolume + serum_alt + serum_ast + serum_ap + serum_bun + serum_creatinine + serum_egfr_calc, data_sub_train_clean) summary(fit) #stepwise regression step <- stepAIC(fit, direction="both") step$anova # display results best_fit <- lm(log_nfl_serum ~ log_nfl_csf + age + bmi_weight + serum_ap + serum_bun + serum_creatinine, data_sub_train_clean) summary(best_fit) saveRDS(best_fit,"./output/best_fit_equation.RDS") age_bmi_fit <- lm(log_nfl_serum ~ log_nfl_csf + age + bmi, data_sub_train_clean) ###### ###### #### predict CSF from serum # generate residuals in the CLEAN cohort mod_train_clean_rev <- lm(log_nfl_csf~log_nfl_serum,data_sub_train_clean) summary(mod_train_clean_rev) #build the model: fit_rev <- lm(log_nfl_csf~log_nfl_serum + age + bmi + bmi_height + bmi_weight + estbloodvolume + serum_alt + serum_ast + serum_ap + serum_bun + serum_creatinine + serum_egfr_calc, data_sub_train_clean) #stepwise regression step_rev <- stepAIC(fit_rev, direction="both") step_rev$anova # display results best_fit_rev <- lm(log_nfl_csf ~ log_nfl_serum + age + bmi + bmi_height + estbloodvolume + serum_alt + serum_ast + serum_bun + serum_egfr_calc, data_sub_train_clean) summary(best_fit_rev) #plot lolipop plot for t-values variables tidy_fit <- broom::tidy(fit) tidy_fit <- tidy_fit[-c(1:2),] tidy_fit <- tidy_fit %>% arrange(desc(abs(statistic))) t_values <- tidy_fit$statistic t_names <- tidy_fit$term data <- as.data.frame(matrix(tidy_fit$statistic,ncol=11)) colnames(data) <- c("Age","BUN","AP","Creatinine","Height","Weight","BMI","Est Blood Vol","ALT","AST","eGFR") data <- rbind(rep(max(t_values),11),rep(0,11),data) #transpose dataframe data_t <- t(data) data_t <- data_t[,3] data_t_names <- names(data_t) data_t <- tibble::enframe(data_t,name = NULL,value = "statistic") data_t <- cbind(data_t_names,data_t) data_t <- data_t %>% arrange(desc(data_t_names)) data_t[12:15,] <- NA data_t[12,1] <- "x" data_t[13,1] <- "y" data_t[14,1] <- "z" data_t[15,1] <- "w" data_names <- data_t$data_t_names tidy_fit <- broom::tidy(best_fit) tidy_fit <- tidy_fit[-c(1:2),] tidy_fit <- tidy_fit %>% select(term,statistic) tidy_fit$term <- c("Age","Weight","AP","BUN","Creatinine") names(tidy_fit)[1] <- "data_t_names" names(tidy_fit)[2] <- "statistics_new" data_t <- merge(data_t,tidy_fit,by="data_t_names",all.x = TRUE) rownames(data_t) #lolipop plot t <- ggplot(data_t, aes(y=statistic, x=data_t_names)) + geom_segment( aes(y=0, yend=statistic,x=data_t_names, xend=data_t_names), color="magenta") + geom_point( color="magenta", size=3, alpha=0.6) + theme_light() + scale_x_discrete(limits = c("Age","x","y","eGFR","Creatinine","BUN","AST","AP","ALT","z","w","Weight","Height","Est Blood Vol","BMI")) + geom_hline(yintercept = 0,color = "black", linetype="dashed",size=.25) + coord_flip() + ylab("abs(t-statistics)") + xlab("")+ ylim(c(-2.5,7.1))+ theme( panel.grid.major.y = element_blank(), panel.border = element_blank(), axis.ticks.y = element_blank(), axis.title.x = element_blank(), axis.text.y = element_text(margin = margin(r=15),size = 12, face = c("bold","plain","plain","plain","bold","bold","plain","bold","plain","plain","plain","bold","plain","plain","plain" )) ) t ggsave(plot = ggarrange(t,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/vars_lolipop_MLM.png", width =2.7,height = 3,units = "in",dpi = 300) t <- ggplot(data_t, aes(y=statistics_new, x=data_t_names)) + geom_segment( aes(y=0, yend=statistics_new,x=data_t_names, xend=data_t_names), color="purple") + geom_point( color="purple", size=3, alpha=0.6) + theme_light() + scale_x_discrete(limits = c("Age","x","y","eGFR","Creatinine","BUN","AST","AP","ALT","z","w","Weight","Height","Est Blood Vol","BMI")) + geom_hline(yintercept = 0,color = "black", linetype="dashed",size=.25) + coord_flip() + ylab("abs(t-statistics)") + xlab("")+ ylim(c(-8.7,8.2))+ theme( panel.grid.major.y = element_blank(), panel.border = element_blank(), axis.ticks.y = element_blank(), axis.title.x = element_blank(), axis.text.y = element_text(margin = margin(r=15),size = 12, face = c("bold","plain","plain","plain","bold","bold","plain","bold","plain","plain","plain","bold","plain","plain","plain" )) ) t ggsave(plot = ggarrange(t,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/vars_lolipop_MLM_new.png", width =2.7,height = 3,units = "in",dpi = 300) # # # # ## # # # # # # # ######################################################### # # THIS SHOULD GO INTO SUPPLEMENTARY # ######################################################### ###### ###### #### predict serum from CSF - mixed model mod_train_clean_mix <- lmer(log_nfl_serum~log_nfl_csf + (1|patientcode),data = data_sub_train_clean) summary(mod_train_clean_mix) # R2 for mixed effect model # https://stats.stackexchange.com/questions/7240/proportion-of-explained-variance-in-a-mixed-effects-model r.squaredGLMM(mod_train_clean_mix) # R2m: marginal R squared value associated with fixed effects # R2c conditional R2 value associated with fixed effects plus the random effects. #build the model: fit_mix <- lmer(log_nfl_serum~log_nfl_csf + age + bmi + bmi_height + estbloodvolume + serum_creatinine + serum_egfr_calc + bmi_weight + serum_ap + serum_bun + (1|patientcode), data_sub_train_clean) summary(fit_mix) #stepwise regression step_mix <- stepcAIC(fit_mix, direction="both", groupCandidates = "patientcode", trace=TRUE) best_fit_mix <- lmer(log_nfl_serum ~ log_nfl_csf + age + bmi + bmi_height + estbloodvolume + serum_creatinine + serum_egfr_calc + bmi_weight + serum_ap + serum_bun + (1 | patientcode), data = data_sub_train_clean) summary(best_fit_mix) r.squaredGLMM(best_fit_mix) # predict serum NFL levels in the trainig cohort data_sub_train_clean$pred_serum_nfl_lm <- predict(mod_train_clean,newdata = data_sub_train_clean) data_sub_train_clean$pred_serum_nfl_mlm <- predict(best_fit,newdata = data_sub_train_clean) data_sub_train_clean$pred_serum_nfl_lmer <- predict(best_fit_mix,newdata = data_sub_train_clean) data_sub_train_clean$pred_serum_nfl_mlm_age_bmi <- predict(age_bmi_fit,newdata = data_sub_train_clean) # clean validation cohort: data_sub_val_clean <- data_sub_val data_sub_val_clean <- data_sub_val_clean[complete.cases(data_sub_val[confound]),] data_sub_val_clean$pred_serum_nfl_lm <- predict(mod_train_clean,newdata = data_sub_val_clean) data_sub_val_clean$pred_serum_nfl_mlm <- predict(best_fit,newdata = data_sub_val_clean) data_sub_val_clean$pred_serum_nfl_lmer <- predict(best_fit_mix,newdata = data_sub_val_clean,allow.new.levels=TRUE) data_sub_val_clean$pred_serum_nfl_mlm_age_bmi <- predict(age_bmi_fit,newdata = data_sub_val_clean) # clean validation cohort - first sample only: data_sub_val_clean_first <- data_sub_val %>% dplyr::select(patientcode,lpdate,diagnosis,log_nfl_csf,log_nfl_serum,all_of(confound),cel) data_sub_val_clean_first <- data_sub_val_clean_first[complete.cases(data_sub_val_clean_first[confound]),] data_sub_val_clean_first <- data_sub_val_clean_first %>% group_by(patientcode) %>% arrange(lpdate) %>% slice(1) data_sub_val_clean_first$pred_serum_nfl_lm <- predict(mod_train_clean,newdata = data_sub_val_clean_first) data_sub_val_clean_first$pred_serum_nfl_mlm <- predict(best_fit,newdata = data_sub_val_clean_first) data_sub_val_clean_first$pred_serum_nfl_lmer <- predict(best_fit_mix,newdata = data_sub_val_clean_first,allow.new.levels=TRUE) # clean validation cohort - median sample: data_sub_val_clean_med <- data_sub_val %>% dplyr::select(patientcode,lpdate,diagnosis,log_nfl_csf,log_nfl_serum,all_of(confound),cel) data_sub_val_clean_med <- data_sub_val_clean_med %>% group_by(patientcode) %>% mutate_at(.vars = c("log_nfl_csf","log_nfl_serum",all_of(confound)),.funs = median ,na.rm = TRUE) %>% slice(1) %>% ungroup() data_sub_val_clean_med <- data_sub_val_clean_med[complete.cases(data_sub_val_clean_med[confound]),] data_sub_val_clean_med$pred_serum_nfl_lm <- predict(mod_train_clean,newdata = data_sub_val_clean_med) data_sub_val_clean_med$pred_serum_nfl_mlm <- predict(best_fit,newdata = data_sub_val_clean_med) data_sub_val_clean_med$pred_serum_nfl_lmer <- predict(best_fit_mix,newdata = data_sub_val_clean_med,allow.new.levels=TRUE) # plot pred <- c("pred_serum_nfl_lm","pred_serum_nfl_mlm","pred_serum_nfl_lmer","pred_serum_nfl_mlm_age_bmi") obs <- c("log_nfl_serum","log_nfl_serum","log_nfl_serum","log_nfl_serum") pred_nice <- c("cNFL-predicted sNFL","cNFL-predicted sNFL","cNFL-predicted sNFL (LMER)","cNFL-predicted sNFL") obs_nice <- c("measured Log10 sNFL","measured Log10 sNFL","measured Log10 sNFL","measured Log10 sNFL") for(i in 1:length(pred)){ # i <-2 df <- data_sub_train_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.1*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[obs[i]])+.12*(max(df[obs[i]])-min(df[obs[i]])), size=4,parse = TRUE) + annotate("text",label = paste("CCC==", as.numeric(round(epi.ccc(as.numeric(df[[obs[i]]]),as.numeric(df[[pred[i]]]), ci = "z-transform",conf.level = 0.95)$rho.c[1], digits = 2)),sep=""), x=max(df[obs[i]])-.3*(max(df[obs[i]])-min(df[obs[i]])), y=max(df[obs[i]])-.01*(max(df[obs[i]])-min(df[obs[i]])), size=4,parse = TRUE,color="blue") + ylim(c(min(df[obs[i]]),max(df[obs[i]])))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + geom_abline(slope = 1,intercept = 0,color="blue",size=0.5) + stat_cor(aes(label=..r.label..), size=4,color="#008F00",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#008F00",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#008F00",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#008F00") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#C00000"), axis.title.y = element_text(colour = "#008F00")) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"),filename = "./output/LM_predicted_sNFL_training.png", width =3,height = 2.2,units = "in",dpi = 300) ggsave(plot = ggarrange(p2,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"),filename = "./output/MLM_predicted_sNFL_training.png", width =3,height = 2.2,units = "in",dpi = 300) ggsave(plot = ggarrange(p4,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"),filename = "./output/MLM_age_bmi_predicted_sNFL_training.png", width =3,height = 2.2,units = "in",dpi = 300) pred <- c("pred_serum_nfl_lm","pred_serum_nfl_mlm","pred_serum_nfl_lmer","pred_serum_nfl_mlm_age_bmi") obs <- c("log_nfl_serum","log_nfl_serum","log_nfl_serum","log_nfl_serum") pred_nice <- c("cNFL-predicted sNFL","cNFL-predicted sNFL","cNFL-predicted sNFL (LMER)","cNFL-predicted sNFL") obs_nice <- c("measured Log10 sNFL","measured Log10 sNFL","measured Log10 sNFL","measured Log10 sNFL") for(i in 1:length(pred)){ # i <- 1 df <- data_sub_val_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.1*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[obs[i]])+.12*(max(df[obs[i]])-min(df[obs[i]])), size=4,parse = TRUE) + annotate("text",label = paste("CCC==", as.numeric(round(epi.ccc(as.numeric(df[[obs[i]]]),as.numeric(df[[pred[i]]]), ci = "z-transform",conf.level = 0.95)$rho.c[1], digits = 2)),sep=""), x=max(df[obs[i]])-.3*(max(df[obs[i]])-min(df[obs[i]])), y=max(df[obs[i]])-.01*(max(df[obs[i]])-min(df[obs[i]])), size=4,parse = TRUE,color="blue") + # ylim(c(min(df[pred[i]]-0.1*(max(df[pred[i]])-min(df[pred[i]]))), # max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ ylim(c(min(df[obs[i]]),max(df[obs[i]])))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + geom_abline(slope = 1,intercept = 0,color="blue",size=0.5) + stat_cor(aes(label=..r.label..), size=4,color="#008F00",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#008F00",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#008F00",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#008F00") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#C00000"), axis.title.y = element_text(colour = "#008F00")) assign(paste("v",i,sep = ""),p) } ggsave(plot = ggarrange(v1,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"),filename = "./output/LM_predicted_sNFL_validation.png", width =3,height = 2.2,units = "in",dpi = 300) ggsave(plot = ggarrange(v2,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"),filename = "./output/MLM_predicted_sNFL_validation.png", width =3,height = 2.2,units = "in",dpi = 300) ggsave(plot = ggarrange(v4,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"),filename = "./output/MLM_age_bmi_predicted_sNFL_validation.png", width =3,height = 2.2,units = "in",dpi = 300) #plot firts LP pred <- c("pred_serum_nfl_lm","pred_serum_nfl_mlm","pred_serum_nfl_lmer") obs <- c("log_nfl_serum","log_nfl_serum","log_nfl_serum") pred_nice <- c("cNFL-predicted sNFL","cNFL-predicted sNFL","predicted sNFL (LMER)") obs_nice <- c("measured Log10 sNFL","measured Log10 sNFL","measured Log10 sNFL") for(i in 1:length(pred)){ # i <- 1 df <- data_sub_val_clean_first p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.1*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+.03*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]-0.1*(max(df[pred[i]])-min(df[pred[i]]))), max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#008F00",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#008F00",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#008F00",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#008F00") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#C00000"), axis.title.y = element_text(colour = "#008F00")) assign(paste("f",i,sep = ""),p) } ggsave(plot = ggarrange(v1,v2,f1,f2,ncol = 2,nrow = 2, common.legend=TRUE, legend="bottom"),filename = "./output/predicted_sNFL_validation_first_sample.png", width =6,height = 5,units = "in",dpi = 300) #plot median value LP pred <- c("pred_serum_nfl_lm","pred_serum_nfl_mlm","pred_serum_nfl_lmer") obs <- c("log_nfl_serum","log_nfl_serum","log_nfl_serum") pred_nice <- c("cNFL-predicted sNFL","cNFL-predicted sNFL","predicted sNFL (LMER)") obs_nice <- c("measured Log10 sNFL","measured Log10 sNFL","measured Log10 sNFL") for(i in 1:length(pred)){ # i <- 1 df <- data_sub_val_clean_med p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.1*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+.03*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]-0.1*(max(df[pred[i]])-min(df[pred[i]]))), max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#008F00",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#008F00",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#008F00",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#008F00") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#C00000"), axis.title.y = element_text(colour = "#008F00")) assign(paste("m",i,sep = ""),p) } ggsave(plot = ggarrange(m1,m2,ncol = 2,nrow = 1, common.legend=TRUE, legend="bottom"),filename = "./output/predicted_sNFL_validation_median_sample.png", width =6,height = 2.5,units = "in",dpi = 300) # # # # ## # # # ## # ######################################################################################################################################################## ######################################################################################################################################################## # # correlation with CEL - adjusted serum NFL # ######################################################################################################################################################## ######################################################################################################################################################## # predict csf NFL from serum NFL and cofounders # best_fit intercept <- as.numeric(best_fit$coefficients[1]) log_nfl_csf_slope <- as.numeric(best_fit$coefficients[2]) age_slope <- as.numeric(best_fit$coefficients[3]) bmi_weight_slope <- as.numeric(best_fit$coefficients[4]) serum_ap_slope <- as.numeric(best_fit$coefficients[5]) serum_bun_slope <- as.numeric(best_fit$coefficients[6]) serum_creatinine_slope <- as.numeric(best_fit$coefficients[7]) data_sub_train_clean$predicted_csf_nfl <- (data_sub_train_clean$log_nfl_serum - age_slope * data_sub_train_clean$age - bmi_weight_slope * data_sub_train_clean$bmi_weight - serum_ap_slope * data_sub_train_clean$serum_ap - serum_bun_slope * data_sub_train_clean$serum_bun - serum_creatinine_slope * data_sub_train_clean$serum_creatinine - intercept) / log_nfl_csf_slope data_sub_val_clean$predicted_csf_nfl <- (data_sub_val_clean$log_nfl_serum - age_slope * data_sub_val_clean$age - bmi_weight_slope * data_sub_val_clean$bmi_weight - serum_ap_slope * data_sub_val_clean$serum_ap - serum_bun_slope * data_sub_val_clean$serum_bun - serum_creatinine_slope * data_sub_val_clean$serum_creatinine - intercept) / log_nfl_csf_slope ###### # generate age-adjusted predicted_csf_nfl ###### # merge train and val data_sub_clean <- rbind(data_sub_train_clean[,c("sampleid","diagnosis","age","predicted_csf_nfl")], data_sub_val_clean[,c("sampleid","diagnosis","age","predicted_csf_nfl")]) data_sub_clean_hd <- data_sub_clean %>% filter(diagnosis=="Healthy Donor") # generate equation in HD hd_mod <- lm(predicted_csf_nfl~age,data_sub_clean_hd) summary(hd_mod) # predict age-adjusted cNFL data_sub_clean$predicted_csf_nfl_age_adj <- data_sub_clean$predicted_csf_nfl - predict(object = hd_mod,newdata = data_sub_clean) # merge with data_sub_train_clean and data_sub_val_clean data_sub_clean_to_merge <- data_sub_clean %>% select(sampleid,predicted_csf_nfl_age_adj) data_sub_train_clean <- merge(data_sub_train_clean,data_sub_clean_to_merge,by="sampleid",all.x = TRUE) data_sub_val_clean <- merge(data_sub_val_clean,data_sub_clean_to_merge,by="sampleid",all.x = TRUE) #plot CEL vs CSF NFL obs <- c("log_nfl_csf","log_nfl_csf","log_nfl_csf_age_adj") obs_nice <- c("Log10 cNFL","Log10 cNFL","Log10 age-adj cNFL") pred <- c("cel","t2_new_large","cel") pred_nice <- c("Ln(CEL+1)","Ln(#New+Enlarging T2 +1)","Ln(CEL+1)") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_train_clean df <- data %>% filter(.,!is.na(data[[pred[i]]]) & !is.na(data$predicted_csf_nfl)) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1.5, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm", se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#0171C0"), axis.title.y = element_text(colour = "#FF9300")) assign(paste("p",i,sep = ""),p) } # Save all plots 4x3 ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/cel_vs_csf_nfl_training.png", width =2.5,height = 2.5,units = "in",dpi = 300) # ggsave(plot = ggarrange(p2,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), # filename = "./output/T2NE_vs_csf_nfl_training.png", width =2.5,height = 2.5,units = "in",dpi = 300) ggsave(plot = ggarrange(p3,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/cel_vs_age_adj_csf_nfl_training.png", width =2.5,height = 2.5,units = "in",dpi = 300) #plot CEL vs serum NFL obs <- c("log_nfl_serum","log_nfl_serum","log_nfl_serum_age_adj") obs_nice <- c("Log10 sNFL","Log10 sNFL","Log age-adj sNFL") pred <- c("cel","t2_new_large","cel") pred_nice <- c("Ln(CEL+1)","Ln(#New+Enlarging T2 +1)","Ln(CEL+1)") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_train_clean df <- data %>% filter(.,!is.na(data[[pred[i]]]) & !is.na(data$predicted_csf_nfl)) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#C00000"), axis.title.y = element_text(colour = "#FF9300")) assign(paste("p",i,sep = ""),p) } # Save all plots 4x3 ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/cel_vs_serum_nfl_training.png", width =2.5,height = 2.5,units = "in",dpi = 300) # ggsave(plot = ggarrange(p2,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), # filename = "./output/T2NE_vs_serum_nfl_training.png", width =2.5,height = 2.5,units = "in",dpi = 300) ggsave(plot = ggarrange(p3,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/cel_vs_age_adj_serum_nfl_training.png", width =2.5,height = 2.5,units = "in",dpi = 300) #plot CEL vs predicted CSF NFL obs <- c("predicted_csf_nfl","predicted_csf_nfl","predicted_csf_nfl_age_adj") obs_nice <- c("sNFL-predicted cNFL","sNFL-predicted cNFL","age-adj sNFL-predicted cNFL") pred <- c("cel","t2_new_large","cel") pred_nice <- c("Ln(CEL+1)","Ln(#New+Enlarging T2 +1)","Ln(CEL+1)") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_train_clean df <- data %>% filter(.,!is.na(data[[pred[i]]]) & !is.na(data$predicted_csf_nfl)) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#008F00"), axis.title.y = element_text(colour = "#FF9300")) assign(paste("p",i,sep = ""),p) } # Save all plots 4x3 ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/cel_vs_predicted_nfl_training.png", width =2.5,height = 2.5,units = "in",dpi = 300) # ggsave(plot = ggarrange(p2,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), # filename = "./output/T2NE_vs_predicted_nfl_training.png", width =2.5,height = 2.5,units = "in",dpi = 300) ggsave(plot = ggarrange(p3,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/cel_vs_age_adj_predicted_nfl_training.png", width =2.5,height = 2.5,units = "in",dpi = 300) # # calculate residuals of serum vs CEL and adjusted serum vs CEL data <- data_sub_train_clean df <- data %>% filter(.,!is.na(data$cel) & !is.na(data$predicted_csf_nfl)) mod_cel_serum <- lm(cel~log_nfl_serum,df) summary(mod_cel_serum) df$serum_cel_res <- mod_cel_serum$residuals hist(mod_cel_serum$residuals) mod_cel_adj_serum <- lm(cel~predicted_csf_nfl,df) summary(mod_cel_adj_serum) df$adj_serum_cel_res <- mod_cel_adj_serum$residuals hist(df$adj_serum_cel_res) wilcox.test(df$serum_cel_res,df$adj_serum_cel_res,paired = TRUE, alternative = "two.sided") df_new <- tibble::enframe(df$serum_cel_res,name = NULL,value = "serum") df_new <- df_new %>% mutate(subclass=row_number()) %>% mutate(group="serum") df_new2 <- tibble::enframe(df$adj_serum_cel_res,name = NULL,value = "serum") df_new2 <- df_new2 %>% mutate(subclass=row_number()) %>% mutate(group="adj-serum") df_all <- rbind(df_new,df_new2) # plot obs <- c("group") obs_nice <- c("residuals") pred <- c("serum") pred_nice <- c("serum-CEL residuals") i <- 1 p4 <- df_all %>% ggplot(aes_string(x=obs[i], y=pred[i], color = "group")) + geom_boxplot(outlier.colour = "transparent") + geom_jitter(alpha = 0.6, shape = 1, size = 2,stroke = 0.6,position=position_jitter(width = 0.07,height=0)) + geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + ylim(c(min(df_all[pred[i]])-0.25*(max(df_all[pred[i]])-min(df_all[pred[i]])),max(df_all[pred[i]])+0.2*(max(df_all[pred[i]])-min(df_all[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + scale_colour_discrete(labels = c("adjusted serum", "measured serum"))+ scale_x_discrete(labels = c("adjusted serum", "measured serum")) + stat_compare_means(method = "t.test", paired = TRUE, label.x = "adj-serum", label.y = max(df_all[pred[i]])+0.1*(max(df_all[pred[i]])-min(df_all[pred[i]])) )+ theme_bw(base_size = 12) + stat_summary(fun.data = function(x) { return( data.frame( y = min(df_all[pred[i]])-0.05*(max(df_all[pred[i]])-min(df_all[pred[i]])), label = paste('n =', format(length(x), big.mark = ",", decimal.mark = ".", scientific = FALSE), '\n', 'median =', format(round(median(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE)) ) ) },geom = "text", hjust = 0.5, vjust = 1, size =3) + theme_bw(base_size = 12) + theme(legend.position = "none")+ theme(axis.title.y = element_text( colour = "blue" )) ggsave(plot = ggarrange(p4,ncol = 1,nrow = 1), filename = "./output/CEL_residuals_plot_t.test.png", width =2.7,height = 2.5,units = "in",dpi = 300) #### VALIDATION cohort #plot CEL vs CSF NFL obs <- c("log_nfl_csf","log_nfl_csf","log_nfl_csf_age_adj") obs_nice <- c("Log10 cNFL","Log10 cNFL","Log age-adj cNFL") pred <- c("cel","t2_new_large","cel") pred_nice <- c("Ln(CEL+1)","Ln(#New+Enlarging T2 +1)","Ln(CEL+1)") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_val_clean df <- data %>% filter(.,!is.na(data[[pred[i]]]) & !is.na(data$predicted_csf_nfl)) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#0171C0"), axis.title.y = element_text(colour = "#FF9300")) assign(paste("p",i,sep = ""),p) } # Save all plots 4x3 ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/cel_vs_csf_nfl_validation.png", width =2.5,height = 2.5,units = "in",dpi = 300) # ggsave(plot = ggarrange(p2,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), # filename = "./output/T2NE_cel_vs_csf_nfl_validation.png", width =2.5,height = 2.5,units = "in",dpi = 300) ggsave(plot = ggarrange(p3,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/cel_vs_age_adj_csf_nfl_validation.png", width =2.5,height = 2.5,units = "in",dpi = 300) #plot CEL vs serum NFL obs <- c("log_nfl_serum","log_nfl_serum","log_nfl_serum_age_adj") obs_nice <- c("Log10 sNFL","Log10 sNFL","Log age-adj sNFL") pred <- c("cel","t2_new_large","cel") pred_nice <- c("Ln(CEL+1)","Ln(#New+Enlarging T2 +1)","Ln(CEL+1)") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_val_clean df <- data %>% filter(.,!is.na(data[[pred[i]]]) & !is.na(data$predicted_csf_nfl)) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#C00000"), axis.title.y = element_text(colour = "#FF9300")) assign(paste("p",i,sep = ""),p) } # Save all plots 4x3 ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/cel_vs_serum_nfl_validation.png", width =2.5,height = 2.5,units = "in",dpi = 300) # ggsave(plot = ggarrange(p2,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), # filename = "./output/T2NE_cel_vs_serum_nfl_validation.png", width =2.5,height = 2.5,units = "in",dpi = 300) ggsave(plot = ggarrange(p3,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/cel_vs_age_adj_serum_nfl_validation.png", width =2.5,height = 2.5,units = "in",dpi = 300) #plot CEL vs predicted CSF NFL obs <- c("predicted_csf_nfl","predicted_csf_nfl","predicted_csf_nfl_age_adj") obs_nice <- c("sNFL-predicted cNFL","sNFL-predicted cNFL","age-adj sNFL-predicted cNFL") pred <- c("cel","t2_new_large","cel") pred_nice <- c("Ln(CEL+1)","Ln(#New+Enlarging T2 +1)","Ln(CEL+1)") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_val_clean df <- data %>% filter(.,!is.na(data[[pred[i]]]) & !is.na(data$predicted_csf_nfl)) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#008F00"), axis.title.y = element_text(colour = "#FF9300")) assign(paste("p",i,sep = ""),p) } # Save all plots 4x3 ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/cel_vs_predicted_nfl_validation.png", width =2.5,height = 2.5,units = "in",dpi = 300) # ggsave(plot = ggarrange(p2,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), # filename = "./output/T2NE_cel_vs_adjusted_nfl_validation.png", width =2.5,height = 2.5,units = "in",dpi = 300) ggsave(plot = ggarrange(p3,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/cel_vs_age_adj_predicted_nfl_validation.png", width =2.5,height = 2.5,units = "in",dpi = 300) # try Poisson regression in training cohort summary(m1 <- glm(cel~log_nfl_csf, family = "poisson", data = data_sub_train_clean)) summary(m2 <- glm(cel~log_nfl_serum, family = "poisson", data = data_sub_train_clean)) summary(m3 <- glm(cel~predicted_csf_nfl, family = "poisson", data = data_sub_train_clean)) with(m1, cbind(res.deviance = deviance, df=df.residual, p= pchisq(deviance, df.residual, lower.tail = FALSE))) with(m2, cbind(res.deviance = deviance, df=df.residual, p= pchisq(deviance, df.residual, lower.tail = FALSE))) with(m3, cbind(res.deviance = deviance, df=df.residual, p= pchisq(deviance, df.residual, lower.tail = FALSE))) data_sub_train_clean$poison_csf <- predict(object = m1,newdata = data_sub_train_clean) data_sub_train_clean$poison_serum <- predict(object = m2,newdata = data_sub_train_clean) data_sub_train_clean$poison_adj_serum <- predict(object = m3,newdata = data_sub_train_clean) poison_res <- tibble::enframe(m1$residuals,name = NULL,value = "csf_res") poison_res$serum_res <- m2$residuals poison_res$adj_serum_res <- m3$residuals wilcox.test(poison_res$serum_res,poison_res$adj_serum_res,paired = TRUE, alternative = "two.sided") # plot comparison between residuals df_new <- tibble::enframe(poison_res$serum_res,name = NULL,value = "serum") df_new <- df_new %>% mutate(subclass=row_number()) %>% mutate(group="serum") df_new2 <- tibble::enframe(poison_res$adj_serum_res,name = NULL,value = "serum") df_new2 <- df_new2 %>% mutate(subclass=row_number()) %>% mutate(group="adj-serum") df_all <- rbind(df_new,df_new2) # plot obs <- c("group") obs_nice <- c("residuals") pred <- c("serum") pred_nice <- c("serum-CEL residuals") i <- 1 p4 <- df_all %>% ggplot(aes_string(x=obs[i], y=pred[i], color = "group")) + geom_boxplot(outlier.colour = "transparent") + geom_jitter(alpha = 0.6, shape = 1, size = 2,stroke = 0.6,position=position_jitter(width = 0.07,height=0)) + geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + ylim(c(min(df_all[pred[i]])-0.25*(max(df_all[pred[i]])-min(df_all[pred[i]])),max(df_all[pred[i]])+0.2*(max(df_all[pred[i]])-min(df_all[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + scale_colour_discrete(labels = c("adjusted serum", "measured serum"))+ # scale_x_discrete(labels = c("adjusted serum", "measured serum")) + stat_compare_means(method = "wilcox.test", paired = TRUE, label.x = "adj-serum", label.y = max(df_all[pred[i]])+0.1*(max(df_all[pred[i]])-min(df_all[pred[i]])) )+ theme_bw(base_size = 12) + stat_summary(fun.data = function(x) { return( data.frame( y = min(df_all[pred[i]])-0.05*(max(df_all[pred[i]])-min(df_all[pred[i]])), label = paste('n =', format(length(x), big.mark = ",", decimal.mark = ".", scientific = FALSE), '\n', 'median =', format(round(median(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE)) ) ) },geom = "text", hjust = 0.5, vjust = 1, size =3) + theme_bw(base_size = 12) + theme(legend.position = "none")+ theme(axis.title.y = element_text( colour = "blue" )) ggsave(plot = ggarrange(p4,ncol = 1,nrow = 1), filename = "./output/Poison_CEL_residuals_plot_t.test.png", width =2.7,height = 2.5,units = "in",dpi = 300) # try poisson regression in validation cohort summary(m1 <- glm(cel~log_nfl_csf, family = "poisson", data = data_sub_val_clean)) summary(m2 <- glm(cel~log_nfl_serum, family = "poisson", data = data_sub_val_clean)) summary(m3 <- glm(cel~predicted_csf_nfl, family = "poisson", data = data_sub_val_clean)) with(m1, cbind(res.deviance = deviance, df=df.residual, p= pchisq(deviance, df.residual, lower.tail = FALSE))) with(m2, cbind(res.deviance = deviance, df=df.residual, p= pchisq(deviance, df.residual, lower.tail = FALSE))) with(m3, cbind(res.deviance = deviance, df=df.residual, p= pchisq(deviance, df.residual, lower.tail = FALSE))) data_sub_val_clean$poison_csf <- predict(object = m1,newdata = data_sub_val_clean) data_sub_val_clean$poison_serum <- predict(object = m2,newdata = data_sub_val_clean) data_sub_val_clean$poison_adj_serum <- predict(object = m3,newdata = data_sub_val_clean) poison_res <- tibble::enframe(m1$residuals,name = NULL,value = "csf_res") poison_res$serum_res <- m2$residuals poison_res$adj_serum_res <- m3$residuals wilcox.test(poison_res$serum_res,poison_res$adj_serum_res,paired = TRUE, alternative = "two.sided") # plot comparison between residuals df_new <- tibble::enframe(poison_res$serum_res,name = NULL,value = "serum") df_new <- df_new %>% mutate(subclass=row_number()) %>% mutate(group="serum") df_new2 <- tibble::enframe(poison_res$adj_serum_res,name = NULL,value = "serum") df_new2 <- df_new2 %>% mutate(subclass=row_number()) %>% mutate(group="adj-serum") df_all <- rbind(df_new,df_new2) # plot obs <- c("group") obs_nice <- c("residuals") pred <- c("serum") pred_nice <- c("serum-CEL residuals") i <- 1 p4 <- df_all %>% ggplot(aes_string(x=obs[i], y=pred[i], color = "group")) + geom_boxplot(outlier.colour = "transparent") + geom_jitter(alpha = 0.6, shape = 1, size = 2,stroke = 0.6,position=position_jitter(width = 0.07,height=0)) + geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + ylim(c(min(df_all[pred[i]])-0.25*(max(df_all[pred[i]])-min(df_all[pred[i]])),max(df_all[pred[i]])+0.2*(max(df_all[pred[i]])-min(df_all[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + scale_colour_discrete(labels = c("adjusted serum", "measured serum"))+ # scale_x_discrete(labels = c("adjusted serum", "measured serum")) + stat_compare_means(method = "wilcox.test", paired = TRUE, label.x = "adj-serum", label.y = max(df_all[pred[i]])+0.1*(max(df_all[pred[i]])-min(df_all[pred[i]])) )+ theme_bw(base_size = 12) + stat_summary(fun.data = function(x) { return( data.frame( y = min(df_all[pred[i]])-0.05*(max(df_all[pred[i]])-min(df_all[pred[i]])), label = paste('n =', format(length(x), big.mark = ",", decimal.mark = ".", scientific = FALSE), '\n', 'median =', format(round(median(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE)) ) ) },geom = "text", hjust = 0.5, vjust = 1, size =3) + theme_bw(base_size = 12) + theme(legend.position = "none")+ theme(axis.title.y = element_text( colour = "blue" )) p4 ggsave(plot = ggarrange(p4,ncol = 1,nrow = 1), filename = "./output/Poison_CEL_residuals_plot_val.test.png", width =2.7,height = 2.5,units = "in",dpi = 300) # POISSON REGRESSION in combined training and validation cohort data_train <- data_sub_train_clean %>% select(patientcode,cel,log_nfl_csf,log_nfl_serum,predicted_csf_nfl) data_val <- data_sub_val_clean %>% select(patientcode,cel,log_nfl_csf,log_nfl_serum,predicted_csf_nfl) data_both <- rbind(data_train,data_val) summary(m1 <- glm(cel~log_nfl_csf, family = "poisson", data = data_both)) summary(m2 <- glm(cel~log_nfl_serum, family = "poisson", data = data_both)) summary(m3 <- glm(cel~predicted_csf_nfl, family = "poisson", data = data_both)) with(m1, cbind(res.deviance = deviance, df=df.residual, p= pchisq(deviance, df.residual, lower.tail = FALSE))) with(m2, cbind(res.deviance = deviance, df=df.residual, p= pchisq(deviance, df.residual, lower.tail = FALSE))) with(m3, cbind(res.deviance = deviance, df=df.residual, p= pchisq(deviance, df.residual, lower.tail = FALSE))) poison_res <- tibble::enframe(m1$residuals,name = NULL,value = "csf_res") poison_res$serum_res <- m2$residuals poison_res$adj_serum_res <- m3$residuals wilcox.test(poison_res$serum_res,poison_res$adj_serum_res,paired = TRUE, alternative = "two.sided") # plot comparison between residuals df_new <- tibble::enframe(poison_res$serum_res,name = NULL,value = "serum") df_new <- df_new %>% mutate(subclass=row_number()) %>% mutate(group="serum") df_new2 <- tibble::enframe(poison_res$adj_serum_res,name = NULL,value = "serum") df_new2 <- df_new2 %>% mutate(subclass=row_number()) %>% mutate(group="adj-serum") df_all <- rbind(df_new,df_new2) # plot obs <- c("group") obs_nice <- c("residuals") pred <- c("serum") pred_nice <- c("serum-CEL residuals") i <- 1 p4 <- df_all %>% ggplot(aes_string(x=obs[i], y=pred[i], color = "group")) + geom_boxplot(outlier.colour = "transparent") + geom_jitter(alpha = 0.6, shape = 1, size = 2,stroke = 0.6,position=position_jitter(width = 0.07,height=0)) + geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + ylim(c(min(df_all[pred[i]])-0.25*(max(df_all[pred[i]])-min(df_all[pred[i]])),max(df_all[pred[i]])+0.2*(max(df_all[pred[i]])-min(df_all[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + scale_colour_discrete(labels = c("adjusted serum", "measured serum"))+ # scale_x_discrete(labels = c("adjusted serum", "measured serum")) + stat_compare_means(method = "wilcox.test", paired = TRUE, label.x = "adj-serum", label.y = max(df_all[pred[i]])+0.1*(max(df_all[pred[i]])-min(df_all[pred[i]])) )+ theme_bw(base_size = 12) + stat_summary(fun.data = function(x) { return( data.frame( y = min(df_all[pred[i]])-0.05*(max(df_all[pred[i]])-min(df_all[pred[i]])), label = paste('n =', format(length(x), big.mark = ",", decimal.mark = ".", scientific = FALSE), '\n', 'median =', format(round(median(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE)) ) ) },geom = "text", hjust = 0.5, vjust = 1, size =3) + theme_bw(base_size = 12) + theme(legend.position = "none")+ theme(axis.title.y = element_text( colour = "blue" )) p4 ggsave(plot = ggarrange(p4,ncol = 1,nrow = 1), filename = "./output/Poison_CEL_residuals_plot_train+val.test.png", width =2.7,height = 2.5,units = "in",dpi = 300) # try poisson regression for T2 and New lesions summary(m1 <- glm(t2_new_large~log_nfl_csf, family = "poisson", data = data_sub_train_clean)) summary(m2 <- glm(t2_new_large~log_nfl_serum, family = "poisson", data = data_sub_train_clean)) summary(m3 <- glm(t2_new_large~predicted_csf_nfl, family = "poisson", data = data_sub_train_clean)) with(m1, cbind(res.deviance = deviance, df=df.residual, p= pchisq(deviance, df.residual, lower.tail = FALSE))) with(m2, cbind(res.deviance = deviance, df=df.residual, p= pchisq(deviance, df.residual, lower.tail = FALSE))) with(m3, cbind(res.deviance = deviance, df=df.residual, p= pchisq(deviance, df.residual, lower.tail = FALSE))) data_sub_train_clean$poison_csf <- predict(object = m1,newdata = data_sub_train_clean) data_sub_train_clean$poison_serum <- predict(object = m2,newdata = data_sub_train_clean) data_sub_train_clean$poison_adj_serum <- predict(object = m3,newdata = data_sub_train_clean) poison_res <- tibble::enframe(m1$residuals,name = NULL,value = "csf_res") poison_res$serum_res <- m2$residuals poison_res$adj_serum_res <- m3$residuals wilcox.test(poison_res$serum_res,poison_res$adj_serum_res,paired = TRUE, alternative = "two.sided") test1 <- data_sub_train_clean %>% filter(!is.na(cel) & !is.na(t2_new_large)) %>% select(patientcode,log_nfl_csf,log_nfl_serum,predicted_csf_nfl,cel,t2_new_large) test2 <- data_sub_val_clean %>% filter(!is.na(cel) & !is.na(t2_new_large)) %>% select(patientcode,log_nfl_csf,log_nfl_serum,predicted_csf_nfl,cel,t2_new_large) test <- rbind(test1,test2) test$composite_cel <- test$cel + test$t2_new_large plot(composite_cel~predicted_csf_nfl,test) # plot obs <- c("log_nfl_csf","log_nfl_serum","predicted_csf_nfl") obs_nice <- c("measured cNFL","measured sNFL","adjusted sNFL") pred <- c("composite_cel","composite_cel","composite_cel") pred_nice <- c("CEL+T2 new&enlarge","CEL+T2 new&enlarge","CEL+T2 new&enlarge") for(i in 1:length(obs)){ # i <- 2 df <- test p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#008F00"), axis.title.y = element_text(colour = "#FF9300")) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2,p3,ncol = 3,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/CEL+T2NE_vs_nfl_train+val.png", width =7.5,height = 2.5,units = "in",dpi = 300) #################### #################### ## exponential CEL #################### #################### ######## TRAINING cohort ############ #plot CEL vs CSF NFL obs <- c("log_nfl_csf") obs_nice <- c("Log10 cNFL") pred <- c("cel") pred_nice <- c("Ln(CEL+1)") for(i in 1:length(obs)){ # i <- 1 data <- data_sub_train_clean df <- data %>% filter(.,!is.na(data[[pred[i]]]) & !is.na(data$predicted_csf_nfl)) mod <- summary(lm(df[[pred[i]]]~exp(df[[obs[i]]]))) lb1 <- paste("R^2==", round(mod$r.squared,3)) lp1 <- paste("p=", format.pval(mod$coefficients[2,4])) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1.5, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + annotate("text",label = lb1, x=min(df[obs[i]])+.2*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.3*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE,color="#521B93") + annotate("text",label = lp1, x=min(df[obs[i]])+.27*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.1*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = FALSE,color="#521B93") + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + # stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ # stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ # stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm", formula = y~exp(x), se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#0171C0"), axis.title.y = element_text(colour = "#FF9300")) assign(paste("p",i,sep = ""),p) } # Save all plots 4x3 ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/EXP_cel_vs_csf_nfl_training.png", width =2.5,height = 2.5,units = "in",dpi = 300) #plot CEL vs serum NFL obs <- c("log_nfl_serum") obs_nice <- c("Log10 sNFL") pred <- c("cel") pred_nice <- c("Ln(CEL+1)") for(i in 1:length(obs)){ # i <- 1 data <- data_sub_train_clean df <- data %>% filter(.,!is.na(data[[pred[i]]]) & !is.na(data$predicted_csf_nfl)) mod <- summary(lm(df[[pred[i]]]~exp(df[[obs[i]]]))) lb1 <- paste("R^2==", round(mod$r.squared,3)) lp1 <- paste("p=", format.pval(mod$coefficients[2,4])) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1.5, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + annotate("text",label = lb1, x=min(df[obs[i]])+.2*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.3*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE,color="#521B93") + annotate("text",label = lp1, x=min(df[obs[i]])+.27*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.1*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = FALSE,color="#521B93") + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + # stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ # stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ # stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm", formula = y~exp(x), se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#C00000"), axis.title.y = element_text(colour = "#FF9300")) assign(paste("p",i,sep = ""),p) } # Save all plots 4x3 ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/EXP_cel_vs_serum_nfl_training.png", width =2.5,height = 2.5,units = "in",dpi = 300) #plot CEL vs predicted CSF NFL obs <- c("predicted_csf_nfl") obs_nice <- c("sNFL-predicted cNFL") pred <- c("cel") pred_nice <- c("Ln(CEL+1)") for(i in 1:length(obs)){ # i <- 1 data <- data_sub_train_clean df <- data %>% filter(.,!is.na(data[[pred[i]]]) & !is.na(data$predicted_csf_nfl)) mod <- summary(lm(df[[pred[i]]]~exp(df[[obs[i]]]))) lb1 <- paste("R^2==", round(mod$r.squared,3)) lp1 <- paste("p=", format.pval(mod$coefficients[2,4])) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1.5, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + annotate("text",label = lb1, x=min(df[obs[i]])+.2*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.3*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE,color="#521B93") + annotate("text",label = lp1, x=min(df[obs[i]])+.27*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.1*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = FALSE,color="#521B93") + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + # stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ # stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ # stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm", formula = y~exp(x), se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#008F00"), axis.title.y = element_text(colour = "#FF9300")) assign(paste("p",i,sep = ""),p) } # Save all plots 4x3 ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/EXP_cel_vs_predicted_nfl_training.png", width =2.5,height = 2.5,units = "in",dpi = 300) ############ VALIDATION COHORT ############################ #plot CEL vs CSF NFL obs <- c("log_nfl_csf") obs_nice <- c("Log10 cNFL") pred <- c("cel") pred_nice <- c("Ln(CEL+1)") for(i in 1:length(obs)){ # i <- 1 data <- data_sub_val_clean df <- data %>% filter(.,!is.na(data[[pred[i]]]) & !is.na(data$predicted_csf_nfl)) mod <- summary(lm(df[[pred[i]]]~exp(df[[obs[i]]]))) lb1 <- paste("R^2==", round(mod$r.squared,3)) lp1 <- paste("p=", format.pval(mod$coefficients[2,4])) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1.5, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + annotate("text",label = lb1, x=min(df[obs[i]])+.2*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.3*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE,color="#521B93") + annotate("text",label = lp1, x=min(df[obs[i]])+.27*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.1*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = FALSE,color="#521B93") + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + # stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ # stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ # stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm", formula = y~exp(x), se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#0171C0"), axis.title.y = element_text(colour = "#FF9300")) assign(paste("p",i,sep = ""),p) } # Save all plots 4x3 ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/EXP_cel_vs_csf_nfl_validation.png", width =2.5,height = 2.5,units = "in",dpi = 300) #plot CEL vs serum NFL obs <- c("log_nfl_serum") obs_nice <- c("Log10 sNFL") pred <- c("cel") pred_nice <- c("Ln(CEL+1)") for(i in 1:length(obs)){ # i <- 1 data <- data_sub_val_clean df <- data %>% filter(.,!is.na(data[[pred[i]]]) & !is.na(data$predicted_csf_nfl)) mod <- summary(lm(df[[pred[i]]]~exp(df[[obs[i]]]))) lb1 <- paste("R^2==", round(mod$r.squared,3)) lp1 <- paste("p=", format.pval(mod$coefficients[2,4])) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1.5, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + annotate("text",label = lb1, x=min(df[obs[i]])+.2*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.3*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE,color="#521B93") + annotate("text",label = lp1, x=min(df[obs[i]])+.27*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.1*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = FALSE,color="#521B93") + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + # stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ # stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ # stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm", formula = y~exp(x), se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#C00000"), axis.title.y = element_text(colour = "#FF9300")) assign(paste("p",i,sep = ""),p) } # Save all plots 4x3 ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/EXP_cel_vs_serum_nfl_validation.png", width =2.5,height = 2.5,units = "in",dpi = 300) #plot CEL vs predicted CSF NFL obs <- c("predicted_csf_nfl") obs_nice <- c("sNFL-predicted cNFL") pred <- c("cel") pred_nice <- c("Ln(CEL+1)") for(i in 1:length(obs)){ # i <- 1 data <- data_sub_val_clean df <- data %>% filter(.,!is.na(data[[pred[i]]]) & !is.na(data$predicted_csf_nfl)) mod <- summary(lm(df[[pred[i]]]~exp(df[[obs[i]]]))) lb1 <- paste("R^2==", round(mod$r.squared,3)) lp1 <- paste("p=", format.pval(mod$coefficients[2,4])) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1.5, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + annotate("text",label = lb1, x=min(df[obs[i]])+.2*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.3*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE,color="#521B93") + annotate("text",label = lp1, x=min(df[obs[i]])+.27*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.1*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = FALSE,color="#521B93") + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + # stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ # stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ # stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm", formula = y~exp(x), se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#008F00"), axis.title.y = element_text(colour = "#FF9300")) assign(paste("p",i,sep = ""),p) } # Save all plots 4x3 ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/EXP_cel_vs_predicted_nfl_validation.png", width =2.5,height = 2.5,units = "in",dpi = 300) # calculate residuals of serum vs CEL and adjusted serum vs CEL in the exponential models data <- data_sub_train_clean df <- data %>% filter(.,!is.na(data$cel) & !is.na(data$predicted_csf_nfl)) mod_cel_serum <- lm(cel~exp(log_nfl_serum),df) summary(mod_cel_serum) df$serum_cel_res <- mod_cel_serum$residuals hist(mod_cel_serum$residuals) mod_cel_adj_serum <- lm(cel~exp(predicted_csf_nfl),df) summary(mod_cel_adj_serum) df$adj_serum_cel_res <- mod_cel_adj_serum$residuals hist(df$adj_serum_cel_res) wilcox.test(df$serum_cel_res,df$adj_serum_cel_res,paired = TRUE, alternative = "two.sided") df_new <- tibble::enframe(df$serum_cel_res,name = NULL,value = "serum") df_new <- df_new %>% mutate(subclass=row_number()) %>% mutate(group="serum") df_new2 <- tibble::enframe(df$adj_serum_cel_res,name = NULL,value = "serum") df_new2 <- df_new2 %>% mutate(subclass=row_number()) %>% mutate(group="adj-serum") df_all <- rbind(df_new,df_new2) # plot obs <- c("group") obs_nice <- c("residuals") pred <- c("serum") pred_nice <- c("serum-CEL residuals") i <- 1 p4 <- df_all %>% ggplot(aes_string(x=obs[i], y=pred[i], color = "group")) + geom_boxplot(outlier.colour = "transparent") + geom_jitter(alpha = 0.6, shape = 1, size = 2,stroke = 0.6,position=position_jitter(width = 0.07,height=0)) + geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + ylim(c(min(df_all[pred[i]])-0.25*(max(df_all[pred[i]])-min(df_all[pred[i]])),max(df_all[pred[i]])+0.2*(max(df_all[pred[i]])-min(df_all[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + scale_colour_discrete(labels = c("adjusted serum", "measured serum"))+ scale_x_discrete(labels = c("adjusted serum", "measured serum")) + stat_compare_means(method = "t.test", paired = TRUE, label.x = "adj-serum", label.y = max(df_all[pred[i]])+0.1*(max(df_all[pred[i]])-min(df_all[pred[i]])) )+ theme_bw(base_size = 12) + stat_summary(fun.data = function(x) { return( data.frame( y = min(df_all[pred[i]])-0.05*(max(df_all[pred[i]])-min(df_all[pred[i]])), label = paste('n =', format(length(x), big.mark = ",", decimal.mark = ".", scientific = FALSE), '\n', 'median =', format(round(median(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE)) ) ) },geom = "text", hjust = 0.5, vjust = 1, size =3) + theme_bw(base_size = 12) + theme(legend.position = "none")+ theme(axis.title.y = element_text( colour = "blue" )) ggsave(plot = ggarrange(p4,ncol = 1,nrow = 1), filename = "./output/CEL_residuals_plot_t.test.png", width =2.7,height = 2.5,units = "in",dpi = 300) #### VALIDATION cohort #plot CEL vs CSF NFL obs <- c("log_nfl_csf","log_nfl_csf") obs_nice <- c("Log10 cNFL","Log10 cNFL") pred <- c("cel","t2_new_large") pred_nice <- c("Ln(CEL+1)","Ln(#New+Enlarging T2 +1)") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_val_clean df <- data %>% filter(.,!is.na(data[[pred[i]]]) & !is.na(data$predicted_csf_nfl)) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#0171C0"), axis.title.y = element_text(colour = "#FF9300")) assign(paste("p",i,sep = ""),p) } # Save all plots 4x3 ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/cel_vs_csf_nfl_validation.png", width =2.5,height = 2.5,units = "in",dpi = 300) ggsave(plot = ggarrange(p2,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/T2NE_cel_vs_csf_nfl_validation.png", width =2.5,height = 2.5,units = "in",dpi = 300) #plot CEL vs serum NFL obs <- c("log_nfl_serum","log_nfl_serum") obs_nice <- c("Log10 sNFL","Log10 sNFL") pred <- c("cel","t2_new_large") pred_nice <- c("Ln(CEL+1)","Ln(#New+Enlarging T2 +1)") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_val_clean df <- data %>% filter(.,!is.na(data[[pred[i]]]) & !is.na(data$predicted_csf_nfl)) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#C00000"), axis.title.y = element_text(colour = "#FF9300")) assign(paste("p",i,sep = ""),p) } # Save all plots 4x3 ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/cel_vs_serum_nfl_validation.png", width =2.5,height = 2.5,units = "in",dpi = 300) ggsave(plot = ggarrange(p2,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/T2NE_cel_vs_serum_nfl_validation.png", width =2.5,height = 2.5,units = "in",dpi = 300) #plot CEL vs predicted CSF NFL obs <- c("predicted_csf_nfl","predicted_csf_nfl") obs_nice <- c("adjusted sNFL","adjusted sNFL") pred <- c("cel","t2_new_large") pred_nice <- c("Ln(CEL+1)","Ln(#New+Enlarging T2 +1)") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_val_clean df <- data %>% filter(.,!is.na(data[[pred[i]]]) & !is.na(data$predicted_csf_nfl)) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#008F00"), axis.title.y = element_text(colour = "#FF9300")) assign(paste("p",i,sep = ""),p) } # Save all plots 4x3 ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/cel_vs_predicted_nfl_validation.png", width =2.5,height = 2.5,units = "in",dpi = 300) ggsave(plot = ggarrange(p2,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/T2NE_cel_vs_adjusted_nfl_validation.png", width =2.5,height = 2.5,units = "in",dpi = 300) ######################################################################################################################################################## ######################################################################################################################################################## # logistic regression - predction of CEL status (yes/no) from sNFL, cNFL, pred cNFL ######################################################################################################################################################## ######################################################################################################################################################## # generate dichotomized CEL column data_sub_train_clean$cel_dich <- NA data_sub_train_clean$cel_dich[which(data_sub_train_clean$cel==0)] <- 0 data_sub_train_clean$cel_dich[which(data_sub_train_clean$cel>0)] <- 1 # logistic regression model 1 - using measured cNFL model1 <- glm(cel_dich~log_nfl_csf,family=binomial(link="logit"),data = data_sub_train_clean) # logistic regression model 2 - using measured sNFL model2 <- glm(cel_dich~log_nfl_serum,family=binomial(link="logit"),data = data_sub_train_clean) # logistic regression model 3 - using predicted cNFL model3 <- glm(cel_dich~predicted_csf_nfl,family=binomial(link="logit"),data = data_sub_train_clean) # calculate predicted probabilities of cCEL for each model data_sub_train_clean$glm_1 <- plogis(predict(model1,data_sub_train_clean)) data_sub_train_clean$glm_2 <- plogis(predict(model2,data_sub_train_clean)) data_sub_train_clean$glm_3 <- plogis(predict(model3,data_sub_train_clean)) plot.roc(roc(data_sub_train_clean$cel_dich,data_sub_train_clean$glm_1,percent=TRUE, plot=FALSE, ci=TRUE),print.auc = TRUE) plot.roc(roc(data_sub_train_clean$cel_dich,data_sub_train_clean$glm_2,percent=TRUE, plot=FALSE, ci=TRUE),print.auc = TRUE) plot.roc(roc(data_sub_train_clean$cel_dich,data_sub_train_clean$glm_3,percent=TRUE, plot=FALSE, ci=TRUE),print.auc = TRUE) # generate cut-off for NFL that best differentiates CEL vs noCEL in training cohort cutoff_1 <- optimalCutoff(data_sub_train_clean$cel_dich,data_sub_train_clean$glm_1)[1] cutoff_2 <- optimalCutoff(data_sub_train_clean$cel_dich,data_sub_train_clean$glm_2)[1] cutoff_3 <- optimalCutoff(data_sub_train_clean$cel_dich,data_sub_train_clean$glm_3)[1] ############################## # NFL cutoff for model 1 # identify the lowest NFL from category 1 test <- data_sub_train_clean %>% filter(glm_1>cutoff_1) min <- min(test$nfl_csf) # identify the highest NFL from category 0 test <- data_sub_train_clean %>% filter(glm_1% filter(glm_2>cutoff_2) min <- min(test$nfl_serum) # identify the highest NFL from category 0 test <- data_sub_train_clean %>% filter(glm_2% filter(glm_3>cutoff_3) min <- min(test$predicted_csf_nfl) # identify the highest NFL from category 0 test <- data_sub_train_clean %>% filter(glm_3 cutoff_1, 1,0) data_train_recoded$glm_2_recoded <- ifelse(data_train_recoded$glm_2 > cutoff_2, 1,0) data_train_recoded$glm_3_recoded <- ifelse(data_train_recoded$glm_3 > cutoff_3, 1,0) library(caret) gl1_confmx <- table(factor(data_train_recoded$glm_1_recoded, levels=c(1,0)),factor(data_train_recoded$cel_dich,levels = c(1,0))) gl1_sens <- round(caret::sensitivity(gl1_confmx)*100,digits = 1) gl1_spec <- round(caret::specificity(gl1_confmx)*100,digits = 1) gl2_confmx <- table(factor(data_train_recoded$glm_2_recoded,levels = c(1,0)),factor(data_train_recoded$cel_dich,levels = c(1,0))) gl2_sens <- round(caret::sensitivity(gl2_confmx)*100,digits = 1) gl2_spec <- round(caret::specificity(gl2_confmx)*100,digits = 1) gl3_confmx <- table(factor(data_train_recoded$glm_3_recoded,levels = c(1,0)),factor(data_train_recoded$cel_dich,levels = c(1,0))) gl3_sens <- round(caret::sensitivity(gl3_confmx)*100,digits = 1) gl3_spec <- round(caret::specificity(gl3_confmx)*100,digits = 1) p1 <- data_sub_train_clean %>% mutate_at(.,.vars="cel_dich",.funs = as.factor) %>% filter(!is.na(cel_dich)) %>% ggplot(aes(x=cel_dich, y=glm_1, color = cel_dich)) + geom_violin(color="black",alpha = 0.7,size=0.3,aes(fill=cel_dich)) + geom_jitter( shape = 21, size = 3,stroke = 0.6,position=position_jitter(0.07),color="black",aes(fill=cel_dich))+ scale_fill_manual(values = c("orange","red"),breaks = c(0,1))+ geom_hline(yintercept = cutoff_1,color="black",linetype="dotted",size=.5)+ # ggtitle("TRAINING cohort \npredictors: Age + Gender") + xlab("") + ylab("predicted probability of CEL") + ggtitle(paste0("AUC: 78.4% \nSens = ",gl1_sens,"%"," Spec = ",gl1_spec,"%"))+ scale_y_continuous(limits = c(0,1.02),labels = scales::percent) + scale_x_discrete(label=c("No","Yes"))+ stat_summary(fun="median", geom = "crossbar",color="black",size=.5,width=0.5)+ stat_compare_means(method = "wilcox.test", paired = FALSE,label.x = "0",size=4)+ theme_classic2() + theme(legend.position = "none", plot.title = element_text(size=12), axis.text = element_text(size=12), axis.title = element_text(size=14)) p1 p2 <- data_sub_train_clean %>% mutate_at(.,.vars="cel_dich",.funs = as.factor) %>% filter(!is.na(cel_dich)) %>% ggplot(aes(x=cel_dich, y=glm_2, color = cel_dich)) + geom_violin(color="black",alpha = 0.7,size=0.3,aes(fill=cel_dich)) + geom_jitter( shape = 21, size = 3,stroke = 0.6,position=position_jitter(0.07),color="black",aes(fill=cel_dich))+ scale_fill_manual(values = c("orange","red"),breaks = c(0,1))+ geom_hline(yintercept = cutoff_2,color="black",linetype="dotted",size=.5)+ # ggtitle("TRAINING cohort \npredictors: Age + Gender") + xlab("") + ylab("predicted probability of CEL") + ggtitle(paste0("AUC: 61.8% \nSens = ",gl2_sens,"%"," Spec = ",gl2_spec,"%"))+ scale_y_continuous(limits = c(0,1.02),labels = scales::percent) + scale_x_discrete(label=c("No","Yes"))+ stat_summary(fun="median", geom = "crossbar",color="black",size=.5,width=0.5)+ stat_compare_means(method = "wilcox.test", paired = FALSE,label.x = "0")+ theme_classic2() + theme(legend.position = "none", plot.title = element_text(size=12), axis.text = element_text(size=12), axis.title = element_text(size=14)) p2 p3 <- data_sub_train_clean %>% mutate_at(.,.vars="cel_dich",.funs = as.factor) %>% filter(!is.na(cel_dich)) %>% ggplot(aes(x=cel_dich, y=glm_3, color = cel_dich)) + geom_violin(color="black",alpha = 0.7,size=0.3,aes(fill=cel_dich)) + geom_jitter( shape = 21, size = 3,stroke = 0.6,position=position_jitter(0.07),color="black",aes(fill=cel_dich))+ scale_fill_manual(values = c("orange","red"),breaks = c(0,1))+ geom_hline(yintercept = cutoff_3,color="black",linetype="dotted",size=.5)+ # ggtitle("TRAINING cohort \npredictors: Age + Gender") + xlab("") + ylab("predicted probability of CEL") + ggtitle(paste0("AUC: 69.2% \nSens = ",gl3_sens,"%"," Spec = ",gl3_spec,"%"))+ scale_y_continuous(limits = c(0,1.02),labels = scales::percent) + scale_x_discrete(label=c("No","Yes"))+ stat_summary(fun="median", geom = "crossbar",color="black",size=.5,width=0.5)+ stat_compare_means(method = "wilcox.test", paired = FALSE,label.x = "0")+ theme_classic2() + theme(legend.position = "none", plot.title = element_text(size=12), axis.text = element_text(size=12), axis.title = element_text(size=14)) p3 # predict in validation cohort data_sub_val_clean$cel_dich <- ifelse(data_sub_val_clean$cel >0,1,0) # data_sub_val_clean$cel_dich[which(data_sub_val_clean$Clinical_Disease_Activity=="Yes")] <- 1 # calculate predicted probabilities of cCEL for each model data_sub_val_clean$glm_1 <- plogis(predict(model1,data_sub_val_clean)) data_sub_val_clean$glm_2 <- plogis(predict(model2,data_sub_val_clean)) data_sub_val_clean$glm_3 <- plogis(predict(model3,data_sub_val_clean)) plot.roc(roc(data_sub_val_clean$cel_dich,data_sub_val_clean$glm_1,percent=TRUE, plot=FALSE, ci=TRUE),print.auc = TRUE) plot.roc(roc(data_sub_val_clean$cel_dich,data_sub_val_clean$glm_2,percent=TRUE, plot=FALSE, ci=TRUE),print.auc = TRUE) plot.roc(roc(data_sub_val_clean$cel_dich,data_sub_val_clean$glm_3,percent=TRUE, plot=FALSE, ci=TRUE),print.auc = TRUE) #glm1 data_val_recoded <- data_sub_val_clean data_val_recoded$glm_1_recoded <- ifelse(data_val_recoded$glm_1 > cutoff_1, 1,0) data_val_recoded$glm_2_recoded <- ifelse(data_val_recoded$glm_2 > cutoff_2, 1,0) data_val_recoded$glm_3_recoded <- ifelse(data_val_recoded$glm_3 > cutoff_3, 1,0) gl1_confmx <- table(factor(data_val_recoded$glm_1_recoded,levels = c(1,0)),factor(data_val_recoded$cel_dich,levels = c(1,0))) gl1_sens <- round(caret::sensitivity(gl1_confmx)*100,digits = 1) gl1_spec <- round(caret::specificity(gl1_confmx)*100,digits = 1) gl2_confmx <- table(factor(data_val_recoded$glm_2_recoded,levels = c(1,0)),factor(data_val_recoded$cel_dich,levels = c(1,0))) gl2_sens <- round(caret::sensitivity(gl2_confmx)*100,digits = 1) gl2_spec <- round(caret::specificity(gl2_confmx)*100,digits = 1) gl3_confmx <- table(factor(data_val_recoded$glm_3_recoded,levels = c(1,0)),factor(data_val_recoded$cel_dich,levels = c(1,0))) gl3_sens <- round(caret::sensitivity(gl3_confmx)*100,digits = 1) gl3_spec <- round(caret::specificity(gl3_confmx)*100,digits = 1) p1_v <- data_sub_val_clean %>% mutate_at(.,.vars="cel_dich",.funs = as.factor) %>% filter(!is.na(cel_dich)) %>% ggplot(aes(x=cel_dich, y=glm_1, color = cel_dich)) + geom_violin(color="black",alpha = 0.7,size=0.3,aes(fill=cel_dich)) + geom_jitter( shape = 21, size = 3,stroke = 0.6,position=position_jitter(0.07),color="black",aes(fill=cel_dich))+ scale_fill_manual(values = c("orange","red"),breaks = c(0,1))+ geom_hline(yintercept = cutoff_1,color="black",linetype="dotted",size=.5)+ # ggtitle("TRAINING cohort \npredictors: Age + Gender") + xlab("") + ylab("predicted probability of CEL") + ggtitle(paste0("AUC: 73.5% \nSens = ",gl1_sens,"%"," Spec = ",gl1_spec,"%"))+ scale_y_continuous(limits = c(0,1.02),labels = scales::percent) + scale_x_discrete(label=c("No","Yes"))+ stat_summary(fun="median", geom = "crossbar",color="black",size=.5,width=0.5)+ stat_compare_means(method = "wilcox.test", paired = FALSE,label.x = "0")+ theme_classic2() + theme(legend.position = "none", plot.title = element_text(size=12), axis.text = element_text(size=12), axis.title = element_text(size=14)) p1_v p2_v <- data_sub_val_clean %>% mutate_at(.,.vars="cel_dich",.funs = as.factor) %>% filter(!is.na(cel_dich)) %>% ggplot(aes(x=cel_dich, y=glm_2, color = cel_dich)) + geom_violin(color="black",alpha = 0.7,size=0.3,aes(fill=cel_dich)) + geom_jitter( shape = 21, size = 3,stroke = 0.6,position=position_jitter(0.07),color="black",aes(fill=cel_dich))+ scale_fill_manual(values = c("orange","red"),breaks = c(0,1))+ geom_hline(yintercept = cutoff_2,color="black",linetype="dotted",size=.5)+ # ggtitle("TRAINING cohort \npredictors: Age + Gender") + xlab("") + ylab("predicted probability of CEL") + ggtitle(paste0("AUC: 65.1% \nSens = ",gl2_sens,"%"," Spec = ",gl2_spec,"%"))+ scale_y_continuous(limits = c(0,1.02),labels = scales::percent) + scale_x_discrete(label=c("No","Yes"))+ stat_summary(fun="median", geom = "crossbar",color="black",size=.5,width=0.5)+ stat_compare_means(method = "wilcox.test", paired = FALSE,label.x = "0")+ theme_classic2() + theme(legend.position = "none", plot.title = element_text(size=12), axis.text = element_text(size=12), axis.title = element_text(size=14)) p2_v p3_v <- data_sub_val_clean %>% mutate_at(.,.vars="cel_dich",.funs = as.factor) %>% filter(!is.na(cel_dich)) %>% ggplot(aes(x=cel_dich, y=glm_3, color = cel_dich)) + geom_violin(color="black",alpha = 0.7,size=0.3,aes(fill=cel_dich)) + geom_jitter( shape = 21, size = 3,stroke = 0.6,position=position_jitter(0.07),color="black",aes(fill=cel_dich))+ scale_fill_manual(values = c("orange","red"),breaks = c(0,1))+ geom_hline(yintercept = cutoff_3,color="black",linetype="dotted",size=.5)+ # ggtitle("TRAINING cohort \npredictors: Age + Gender") + xlab("") + ylab("predicted probability of CEL") + ggtitle(paste0("AUC: 75.3% \nSens = ",gl3_sens,"%"," Spec = ",gl3_spec,"%"))+ scale_y_continuous(limits = c(0,1.02),labels = scales::percent) + scale_x_discrete(label=c("No","Yes"))+ stat_summary(fun="median", geom = "crossbar",color="black",size=.5,width=0.5)+ stat_compare_means(method = "wilcox.test", paired = FALSE,label.x = "0")+ theme_classic2() + theme(legend.position = "none", plot.title = element_text(size=12), axis.text = element_text(size=12), axis.title = element_text(size=14)) p3_v ggsave(plot = ggarrange(p1,p2,p3,p1_v,p2_v,p3_v, ncol = 3,nrow = 2, common.legend=TRUE, legend="none"), filename = "./output/cel_dichotomized_train+val.png", width=15,height=10,units = "in",dpi = 150) ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1, common.legend=TRUE, legend="none"), filename = "./output/dichotomized_cel_csf_nfl_training.png", width =3.5,height = 3.5,units = "in",dpi = 300) ggsave(plot = ggarrange(p2,ncol = 1,nrow = 1, common.legend=TRUE, legend="none"), filename = "./output/dichotomized_cel_serum_nfl_training.png", width =3.5,height = 3.5,units = "in",dpi = 300) ggsave(plot = ggarrange(p3,ncol = 1,nrow = 1, common.legend=TRUE, legend="none"), filename = "./output/dichotomized_cel_predicted_csf_nfl_training.png", width =3.5,height =3.5,units = "in",dpi = 300) ggsave(plot = ggarrange(p1_v,ncol = 1,nrow = 1, common.legend=TRUE, legend="none"), filename = "./output/dichotomized_cel_csf_nfl_validation.png", width =3.5,height = 3.5,units = "in",dpi = 300) ggsave(plot = ggarrange(p2_v,ncol = 1,nrow = 1, common.legend=TRUE, legend="none"), filename = "./output/dichotomized_cel_serum_nfl_validation.png", width =3.5,height = 3.5,units = "in",dpi = 300) ggsave(plot = ggarrange(p3_v,ncol = 1,nrow = 1, common.legend=TRUE, legend="none"), filename = "./output/dichotomized_cel_predicted_csf_nfl_validation.png", width =3.5,height = 3.5,units = "in",dpi = 300) ######################################################################################################################################################## ######################################################################################################################################################## ######################################################################################################################################################## # logistic regression - predction of CEL status (yes/no) from HD age-adjusted sNFL, cNFL, pred cNFL ######################################################################################################################################################## ######################################################################################################################################################## ######################################################################################################################################################## # generate dichotomized CEL column data_sub_train_clean$cel_dich <- NA data_sub_train_clean$cel_dich[which(data_sub_train_clean$cel==0)] <- 0 data_sub_train_clean$cel_dich[which(data_sub_train_clean$cel>0)] <- 1 # logistic regression model 1 - using measured cNFL model1 <- glm(cel_dich~log_nfl_csf_age_adj,family=binomial(link="logit"),data = data_sub_train_clean) # logistic regression model 2 - using measured sNFL model2 <- glm(cel_dich~log_nfl_serum_age_adj,family=binomial(link="logit"),data = data_sub_train_clean) # logistic regression model 3 - using predicted cNFL model3 <- glm(cel_dich~predicted_csf_nfl_age_adj,family=binomial(link="logit"),data = data_sub_train_clean) # calculate predicted probabilities of cCEL for each model data_sub_train_clean$glm_1 <- plogis(predict(model1,data_sub_train_clean)) data_sub_train_clean$glm_2 <- plogis(predict(model2,data_sub_train_clean)) data_sub_train_clean$glm_3 <- plogis(predict(model3,data_sub_train_clean)) plot.roc(roc(data_sub_train_clean$cel_dich,data_sub_train_clean$glm_1,percent=TRUE, plot=FALSE, ci=TRUE),print.auc = TRUE) plot.roc(roc(data_sub_train_clean$cel_dich,data_sub_train_clean$glm_2,percent=TRUE, plot=FALSE, ci=TRUE),print.auc = TRUE) plot.roc(roc(data_sub_train_clean$cel_dich,data_sub_train_clean$glm_3,percent=TRUE, plot=FALSE, ci=TRUE),print.auc = TRUE) # generate cut-off for NFL that best differentiates CEL vs noCEL in training cohort cutoff_1 <- optimalCutoff(data_sub_train_clean$cel_dich,data_sub_train_clean$glm_1)[1] cutoff_2 <- optimalCutoff(data_sub_train_clean$cel_dich,data_sub_train_clean$glm_2)[1] cutoff_3 <- optimalCutoff(data_sub_train_clean$cel_dich,data_sub_train_clean$glm_3)[1] ############################## # NFL cutoff for model 1 # identify the lowest NFL from category 1 test <- data_sub_train_clean %>% filter(glm_1>cutoff_1) min <- min(test$nfl_csf) # identify the highest NFL from category 0 test <- data_sub_train_clean %>% filter(glm_1% filter(glm_2>cutoff_2) min <- min(test$nfl_serum) # identify the highest NFL from category 0 test <- data_sub_train_clean %>% filter(glm_2% filter(glm_3>cutoff_3) min <- min(test$predicted_csf_nfl) # identify the highest NFL from category 0 test <- data_sub_train_clean %>% filter(glm_3 cutoff_1, 1,0) data_train_recoded$glm_2_recoded <- ifelse(data_train_recoded$glm_2 > cutoff_2, 1,0) data_train_recoded$glm_3_recoded <- ifelse(data_train_recoded$glm_3 > cutoff_3, 1,0) gl1_confmx <- table(factor(data_train_recoded$glm_1_recoded, levels=c(1,0)),factor(data_train_recoded$cel_dich,levels = c(1,0))) gl1_sens <- round(caret::sensitivity(gl1_confmx)*100,digits = 1) gl1_spec <- round(caret::specificity(gl1_confmx)*100,digits = 1) gl2_confmx <- table(factor(data_train_recoded$glm_2_recoded,levels = c(1,0)),factor(data_train_recoded$cel_dich,levels = c(1,0))) gl2_sens <- round(caret::sensitivity(gl2_confmx)*100,digits = 1) gl2_spec <- round(caret::specificity(gl2_confmx)*100,digits = 1) gl3_confmx <- table(factor(data_train_recoded$glm_3_recoded,levels = c(1,0)),factor(data_train_recoded$cel_dich,levels = c(1,0))) gl3_sens <- round(caret::sensitivity(gl3_confmx)*100,digits = 1) gl3_spec <- round(caret::specificity(gl3_confmx)*100,digits = 1) p1 <- data_sub_train_clean %>% mutate_at(.,.vars="cel_dich",.funs = as.factor) %>% filter(!is.na(cel_dich)) %>% ggplot(aes(x=cel_dich, y=glm_1, color = cel_dich)) + geom_violin(color="black",alpha = 0.7,size=0.3,aes(fill=cel_dich)) + geom_jitter( shape = 21, size = 3,stroke = 0.6,position=position_jitter(0.07),color="black",aes(fill=cel_dich))+ scale_fill_manual(values = c("orange","red"),breaks = c(0,1))+ geom_hline(yintercept = cutoff_1,color="black",linetype="dotted",size=.5)+ # ggtitle("TRAINING cohort \npredictors: Age + Gender") + xlab("MRI CEL") + ylab("predicted probability of CEL") + ggtitle(paste0("AUC: 82.7% Sens = ",gl1_sens,"%"," Spec = ",gl1_spec,"%"))+ scale_y_continuous(limits = c(0,1.02),labels = scales::percent) + scale_x_discrete(label=c("No","Yes"))+ stat_summary(fun="median", geom = "crossbar",color="black",size=.5,width=0.5)+ stat_compare_means(method = "wilcox.test", paired = FALSE,label.x = "0")+ theme_classic2() + theme(legend.position = "none", plot.title = element_text(size=12), axis.text = element_text(size=12)) p1 p2 <- data_sub_train_clean %>% mutate_at(.,.vars="cel_dich",.funs = as.factor) %>% filter(!is.na(cel_dich)) %>% ggplot(aes(x=cel_dich, y=glm_2, color = cel_dich)) + geom_violin(color="black",alpha = 0.7,size=0.3,aes(fill=cel_dich)) + geom_jitter( shape = 21, size = 3,stroke = 0.6,position=position_jitter(0.07),color="black",aes(fill=cel_dich))+ scale_fill_manual(values = c("orange","red"),breaks = c(0,1))+ geom_hline(yintercept = cutoff_2,color="black",linetype="dotted",size=.5)+ # ggtitle("TRAINING cohort \npredictors: Age + Gender") + xlab("MRI CEL") + ylab("predicted probability of CEL") + ggtitle(paste0("AUC: 74.8% Sens = ",gl2_sens,"%"," Spec = ",gl2_spec,"%"))+ scale_y_continuous(limits = c(0,1.02),labels = scales::percent) + scale_x_discrete(label=c("No","Yes"))+ stat_summary(fun="median", geom = "crossbar",color="black",size=.5,width=0.5)+ stat_compare_means(method = "wilcox.test", paired = FALSE,label.x = "0")+ theme_classic2() + theme(legend.position = "none", plot.title = element_text(size=12), axis.text = element_text(size=12)) p2 p3 <- data_sub_train_clean %>% mutate_at(.,.vars="cel_dich",.funs = as.factor) %>% filter(!is.na(cel_dich)) %>% ggplot(aes(x=cel_dich, y=glm_3, color = cel_dich)) + geom_violin(color="black",alpha = 0.7,size=0.3,aes(fill=cel_dich)) + geom_jitter( shape = 21, size = 3,stroke = 0.6,position=position_jitter(0.07),color="black",aes(fill=cel_dich))+ scale_fill_manual(values = c("orange","red"),breaks = c(0,1))+ geom_hline(yintercept = cutoff_3,color="black",linetype="dotted",size=.5)+ # ggtitle("TRAINING cohort \npredictors: Age + Gender") + xlab("MRI CEL") + ylab("predicted probability of CEL") + ggtitle(paste0("AUC: 78.4% Sens = ",gl3_sens,"%"," Spec = ",gl3_spec,"%"))+ scale_y_continuous(limits = c(0,1.02),labels = scales::percent) + scale_x_discrete(label=c("No","Yes"))+ stat_summary(fun="median", geom = "crossbar",color="black",size=.5,width=0.5)+ stat_compare_means(method = "wilcox.test", paired = FALSE,label.x = "0")+ theme_classic2() + theme(legend.position = "none", plot.title = element_text(size=12), axis.text = element_text(size=12)) p3 # predict in validation cohort data_sub_val_clean$cel_dich <- ifelse(data_sub_val_clean$cel >0,1,0) # calculate predicted probabilities of cCEL for each model data_sub_val_clean$glm_1 <- plogis(predict(model1,data_sub_val_clean)) data_sub_val_clean$glm_2 <- plogis(predict(model2,data_sub_val_clean)) data_sub_val_clean$glm_3 <- plogis(predict(model3,data_sub_val_clean)) plot.roc(roc(data_sub_val_clean$cel_dich,data_sub_val_clean$glm_1,percent=TRUE, plot=FALSE, ci=TRUE),print.auc = TRUE) plot.roc(roc(data_sub_val_clean$cel_dich,data_sub_val_clean$glm_2,percent=TRUE, plot=FALSE, ci=TRUE),print.auc = TRUE) plot.roc(roc(data_sub_val_clean$cel_dich,data_sub_val_clean$glm_3,percent=TRUE, plot=FALSE, ci=TRUE),print.auc = TRUE) #glm1 data_val_recoded <- data_sub_val_clean data_val_recoded$glm_1_recoded <- ifelse(data_val_recoded$glm_1 > cutoff_1, 1,0) data_val_recoded$glm_2_recoded <- ifelse(data_val_recoded$glm_2 > cutoff_2, 1,0) data_val_recoded$glm_3_recoded <- ifelse(data_val_recoded$glm_3 > cutoff_3, 1,0) gl1_confmx <- table(factor(data_val_recoded$glm_1_recoded,levels = c(1,0)),factor(data_val_recoded$cel_dich,levels = c(1,0))) gl1_sens <- round(caret::sensitivity(gl1_confmx)*100,digits = 1) gl1_spec <- round(caret::specificity(gl1_confmx)*100,digits = 1) gl2_confmx <- table(factor(data_val_recoded$glm_2_recoded,levels = c(1,0)),factor(data_val_recoded$cel_dich,levels = c(1,0))) gl2_sens <- round(caret::sensitivity(gl2_confmx)*100,digits = 1) gl2_spec <- round(caret::specificity(gl2_confmx)*100,digits = 1) gl3_confmx <- table(factor(data_val_recoded$glm_3_recoded,levels = c(1,0)),factor(data_val_recoded$cel_dich,levels = c(1,0))) gl3_sens <- round(caret::sensitivity(gl3_confmx)*100,digits = 1) gl3_spec <- round(caret::specificity(gl3_confmx)*100,digits = 1) p1_v <- data_sub_val_clean %>% mutate_at(.,.vars="cel_dich",.funs = as.factor) %>% filter(!is.na(cel_dich)) %>% ggplot(aes(x=cel_dich, y=glm_1, color = cel_dich)) + geom_violin(color="black",alpha = 0.7,size=0.3,aes(fill=cel_dich)) + geom_jitter( shape = 21, size = 3,stroke = 0.6,position=position_jitter(0.07),color="black",aes(fill=cel_dich))+ scale_fill_manual(values = c("orange","red"),breaks = c(0,1))+ geom_hline(yintercept = cutoff_1,color="black",linetype="dotted",size=.5)+ # ggtitle("TRAINING cohort \npredictors: Age + Gender") + xlab("MRI CEL") + ylab("predicted probability of CEL") + ggtitle(paste0("AUC: 78.3% Sens = ",gl1_sens,"%"," Spec = ",gl1_spec,"%"))+ scale_y_continuous(limits = c(0,1.02),labels = scales::percent) + scale_x_discrete(label=c("No","Yes"))+ stat_summary(fun="median", geom = "crossbar",color="black",size=.5,width=0.5)+ stat_compare_means(method = "wilcox.test", paired = FALSE,label.x = "0")+ theme_classic2() + theme(legend.position = "none", plot.title = element_text(size=12), axis.text = element_text(size=12)) p1_v p2_v <- data_sub_val_clean %>% mutate_at(.,.vars="cel_dich",.funs = as.factor) %>% filter(!is.na(cel_dich)) %>% ggplot(aes(x=cel_dich, y=glm_2, color = cel_dich)) + geom_violin(color="black",alpha = 0.7,size=0.3,aes(fill=cel_dich)) + geom_jitter( shape = 21, size = 3,stroke = 0.6,position=position_jitter(0.07),color="black",aes(fill=cel_dich))+ scale_fill_manual(values = c("orange","red"),breaks = c(0,1))+ geom_hline(yintercept = cutoff_2,color="black",linetype="dotted",size=.5)+ # ggtitle("TRAINING cohort \npredictors: Age + Gender") + xlab("MRI CEL") + ylab("predicted probability of CEL") + ggtitle(paste0("AUC: 78.6% Sens = ",gl2_sens,"%"," Spec = ",gl2_spec,"%"))+ scale_y_continuous(limits = c(0,1.02),labels = scales::percent) + scale_x_discrete(label=c("No","Yes"))+ stat_summary(fun="median", geom = "crossbar",color="black",size=.5,width=0.5)+ stat_compare_means(method = "wilcox.test", paired = FALSE,label.x = "0")+ theme_classic2() + theme(legend.position = "none", plot.title = element_text(size=12), axis.text = element_text(size=12)) p2_v p3_v <- data_sub_val_clean %>% mutate_at(.,.vars="cel_dich",.funs = as.factor) %>% filter(!is.na(cel_dich)) %>% ggplot(aes(x=cel_dich, y=glm_3, color = cel_dich)) + geom_violin(color="black",alpha = 0.7,size=0.3,aes(fill=cel_dich)) + geom_jitter( shape = 21, size = 3,stroke = 0.6,position=position_jitter(0.07),color="black",aes(fill=cel_dich))+ scale_fill_manual(values = c("orange","red"),breaks = c(0,1))+ geom_hline(yintercept = cutoff_3,color="black",linetype="dotted",size=.5)+ # ggtitle("TRAINING cohort \npredictors: Age + Gender") + xlab("MRI CEL") + ylab("predicted probability of CEL") + ggtitle(paste0("AUC: 84.7% Sens = ",gl3_sens,"%"," Spec = ",gl3_spec,"%"))+ scale_y_continuous(limits = c(0,1.02),labels = scales::percent) + scale_x_discrete(label=c("No","Yes"))+ stat_summary(fun="median", geom = "crossbar",color="black",size=.5,width=0.5)+ stat_compare_means(method = "wilcox.test", paired = FALSE,label.x = "0")+ theme_classic2() + theme(legend.position = "none", plot.title = element_text(size=12), axis.text = element_text(size=12)) p3_v ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1, common.legend=TRUE, legend="none"), filename = "./output/dichotomized_cel_csf_nfl_age_adj_training.png", width =4.5,height = 3.5,units = "in",dpi = 300) ggsave(plot = ggarrange(p2,ncol = 1,nrow = 1, common.legend=TRUE, legend="none"), filename = "./output/dichotomized_cel_serum_nfl_age_adj_training.png", width =4.5,height = 3.5,units = "in",dpi = 300) ggsave(plot = ggarrange(p3,ncol = 1,nrow = 1, common.legend=TRUE, legend="none"), filename = "./output/dichotomized_cel_predicted_csf_nfl_age_adj_training.png", width =4.5,height =3.5,units = "in",dpi = 300) ggsave(plot = ggarrange(p1_v,ncol = 1,nrow = 1, common.legend=TRUE, legend="none"), filename = "./output/dichotomized_cel_csf_nfl_age_adj_validation.png", width =4.5,height = 3.5,units = "in",dpi = 300) ggsave(plot = ggarrange(p2_v,ncol = 1,nrow = 1, common.legend=TRUE, legend="none"), filename = "./output/dichotomized_cel_serum_nfl_age_adj_validation.png", width =4.5,height = 3.5,units = "in",dpi = 300) ggsave(plot = ggarrange(p3_v,ncol = 1,nrow = 1, common.legend=TRUE, legend="none"), filename = "./output/dichotomized_cel_predicted_csf_nfl_age_adj_validation.png", width =4.5,height = 3.5,units = "in",dpi = 300) ######################################################################################################################################################## ######################################################################################################################################################## # plot SIMOA vs ELISA NFL ######################################################################################################################################################## ######################################################################################################################################################## simoa_data <- read_csv("./input/simoa_vs_elisa.csv") #plot CEL vs CSF NFL obs <- c("simoa") obs_nice <- c("cNFL (pg/ml) - SIMOA") pred <- c("elisa") pred_nice <- c("cNFL (pg/ml) - ELISA") for(i in 1:length(obs)){ # i <- 2 data <- simoa_data df <- data p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1.5, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="darkred",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="darkred",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="darkred",label.y.npc = .77)+ geom_smooth(method="lm", se=TRUE, size=0.75,color="darkred") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#0171C0"), axis.title.y = element_text(colour = "darkblue")) assign(paste("p",i,sep = ""),p) } p1 # Save all plots 4x3 ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/simao_vs_elisa.png", width =3,height = 2.5,units = "in",dpi = 300) ######################################################################################################################################################## ######################################################################################################################################################## # plot sNFL-predicted cNFL with diagnoses distinguished in the validation cohort ######################################################################################################################################################## ######################################################################################################################################################## # plot residuals #plot CSF vs Serum NFL pred <- c("log_nfl_serum","predicted_csf_nfl") obs <- c("log_nfl_csf","log_nfl_csf") pred_nice <- c("Log10 sNFL","sNFL-predicted cNFL") obs_nice <- c("Log10 cNFL", "Log10 cNFL") col <- c("#C00000","blue") for(i in 1:length(pred)){ # i <- 2 df <- data_sub_val_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(aes(color=diagnosis_simple),alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.1*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+.03*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]-0.1*(max(df[pred[i]])-min(df[pred[i]]))), max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + scale_color_manual(breaks = c("MS","non-MS"), values = c("orange","darkgray"))+ # stat_cor(aes(label=..r.label..), size=4,color="black",label.y.npc = 0.99)+ stat_cor(aes(label=..rr.label..,color=diagnosis_simple), size=4,label.y.npc = .99,label.x.npc = 0)+ stat_cor(aes(label=..p.label..,color=diagnosis_simple), size=4,label.y.npc = .99, label.x.npc = 0.4)+ geom_smooth(aes(color=diagnosis_simple),method="lm",se=FALSE, size=1) + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#0171C0"), axis.title.y = element_text(colour = col[i]), legend.position = "none") assign(paste("p",i,sep = ""),p) } p1 p2 ggsave(plot = p1,filename = "./output/validation_csf_vs_serum_nfl.png", width =3,height = 3,units = "in",dpi = 300) ggsave(plot = p2,filename = "./output/validation_csf_vs_serum_predicted_cNFL_nfl.png", width =3,height = 3,units = "in",dpi = 300) pred <- c("log_nfl_serum","predicted_csf_nfl") obs <- c("log_nfl_csf","log_nfl_csf") pred_nice <- c("Log10 sNFL","sNFL-predicted cNFL") obs_nice <- c("Log10 cNFL", "Log10 cNFL") for(i in 1:length(pred)){ # i <- 2 df <- data_sub_val_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="black",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.1*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+.03*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]-0.1*(max(df[pred[i]])-min(df[pred[i]]))), max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + # stat_cor(aes(label=..r.label..), size=4,color="black",label.y.npc = 0.99)+ stat_cor(aes(label=..rr.label..),color="brown", size=4,label.y.npc = .99,label.x.npc = 0)+ stat_cor(aes(label=..p.label..),color="brown", size=4,label.y.npc = .99, label.x.npc = 0.4)+ geom_smooth(color="brown",method="lm",se=FALSE, size=1) + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#0171C0"), axis.title.y = element_text(colour = col[i])) assign(paste("p",i,sep = ""),p) } p1 p2 ggsave(plot = p1,filename = "./output/validation_csf_vs_serum_nfl_all.png", width =3,height = 3,units = "in",dpi = 300) ggsave(plot = p2,filename = "./output/validation_csf_vs_serum_predicted_cNFL_nfl_all.png", width =3,height = 3,units = "in",dpi = 300) ######################################################################################################################################################## ######################################################################################################################################################## # correlation with severity outcomes in TRAINING cohort ######################################################################################################################################################## ######################################################################################################################################################## pred <- c("msdss","msss","armss") pred_nice <- c("MS-DSS","MSSS","ARMSS") obs <- c("log_nfl_csf","log_nfl_csf","log_nfl_csf") obs_nice <- c("Log10 cNFL","Log10 cNFL","Log10 cNFL") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_train_clean %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS")) df <- data %>% filter(.,!is.na(data[[pred[i]]])) # %>% # group_by(patientcode) %>% # arrange(lpdate) %>% # slice_head() %>% # ungroup() # p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_blank(), axis.title.y = element_blank()) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2,p3, ncol = 1,nrow = 3, common.legend=TRUE, legend="bottom"), filename = "./output/csf_nfl_vs_severity_outcomes_training.png", width=2.5,height=7.5,units = "in",dpi = 150) # serum NFL pred <- c("msdss","msss","armss") pred_nice <- c("MS-DSS","MSSS","ARMSS") obs <- c("log_nfl_serum","log_nfl_serum","log_nfl_serum") obs_nice <- c("Log10 sNFL","Log10 sNFL","Log10 sNFL") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_train_clean%>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS")) df <- data %>% filter(.,!is.na(data[[pred[i]]])) # %>% # group_by(patientcode) %>% # arrange(lpdate) %>% # slice_head() %>% # ungroup() p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_blank(), axis.title.y = element_blank()) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2,p3, ncol = 1,nrow = 3, common.legend=TRUE, legend="bottom"), filename = "./output/serum_nfl_vs_severity_outcomes_training.png", width=2.5,height=7.5,units = "in",dpi = 150) # predicted cNFL pred <- c("msdss","msss","armss") pred_nice <- c("MS-DSS","MSSS","ARMSS") obs <- c("predicted_csf_nfl","predicted_csf_nfl","predicted_csf_nfl") obs_nice <- c("sNFL-predicted cNFL","sNFL-predicted cNFL","sNFL-predicted cNFL") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_train_clean %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS")) df <- data %>% filter(.,!is.na(data[[pred[i]]])) # %>% # group_by(patientcode) %>% # arrange(lpdate) %>% # slice_head() %>% # ungroup() p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_blank(), axis.title.y = element_blank()) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2,p3, ncol = 1,nrow = 3, common.legend=TRUE, legend="bottom"), filename = "./output/predicted_csf_nfl_vs_severity_outcomes_training.png", width=2.5,height=7.5,units = "in",dpi = 150) # age-adjusted cNFL pred <- c("msdss","msss","armss") pred_nice <- c("MS-DSS","MSSS","ARMSS") obs <- c("log_nfl_csf_age_adj","log_nfl_csf_age_adj","log_nfl_csf_age_adj") obs_nice <- c("age-adjusted cNFL","age-adjusted cNFL","age-adjusted cNFL") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_train_clean %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS")) df <- data %>% filter(.,!is.na(data[[pred[i]]])) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_blank(), axis.title.y = element_blank()) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2,p3, ncol = 1,nrow = 3, common.legend=TRUE, legend="bottom"), filename = "./output/age-adjusted_csf_nfl_vs_severity_outcomes_training.png", width=2.5,height=7.5,units = "in",dpi = 150) # age-adjusted sNFL pred <- c("msdss","msss","armss") pred_nice <- c("MS-DSS","MSSS","ARMSS") obs <- c("log_nfl_serum_age_adj","log_nfl_serum_age_adj","log_nfl_serum_age_adj") obs_nice <- c("age-adjusted cNFL","age-adjusted cNFL","age-adjusted cNFL") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_train_clean %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS")) df <- data %>% filter(.,!is.na(data[[pred[i]]])) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_blank(), axis.title.y = element_blank()) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2,p3, ncol = 1,nrow = 3, common.legend=TRUE, legend="bottom"), filename = "./output/age-adjusted_serum_nfl_vs_severity_outcomes_training.png", width=2.5,height=7.5,units = "in",dpi = 150) # age-adjusted sNFL pred <- c("msdss","msss","armss") pred_nice <- c("MS-DSS","MSSS","ARMSS") obs <- c("predicted_csf_nfl_age_adj","predicted_csf_nfl_age_adj","predicted_csf_nfl_age_adj") obs_nice <- c("age-adj sNFL-predicted cNFL","age-adj sNFL-predicted cNFL","age-adj sNFL-predicted cNFL") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_train_clean %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS")) df <- data %>% filter(.,!is.na(data[[pred[i]]])) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_blank(), axis.title.y = element_blank()) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2,p3, ncol = 1,nrow = 3, common.legend=TRUE, legend="bottom"), filename = "./output/age-adjusted_snfl_predicted_cNFL_vs_severity_outcomes_training.png", width=2.5,height=7.5,units = "in",dpi = 150) ################################### # # with diagnosis # ################################### pred <- c("msdss","msss","armss") pred_nice <- c("MS-DSS","MSSS","ARMSS") obs <- c("log_nfl_csf","log_nfl_csf","log_nfl_csf") obs_nice <- c("Log10 cNFL","Log10 cNFL","Log10 cNFL") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_train_clean %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS")) data$diagnosis <- str_replace_all(string = data$diagnosis,pattern = "PP-MS",replacement = "P-MS") data$diagnosis <- str_replace_all(string = data$diagnosis,pattern = "SP-MS",replacement = "P-MS") df <- data %>% filter(.,!is.na(data[[pred[i]]])) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(aes(color=diagnosis),alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.28*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + scale_color_manual(breaks = c("P-MS","RR-MS"), values = c("darkblue","orange"))+ # stat_cor(aes(label=..r.label..,color=diagnosis), size=4,label.x.npc = 0, label.y.npc = 1)+ stat_cor(aes(label=..rr.label..,color=diagnosis), size=4,label.x.npc = 0, label.y.npc = 1)+ stat_cor(aes(label=..p.label..,color=diagnosis), size=4,label.x.npc = .35, label.y.npc = 1)+ geom_smooth(aes(color=diagnosis),method="lm",se=TRUE, size=1) + theme_bw(base_size = 12) + theme(axis.title.x = element_blank(), axis.title.y = element_blank()) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2,p3, ncol = 1,nrow = 3, common.legend=TRUE, legend="none"), filename = "./output/csf_nfl_vs_severity_outcomes_training_with_diagnosis.png", width=3.5,height=10,units = "in",dpi = 150) # serum NFL pred <- c("msdss","msss","armss") pred_nice <- c("MS-DSS","MSSS","ARMSS") obs <- c("log_nfl_serum","log_nfl_serum","log_nfl_serum") obs_nice <- c("Log10 sNFL","Log10 sNFL","Log10 sNFL") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_train_clean %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS")) data$diagnosis <- str_replace_all(string = data$diagnosis,pattern = "PP-MS",replacement = "P-MS") data$diagnosis <- str_replace_all(string = data$diagnosis,pattern = "SP-MS",replacement = "P-MS") df <- data %>% filter(.,!is.na(data[[pred[i]]])) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(aes(color=diagnosis),alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.28*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + scale_color_manual(breaks = c("P-MS","RR-MS"), values = c("darkblue","orange"))+ # stat_cor(aes(label=..r.label..,color=diagnosis), size=4,label.x.npc = 0, label.y.npc = 1)+ stat_cor(aes(label=..rr.label..,color=diagnosis), size=4,label.x.npc = 0, label.y.npc = 1)+ stat_cor(aes(label=..p.label..,color=diagnosis), size=4,label.x.npc = .35, label.y.npc = 1)+ geom_smooth(aes(color=diagnosis),method="lm",se=TRUE, size=1) + theme_bw(base_size = 12) + theme(axis.title.x = element_blank(), axis.title.y = element_blank()) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2,p3, ncol = 1,nrow = 3, common.legend=TRUE, legend="none"), filename = "./output/serum_nfl_vs_severity_outcomes_training_with_diagnosis.png", width=3.5,height=10,units = "in",dpi = 150) # predicted cNFL pred <- c("msdss","msss","armss") pred_nice <- c("MS-DSS","MSSS","ARMSS") obs <- c("predicted_csf_nfl","predicted_csf_nfl","predicted_csf_nfl") obs_nice <- c("predicted cNFL","predicted cNFL","predicted cNFL") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_train_clean %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS")) data$diagnosis <- str_replace_all(string = data$diagnosis,pattern = "PP-MS",replacement = "P-MS") data$diagnosis <- str_replace_all(string = data$diagnosis,pattern = "SP-MS",replacement = "P-MS") df <- data %>% filter(.,!is.na(data[[pred[i]]])) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(aes(color=diagnosis),alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.28*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + scale_color_manual(breaks = c("P-MS","RR-MS"), values = c("darkblue","orange"))+ # stat_cor(aes(label=..r.label..,color=diagnosis), size=4,label.x.npc = 0, label.y.npc = 1)+ stat_cor(aes(label=..rr.label..,color=diagnosis), size=4,label.x.npc = 0, label.y.npc = 1)+ stat_cor(aes(label=..p.label..,color=diagnosis), size=4,label.x.npc = .35, label.y.npc = 1)+ geom_smooth(aes(color=diagnosis),method="lm",se=TRUE, size=1) + theme_bw(base_size = 12) + theme(axis.title.x = element_blank(), axis.title.y = element_blank()) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2,p3, ncol = 1,nrow = 3, common.legend=TRUE, legend="none"), filename = "./output/predicted_csf_nfl_vs_severity_outcomes_training_with_diagnosis.png", width=3.5,height=10,units = "in",dpi = 150) # serum NFL - SPMS only pred <- c("msdss","msss","armss") pred_nice <- c("MS-DSS","MSSS","ARMSS") obs <- c("log_nfl_serum","log_nfl_serum","log_nfl_serum") obs_nice <- c("Log10 sNFL","Log10 sNFL","Log10 sNFL") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_train_clean %>% filter(diagnosis %in% c("SP-MS")) df <- data %>% filter(.,!is.na(data[[pred[i]]])) # %>% # filter(log_nfl_serum<2) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(aes(color=diagnosis),alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..,color=diagnosis), size=4,label.x.npc = 0, label.y.npc = 1)+ stat_cor(aes(label=..rr.label..,color=diagnosis), size=4,label.x.npc = .25, label.y.npc = 1)+ stat_cor(aes(label=..p.label..,color=diagnosis), size=4,label.x.npc = .5, label.y.npc = 1)+ geom_smooth(aes(color=diagnosis),method="lm",se=TRUE, size=1) + theme_bw(base_size = 12) + theme(axis.title.x = element_blank(), axis.title.y = element_blank()) assign(paste("p",i,sep = ""),p) } # removed top three sNFL samples for(i in 1:length(obs)){ # i <- 2 data <- data_sub_train_clean %>% filter(diagnosis %in% c("SP-MS")) df <- data %>% filter(.,!is.na(data[[pred[i]]])) %>% filter(log_nfl_serum<2 & msdss<5) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(aes(color=diagnosis),alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..,color=diagnosis), size=4,label.x.npc = 0, label.y.npc = 1)+ stat_cor(aes(label=..rr.label..,color=diagnosis), size=4,label.x.npc = .25, label.y.npc = 1)+ stat_cor(aes(label=..p.label..,color=diagnosis), size=4,label.x.npc = .5, label.y.npc = 1)+ geom_smooth(aes(color=diagnosis),method="lm",se=TRUE, size=1) + theme_bw(base_size = 12) + theme(axis.title.x = element_blank(), axis.title.y = element_blank()) assign(paste("q",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2,p3, ncol = 1,nrow = 3, common.legend=TRUE, legend="bottom"), filename = "./output/serum_nfl_vs_severity_outcomes_training_with_diagnosis_SPMS_only.png", width=5,height=15,units = "in",dpi = 150) ggsave(plot = ggarrange(q1,q2,q3, ncol = 1,nrow = 3, common.legend=TRUE, legend="bottom"), filename = "./output/serum_nfl_vs_severity_outcomes_training_with_diagnosis_SPMS_only_noout.png", width=5,height=15,units = "in",dpi = 150) # corelation of severity outcomes with Upper SC atrophy pred <- c("msdss","msss","armss") pred_nice <- c("MS-DSS","MSSS","ARMSS") obs <- c("medulla_ll_atrophy","medulla_ll_atrophy","medulla_ll_atrophy") obs_nice <- c("Medulla LL&atrophy","Medulla LL&atrophy","Medulla LL&atrophy") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_train %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS")) df <- data %>% filter(.,!is.na(data[[pred[i]]])) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2,p3, ncol = 1,nrow = 3, common.legend=TRUE, legend="bottom"), filename = "./output/medulla_atrophy_vs_severity_outcomes_training.png", width=2.5,height=7.5,units = "in",dpi = 150) ######################################################################################################################################################## ######################################################################################################################################################## # correlation with severity outcomes in VALIDATION cohort ######################################################################################################################################################## ######################################################################################################################################################## pred <- c("msdss","msss","armss") pred_nice <- c("MS-DSS","MSSS","ARMSS") obs <- c("log_nfl_csf","log_nfl_csf","log_nfl_csf") obs_nice <- c("Log10 cNFL","Log10 cNFL","Log10 cNFL") for(i in 1:length(obs)){ # i <- 1 data <- data_sub_val_clean%>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS")) df <- data %>% filter(.,!is.na(data[[pred[i]]])) # %>% # group_by(patientcode) %>% # arrange(lpdate) %>% # slice_head() %>% # ungroup() p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_blank(), axis.title.y = element_blank()) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2,p3, ncol = 1,nrow = 3, common.legend=TRUE, legend="bottom"), filename = "./output/csf_nfl_vs_severity_outcomes_validation.png", width=2.5,height=7.5,units = "in",dpi = 150) # serum NFL pred <- c("msdss","msss","armss") pred_nice <- c("MS-DSS","MSSS","ARMSS") obs <- c("log_nfl_serum","log_nfl_serum","log_nfl_serum") obs_nice <- c("Log10 sNFL","Log10 sNFL","Log10 sNFL") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_val_clean%>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS")) df <- data %>% filter(.,!is.na(data[[pred[i]]])) # %>% # group_by(patientcode) %>% # arrange(lpdate) %>% # slice_head() %>% # ungroup() p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_blank(), axis.title.y = element_blank()) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2,p3, ncol = 1,nrow = 3, common.legend=TRUE, legend="bottom"), filename = "./output/serum_nfl_vs_severity_outcomes_validation.png", width=2.5,height=7.5,units = "in",dpi = 150) # predicted cNFL pred <- c("msdss","msss","armss") pred_nice <- c("MS-DSS","MSSS","ARMSS") obs <- c("predicted_csf_nfl","predicted_csf_nfl","predicted_csf_nfl") obs_nice <- c("predicted cNFL","predicted cNFL","predicted cNFL") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_val_clean%>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS")) df <- data %>% filter(.,!is.na(data[[pred[i]]])) # %>% # group_by(patientcode) %>% # arrange(lpdate) %>% # slice_head() %>% # ungroup() # p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) + theme(axis.title.x = element_blank(), axis.title.y = element_blank()) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2,p3, ncol = 1,nrow = 3, common.legend=TRUE, legend="bottom"), filename = "./output/predicted_csf_nfl_vs_severity_outcomes_validation.png", width=2.5,height=7.5,units = "in",dpi = 150) ################################### # # with diagnosis # ################################### pred <- c("msdss","msss","armss") pred_nice <- c("MS-DSS","MSSS","ARMSS") obs <- c("log_nfl_csf","log_nfl_csf","log_nfl_csf") obs_nice <- c("Log10 cNFL","Log10 cNFL","Log10 cNFL") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_val_clean %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS")) data$diagnosis <- str_replace_all(string = data$diagnosis,pattern = "PP-MS",replacement = "P-MS") data$diagnosis <- str_replace_all(string = data$diagnosis,pattern = "SP-MS",replacement = "P-MS") df <- data %>% filter(.,!is.na(data[[pred[i]]])) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(aes(color=diagnosis),alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.28*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + scale_color_manual(breaks = c("P-MS","RR-MS"), values = c("darkblue","orange"))+ # stat_cor(aes(label=..r.label..,color=diagnosis), size=4,label.x.npc = 0, label.y.npc = 1)+ stat_cor(aes(label=..rr.label..,color=diagnosis), size=4,label.x.npc = 0, label.y.npc = 1)+ stat_cor(aes(label=..p.label..,color=diagnosis), size=4,label.x.npc = .35, label.y.npc = 1)+ geom_smooth(aes(color=diagnosis),method="lm",se=TRUE, size=1) + theme_bw(base_size = 12) + theme(axis.title.x = element_blank(), axis.title.y = element_blank()) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2,p3, ncol = 1,nrow = 3, common.legend=TRUE, legend="none"), filename = "./output/csf_nfl_vs_severity_outcomes_validation_with_diagnosis.png", width=3.5,height=10,units = "in",dpi = 150) # serum NFL pred <- c("msdss","msss","armss") pred_nice <- c("MS-DSS","MSSS","ARMSS") obs <- c("log_nfl_serum","log_nfl_serum","log_nfl_serum") obs_nice <- c("Log10 sNFL","Log10 sNFL","Log10 sNFL") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_val_clean %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS")) data$diagnosis <- str_replace_all(string = data$diagnosis,pattern = "PP-MS",replacement = "P-MS") data$diagnosis <- str_replace_all(string = data$diagnosis,pattern = "SP-MS",replacement = "P-MS") df <- data %>% filter(.,!is.na(data[[pred[i]]])) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(aes(color=diagnosis),alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.28*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + scale_color_manual(breaks = c("P-MS","RR-MS"), values = c("darkblue","orange"))+ # stat_cor(aes(label=..r.label..,color=diagnosis), size=4,label.x.npc = 0, label.y.npc = 1)+ stat_cor(aes(label=..rr.label..,color=diagnosis), size=4,label.x.npc = 0, label.y.npc = 1)+ stat_cor(aes(label=..p.label..,color=diagnosis), size=4,label.x.npc = .35, label.y.npc = 1)+ geom_smooth(aes(color=diagnosis),method="lm",se=TRUE, size=1) + theme_bw(base_size = 12) + theme(axis.title.x = element_blank(), axis.title.y = element_blank()) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2,p3, ncol = 1,nrow = 3, common.legend=TRUE, legend="none"), filename = "./output/serum_nfl_vs_severity_outcomes_validation_with_diagnosis.png", width=3.5,height=10,units = "in",dpi = 150) # predicted cNFL pred <- c("msdss","msss","armss") pred_nice <- c("MS-DSS","MSSS","ARMSS") obs <- c("predicted_csf_nfl","predicted_csf_nfl","predicted_csf_nfl") obs_nice <- c("predicted cNFL","predicted cNFL","predicted cNFL") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_val_clean %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS")) data$diagnosis <- str_replace_all(string = data$diagnosis,pattern = "PP-MS",replacement = "P-MS") data$diagnosis <- str_replace_all(string = data$diagnosis,pattern = "SP-MS",replacement = "P-MS") df <- data %>% filter(.,!is.na(data[[pred[i]]])) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(aes(color=diagnosis),alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.28*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + scale_color_manual(breaks = c("P-MS","RR-MS"), values = c("darkblue","orange"))+ # stat_cor(aes(label=..r.label..,color=diagnosis), size=4,label.x.npc = 0, label.y.npc = 1)+ stat_cor(aes(label=..rr.label..,color=diagnosis), size=4,label.x.npc = 0, label.y.npc = 1)+ stat_cor(aes(label=..p.label..,color=diagnosis), size=4,label.x.npc = .35, label.y.npc = 1)+ geom_smooth(aes(color=diagnosis),method="lm",se=TRUE, size=1) + theme_bw(base_size = 12) + theme(axis.title.x = element_blank(), axis.title.y = element_blank()) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2,p3, ncol = 1,nrow = 3, common.legend=TRUE, legend="none"), filename = "./output/predicted_csf_nfl_vs_severity_outcomes_validation_with_diagnosis.png", width=3.5,height=10,units = "in",dpi = 150) # corelation of severity outcomes with Upper SC atrophy pred <- c("msdss","msss","armss") pred_nice <- c("MS-DSS","MSSS","ARMSS") obs <- c("medulla_ll_atrophy","medulla_ll_atrophy","medulla_ll_atrophy") obs_nice <- c("Medulla LL&atrophy","Medulla LL&atrophy","Medulla LL&atrophy") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_val%>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS")) df <- data %>% filter(.,!is.na(data[[pred[i]]])) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2,p3, ncol = 1,nrow = 3, common.legend=TRUE, legend="bottom"), filename = "./output/medulla_atrophy_vs_severity_outcomes_validation.png", width=2.5,height=7.5,units = "in",dpi = 150) ################################################################################ ################################################################################ ## ## ## MLR model prediciting MS severity from sNFL ## ## ################################################################################ ################################################################################ ####### #MSDSS ####### severity_data_msdss_train <- data_sub_train %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS"))%>% filter(!is.na(msdss)) severity_data_msdss_train_clean <- severity_data_msdss_train[complete.cases(severity_data_msdss_train[confound]),] #build the model: fit <- lm(msdss~log_nfl_serum + age + bmi + bmi_weight + serum_ap + serum_bun + serum_creatinine, severity_data_msdss_train_clean) fit_msdss <- lm(msdss~log_nfl_serum, severity_data_msdss_train_clean) plot(msdss~log_nfl_serum, severity_data_msdss_train_clean) abline(lm(msdss~log_nfl_serum, severity_data_msdss_train_clean)) summary(fit_msdss) #stepwise regression step <- stepAIC(fit, direction="both") step$anova # display results best_fit_msdss <- lm(msdss ~ log_nfl_serum + age + bmi + bmi_weight + serum_ap, severity_data_msdss_train_clean) summary(best_fit_msdss) # predict msdss from LM and MLM severity_data_msdss_train_clean$msdss_lm <- fit_msdss$fitted.values severity_data_msdss_train_clean$msdss_mlm <- predict(object = best_fit_msdss,newdata = severity_data_msdss_train_clean) # plot pred <- c("msdss_lm","msdss_mlm") pred_nice <- c("sNFL-predicted MS-DSS","sNFL-predicted MS-DSS") obs <- c("msdss","msdss") obs_nice <- c("measured MS-DSS","measured MS-DSS") for(i in 1:length(obs)){ # i <- 2 df <- severity_data_msdss_train_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2, ncol = 2,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/msdss_LM_vs_MLM_prediction_training.png", width=6,height=2.5,units = "in",dpi = 150) #VALIDATION cohort severity_data_msdss_val <- data_sub_val %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS"))%>% filter(!is.na(msdss)) severity_data_msdss_val_clean <- severity_data_msdss_val[complete.cases(severity_data_msdss_val[confound]),] # predict msdss from LM and MLM in the VALIDATION cohort severity_data_msdss_val_clean$msdss_lm <- predict(object = fit_msdss,newdata = severity_data_msdss_val_clean) severity_data_msdss_val_clean$msdss_mlm <- predict(object = best_fit_msdss,newdata = severity_data_msdss_val_clean) # plot pred <- c("msdss_lm","msdss_mlm") pred_nice <- c("sNFL-predicted MS-DSS","sNFL-predicted MS-DSS") obs <- c("msdss","msdss") obs_nice <- c("measured MS-DSS","measured MS-DSS") for(i in 1:length(obs)){ # i <- 2 df <- severity_data_msdss_val_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2, ncol = 2,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/msdss_LM_vs_MLM_prediction_validation.png", width=6,height=2.5,units = "in",dpi = 150) ######## # MSSS ######## severity_data_msss_train <- data_sub_train %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS"))%>% filter(!is.na(msss)) severity_data_msss_train_clean <- severity_data_msss_train[complete.cases(severity_data_msss_train[confound]),] #build the model: fit <- lm(msss~log_nfl_serum + age + bmi + bmi_weight + serum_ap + serum_bun + serum_creatinine , severity_data_msss_train_clean) fit_msss <- lm(msss~log_nfl_serum, severity_data_msss_train_clean) plot(msss~log_nfl_serum, severity_data_msss_train_clean) abline(lm(msss~log_nfl_serum, severity_data_msss_train_clean)) summary(fit_msss) #stepwise regression step <- stepAIC(fit, direction="both") step$anova # display results best_fit_msss <- lm(msss ~ log_nfl_serum + bmi_weight + serum_ap + serum_bun + serum_creatinine, severity_data_msss_train_clean) summary(best_fit_msss) # predict msss from LM and MLM severity_data_msss_train_clean$msss_lm <- fit_msss$fitted.values severity_data_msss_train_clean$msss_mlm <- predict(object = best_fit_msss,newdata = severity_data_msss_train_clean) # plot pred <- c("msss_lm","msss_mlm") pred_nice <- c("sNFL-predicted MSSS","sNFL-predicted MSSS") obs <- c("msss","msss") obs_nice <- c("measured MSSS","measured MSSS") for(i in 1:length(obs)){ # i <- 2 df <- severity_data_msss_train_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2, ncol = 2,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/msss_LM_vs_MLM_prediction_training.png", width=6,height=2.5,units = "in",dpi = 150) #VALIDATION cohort severity_data_msss_val <- data_sub_val %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS"))%>% filter(!is.na(msss)) severity_data_msss_val_clean <- severity_data_msss_val[complete.cases(severity_data_msss_val[confound]),] # predict msss from LM and MLM in the VALIDATION cohort severity_data_msss_val_clean$msss_lm <- predict(object = fit_msss,newdata = severity_data_msss_val_clean) severity_data_msss_val_clean$msss_mlm <- predict(object = best_fit_msss,newdata = severity_data_msss_val_clean) # plot pred <- c("msss_lm","msss_mlm") pred_nice <- c("sNFL-predicted MSSS","sNFL-predicted MSSS") obs <- c("msss","msss") obs_nice <- c("measured MSSS","measured MSSS") for(i in 1:length(obs)){ # i <- 2 df <- severity_data_msss_val_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2, ncol = 2,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/msss_LM_vs_MLM_prediction_validation.png", width=6,height=2.5,units = "in",dpi = 150) ######## # ARMSS ######## severity_data_armss_train <- data_sub_train %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS"))%>% filter(!is.na(armss)) severity_data_armss_train_clean <- severity_data_armss_train[complete.cases(severity_data_armss_train[confound]),] #build the model: fit <- lm(armss~log_nfl_serum + age + bmi + bmi_weight + serum_ap + serum_bun + serum_creatinine, severity_data_armss_train_clean) fit_armss <- lm(armss~log_nfl_serum, severity_data_armss_train_clean) plot(armss~log_nfl_serum, severity_data_armss_train_clean) abline(lm(armss~log_nfl_serum, severity_data_armss_train_clean)) summary(fit_armss) #stepwise regression step <- stepAIC(fit, direction="both") step$anova # display results best_fit_armss <- lm(armss ~ log_nfl_serum + age + bmi + bmi_weight + serum_ap + serum_bun + serum_creatinine, severity_data_armss_train_clean) summary(best_fit_armss) # predict armss from LM and MLM severity_data_armss_train_clean$armss_lm <- fit_armss$fitted.values severity_data_armss_train_clean$armss_mlm <- predict(object = best_fit_armss,newdata = severity_data_armss_train_clean) # plot pred <- c("armss_lm","armss_mlm") pred_nice <- c("sNFL-predicted ARMSS","sNFL-predicted ARMSS") obs <- c("armss","armss") obs_nice <- c("measured ARMSS","measured ARMSS") for(i in 1:length(obs)){ # i <- 2 df <- severity_data_armss_train_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2, ncol = 2,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/armss_LM_vs_MLM_prediction_training.png", width=6,height=2.5,units = "in",dpi = 150) #VALIDATION cohort severity_data_armss_val <- data_sub_val %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS"))%>% filter(!is.na(armss)) severity_data_armss_val_clean <- severity_data_armss_val[complete.cases(severity_data_armss_val[confound]),] # predict armss from LM and MLM in the VALIDATION cohort severity_data_armss_val_clean$armss_lm <- predict(object = fit_armss,newdata = severity_data_armss_val_clean) severity_data_armss_val_clean$armss_mlm <- predict(object = best_fit_armss,newdata = severity_data_armss_val_clean) # plot pred <- c("armss_lm","armss_mlm") pred_nice <- c("sNFL-predicted ARMSS","sNFL-predicted ARMSS") obs <- c("armss","armss") obs_nice <- c("measured ARMSS","measured ARMSS") for(i in 1:length(obs)){ # i <- 2 df <- severity_data_armss_val_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2, ncol = 2,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/armss_LM_vs_MLM_prediction_validation.png", width=6,height=2.5,units = "in",dpi = 150) ########################### # add PNS variables ####### #MSDSS ####### severity_data_msdss_train <- data_sub_train %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS"))%>% filter(!is.na(msdss)) confound_pns <- c(confound,"panel_11_17","medulla_ll_atrophy") severity_data_msdss_train_clean <- severity_data_msdss_train[complete.cases(severity_data_msdss_train[confound_pns]),] #build the model: fit <- lm(msdss~log_nfl_serum + age + bmi + bmi_weight + + serum_ap + serum_bun + serum_creatinine + panel_11_17 + medulla_ll_atrophy, severity_data_msdss_train_clean) fit_msdss <- lm(msdss~log_nfl_serum, severity_data_msdss_train_clean) plot(msdss~log_nfl_serum, severity_data_msdss_train_clean) abline(lm(msdss~log_nfl_serum, severity_data_msdss_train_clean)) summary(fit_msdss) #stepwise regression step <- stepAIC(fit, direction="both") step$anova # display results best_fit_msdss_pns <- lm(msdss ~ log_nfl_serum + age + bmi + bmi_weight + serum_ap + medulla_ll_atrophy, severity_data_msdss_train_clean) summary(best_fit_msdss_pns) # predict msdss from LM and MLM severity_data_msdss_train_clean$msdss_lm <- fit_msdss$fitted.values severity_data_msdss_train_clean$msdss_mlm <- predict(object = best_fit_msdss_pns,newdata = severity_data_msdss_train_clean) # plot pred <- c("msdss_lm","msdss_mlm") pred_nice <- c("sNFL-predicted MS-DSS","sNFL-predicted MS-DSS") obs <- c("msdss","msdss") obs_nice <- c("measured MS-DSS","measured MS-DSS") for(i in 1:length(obs)){ # i <- 2 df <- severity_data_msdss_train_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2, ncol = 2,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/msdss_LM_vs_MLM_prediction_wPNS_training.png", width=6,height=2.5,units = "in",dpi = 150) #VALIDATION cohort severity_data_msdss_val <- data_sub_val %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS"))%>% filter(!is.na(msdss)) severity_data_msdss_val_clean <- severity_data_msdss_val[complete.cases(severity_data_msdss_val[confound_pns]),] # predict msdss from LM and MLM in the VALIDATION cohort severity_data_msdss_val_clean$msdss_lm <- predict(object = fit_msdss,newdata = severity_data_msdss_val_clean) severity_data_msdss_val_clean$msdss_mlm <- predict(object = best_fit_msdss_pns,newdata = severity_data_msdss_val_clean) # plot pred <- c("msdss_lm","msdss_mlm") pred_nice <- c("sNFL-predicted MS-DSS","sNFL-predicted MS-DSS") obs <- c("msdss","msdss") obs_nice <- c("measured MS-DSS","measured MS-DSS") for(i in 1:length(obs)){ # i <- 2 df <- severity_data_msdss_val_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2, ncol = 2,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/msdss_LM_vs_MLM_prediction_wPNS_validation.png", width=6,height=2.5,units = "in",dpi = 150) ######## # MSSS ######## severity_data_msss_train <- data_sub_train %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS"))%>% filter(!is.na(msss)) severity_data_msss_train_clean <- severity_data_msss_train[complete.cases(severity_data_msss_train[confound_pns]),] #build the model: fit <- lm(msss~log_nfl_serum + age + bmi + bmi_weight + serum_ap + serum_bun + serum_creatinine + panel_11_17 + medulla_ll_atrophy, severity_data_msss_train_clean) fit_msss <- lm(msss~log_nfl_serum, severity_data_msss_train_clean) plot(msss~log_nfl_serum, severity_data_msss_train_clean) abline(lm(msss~log_nfl_serum, severity_data_msss_train_clean)) summary(fit_msss) #stepwise regression step <- stepAIC(fit, direction="both") step$anova # display results best_fit_msss_pns <- lm(msss ~ log_nfl_serum + bmi_weight + serum_ap + serum_bun + serum_creatinine + panel_11_17 + medulla_ll_atrophy, severity_data_msss_train_clean) summary(best_fit_msss_pns) # predict msss from LM and MLM severity_data_msss_train_clean$msss_lm <- fit_msss$fitted.values severity_data_msss_train_clean$msss_mlm <- predict(object = best_fit_msss_pns,newdata = severity_data_msss_train_clean) # plot pred <- c("msss_lm","msss_mlm") pred_nice <- c("sNFL-predicted MSSS","sNFL-predicted MSSS") obs <- c("msss","msss") obs_nice <- c("measured MSSS","measured MSSS") for(i in 1:length(obs)){ # i <- 2 df <- severity_data_msss_train_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2, ncol = 2,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/msss_LM_vs_MLM_prediction_wPNS_training.png", width=6,height=2.5,units = "in",dpi = 150) #VALIDATION cohort severity_data_msss_val <- data_sub_val %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS"))%>% filter(!is.na(msss)) severity_data_msss_val_clean <- severity_data_msss_val[complete.cases(severity_data_msss_val[confound_pns]),] # predict msss from LM and MLM in the VALIDATION cohort severity_data_msss_val_clean$msss_lm <- predict(object = fit_msss,newdata = severity_data_msss_val_clean) severity_data_msss_val_clean$msss_mlm <- predict(object = best_fit_msss_pns,newdata = severity_data_msss_val_clean) # plot pred <- c("msss_lm","msss_mlm") pred_nice <- c("sNFL-predicted MSSS","sNFL-predicted MSSS") obs <- c("msss","msss") obs_nice <- c("measured MSSS","measured MSSS") for(i in 1:length(obs)){ # i <- 2 df <- severity_data_msss_val_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2, ncol = 2,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/msss_LM_vs_MLM_prediction_wPNS_validation.png", width=6,height=2.5,units = "in",dpi = 150) ######## # ARMSS ######## severity_data_armss_train <- data_sub_train %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS"))%>% filter(!is.na(armss)) severity_data_armss_train_clean <- severity_data_armss_train[complete.cases(severity_data_armss_train[confound_pns]),] #build the model: fit <- lm(armss~log_nfl_serum + age + bmi + bmi_weight + serum_ap + serum_bun + serum_creatinine + panel_11_17 + medulla_ll_atrophy, severity_data_armss_train_clean) fit_armss <- lm(armss~log_nfl_serum, severity_data_armss_train_clean) plot(armss~log_nfl_serum, severity_data_armss_train_clean) abline(lm(armss~log_nfl_serum, severity_data_armss_train_clean)) summary(fit_armss) #stepwise regression step <- stepAIC(fit, direction="both") step$anova # display results best_fit_armss_pns <- lm(armss ~ log_nfl_serum + age + bmi_weight + serum_ap + serum_creatinine + panel_11_17 + medulla_ll_atrophy, severity_data_armss_train_clean) summary(best_fit_armss_pns) # predict armss from LM and MLM severity_data_armss_train_clean$armss_lm <- fit_armss$fitted.values severity_data_armss_train_clean$armss_mlm <- predict(object = best_fit_armss_pns,newdata = severity_data_armss_train_clean) # plot pred <- c("armss_lm","armss_mlm") pred_nice <- c("sNFL-predicted ARMSS","sNFL-predicted ARMSS") obs <- c("armss","armss") obs_nice <- c("measured ARMSS","measured ARMSS") for(i in 1:length(obs)){ # i <- 2 df <- severity_data_armss_train_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2, ncol = 2,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/armss_LM_vs_MLM_prediction_wPNS_training.png", width=6,height=2.5,units = "in",dpi = 150) #VALIDATION cohort severity_data_armss_val <- data_sub_val %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS"))%>% filter(!is.na(armss)) severity_data_armss_val_clean <- severity_data_armss_val[complete.cases(severity_data_armss_val[confound_pns]),] # predict armss from LM and MLM in the VALIDATION cohort severity_data_armss_val_clean$armss_lm <- predict(object = fit_armss,newdata = severity_data_armss_val_clean) severity_data_armss_val_clean$armss_mlm <- predict(object = best_fit_armss_pns,newdata = severity_data_armss_val_clean) # plot pred <- c("armss_lm","armss_mlm") pred_nice <- c("sNFL-predicted ARMSS","sNFL-predicted ARMSS") obs <- c("armss","armss") obs_nice <- c("measured ARMSS","measured ARMSS") for(i in 1:length(obs)){ # i <- 2 df <- severity_data_armss_val_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2, ncol = 2,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/armss_LM_vs_MLM_prediction_wPNS_validation.png", width=6,height=2.5,units = "in",dpi = 150) ############################################### ############## lolipop plots of t statistics ############################################### # get Tstat from the MLM model # start by all variables tidy_fit <- broom::tidy(fit) tidy_vars <- tibble::enframe(tidy_fit$term,name = NULL,value = "term") # MSDSS no PNS tidy_fit <- broom::tidy(best_fit_msdss) tidy_fit <- tidy_fit[,c(1,4)] names(tidy_fit)[2] <- "msdss_nopns" tidy_vars <- merge(tidy_vars,tidy_fit,by="term",all.x = TRUE) # MSDSS w PNS tidy_fit <- broom::tidy(best_fit_msdss_pns) tidy_fit <- tidy_fit[,c(1,4)] names(tidy_fit)[2] <- "msdss_wpns" tidy_vars <- merge(tidy_vars,tidy_fit,by="term",all.x = TRUE) # MSSS no PNS tidy_fit <- broom::tidy(best_fit_msss) tidy_fit <- tidy_fit[,c(1,4)] names(tidy_fit)[2] <- "msss_nopns" tidy_vars <- merge(tidy_vars,tidy_fit,by="term",all.x = TRUE) # MSSS w PNS tidy_fit <- broom::tidy(best_fit_msss_pns) tidy_fit <- tidy_fit[,c(1,4)] names(tidy_fit)[2] <- "msss_wpns" tidy_vars <- merge(tidy_vars,tidy_fit,by="term",all.x = TRUE) # ARMSS no PNS tidy_fit <- broom::tidy(best_fit_armss) tidy_fit <- tidy_fit[,c(1,4)] names(tidy_fit)[2] <- "armss_nopns" tidy_vars <- merge(tidy_vars,tidy_fit,by="term",all.x = TRUE) # ARMSS w PNS tidy_fit <- broom::tidy(best_fit_armss_pns) tidy_fit <- tidy_fit[,c(1,4)] names(tidy_fit)[2] <- "armss_wpns" tidy_vars <- merge(tidy_vars,tidy_fit,by="term",all.x = TRUE) sev_outcomes <- names(tidy_vars)[-1] terms <- tidy_vars$term # lolipop plotplot(# Horizontal version for(i in 1:length(sev_outcomes)){ # i <- 1 z <- ggplot(tidy_vars,aes_string(y=sev_outcomes[i], x="terms")) + geom_segment( aes_string(y=0, yend=sev_outcomes[i],x="terms", xend="terms"), color="skyblue") + geom_point( color="blue", size=4, alpha=0.6) + theme_light() + scale_x_discrete(limits = terms) + geom_hline(yintercept = 0,color = "black", linetype="dashed",size=.25) + coord_flip() + ggtitle(sev_outcomes[i])+ ylab("t-statistics") + xlab("")+ ylim(c(-8,15))+ theme( panel.grid.major.y = element_blank(), panel.border = element_blank(), axis.ticks.y = element_blank() ) assign(paste("z",i,sep = ""),z) } ggsave(plot = ggarrange(z1,z3,z5,ncol = 1,nrow = 3, common.legend=TRUE, legend="bottom"), filename = "./output/vars_severity_predicitions_noPNS.png", width =5,height = 9,units = "in",dpi = 300) ggsave(plot = ggarrange(z2,z4,z6,ncol = 1,nrow = 3, common.legend=TRUE, legend="bottom"), filename = "./output/vars_severity_predicitions_wPNS.png", width =5,height = 9,units = "in",dpi = 300) ########################### # use PNS variables only to predict severity ####### #MSDSS ####### severity_data_msdss_train <- data_sub_train %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS"))%>% filter(!is.na(msdss)) confound_pns <- c("panel_11_17","medulla_ll_atrophy") severity_data_msdss_train_clean <- severity_data_msdss_train[complete.cases(severity_data_msdss_train[confound_pns]),] #build the model: fit <- lm(msdss~panel_11_17 + medulla_ll_atrophy, severity_data_msdss_train_clean) fit_msdss <- lm(msdss~log_nfl_serum, severity_data_msdss_train_clean) plot(msdss~log_nfl_serum, severity_data_msdss_train_clean) abline(lm(msdss~log_nfl_serum, severity_data_msdss_train_clean)) summary(fit_msdss) #stepwise regression step <- stepAIC(fit, direction="both") step$anova # display results best_fit_msdss_pns <- lm(msdss ~ panel_11_17 + medulla_ll_atrophy, severity_data_msdss_train_clean) summary(best_fit_msdss_pns) # predict msdss from LM and MLM severity_data_msdss_train_clean$msdss_lm <- fit_msdss$fitted.values severity_data_msdss_train_clean$msdss_mlm <- predict(object = best_fit_msdss_pns,newdata = severity_data_msdss_train_clean) # plot pred <- c("msdss_lm","msdss_mlm") pred_nice <- c("sNFL-predicted MS-DSS","sNFL-predicted MS-DSS") obs <- c("msdss","msdss") obs_nice <- c("measured MS-DSS","measured MS-DSS") for(i in 1:length(obs)){ # i <- 2 df <- severity_data_msdss_train_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2, ncol = 2,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/msdss_LM_vs_MLM_prediction_PNSonly_training.png", width=6,height=2.5,units = "in",dpi = 150) #VALIDATION cohort severity_data_msdss_val <- data_sub_val %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS"))%>% filter(!is.na(msdss)) severity_data_msdss_val_clean <- severity_data_msdss_val[complete.cases(severity_data_msdss_val[confound_pns]),] # predict msdss from LM and MLM in the VALIDATION cohort severity_data_msdss_val_clean$msdss_lm <- predict(object = fit_msdss,newdata = severity_data_msdss_val_clean) severity_data_msdss_val_clean$msdss_mlm <- predict(object = best_fit_msdss_pns,newdata = severity_data_msdss_val_clean) # plot pred <- c("msdss_lm","msdss_mlm") pred_nice <- c("sNFL-predicted MS-DSS","sNFL-predicted MS-DSS") obs <- c("msdss","msdss") obs_nice <- c("measured MS-DSS","measured MS-DSS") for(i in 1:length(obs)){ # i <- 2 df <- severity_data_msdss_val_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2, ncol = 2,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/msdss_LM_vs_MLM_prediction_PNSonly_validation.png", width=6,height=2.5,units = "in",dpi = 150) ######## # MSSS ######## severity_data_msss_train <- data_sub_train %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS"))%>% filter(!is.na(msss)) severity_data_msss_train_clean <- severity_data_msss_train[complete.cases(severity_data_msss_train[confound_pns]),] #build the model: fit <- lm(msss~ panel_11_17 + medulla_ll_atrophy, severity_data_msss_train_clean) fit_msss <- lm(msss~log_nfl_serum, severity_data_msss_train_clean) plot(msss~log_nfl_serum, severity_data_msss_train_clean) abline(lm(msss~log_nfl_serum, severity_data_msss_train_clean)) summary(fit_msss) #stepwise regression step <- stepAIC(fit, direction="both") step$anova # display results best_fit_msss_pns <- lm(msss ~ panel_11_17 + medulla_ll_atrophy, severity_data_msss_train_clean) summary(best_fit_msss_pns) # predict msss from LM and MLM severity_data_msss_train_clean$msss_lm <- fit_msss$fitted.values severity_data_msss_train_clean$msss_mlm <- predict(object = best_fit_msss_pns,newdata = severity_data_msss_train_clean) # plot pred <- c("msss_lm","msss_mlm") pred_nice <- c("sNFL-predicted MSSS","sNFL-predicted MSSS") obs <- c("msss","msss") obs_nice <- c("measured MSSS","measured MSSS") for(i in 1:length(obs)){ # i <- 2 df <- severity_data_msss_train_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2, ncol = 2,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/msss_LM_vs_MLM_prediction_PNSonly_training.png", width=6,height=2.5,units = "in",dpi = 150) #VALIDATION cohort severity_data_msss_val <- data_sub_val %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS"))%>% filter(!is.na(msss)) severity_data_msss_val_clean <- severity_data_msss_val[complete.cases(severity_data_msss_val[confound_pns]),] # predict msss from LM and MLM in the VALIDATION cohort severity_data_msss_val_clean$msss_lm <- predict(object = fit_msss,newdata = severity_data_msss_val_clean) severity_data_msss_val_clean$msss_mlm <- predict(object = best_fit_msss_pns,newdata = severity_data_msss_val_clean) # plot pred <- c("msss_lm","msss_mlm") pred_nice <- c("sNFL-predicted MSSS","sNFL-predicted MSSS") obs <- c("msss","msss") obs_nice <- c("measured MSSS","measured MSSS") for(i in 1:length(obs)){ # i <- 2 df <- severity_data_msss_val_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2, ncol = 2,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/msss_LM_vs_MLM_prediction_PNSonly_validation.png", width=6,height=2.5,units = "in",dpi = 150) ######## # ARMSS ######## severity_data_armss_train <- data_sub_train %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS"))%>% filter(!is.na(armss)) severity_data_armss_train_clean <- severity_data_armss_train[complete.cases(severity_data_armss_train[confound_pns]),] #build the model: fit <- lm(armss~ panel_11_17 + medulla_ll_atrophy, severity_data_armss_train_clean) fit_armss <- lm(armss~log_nfl_serum, severity_data_armss_train_clean) plot(armss~log_nfl_serum, severity_data_armss_train_clean) abline(lm(armss~log_nfl_serum, severity_data_armss_train_clean)) summary(fit_armss) #stepwise regression step <- stepAIC(fit, direction="both") step$anova # display results best_fit_armss_pns <- lm(armss ~ panel_11_17 + medulla_ll_atrophy, severity_data_armss_train_clean) summary(best_fit_armss_pns) # predict armss from LM and MLM severity_data_armss_train_clean$armss_lm <- fit_armss$fitted.values severity_data_armss_train_clean$armss_mlm <- predict(object = best_fit_armss_pns,newdata = severity_data_armss_train_clean) # plot pred <- c("armss_lm","armss_mlm") pred_nice <- c("sNFL-predicted ARMSS","sNFL-predicted ARMSS") obs <- c("armss","armss") obs_nice <- c("measured ARMSS","measured ARMSS") for(i in 1:length(obs)){ # i <- 2 df <- severity_data_armss_train_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2, ncol = 2,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/armss_LM_vs_MLM_prediction_PNSonly_training.png", width=6,height=2.5,units = "in",dpi = 150) #VALIDATION cohort severity_data_armss_val <- data_sub_val %>% filter(diagnosis %in% c("RR-MS","PP-MS","SP-MS"))%>% filter(!is.na(armss)) severity_data_armss_val_clean <- severity_data_armss_val[complete.cases(severity_data_armss_val[confound_pns]),] # predict armss from LM and MLM in the VALIDATION cohort severity_data_armss_val_clean$armss_lm <- predict(object = fit_armss,newdata = severity_data_armss_val_clean) severity_data_armss_val_clean$armss_mlm <- predict(object = best_fit_armss_pns,newdata = severity_data_armss_val_clean) # plot pred <- c("armss_lm","armss_mlm") pred_nice <- c("sNFL-predicted ARMSS","sNFL-predicted ARMSS") obs <- c("armss","armss") obs_nice <- c("measured ARMSS","measured ARMSS") for(i in 1:length(obs)){ # i <- 2 df <- severity_data_armss_val_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.15*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+1.2*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]), max(df[pred[i]])+0.35*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="#521B93",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#521B93",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#521B93",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#521B93") + theme_bw(base_size = 12) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2, ncol = 2,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/armss_LM_vs_MLM_prediction_PNSonly_validation.png", width=6,height=2.5,units = "in",dpi = 150) ######################################################################################################################################################## ######################################################################################################################################################## # # PROPENSITY score matching for brain atrophy # ######################################################################################################################################################## ######################################################################################################################################################## ##################################### # isolate training cohort data_sub_train <- data_sub %>% filter(cohort=="train")%>% filter(diagnosis %in% c("RR-MS","SP-MS","PP-MS")) # generate lm model in the training cohort mod_train <- lm(log_nfl_csf~log_nfl_serum,data_sub_train) # get residuals in the whole cohort data_sub$serum_csf_nfl_res <- data_sub$log_nfl_csf - predict(mod_train,newdata = data_sub) # isolate training cohort again data_sub_train <- data_sub %>% filter(cohort=="train")%>% filter(diagnosis %in% c("RR-MS","SP-MS","PP-MS")) # isolate validation cohort data_sub_val <- data_sub %>% filter(cohort=="val")%>% filter(diagnosis %in% c("RR-MS","SP-MS","PP-MS")) #plot CSF vs Serum CSF residualsNFL in the training and validation cohort pred <- c("log_nfl_csf") obs <- c("log_nfl_serum") pred_nice <- c("Log10 cNFL") obs_nice <- c("Log10 sNFL") for(i in 1:length(obs)){ # i <- 2 data <- data_sub_train df <- data %>% filter(.,!is.na(data[[pred[i]]])) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.1*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+.03*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]-0.1*(max(df[pred[i]])-min(df[pred[i]]))), max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="blue",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="blue",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="blue",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1) + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#C00000"), axis.title.y = element_text(colour = "#0171C0")) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/propensity_csf_vs_serum_training.png", width =2.7,height = 2.5,units = "in",dpi = 300) # get quartiles data <- data_sub_train # calculate quartiles quart <- (quantile(data$serum_csf_nfl_res)) # identify first quartile values q1_ind <- which(data$serum_csf_nfl_res <= quart[2]) #identify third quartile values q3_ind <- which(data$serum_csf_nfl_res >= quart[4]) #create a new column for quartiless data$quart <- NA data$quart[q1_ind] <- "q1" data$quart[q3_ind] <- "q3" data$placeholder <- "A" # plot q1 and q3 nly #make sure this plot is made right after the previous one - otherwise the y-axis limits will not work. # WITHOUT IQR points p <- data %>% filter(!is.na(quart)) %>% ggplot(aes(x=log_nfl_serum, y= log_nfl_csf, color=quart)) + geom_point(alpha = 0.6, shape = 1, size = 2,stroke = 0.6) + ylab("Log10 cNFL ") + xlab("Log10 sNFL") + ylim(c(min(df[pred[i]]-0.1*(max(df[pred[i]])-min(df[pred[i]]))), max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ theme_bw(base_size = 12) + geom_smooth(method = "lm",mapping = aes(x=log_nfl_serum, y= log_nfl_csf),color="blue",se=TRUE,data = data_sub_train ) + theme(axis.title.y = element_text(colour = "#0171C0"), axis.title.x = element_text( colour = "#C00000" ), legend.position = "none") ggsave(plot = ggarrange(p,ncol = 1,nrow = 1), filename = "./output/geom_point_propensity_csf_vs_serum_training.png", width =2.7,height = 2.5,units = "in",dpi = 300) #WITH IQR POINTS p <- data %>% ggplot(aes(x=log_nfl_serum, y= log_nfl_csf, color=quart)) + geom_point(alpha = 0.6, shape = 1, size = 2,stroke = 0.6) + ylab("Log10 cNFL ") + xlab("Log10 sNFL") + ylim(c(min(df[pred[i]]-0.1*(max(df[pred[i]])-min(df[pred[i]]))), max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.1*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+.03*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + theme_bw(base_size = 12) + stat_cor(aes(label=..r.label..), size=4,color="black",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="black",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="black",label.y.npc = .77)+ geom_smooth(method = "lm",mapping = aes(x=log_nfl_serum, y= log_nfl_csf),color="black",size = 0.5,se=FALSE,data = data_sub_train ) + theme(axis.title.y = element_text(colour = "#0171C0"), axis.title.x = element_text( colour = "#C00000" ), legend.position = "none") ggsave(plot = ggarrange(p,ncol = 1,nrow = 1), filename = "./output/geom_point_wIQR_propensity_csf_vs_serum_training.png", width =2.7,height = 2.5,units = "in",dpi = 300) #BOXPLOT p1 <- data %>% ggplot(aes(x=factor(0),y=serum_csf_nfl_res)) + geom_jitter(alpha = 0.6, shape = 1, size = 1.5,stroke = 0.4,position=position_jitter(0.3),aes(color=quart))+ geom_boxplot(outlier.colour = "transparent",fill="transparent") + theme_bw()+ theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), legend.position = "none",panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_blank()) ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1), filename = "./output/boxplot_wIQR_propensity_csf_vs_serum_training.png", width =1.3,height = 1.3,units = "in",dpi = 300) # boxplots of clean quartiles data_clean <- data %>% filter(!is.na(quart)) df <- data_clean obs <- c("quart") obs_nice <- c("NFL Residual Quartiles") pred <- c("log_nfl_serum") pred_nice <- c("Log10 sNFL") for(i in 1:length(obs)){ p1 <- df %>% ggplot(aes_string(x=obs[i], y=pred[i], color = "quart")) + geom_boxplot(outlier.colour = "transparent") + geom_jitter(alpha = 0.6, shape = 1, size = 2,stroke = 0.6,position=position_jitter(0.07)) + # geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + ylim(c(min(df[pred[i]])-0.25*(max(df[pred[i]])-min(df[pred[i]])),max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + scale_colour_discrete(labels = c("Q3"))+ scale_x_discrete(labels = c("Q3")) + stat_compare_means(method = "t.test", paired = FALSE, label.x = "q1", label.y = max(df[pred[i]])+0.1*(max(df[pred[i]])-min(df[pred[i]])) )+ theme_bw(base_size = 12) + stat_summary(fun.data = function(x) { return( data.frame( y = min(df[pred[i]])-0.05*(max(df[pred[i]])-min(df[pred[i]])), label = paste('n =', format(length(x), big.mark = ",", decimal.mark = ".", scientific = FALSE), '\n', 'mean =', format(round(mean(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE)) ) ) },geom = "text", hjust = 0.5, vjust = 1, size =3) + theme(axis.title.y = element_text( colour = "#C00000" ), axis.title.x = element_text( colour = "black" ), legend.position = "none") } ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1), filename = "./output/serum_propensity_csf_vs_serum_training.png", width =2.7,height = 2.5,units = "in",dpi = 300) # residuals df <- data_clean obs <- c("quart") obs_nice <- c("NFL Residual Quartiles") pred <- c("serum_csf_nfl_res") pred_nice <- c("NFL residuals") for(i in 1:length(obs)){ p1 <- df %>% ggplot(aes_string(x=obs[i], y=pred[i], color = "quart")) + geom_boxplot(outlier.colour = "transparent") + geom_jitter(alpha = 0.6, shape = 1, size = 2,stroke = 0.6,position=position_jitter(0.07)) + # geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + ylim(c(min(df[pred[i]])-0.25*(max(df[pred[i]])-min(df[pred[i]])),max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + scale_colour_discrete(labels = c("Q3"))+ scale_x_discrete(labels = c("Q3")) + stat_compare_means(method = "t.test", paired = FALSE, label.x = "q1", label.y = max(df[pred[i]])+0.1*(max(df[pred[i]])-min(df[pred[i]])) )+ theme_bw(base_size = 12) + stat_summary(fun.data = function(x) { return( data.frame( y = min(df[pred[i]])-0.05*(max(df[pred[i]])-min(df[pred[i]])), label = paste('n =', format(length(x), big.mark = ",", decimal.mark = ".", scientific = FALSE), '\n', 'mean =', format(round(mean(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE)) ) ) },geom = "text", hjust = 0.5, vjust = 1, size =3) + theme_bw(base_size = 12) + theme(axis.title.y = element_text( colour = "#C55A11" ), axis.title.x = element_text( colour = "black" ), legend.position = "none") } ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1), filename = "./output/residulas_propensity_csf_vs_serum_training.png", width =2.7,height = 2.5,units = "in",dpi = 300) # # #### #### correlations of residuals with different outcomes in the training cohort #### # # obs <- c("quart","quart","quart","quart") obs_nice <- c("NFL Residual Quartiles","NFL Residual Quartiles","NFL Residual Quartiles","NFL Residual Quartiles") pred <- c("bpfr","atrophy_severity_score","sdmt_age","sdmt") pred_nice <- c("Brain Parenchymal Fraction","Total Brain Atrophy","SDMT/Age","SDMT") cohorts <- c("train","train","train","train") cohort_nice <- c("TRAINING cohort","TRAINING cohort","TRAINING cohort","TRAINING cohort") # isolate samples with outcome in training cohort for(i in 1:length(pred)){ # i <-2 data <- data_sub %>% dplyr::filter(cohort %in% cohorts[i]) data <- data %>% filter(.,!is.na(data[pred[i]]))%>% filter(diagnosis %in% c("RR-MS","SP-MS","PP-MS")) # calculate quartiles quart <- (quantile(data$serum_csf_nfl_res)) # identify first quartile values q1_ind <- which(data$serum_csf_nfl_res <= quart[2]) #identify third quartile values q3_ind <- which(data$serum_csf_nfl_res >= quart[4]) #create a new column for quartiless data$quart <- NA data$quart[q1_ind] <- "q1" data$quart[q3_ind] <- "q3" data_clean <- data %>% filter(!is.na(quart)) df <- data_clean p3 <- df %>% ggplot(aes_string(x=obs[i], y=pred[i], color = "quart")) + geom_boxplot(outlier.colour = "transparent") + geom_jitter(alpha = 0.6, shape = 1, size = 2,stroke = 0.6,position=position_jitter(width = 0.07,height=0)) + # geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + ylim(c(min(df[pred[i]])-0.25*(max(df[pred[i]])-min(df[pred[i]])),max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + scale_colour_discrete(labels = c("Q3"))+ scale_x_discrete(labels = c("Q3")) + stat_compare_means(method = "wilcox.test", paired = FALSE, label.x = "q1", label.y = max(df[pred[i]])+0.1*(max(df[pred[i]])-min(df[pred[i]])) )+ theme_bw(base_size = 12) + stat_summary(fun.data = function(x) { return( data.frame( y = min(df[pred[i]])-0.05*(max(df[pred[i]])-min(df[pred[i]])), label = paste('n =', format(length(x), big.mark = ",", decimal.mark = ".", scientific = FALSE), '\n', 'median =', format(round(median(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE)) ) ) },geom = "text", hjust = 0.5, vjust = 1, size =3) + theme_bw(base_size = 12) + theme(legend.position = "none") + theme(axis.title.y = element_text( colour = "blue" )) ggsave(plot = ggarrange(p3,ncol = 1,nrow = 1), filename = paste("./output/",pred[i],"_final_plot_",cohorts[i],".png",sep=""), width =2.7,height = 2.5,units = "in",dpi = 300) df <- data_clean df$group <- df$quart df$group <- df$group=="q1" df <- df %>% filter(!is.na(df[pred[i]])) set.seed(1234) match.it <- matchit(group~log_nfl_serum,data = df, method="full") a <- summary(match.it) plot(match.it) df.match <- match.data(match.it) df_q1 <- df.match %>% dplyr::group_by(subclass) %>% arrange(desc(abs(serum_csf_nfl_res))) %>% arrange(weights) %>% filter(quart=="q1") %>% filter(row_number()==1) df_q3 <- df.match %>% dplyr::group_by(subclass) %>% arrange(desc(abs(serum_csf_nfl_res))) %>% arrange(weights) %>% filter(quart=="q3") %>% filter(row_number()==1) df <- rbind(df_q1,df_q3) df <- df %>% dplyr::select(subclass,quart,log_nfl_serum,serum_csf_nfl_res,pred[i]) %>% dplyr::arrange(.,subclass) #plot serum NFL in two quartile groups p4 <- df %>% ggplot(aes_string(x=obs[i], y=pred[i], color = "quart")) + geom_boxplot(outlier.colour = "transparent") + geom_jitter(alpha = 0.6, shape = 1, size = 2,stroke = 0.6,position=position_jitter(width = 0.07,height=0)) + geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + ylim(c(min(df[pred[i]])-0.25*(max(df[pred[i]])-min(df[pred[i]])),max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + scale_colour_discrete(labels = c("Q3"))+ scale_x_discrete(labels = c("Q3")) + stat_compare_means(method = "wilcox.test", paired = TRUE, label.x = "q1", label.y = max(df[pred[i]])+0.1*(max(df[pred[i]])-min(df[pred[i]])) )+ theme_bw(base_size = 12) + stat_summary(fun.data = function(x) { return( data.frame( y = min(df[pred[i]])-0.05*(max(df[pred[i]])-min(df[pred[i]])), label = paste('n =', format(length(x), big.mark = ",", decimal.mark = ".", scientific = FALSE), '\n', 'median =', format(round(median(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE)) ) ) },geom = "text", hjust = 0.5, vjust = 1, size =3) + theme_bw(base_size = 12) + theme(legend.position = "none")+ theme(axis.title.y = element_text( colour = "blue" )) ggsave(plot = ggarrange(p4,ncol = 1,nrow = 1), filename = paste("./output/",pred[i],"_final_plot_propensity_matched_",cohorts[i],".png",sep=""), width =2.7,height = 2.5,units = "in",dpi = 300) } # boxplots of matched quartiles obs <- c("quart") obs_nice <- c("NFL Residual Quartiles") pred <- c("log_nfl_serum") pred_nice <- c("Log10 sNFL") for(i in 1:length(obs)){ p1 <- df %>% ggplot(aes_string(x=obs[i], y=pred[i], color = "quart")) + geom_boxplot(outlier.colour = "transparent") + geom_jitter(alpha = 0.6, shape = 1, size = 2,stroke = 0.6,position=position_jitter(0.07)) + # geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + ylim(c(min(df[pred[i]])-0.25*(max(df[pred[i]])-min(df[pred[i]])),max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + scale_colour_discrete(labels = c("Q3"))+ scale_x_discrete(labels = c("Q3")) + stat_compare_means(method = "t.test", paired = TRUE, label.x = "q1", label.y = max(df[pred[i]])+0.1*(max(df[pred[i]])-min(df[pred[i]])) )+ theme_bw(base_size = 12) + stat_summary(fun.data = function(x) { return( data.frame( y = min(df[pred[i]])-0.05*(max(df[pred[i]])-min(df[pred[i]])), label = paste('n =', format(length(x), big.mark = ",", decimal.mark = ".", scientific = FALSE), '\n', 'mean =', format(round(mean(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE)) ) ) },geom = "text", hjust = 0.5, vjust = 1, size =3) + theme(axis.title.y = element_text( colour = "#C00000" ), axis.title.x = element_text( colour = "black" ), legend.position = "none") } ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1), filename = "./output/serum_propensity_matched_csf_vs_serum_training.png", width =2.7,height = 2.5,units = "in",dpi = 300) # residuals obs <- c("quart") obs_nice <- c("NFL Residual Quartiles") pred <- c("serum_csf_nfl_res") pred_nice <- c("NFL residuals") for(i in 1:length(obs)){ p1 <- df %>% ggplot(aes_string(x=obs[i], y=pred[i], color = "quart")) + geom_boxplot(outlier.colour = "transparent") + geom_jitter(alpha = 0.6, shape = 1, size = 2,stroke = 0.6,position=position_jitter(0.07)) + # geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + ylim(c(min(df[pred[i]])-0.25*(max(df[pred[i]])-min(df[pred[i]])),max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + scale_colour_discrete(labels = c("Q3"))+ scale_x_discrete(labels = c("Q3")) + geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + stat_compare_means(method = "t.test", paired = TRUE, label.x = "q1", label.y = max(df[pred[i]])+0.1*(max(df[pred[i]])-min(df[pred[i]])) )+ theme_bw(base_size = 12) + stat_summary(fun.data = function(x) { return( data.frame( y = min(df[pred[i]])-0.05*(max(df[pred[i]])-min(df[pred[i]])), label = paste('n =', format(length(x), big.mark = ",", decimal.mark = ".", scientific = FALSE), '\n', 'mean =', format(round(mean(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE)) ) ) },geom = "text", hjust = 0.5, vjust = 1, size =3) + theme_bw(base_size = 12) + theme(axis.title.y = element_text( colour = "#C55A11" ), axis.title.x = element_text( colour = "black" ), legend.position = "none") } ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1), filename = "./output/residulas_propensity_matched_csf_vs_serum_training.png", width =2.7,height = 2.5,units = "in",dpi = 300) # isolate samples with outcome in validation cohort # VALIDATION!!!!!!!! # # #### #### correlations of residuals with different outcomes in the validation cohort #### # # obs <- c("quart","quart") obs_nice <- c("NFL Residual Quartiles","NFL Residual Quartiles") pred <- c("bpfr","atrophy_severity_score") pred_nice <- c("Brain Parenchymal Fraction","Total Brain Atrophy") cohorts <- c("val","val") cohort_nice <- c("VALIDATION cohort","VALIDATION cohort") # isolate samples with outcome in training cohort for(i in 1:length(pred)){ # i <-1 data <- data_sub %>% dplyr::filter(cohort %in% cohorts[i]) data <- data %>% filter(.,!is.na(data[pred[i]]))%>% filter(diagnosis %in% c("RR-MS","SP-MS","PP-MS")) # calculate quartiles quart <- (quantile(data$serum_csf_nfl_res)) # identify first quartile values q1_ind <- which(data$serum_csf_nfl_res <= quart[2]) #identify third quartile values q3_ind <- which(data$serum_csf_nfl_res >= quart[4]) #create a new column for quartiless data$quart <- NA data$quart[q1_ind] <- "q1" data$quart[q3_ind] <- "q3" data_clean <- data %>% filter(!is.na(quart)) df <- data_clean p3 <- df %>% ggplot(aes_string(x=obs[i], y=pred[i], color = "quart")) + geom_boxplot(outlier.colour = "transparent") + geom_jitter(alpha = 0.6, shape = 1, size = 2,stroke = 0.6,position=position_jitter(width = 0.07,height=0)) + # geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + ylim(c(min(df[pred[i]])-0.25*(max(df[pred[i]])-min(df[pred[i]])),max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + scale_colour_discrete(labels = c("Q3"))+ scale_x_discrete(labels = c("Q3")) + stat_compare_means(method = "wilcox.test", paired = FALSE, label.x = "q1", label.y = max(df[pred[i]])+0.1*(max(df[pred[i]])-min(df[pred[i]])) )+ theme_bw(base_size = 12) + stat_summary(fun.data = function(x) { return( data.frame( y = min(df[pred[i]])-0.05*(max(df[pred[i]])-min(df[pred[i]])), label = paste('n =', format(length(x), big.mark = ",", decimal.mark = ".", scientific = FALSE), '\n', 'median =', format(round(median(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE)) ) ) },geom = "text", hjust = 0.5, vjust = 1, size =3) + theme_bw(base_size = 12) + theme(legend.position = "none") ggsave(plot = ggarrange(p3,ncol = 1,nrow = 1), filename = paste("./output/",pred[i],"_final_plot_",cohorts[i],".png",sep=""), width =2.7,height = 2.5,units = "in",dpi = 300) df <- data_clean df$group <- df$quart df$group <- df$group=="q1" df <- df %>% filter(!is.na(df[pred[i]])) set.seed(1234) match.it <- matchit(group~log_nfl_serum,data = df, method="full") a <- summary(match.it) plot(match.it) df.match <- match.data(match.it) df_q1 <- df.match %>% dplyr::group_by(subclass) %>% arrange(desc(abs(serum_csf_nfl_res))) %>% arrange(weights) %>% filter(quart=="q1") %>% filter(row_number()==1) df_q3 <- df.match %>% dplyr::group_by(subclass) %>% arrange(desc(abs(serum_csf_nfl_res))) %>% arrange(weights) %>% filter(quart=="q3") %>% filter(row_number()==1) df <- rbind(df_q1,df_q3) df <- df %>% dplyr::select(subclass,quart,log_nfl_serum,serum_csf_nfl_res,pred[i]) %>% dplyr::arrange(.,subclass) #plot serum NFL in two quartile groups p4 <- df %>% ggplot(aes_string(x=obs[i], y=pred[i], color = "quart")) + geom_boxplot(outlier.colour = "transparent") + geom_jitter(alpha = 0.6, shape = 1, size = 2,stroke = 0.6,position=position_jitter(width = 0.07,height=0)) + geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + ylim(c(min(df[pred[i]])-0.25*(max(df[pred[i]])-min(df[pred[i]])),max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + scale_colour_discrete(labels = c("Q3"))+ scale_x_discrete(labels = c("Q3")) + stat_compare_means(method = "wilcox.test", paired = TRUE, label.x = "q1", label.y = max(df[pred[i]])+0.1*(max(df[pred[i]])-min(df[pred[i]])) )+ theme_bw(base_size = 12) + stat_summary(fun.data = function(x) { return( data.frame( y = min(df[pred[i]])-0.05*(max(df[pred[i]])-min(df[pred[i]])), label = paste('n =', format(length(x), big.mark = ",", decimal.mark = ".", scientific = FALSE), '\n', 'median =', format(round(median(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE)) ) ) },geom = "text", hjust = 0.5, vjust = 1, size =3) + theme_bw(base_size = 12) + theme(legend.position = "none") ggsave(plot = ggarrange(p4,ncol = 1,nrow = 1), filename = paste("./output/",pred[i],"_final_plot_propensity_matched_",cohorts[i],".png",sep=""), width =2.7,height = 2.5,units = "in",dpi = 300) } ######################################################################################################################################################## ######################################################################################################################################################## # # PROPENSITY score matching for PNS injury # ######################################################################################################################################################## ######################################################################################################################################################## ##################################### # isolate training cohort data_sub_train <- data_sub %>% filter(cohort=="train")%>% filter(diagnosis %in% c("RR-MS","SP-MS","PP-MS")) # generate lm model in the training cohort mod_train <- lm(log_nfl_serum~log_nfl_csf,data_sub_train) # get residuals in the whole cohort data_sub$serum_csf_nfl_res <- data_sub$log_nfl_serum - predict(mod_train,newdata = data_sub) # isolate training cohort again data_sub_train <- data_sub %>% filter(cohort=="train")%>% filter(diagnosis %in% c("RR-MS","SP-MS","PP-MS")) # isolate validation cohort data_sub_val <- data_sub %>% filter(cohort=="val")%>% filter(diagnosis %in% c("RR-MS","SP-MS","PP-MS")) ###################### # get quartiles ###################### data <- data_sub_train # calculate quartiles quart <- (quantile(data$serum_csf_nfl_res)) # identify first quartile values q1_ind <- which(data$serum_csf_nfl_res <= quart[2]) #identify third quartile values q3_ind <- which(data$serum_csf_nfl_res >= quart[4]) #create a new column for quartiless data$quart <- NA data$quart[q1_ind] <- "q1" data$quart[q3_ind] <- "q3" data$placeholder <- "A" #plot Serum vs CSF NFL in the training and validation cohort pred <- c("log_nfl_serum") obs <- c("log_nfl_csf") pred_nice <- c("Log10 sNFL") obs_nice <- c("Log10 cNFL") for(i in 1:length(obs)){ # i <- 2 df <- data_sub_train %>% filter(.,!is.na(data[[pred[i]]])) p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.1*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+.03*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + ylim(c(min(df[pred[i]]-0.1*(max(df[pred[i]])-min(df[pred[i]]))), max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + stat_cor(aes(label=..r.label..), size=4,color="blue",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="blue",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="blue",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1) + theme_bw(base_size = 12) + theme(axis.title.y = element_text(colour = "#C00000"), axis.title.x = element_text(colour = "#0171C0")) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/propensity_serum_vs_csf_training.png", width =2.7,height = 2.5,units = "in",dpi = 300) # plot q1 and q3 nly p <- data %>% filter(!is.na(quart)) %>% ggplot(aes(y=log_nfl_serum, x= log_nfl_csf, color=quart)) + geom_point(alpha = 0.6, shape = 1, size = 2,stroke = 0.6) + ylab("Log10 sNFL ") + xlab("Log10 cNFL") + theme_bw(base_size = 12) + ylim(c(min(df[pred[i]]-0.1*(max(df[pred[i]])-min(df[pred[i]]))), max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ geom_smooth(method = "lm",mapping = aes(y=log_nfl_serum, x= log_nfl_csf),color="blue",se=TRUE,data = data_sub_train ) + theme(axis.title.x = element_text(colour = "#0171C0"), axis.title.y = element_text(colour = "#C00000"), legend.position = "none") ggsave(plot = ggarrange(p,ncol = 1,nrow = 1), filename = "./output/geom_point_propensity_csf_vs_serum_PNS_training.png", width =2.7,height = 2.5,units = "in",dpi = 300) #WITH IQR POINTS p <- data %>% ggplot(aes(y=log_nfl_serum, x= log_nfl_csf,color=quart)) + geom_point(alpha = 0.4, shape = 1, size = 2,stroke = 0.6)+ xlab("Log10 cNFL ") + ylab("Log10 sNFL") + ylim(c(min(df[pred[i]]-0.1*(max(df[pred[i]])-min(df[pred[i]]))), max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.1*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[pred[i]])+.03*(max(df[pred[i]])-min(df[pred[i]])), size=4,parse = TRUE) + theme_bw(base_size = 12) + stat_cor(aes(label=..r.label..), size=4,color="black",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="black",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="black",label.y.npc = .77)+ geom_smooth(method = "lm",mapping = aes(y=log_nfl_serum, x= log_nfl_csf),color="black",size = 0.5,se=FALSE,data = data ) + theme(axis.title.y = element_text(colour = "#C00000"), axis.title.x = element_text(colour = "#0171C0"), legend.position = "none") ggsave(plot = ggarrange(p,ncol = 1,nrow = 1), filename = "./output/geom_point_wIQR_propensity_serum_vs_csf_training.png", width =2.7,height = 2.5,units = "in",dpi = 300) #BOXPLOT p1 <- data %>% ggplot(aes(x=factor(0),y=serum_csf_nfl_res)) + geom_jitter(alpha = 0.6, shape = 1, size = 1.5,stroke = 0.4,position=position_jitter(0.3),aes(color=quart))+ geom_boxplot(outlier.colour = "transparent",fill="transparent") + theme_bw()+ theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), legend.position = "none",panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_blank()) ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1), filename = "./output/boxplot_wIQR_propensity_serum_vs_csf_training.png", width =1.3,height = 1.3,units = "in",dpi = 300) # boxplots of clean quartiles data_clean <- data %>% filter(!is.na(quart)) df <- data_clean obs <- c("quart") obs_nice <- c("NFL Residual Quartiles") pred <- c("log_nfl_csf") pred_nice <- c("Log10 cNFL") for(i in 1:length(obs)){ p1 <- df %>% ggplot(aes_string(x=obs[i], y=pred[i], color = "quart")) + geom_boxplot(outlier.colour = "transparent") + geom_jitter(alpha = 0.6, shape = 1, size = 2,stroke = 0.6,position=position_jitter(0.07)) + # geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + ylim(c(min(df[pred[i]])-0.25*(max(df[pred[i]])-min(df[pred[i]])),max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + scale_colour_discrete(labels = c("Q3"))+ scale_x_discrete(labels = c("Q3")) + stat_compare_means(method = "t.test", paired = FALSE, label.x = "q1", label.y = max(df[pred[i]])+0.1*(max(df[pred[i]])-min(df[pred[i]])) )+ theme_bw(base_size = 12) + stat_summary(fun.data = function(x) { return( data.frame( y = min(df[pred[i]])-0.05*(max(df[pred[i]])-min(df[pred[i]])), label = paste('n =', format(length(x), big.mark = ",", decimal.mark = ".", scientific = FALSE), '\n', 'mean =', format(round(mean(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE)) ) ) },geom = "text", hjust = 0.5, vjust = 1, size =3) + theme(axis.title.y = element_text( colour = "#0171C0" ), axis.title.x = element_text( colour = "black" ), legend.position = "none") } ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1), filename = "./output/serum_propensity_csf_vs_serum_PNS_training.png", width =2.7,height = 2.5,units = "in",dpi = 300) # residuals df <- data_clean obs <- c("quart") obs_nice <- c("NFL Residual Quartiles") pred <- c("serum_csf_nfl_res") pred_nice <- c("NFL residuals") for(i in 1:length(obs)){ p1 <- df %>% ggplot(aes_string(x=obs[i], y=pred[i], color = "quart")) + geom_boxplot(outlier.colour = "transparent") + geom_jitter(alpha = 0.6, shape = 1, size = 2,stroke = 0.6,position=position_jitter(0.07)) + # geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + ylim(c(min(df[pred[i]])-0.25*(max(df[pred[i]])-min(df[pred[i]])),max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + scale_colour_discrete(labels = c("Q3"))+ scale_x_discrete(labels = c("Q3")) + stat_compare_means(method = "t.test", paired = FALSE, label.x = "q1", label.y = max(df[pred[i]])+0.1*(max(df[pred[i]])-min(df[pred[i]])) )+ theme_bw(base_size = 12) + stat_summary(fun.data = function(x) { return( data.frame( y = min(df[pred[i]])-0.05*(max(df[pred[i]])-min(df[pred[i]])), label = paste('n =', format(length(x), big.mark = ",", decimal.mark = ".", scientific = FALSE), '\n', 'mean =', format(round(mean(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE)) ) ) },geom = "text", hjust = 0.5, vjust = 1, size =3) + theme_bw(base_size = 12) + theme(axis.title.y = element_text( colour = "#C55A11" ), axis.title.x = element_text( colour = "black" ), legend.position = "none") } ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1), filename = "./output/residulas_propensity_csf_vs_serum_PNS_training.png", width =2.7,height = 2.5,units = "in",dpi = 300) # # #### #### correlations of residuals with different outcomes in the training cohort #### # # obs <- c("quart","quart","quart","quart") obs_nice <- c("NFL Residual Quartiles","NFL Residual Quartiles","NFL Residual Quartiles","NFL Residual Quartiles") pred <- c("medulla_ll_atrophy","panel_11_17","panel_11","panel_17") pred_nice <- c("LL + Atrophy \nof Medulla & Upper CS", "NeurEx - \nMuscle Atrophy + BBSA", "NeurEx - \nMuscle Atrophy", "NeurEx - \nBBSA") cohorts <- c("train","train","train","train") cohort_nice <- c("TRAINING cohort","TRAINING cohort","TRAINING cohort","TRAINING cohort") # isolate samples with outcome in training cohort for(i in 1:length(pred)){ # i <-1 data <- data_sub %>% dplyr::filter(cohort %in% cohorts[i]) data <- data %>% filter(.,!is.na(data[pred[i]]))%>% filter(diagnosis %in% c("RR-MS","SP-MS","PP-MS")) # calculate quartiles quart <- (quantile(data$serum_csf_nfl_res)) # identify first quartile values q1_ind <- which(data$serum_csf_nfl_res <= quart[2]) #identify third quartile values q3_ind <- which(data$serum_csf_nfl_res >= quart[4]) #create a new column for quartiless data$quart <- NA data$quart[q1_ind] <- "q1" data$quart[q3_ind] <- "q3" data_clean <- data %>% filter(!is.na(quart)) df <- data_clean p3 <- df %>% ggplot(aes_string(x=obs[i], y=pred[i], color = "quart")) + geom_boxplot(outlier.colour = "transparent") + geom_jitter(alpha = 0.6, shape = 1, size = 2,stroke = 0.6,position=position_jitter(width = 0.07,height=0)) + # geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + ylim(c(min(df[pred[i]])-0.25*(max(df[pred[i]])-min(df[pred[i]])),max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + scale_colour_discrete(labels = c("Q3"))+ scale_x_discrete(labels = c("Q3")) + stat_compare_means(method = "wilcox.test", paired = FALSE, label.x = "q1", label.y = max(df[pred[i]])+0.1*(max(df[pred[i]])-min(df[pred[i]])) )+ theme_bw(base_size = 12) + stat_summary(fun.data = function(x) { return( data.frame( y = min(df[pred[i]])-0.05*(max(df[pred[i]])-min(df[pred[i]])), label = paste('n =', format(length(x), big.mark = ",", decimal.mark = ".", scientific = FALSE), '\n', 'median =', format(round(median(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE)) ) ) },geom = "text", hjust = 0.5, vjust = 1, size =3) + theme_bw(base_size = 12) + theme(legend.position = "none")+ theme(axis.title.y = element_text( colour = "blue" )) ggsave(plot = ggarrange(p3,ncol = 1,nrow = 1), filename = paste("./output/",pred[i],"_final_plot_",cohorts[i],".png",sep=""), width =2.7,height = 2.5,units = "in",dpi = 300) df <- data_clean df$group <- df$quart df$group <- df$group=="q1" df <- df %>% filter(!is.na(df[pred[i]])) set.seed(1234) match.it <- matchit(group~log_nfl_csf,data = df, method="full") a <- summary(match.it) plot(match.it) df.match <- match.data(match.it) df_q1 <- df.match %>% dplyr::group_by(subclass) %>% arrange(desc(abs(serum_csf_nfl_res))) %>% arrange(weights) %>% filter(quart=="q1") %>% filter(row_number()==1) df_q3 <- df.match %>% dplyr::group_by(subclass) %>% arrange(desc(abs(serum_csf_nfl_res))) %>% arrange(weights) %>% filter(quart=="q3") %>% filter(row_number()==1) df <- rbind(df_q1,df_q3) df <- df %>% dplyr::select(subclass,quart,log_nfl_csf,serum_csf_nfl_res,pred[i]) %>% dplyr::arrange(.,subclass) #plot serum NFL in two quartile groups p4 <- df %>% ggplot(aes_string(x=obs[i], y=pred[i], color = "quart")) + geom_boxplot(outlier.colour = "transparent") + geom_jitter(alpha = 0.6, shape = 1, size = 2,stroke = 0.6,position=position_jitter(width = 0.07,height=0)) + geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + ylim(c(min(df[pred[i]])-0.25*(max(df[pred[i]])-min(df[pred[i]])),max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + scale_colour_discrete(labels = c("Q3"))+ scale_x_discrete(labels = c("Q3")) + stat_compare_means(method = "wilcox.test", paired = TRUE, label.x = "q1", label.y = max(df[pred[i]])+0.1*(max(df[pred[i]])-min(df[pred[i]])) )+ theme_bw(base_size = 12) + stat_summary(fun.data = function(x) { return( data.frame( y = min(df[pred[i]])-0.05*(max(df[pred[i]])-min(df[pred[i]])), label = paste('n =', format(length(x), big.mark = ",", decimal.mark = ".", scientific = FALSE), '\n', 'median =', format(round(median(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE)) ) ) },geom = "text", hjust = 0.5, vjust = 1, size =3) + theme_bw(base_size = 12) + theme(legend.position = "none")+ theme(axis.title.y = element_text( colour = "blue" )) ggsave(plot = ggarrange(p4,ncol = 1,nrow = 1), filename = paste("./output/",pred[i],"_final_plot_propensity_matched_",cohorts[i],".png",sep=""), width =2.7,height = 2.5,units = "in",dpi = 300) } # MATCHED boxplots of clean quartiles obs <- c("quart") obs_nice <- c("NFL Residual Quartiles") pred <- c("log_nfl_csf") pred_nice <- c("Log10 cNFL") for(i in 1:length(obs)){ p1 <- df %>% ggplot(aes_string(x=obs[i], y=pred[i], color = "quart")) + geom_boxplot(outlier.colour = "transparent") + geom_jitter(alpha = 0.6, shape = 1, size = 2,stroke = 0.6,position=position_jitter(0.07)) + # geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + ylim(c(min(df[pred[i]])-0.25*(max(df[pred[i]])-min(df[pred[i]])),max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + scale_colour_discrete(labels = c("Q3"))+ scale_x_discrete(labels = c("Q3")) + stat_compare_means(method = "t.test", paired = TRUE, label.x = "q1", label.y = max(df[pred[i]])+0.1*(max(df[pred[i]])-min(df[pred[i]])) )+ theme_bw(base_size = 12) + stat_summary(fun.data = function(x) { return( data.frame( y = min(df[pred[i]])-0.05*(max(df[pred[i]])-min(df[pred[i]])), label = paste('n =', format(length(x), big.mark = ",", decimal.mark = ".", scientific = FALSE), '\n', 'mean =', format(round(mean(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE)) ) ) },geom = "text", hjust = 0.5, vjust = 1, size =3) + theme(axis.title.y = element_text( colour = "#0171C0" ), axis.title.x = element_text( colour = "black" ), legend.position = "none") } ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1), filename = "./output/serum_propensity_matched_csf_vs_serum_PNS_training.png", width =2.7,height = 2.5,units = "in",dpi = 300) # residuals obs <- c("quart") obs_nice <- c("NFL Residual Quartiles") pred <- c("serum_csf_nfl_res") pred_nice <- c("NFL residuals") for(i in 1:length(obs)){ p1 <- df %>% ggplot(aes_string(x=obs[i], y=pred[i], color = "quart")) + geom_boxplot(outlier.colour = "transparent") + geom_jitter(alpha = 0.6, shape = 1, size = 2,stroke = 0.6,position=position_jitter(0.07)) + # geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + ylim(c(min(df[pred[i]])-0.25*(max(df[pred[i]])-min(df[pred[i]])),max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + scale_colour_discrete(labels = c("Q3"))+ scale_x_discrete(labels = c("Q3")) + stat_compare_means(method = "t.test", paired = TRUE, label.x = "q1", label.y = max(df[pred[i]])+0.1*(max(df[pred[i]])-min(df[pred[i]])) )+ theme_bw(base_size = 12) + stat_summary(fun.data = function(x) { return( data.frame( y = min(df[pred[i]])-0.05*(max(df[pred[i]])-min(df[pred[i]])), label = paste('n =', format(length(x), big.mark = ",", decimal.mark = ".", scientific = FALSE), '\n', 'mean =', format(round(mean(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE)) ) ) },geom = "text", hjust = 0.5, vjust = 1, size =3) + theme_bw(base_size = 12) + theme(axis.title.y = element_text( colour = "#C55A11" ), axis.title.x = element_text( colour = "black" ), legend.position = "none") } ggsave(plot = ggarrange(p1,ncol = 1,nrow = 1), filename = "./output/residulas_propensity_matched_csf_vs_serum_PNS_training.png", width =2.7,height = 2.5,units = "in",dpi = 300) # isolate samples with outcome in validation cohort # VALIDATION!!!!!!!! # # #### #### correlations of residuals with different outcomes in the validation cohort #### # # obs <- c("quart","quart","quart","quart") obs_nice <- c("NFL Residual Quartiles","NFL Residual Quartiles","NFL Residual Quartiles","NFL Residual Quartiles") pred <- c("medulla_ll_atrophy","panel_11_17","panel_11","panel_17") pred_nice <- c("LL + Atrophy \nof Medulla & Upper CS", "NeurEx - \nMuscle Atrophy + BBSA", "NeurEx - \nMuscle Atrophy", "NeurEx - \nBBSA") cohorts <- c("val","val","val","val") cohort_nice <- c("VALIDATION cohort","VALIDATION cohort","VALIDATION cohort","VALIDATION cohort") # isolate samples with outcome in training cohort for(i in 1:length(pred)){ # i <-1 data <- data_sub %>% dplyr::filter(cohort %in% cohorts[i]) data <- data %>% filter(.,!is.na(data[pred[i]]))%>% filter(diagnosis %in% c("RR-MS","SP-MS","PP-MS")) # calculate quartiles quart <- (quantile(data$serum_csf_nfl_res)) # identify first quartile values q1_ind <- which(data$serum_csf_nfl_res <= quart[2]) #identify third quartile values q3_ind <- which(data$serum_csf_nfl_res >= quart[4]) #create a new column for quartiless data$quart <- NA data$quart[q1_ind] <- "q1" data$quart[q3_ind] <- "q3" data_clean <- data %>% filter(!is.na(quart)) df <- data_clean p3 <- df %>% ggplot(aes_string(x=obs[i], y=pred[i], color = "quart")) + geom_boxplot(outlier.colour = "transparent") + geom_jitter(alpha = 0.6, shape = 1, size = 2,stroke = 0.6,position=position_jitter(width = 0.07,height=0)) + # geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + ylim(c(min(df[pred[i]])-0.25*(max(df[pred[i]])-min(df[pred[i]])),max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + scale_colour_discrete(labels = c("Q3"))+ scale_x_discrete(labels = c("Q3")) + stat_compare_means(method = "wilcox.test", paired = FALSE, label.x = "q1", label.y = max(df[pred[i]])+0.1*(max(df[pred[i]])-min(df[pred[i]])) )+ theme_bw(base_size = 12) + stat_summary(fun.data = function(x) { return( data.frame( y = min(df[pred[i]])-0.05*(max(df[pred[i]])-min(df[pred[i]])), label = paste('n =', format(length(x), big.mark = ",", decimal.mark = ".", scientific = FALSE), '\n', 'median =', format(round(median(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE)) ) ) },geom = "text", hjust = 0.5, vjust = 1, size =3) + theme_bw(base_size = 12) + theme(legend.position = "none") ggsave(plot = ggarrange(p3,ncol = 1,nrow = 1), filename = paste("./output/",pred[i],"_final_plot_",cohorts[i],".png",sep=""), width =2.7,height = 2.5,units = "in",dpi = 300) df <- data_clean df$group <- df$quart df$group <- df$group=="q1" df <- df %>% filter(!is.na(df[pred[i]])) set.seed(1234) match.it <- matchit(group~log_nfl_csf,data = df, method="full") a <- summary(match.it) plot(match.it) df.match <- match.data(match.it) df_q1 <- df.match %>% dplyr::group_by(subclass) %>% arrange(desc(abs(serum_csf_nfl_res))) %>% arrange(weights) %>% filter(quart=="q1") %>% filter(row_number()==1) df_q3 <- df.match %>% dplyr::group_by(subclass) %>% arrange(desc(abs(serum_csf_nfl_res))) %>% arrange(weights) %>% filter(quart=="q3") %>% filter(row_number()==1) df <- rbind(df_q1,df_q3) df <- df %>% dplyr::select(subclass,quart,log_nfl_serum,serum_csf_nfl_res,pred[i]) %>% dplyr::arrange(.,subclass) #plot serum NFL in two quartile groups p4 <- df %>% ggplot(aes_string(x=obs[i], y=pred[i], color = "quart")) + geom_boxplot(outlier.colour = "transparent") + geom_jitter(alpha = 0.6, shape = 1, size = 2,stroke = 0.6,position=position_jitter(width = 0.07,height=0)) + geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + ylim(c(min(df[pred[i]])-0.25*(max(df[pred[i]])-min(df[pred[i]])),max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + geom_line(aes(group = subclass),color="gray", alpha = 0.6,size=0.3) + scale_colour_discrete(labels = c("Q3"))+ scale_x_discrete(labels = c("Q3")) + stat_compare_means(method = "wilcox.test", paired = TRUE, label.x = "q1", label.y = max(df[pred[i]])+0.1*(max(df[pred[i]])-min(df[pred[i]])) )+ theme_bw(base_size = 12) + stat_summary(fun.data = function(x) { return( data.frame( y = min(df[pred[i]])-0.05*(max(df[pred[i]])-min(df[pred[i]])), label = paste('n =', format(length(x), big.mark = ",", decimal.mark = ".", scientific = FALSE), '\n', 'median =', format(round(median(x), 2), big.mark = ",", decimal.mark = ".", scientific = FALSE)) ) ) },geom = "text", hjust = 0.5, vjust = 1, size =3) + theme_bw(base_size = 12) + theme(legend.position = "none") ggsave(plot = ggarrange(p4,ncol = 1,nrow = 1), filename = paste("./output/",pred[i],"_final_plot_propensity_matched_",cohorts[i],".png",sep=""), width =2.7,height = 2.5,units = "in",dpi = 300) } # # # ######################################################################################################################################################## ######################################################################################################################################################## # # check the if incorporation of PNS injury variable decreases variance explained by patientcode # ######################################################################################################################################################## ######################################################################################################################################################## ##################################### # isolate training cohort vars_model <- c("log_nfl_csf","log_nfl_serum","age","bmi_weight", "serum_creatinine","serum_ap","serum_bun","medulla_ll_atrophy","panel_11_17") confound <- c("age","bmi_weight", "serum_creatinine","serum_ap","serum_bun","medulla_ll_atrophy","panel_11_17") data_sub_train <- data_sub %>% filter(cohort=="train")%>% filter(diagnosis %in% c("RR-MS","SP-MS","PP-MS"))%>% select(patientcode,lpdate,all_of(vars_model)) data_sub_train_clean <- data_sub_train[complete.cases(data_sub_train[confound]),] # generate lm model in the training cohort mod_train <- lm(log_nfl_serum~log_nfl_csf,data_sub_train_clean) summary(mod_train) #MLM #build the model: fit <- lm(log_nfl_serum~log_nfl_csf + age + serum_creatinine + bmi_weight + serum_ap + serum_bun, data_sub_train_clean) #stepwise regression step <- stepAIC(fit, direction="both") step$anova # display results best_fit <- lm(log_nfl_serum ~ log_nfl_csf + age + serum_creatinine + bmi_weight + serum_ap + serum_bun, data_sub_train_clean) summary(best_fit) ######################### ##### add PNS variables ######################### #MLM #build the model: fit_pns <- lm(log_nfl_serum~log_nfl_csf + age + serum_creatinine + bmi_weight + serum_ap + serum_bun + medulla_ll_atrophy + panel_11_17, data_sub_train_clean) #stepwise regression step <- stepAIC(fit_pns, direction="both") step$anova # display results best_fit_pns <- lm(log_nfl_serum ~ log_nfl_csf + age + serum_creatinine + bmi_weight + serum_ap + serum_bun + medulla_ll_atrophy + panel_11_17, data_sub_train_clean) summary(best_fit_pns) ##################################### # isolate validation cohort data_sub_val <- data_sub %>% filter(cohort=="val")%>% filter(diagnosis %in% c("RR-MS","SP-MS","PP-MS"))%>% select(patientcode,lpdate,all_of(vars_model)) data_sub_val_clean <- data_sub_val[complete.cases(data_sub_val[confound]),] ################################################################################################ #### predict and plot ################################################################################################ #### TRAINING COHORT #predict from simple LM data_sub_train_clean$lm_pred_sNFL <- predict(mod_train,data_sub_train_clean) #predict from MLM data_sub_train_clean$mlm_pred_sNFL <- predict(best_fit,data_sub_train_clean) #predict from PNS-MLM data_sub_train_clean$mlm_pns_pred_sNFL <- predict(best_fit_pns,data_sub_train_clean) # plot pred <- c("lm_pred_sNFL","mlm_pred_sNFL","mlm_pns_pred_sNFL") obs <- c("log_nfl_serum","log_nfl_serum","log_nfl_serum") pred_nice <- c("cNFL-predicted sNFL","cNFL-predicted sNFL","cNFL-predicted sNFL") obs_nice <- c("measured Log10 sNFL","measured Log10 sNFL","measured Log10 sNFL") for(i in 1:length(pred)){ # i <-2 df <- data_sub_train_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.1*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[obs[i]])+.12*(max(df[obs[i]])-min(df[obs[i]])), size=4,parse = TRUE) + annotate("text",label = paste("CCC==", as.numeric(round(epi.ccc(as.numeric(df[[obs[i]]]),as.numeric(df[[pred[i]]]), ci = "z-transform",conf.level = 0.95)$rho.c[1], digits = 2)),sep=""), x=max(df[obs[i]])-.3*(max(df[obs[i]])-min(df[obs[i]])), y=max(df[obs[i]])-.01*(max(df[obs[i]])-min(df[obs[i]])), size=4,parse = TRUE,color="blue") + # ylim(c(min(df[pred[i]]-0.1*(max(df[pred[i]])-min(df[pred[i]]))), # max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ ylim(c(min(df[obs[i]]),max(df[obs[i]])))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + geom_abline(slope = 1,intercept = 0,color="blue",size=0.5) + stat_cor(aes(label=..r.label..), size=4,color="#008F00",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#008F00",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#008F00",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#008F00") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#C00000"), axis.title.y = element_text(colour = "#008F00")) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2,p3,ncol = 3,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/predicted_sNFL_wPNS_training.png", width =9,height = 2.2,units = "in",dpi = 300) ############################################################### #### VALIDATION COHORT ############################################################### #predict from simple LM data_sub_val_clean$lm_pred_sNFL <- predict(mod_train,data_sub_val_clean) #predict from MLM data_sub_val_clean$mlm_pred_sNFL <- predict(best_fit,data_sub_val_clean) #predict from PNS-MLM data_sub_val_clean$mlm_pns_pred_sNFL <- predict(best_fit_pns,data_sub_val_clean) # plot pred <- c("lm_pred_sNFL","mlm_pred_sNFL","mlm_pns_pred_sNFL") obs <- c("log_nfl_serum","log_nfl_serum","log_nfl_serum") pred_nice <- c("cNFL-predicted sNFL","cNFL-predicted sNFL","cNFL-predicted sNFL") obs_nice <- c("measured Log10 sNFL","measured Log10 sNFL","measured Log10 sNFL") for(i in 1:length(pred)){ # i <-2 df <- data_sub_val_clean p <- df %>% ggplot(aes_string(x=obs[i], y=pred[i])) + geom_point(color="#001321",alpha = 0.4, shape = 1, size = 2,stroke = 0.6) + annotate("text",label = paste0('atop(n[s] ==', nrow(df[obs[i]]), ', n[p] ==', length(unique(df$patientcode)), ')'), x=max(df[obs[i]])-.1*(max(df[obs[i]])-min(df[obs[i]])), y=min(df[obs[i]])+.12*(max(df[obs[i]])-min(df[obs[i]])), size=4,parse = TRUE) + annotate("text",label = paste("CCC==", as.numeric(round(epi.ccc(as.numeric(df[[obs[i]]]),as.numeric(df[[pred[i]]]), ci = "z-transform",conf.level = 0.95)$rho.c[1], digits = 2)),sep=""), x=max(df[obs[i]])-.3*(max(df[obs[i]])-min(df[obs[i]])), y=max(df[obs[i]])-.01*(max(df[obs[i]])-min(df[obs[i]])), size=4,parse = TRUE,color="blue") + # ylim(c(min(df[pred[i]]-0.1*(max(df[pred[i]])-min(df[pred[i]]))), # max(df[pred[i]])+0.2*(max(df[pred[i]])-min(df[pred[i]]))))+ ylim(c(min(df[obs[i]]),max(df[obs[i]])))+ xlab(obs_nice[i]) + ylab(pred_nice[i]) + geom_abline(slope = 1,intercept = 0,color="blue",size=0.5) + stat_cor(aes(label=..r.label..), size=4,color="#008F00",label.y.npc = 1)+ stat_cor(aes(label=..rr.label..), size=4,color="#008F00",label.y.npc = .90)+ stat_cor(aes(label=..p.label..), size=4,color="#008F00",label.y.npc = .77)+ geom_smooth(method="lm",se=TRUE, size=1,color="#008F00") + theme_bw(base_size = 12) + theme(axis.title.x = element_text(colour = "#C00000"), axis.title.y = element_text(colour = "#008F00")) assign(paste("p",i,sep = ""),p) } ggsave(plot = ggarrange(p1,p2,p3,ncol = 3,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/predicted_sNFL_wPNS_validation.png", width =9,height = 2.2,units = "in",dpi = 300) summary(mod_train) summary(best_fit) summary(best_fit_pns) #plot lolipop plots for t-values variables # define dataset: data <- tibble::enframe(confound,name = NULL,value = "term") data$term_nice <- c("Age","Weight","Creatinine","AP","BUN","MRI SC Atrophy","Muscle Atrophy+BBSA") # get Tstat from the MLM model tidy_fit <- broom::tidy(best_fit) tidy_fit <- tidy_fit[-c(1:2),] tidy_fit$statistic_mlm <- tidy_fit$statistic tidy_fit <- tidy_fit %>% select(term,statistic_mlm) #merge with data data <- merge(data,tidy_fit,by="term",all.x = TRUE) # get Tstat from the MLM_PNS model tidy_fit <- broom::tidy(best_fit_pns) tidy_fit <- tidy_fit[-c(1:2),] tidy_fit$statistic_mlm_pns <- tidy_fit$statistic tidy_fit <- tidy_fit %>% select(term,statistic_mlm_pns) #merge with data data <- merge(data,tidy_fit,by="term",all.x = TRUE) data$statistic <- NA # lolipop plotplot(# Horizontal version z <- ggplot(data, aes(y=statistic_mlm, x=term_nice)) + geom_segment( aes(y=0, yend=statistic_mlm,x=term_nice, xend=term_nice), color="skyblue") + geom_point( color="blue", size=4, alpha=0.6) + theme_light() + scale_x_discrete(limits = c("Weight","Muscle Atrophy+BBSA","MRI SC Atrophy","Creatinine","BUN","AP","Age")) + geom_hline(yintercept = 0,color = "black", linetype="dashed",size=.25) + coord_flip() + ylab("t-statistics") + xlab("")+ ylim(c(-7.5,9.5))+ theme( panel.grid.major.y = element_blank(), panel.border = element_blank(), axis.ticks.y = element_blank() ) ggsave(plot = ggarrange(z,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/vars_lolipop_MLM.png", width =3.5,height = 2.2,units = "in",dpi = 300) #lolipop for PNS version q <- ggplot(data, aes(y=statistic_mlm_pns, x=term_nice)) + geom_segment( aes(y=0, yend=statistic_mlm_pns,x=term_nice, xend=term_nice), color="pink") + geom_point( color="red", size=4, alpha=0.6) + theme_light() + scale_x_discrete(limits = c("Weight","Muscle Atrophy+BBSA","MRI SC Atrophy","Creatinine","BUN","AP","Age")) + geom_hline(yintercept = 0,color = "black", linetype="dashed",size=.25) + coord_flip() + ylab("t-statistics") + xlab("")+ ylim(c(-7.5,9.5))+ theme( panel.grid.major.y = element_blank(), panel.border = element_blank(), axis.ticks.y = element_blank() ) ggsave(plot = ggarrange(q,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/vars_lolipop_MLM_PNS.png", width =3.5,height = 2.2,units = "in",dpi = 300) # lolipop for LM version #lolipop for PNS version t <- ggplot(data, aes(y=statistic_mlm_pns, x=term_nice)) + geom_segment( aes(y=0, yend=statistic_mlm_pns,x=term_nice, xend=term_nice), color="transparent") + geom_point( color="transparent", size=4, alpha=0.6) + theme_light() + scale_x_discrete(limits = c("Weight","Muscle Atrophy+BBSA","MRI SC Atrophy","Creatinine","BUN","AP","Age")) + geom_hline(yintercept = 0,color = "black", linetype="dashed",size=.25) + coord_flip() + ylab("t-statistics") + xlab("")+ ylim(c(-7.5,9.5))+ theme( panel.grid.major.y = element_blank(), panel.border = element_blank(), axis.ticks.y = element_blank() ) ggsave(plot = ggarrange(t,ncol = 1,nrow = 1, common.legend=TRUE, legend="bottom"), filename = "./output/vars_lolipop_LM.png", width =3.5,height = 2.2,units = "in",dpi = 300) summary(mod_train)