library(tidyverse)
library(lubridate)
library(survival)
library(survminer)
library(broom)
library(car)

FOLLOW_UP_END=dmy('01.08.2023')

a <- read_tsv("230921.tsv") %>% 
  mutate(across(starts_with("therapy"), ~ (!is.na(.x)&.x=="x"))) %>%
  mutate(across(starts_with("gene_"), ~ (!is.na(.x)&.x=="x"))) %>%
  mutate(across(starts_with("genes_"), ~ (!is.na(.x)&.x!=""))) %>%
  mutate(across(starts_with("ihc"), ~ (case_when(.x == "na" ~ NA, .x =="+" ~ TRUE, T ~ FALSE)))) %>%
  mutate(across(starts_with("date"), ~ dmy(.x))) %>%
  mutate(across(starts_with("imm"), ~ as.numeric(.x))) %>%
  mutate(
    marker_mgmt_methylated = case_when( marker_mgmt_methylated=="meth" ~ TRUE, marker_mgmt_methylated=="unmeth" ~ FALSE, T ~ NA),
    marker_msi= case_when(marker_msi=="MSI" ~ "high", T~marker_msi),
    marker_karnofsky=as.numeric(marker_karnofsky),
    marker_tmb=as.numeric(marker_tmb),
    current_state=as.factor(current_state),
    sex=as.factor(sex),
    date_follow_up_end=FOLLOW_UP_END
  ) %>% 
  mutate(
    survival_to_followup_days=interval(date_first_diagnosis,coalesce(date_death,date_follow_up_end)) / ddays(1),
    survival_to_followup_months=interval(date_first_diagnosis,coalesce(date_death,date_follow_up_end)) / dmonths(1),
    survival_from_vaccination_to_followup_days=interval(date_first_vaccination,coalesce(date_death,date_follow_up_end)) / ddays(1),
    survival_from_vaccination_to_followup_months=interval(date_first_vaccination,coalesce(date_death,date_follow_up_end)) / dmonths(1),
    has_died=!is.na(date_death),
    age_at_first_diagnosis=interval(date_birth,date_first_diagnosis) / ddays(1),
    time_to_vaccination=interval(date_first_diagnosis,date_first_vaccination) / ddays(1),
    checkpoint_during_vaccination=!is.na(date_checkpoint_end) & date_checkpoint_end-date_first_vaccination > -30,
    checkpoint_before_start=date_first_vaccination - date_checkpoint_start > 0,
    age_at_death = interval(date_birth, date_death) / ddays(1),
    tmb_high = marker_tmb>=10,
    hrd_high = marker_hrd>=30,
    has_msi = (marker_msi=="high" | marker_msi=="low"),
    imm_positive = imm_v7_positive_pools>0,
    imm_positive_nonlow = imm_v7_positive_pools_normalized>=0.1
  )


## Some basic stats

# general OS in our cohort from 1st diagnosis
survfit(Surv(time=survival_to_followup_months,event=has_died) ~ 1, data=a)

# on-treatment OS from 1st vaccination
survfit(Surv(time=survival_from_vaccination_to_followup_months,event=has_died) ~ 1, data=a)

# OS stratified by bevacizumab
survfit(Surv(time=survival_to_followup_months,event=has_died) ~ therapy_bevacizumab, data=a)

## Univariate stats
covariates <- c(
  "age_at_first_diagnosis", "sex", "marker_karnofsky",  
  "tmb_high", "marker_tmb", "hrd_high", "marker_hrd", "has_msi", 
  "ihc_pdl1",
  "imm_positive", "imm_positive_nonlow", 
  "therapy_checkpoint", "therapy_optune", "marker_mgmt_methylated", "therapy_temozolomid", "therapy_radio",
  "therapy_immunosuppressant", "therapy_bevacizumab","therapy_osimertinib","therapy_lomustin",
  "genes_relevant_germline_mutations",  "genes_pharmacogenetics",
  "gene_pten_inactivated",  "gene_pik3_akt_mtor_activated",  "gene_egfr_activated",  "gene_cdkn2a_cdkn2b_loss",
  "gene_cdk4_cd6",  "gene_rb1",  "gene_cellcycle",  "gene_nfe1",  "gene_tert",  "gene_tp53",  "gene_mdms",  "gene_rtk",
  "time_to_vaccination"
)
univ_formulas.vacc <- sapply(covariates, function(x) as.formula(paste('Surv(time=survival_from_vaccination_to_followup_months,event=has_died)~', x)))
univ_models.vacc <- lapply( univ_formulas.vacc, function(x){coxph(x, data = a)})

# Extract data 
univ_results.vacc <- lapply(univ_models.vacc,
  function(x){ 
    x <- summary(x)
    p.value<-signif(x$sctest["pvalue"], digits=2)
    logrank<-signif(x$sctest["test"], digits=2)
    HR <-signif(x$coef[2], digits=2);
    HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
    HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
    HR <- paste0(HR, " (", HR.confint.lower, "-", HR.confint.upper, ")")
    res<-c(HR, logrank, p.value)
    names(res)<-c("1st vacc: HR (95% CI for HR)", "1st vacc: log-rank test", "1st vacc: p.value")
    return(res)
  }
)
uu <- t(as.data.frame(univ_results.vacc, check.names = FALSE))
uu <- tibble(covariate=names(univ_results.vacc)) %>% bind_cols(as_tibble(uu)) 

univ_formulas.diag <- sapply(covariates, function(x) as.formula(paste('Surv(time=survival_to_followup_months,event=has_died)~', x)))
univ_models.diag <- lapply( univ_formulas.diag, function(x){coxph(x, data = a)})

# Extract data 
univ_results.diag <- lapply(univ_models.diag,
  function(x){ 
    x <- summary(x)
    p.value<-signif(x$sctest["pvalue"], digits=2)
    logrank<-signif(x$sctest["test"], digits=2)
    HR <-signif(x$coef[2], digits=2);
    HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
    HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
    HR <- paste0(HR, " (", HR.confint.lower, "-", HR.confint.upper, ")")
    res<-c(HR, logrank, p.value)
    names(res)<-c("1st diag: HR (95% CI for HR)", "1st diag: log-rank test", "1st diag: p.value")
    return(res)
  }
)
uu <- uu %>% bind_cols(as_tibble(t(as.data.frame(univ_results.diag, check.names = FALSE)))) 
univ_results<-uu 

rm(uu, covariates)
rm(univ_results.diag, univ_models.diag, univ_formulas.diag)
rm(univ_results.vacc, univ_models.vacc, univ_formulas.vacc)
write_tsv(univ_results, "univariate_tests.tsv", col_names=T)
univ_results


# Load propensity matching dataset

matched_cohort<-read_csv("propensity_score_matching/matched_datasets_TTV.csv") %>%
  mutate(patient_group = c("Public", "Praxis")[group+1]) %>%
  filter(patient_group=="Public") %>%
  select(id,survival_months,vital_status,patient_group) %>%
  mutate(imm_positive_nonlow=FALSE, id=as.numeric(id))

matched_cohorts <- bind_rows(
  matched_cohort,
  a %>% mutate(vital_status=ifelse(has_died,"Dead","Alive"), patient_group="Praxis") %>%
    select(id,survival_months=survival_to_followup_months,vital_status,patient_group,imm_positive_nonlow)
)


## Figure 1A: Compare survival from first diagnosis with/without progress
fig1a.fit <- survfit(Surv(time=survival_to_followup_months,event=has_died) ~ current_state, data=a)
fig1a.pval <-surv_pvalue(fig1a.fit)$pval.txt

fig1a <- ggsurvplot(fig1a.fit, data=a, conf.int=F, pval=fig1a.pval, censor.shape="|", risk.table=T, surv.median.line="hv", 
                   xlab="Overall survival from diagnosis [months]", 
                   risk.table.y.text=F, tables.height = .20, break.time.by=3, 
                   palette=c("#FF0000","#000000"),
                   legend.title=element_blank(), legend="bottom", 
                   legend.labs=c("recurrent","primary"), 
                   tables.y.text="",
                   ggtheme = theme_classic2(base_size=12),
                   tables.theme = theme_cleantable(),
                   font.family = "Helvetica",
                   pval.size=3.5,  #used by ggsurvplot_core
                   fontsize=3.5 #used by ggrisktable
)+ggtitle("A. OS from initial GBM diagnosis")

fig1a$table$theme$text$size=12
fig1a$table$theme$plot.title$size=12
fig1a$plot$theme$legend.text$size=12
fig1a

ggsave("final_figure1A.pdf", plot=print(fig1a), width = 88, height = 58, unit="mm", dpi=300, scale=2)


## Figure 1B: Compare survival from first vaccination with/without progress
fig1b.fit <- survfit(Surv(time=survival_from_vaccination_to_followup_months,event=has_died) ~ current_state, data=a)
fig1b.pval <-surv_pvalue(fig1b.fit)$pval.txt

fig1b <- ggsurvplot(fig1b.fit, data=a, conf.int=F, pval=fig1b.pval, censor.shape="|", risk.table=T, surv.median.line="hv", 
                   xlab="Overall survival from 1st vaccination [months]", 
                   risk.table.y.text=F, tables.height = .20, break.time.by=3, 
                   palette=c("#FF0000","#000000"),
                   legend.title=element_blank(), legend="bottom", 
                   legend.labs=c("recurrent","primary"), 
                   tables.y.text="",
                   ggtheme = theme_classic2(base_size=12),
                   tables.theme = theme_cleantable(),
                   font.family = "Helvetica",
                   pval.size=3.5,  #used by ggsurvplot_core
                   fontsize=3.5 #used by ggrisktable
)+ggtitle("B. On-treatment survival (from initiation of vaccine therapy)")

fig1b$table$theme$text$size=12
fig1b$table$theme$plot.title$size=12
fig1b$plot$theme$legend.text$size=12
fig1b

ggsave("final_figure1B.pdf", plot=print(fig1b), width = 88, height = 58, unit="mm", dpi=300, scale=2)


## Figure 2: Complete cohort, restricted to >18y & has mgmt, compared to the matched controls
matched_cohorts_limited <- matched_cohorts %>% 
  filter(is.na(id) | id %in% read_csv("propensity_score_matching/matched_datasets_TTV.csv")$id )

fig2.fit <- survfit(Surv(survival_months, vital_status == "Dead") ~ patient_group, data = matched_cohorts_limited)
fig2.pval <- surv_pvalue(fig2.fit)$pval.txt

fig2 <- ggsurvplot(fig2.fit, data=matched_cohorts_limited, conf.int=T, pval=fig2.pval, censor.shape="|", risk.table=T, surv.median.line="hv", 
                    xlab="Overall survival from diagnosis* [months]", 
                    risk.table.y.text=F, tables.height = .20, break.time.by=6, 
                    palette=c("#FF0000","#000000"),
                    legend.title=element_blank(), legend="bottom", 
                    legend.labs=c("our data","publication data, propensity score matched"), 
                    tables.y.text="",
                    ggtheme = theme_classic2(base_size=12),
                    tables.theme = theme_cleantable(),
                    font.family = "Helvetica",
                    pval.size=3.5,  #used by ggsurvplot_core
                    fontsize=3.5 #used by ggrisktable
)
fig2+ggtitle("Figure 2: Overall survival for matched cohorts")
fig2$table$theme$text$size=12
fig2$table$theme$plot.title$size=12
fig2$plot$theme$legend.text$size=12

ggsave("final_figure2.pdf", plot=print(fig2), width = 88, height = 58, unit="mm", dpi=300, scale=2)


## Figure 3A: Compare survival from first vaccination with/without+low positive immunemonitoring
fig3a.fit <- survfit(Surv(time=survival_from_vaccination_to_followup_months,event=has_died) ~ imm_positive_nonlow, data=a)
fig3a.pval <-surv_pvalue(fig3a.fit)$pval.txt$

fig3a <- ggsurvplot(fig3a.fit, data=a, conf.int=F, pval=fig3a.pval, censor.shape="|", risk.table=T, surv.median.line="hv", 
                    xlab="Overall survival from 1st vaccination [months]", 
                    risk.table.y.text=F, tables.height = .20, break.time.by=3, 
                    palette=c("#FF0000","#000000"),
                    legend.title=element_blank(), legend="bottom", 
                    legend.labs=c("our data, immunological Non-Responder","our data, immunological Responder"), 
                    tables.y.text="",
                    ggtheme = theme_classic2(base_size=12),
                    tables.theme = theme_cleantable(),
                    font.family = "Helvetica",
                    pval.size=3.5,  #used by ggsurvplot_core
                    fontsize=3.5 #used by ggrisktable
)+ggtitle("A.")

fig3a$table$theme$text$size=12
fig3a$table$theme$plot.title$size=12
fig3a$plot$theme$legend.text$size=12

fig3a
ggsave("final_figure3a.pdf", plot=print(fig3a), width = 88, height = 58, unit="mm", dpi=300, scale=2)


## Figure 3B: Complete cohort - split by immune status, compared to the matched controls
fig3b.fit <- survfit(Surv(survival_months, vital_status == "Dead") ~ patient_group+imm_positive_nonlow, data = matched_cohorts)

fig3b <- ggsurvplot(fig3b.fit, data=matched_cohorts, conf.int=F, pval="", censor.shape="|", risk.table=T, surv.median.line="hv", 
                    xlab="Overall survival from diagnosis* [months]", 
                    risk.table.y.text=F, tables.height = .20, break.time.by=6, 
                    legend.title=element_blank(), legend="bottom", 
                    legend.labs=c("our data, imm. Non-Responder","our data, imm. Responder","publication data"), 
                    tables.y.text="",
                    ggtheme = theme_classic2(base_size=12),
                    tables.theme = theme_cleantable(),
                    font.family = "Helvetica",
                    pval.size=3.5,  #used by ggsurvplot_core
                    fontsize=3.5 #used by ggrisktable
)+ggtitle("B. ")
fig3b$table$theme$text$size=12
fig3b$table$theme$plot.title$size=12
fig3b$plot$theme$legend.text$size=12

fig3b

ggsave("final_figure3b.pdf", plot=print(fig3b), width = 88, height = 58, unit="mm", dpi=300, scale=2)


## Figure 3C: IMM positive/nonlow, compared to matched controls
matched_cohorts_positive <- matched_cohorts %>% filter(imm_positive_nonlow==TRUE | patient_group=="Public")
fig3c.fit <- survfit(Surv(survival_months, vital_status == "Dead") ~ patient_group, data = matched_cohorts_positive)
fig3c.pval <- surv_pvalue(fig3c.fit)$pval.txt

fig3c <- ggsurvplot(fig3c.fit, data=matched_cohorts_positive, conf.int=T, pval=fig3c.pval, censor.shape="|", risk.table=T, surv.median.line="hv", 
                    xlab="Overall survival from diagnosis* [months]", 
                    risk.table.y.text=F, tables.height = .20, break.time.by=6, 
                    palette=c("#FF0000","#000000"),
                    legend.title=element_blank(), legend="bottom", 
                    legend.labs=c("our data, immunological Responder","publication data"), 
                    tables.y.text="",
                    ggtheme = theme_classic2(base_size=12),
                    tables.theme = theme_cleantable(),
                    font.family = "Helvetica",
                    pval.size=3.5,  #used by ggsurvplot_core
                    fontsize=3.5 #used by ggrisktable
)+ggtitle("C. ")
fig3c$table$theme$text$size=12
fig3c$table$theme$plot.title$size=12
fig3c$plot$theme$legend.text$size=12

fig3c+ggtitle("Figure 3c: Overall survival for matched cohorts, only positive")

ggsave("final_figure3c.pdf", plot=print(fig3c), width = 88, height = 58, unit="mm", dpi=300, scale=2)