############################################################################################################ # External validation and recalibration of the ERASL risk scores # # # # BR Beumer; K Takagi ; B Vervoort; S Buettner; Y Umeda; T Yagi; T Fujiwara;EW Steyerberg; JNM IJzermans* # # # # # Affiliations: # # * Department of Surgery, Division of HPB & Transplant Surgery, Erasmus MC, University Medical Centre # # Rotterdam, Rotterdam, The Netherlands Erasmus MC Rotterdam # # # # Corresponding author: Prof. Dr. J.N.M. IJzermans, # # Email: j.ijzermans@erasmusmc.nl # ############################################################################################################ library(readxl) library(tableone) library(dplyr) library(tidyr) library(mice) library(VIM) library(survival) # KM plots, baseline haz library(rms) # cph, validate, rcorr.cens library(boot) # generic nonparametric bootstrap library(survAUC) # GHCI,tdAUC # survcomp install #if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") #BiocManager::install("survcomp") library(survcomp) # Rd library(SurvRegCensCov) # R.Version()$version.string setwd("C:\\Users\\beren\\Dropbox\\Current projects\\9. External Validation johnson\\src") #path = "E:\\7. ERASL validation\\data\\external\\HCC_EMC_JP(17-02-2020).xlsx" path = "C:\\Users\\062010\\Desktop\\ERASL_val\\HCC_EMC_JP(18-02-2021).xlsx" df <- read_excel(path, sheet="Source data") #---------- Renaming and date format ---------------------------------------- # renaming columns old = c("Patient ID", "Birth date", "Date of surgery","Child-pugh score", "Child-pugh class", "Total bilirubin", "Tumor size (cm)", "Tumor number", "Microvascular invasion") new = c("Patient_ID", "Birth_date", "Date_of_surgery","Child_pugh_score", "Child_pug_class", "Total_bilirubin", "Tumor_size", "Tumor_number", "Microvascular_invasion") for(i in 1:length(old)){ names(df)[names(df) == old[i]] = new[i] } #df <- rename(df, D_r = T_r) #df <- rename(df, D_d = T_d) # date transformation df$Birth_date <- as.Date(df$Birth_date) df$Date_of_surgery <- as.Date(df$Date_of_surgery) df$D_d <- as.Date(df$D_d) df$D_r <- as.Date(df$D_r) # mutations df$AFP <- recode(df$AFP, '0' = 0.0001) df$Age <- as.numeric((df$Date_of_surgery - df$Birth_date)/365.25) df$T_r <- df$D_r - df$Date_of_surgery df$T_d <- df$D_d - df$Date_of_surgery df <- mutate(df, ALBI_score = -0.085 *Albumin + 0.66*log10(Total_bilirubin)) df$ALBI_grade <-cut(df$ALBI_score, breaks= c(-500, -2.60,-1.39,500), labels= c(1,2,3)) # solitary tumor df <- mutate(df, Solitary = as.numeric(Tumor_number==1)) # recurrence <2yr df <- mutate(df, Early = as.numeric((T_r<365.25*2)) & (E_r==1)) #---------- Inspection and selection ----------------------------------------- #head(df) #names(df) #summary(df) #str(df) # Nr patients per center table(df$Center) # Nr patients total and unique length(df$Patient_ID) length(unique(df$Patient_ID)) # Select patients before 05/02/2018 to assure FU >= 2yr df_sel <- filter(df, df$Date_of_surgery < as.Date("2018-02-05")) table(df_sel$Center) # nr patients per center # Select relevant columns col_sel <- c("Patient_ID", "Sex", "Age","Birth_date", "Date_of_surgery", "HBV", "HCV", "Child_pugh_score", "Child_pug_class", "Albumin", "Total_bilirubin", "ALBI_score", "ALBI_grade", "AFP", "Tumor_size", "Tumor_number", "Solitary", "Microvascular_invasion", "Differentiation", "D_r", "T_r", "E_r", "Early", "D_d", "T_d", "E_d", "Center", "Extended hemihepatectomie rechts", "Extended hemihepatectomie links", "Hemihepatectomie rechts" , "Hemihepatectomie links", "Segment resectie", "Wigresectie", "Open_RFA", "Resection_margin", "Adjuvant_chemotherapy", "Intrahepatic_recurrence", "Extrahepatic_recurrence", "Reoperation", "Salvage_LT", "Ablation", "TACE", "RTx", "Systemic_chemotherapy", "Best_supportive_care" ) df_sel <- df_sel[, col_sel] #---------- Baseline Table ------------------------------------------------ #### complete case ############################################# col_pre <- c('Sex', 'Albumin', 'Total_bilirubin', 'AFP', 'Tumor_size', 'Tumor_number', 'D_r', 'E_r') df_sel <- df_sel[complete.cases(df_sel[,col_pre]), ] table(df_sel$Center) # # in R niet gechecked voor tumor number 0,99 # in R niet gechecked voor tumor size > 0 #### ############################################# df_emc_baseline = filter(df_sel, df_sel$Center=='ErasmusMC') df_jp_baseline = filter(df_sel, df_sel$Center=='Okayama') # Number cases print(paste('Totaal aantal cases :', dim(df_sel)[1])) table(df_sel$Center) # Gender print('Gender - ErasmusMC') t <- table(recode(df_emc_baseline$Sex, '0'='Male', '1'='Female')) t print(round(t[2]/(t[1] + t[2]),2)) print('Gender - Okayama') t <- table(recode(df_jp_baseline$Sex, '0'='Male', '1'='Female')) t print(round(t[2]/(t[1] + t[2]),2)) # Age print('Age - ErasmusMC') round(as.numeric(mean(df_emc_baseline$Age, na.rm=TRUE))) round(sd(df_emc_baseline$Age, na.rm=TRUE)) print('Age - Okayama') round(mean(df_jp_baseline$Age, na.rm=TRUE)) round(sd(df_jp_baseline$Age, na.rm=TRUE)) # Hep B print('Hep B - ErasmusMC') t <- table(df_emc_baseline$HBV) t round(as.numeric(t[2]/(t[1] + t[2])),2) print('Hep B - Okayama') t <- table(df_jp_baseline$HBV) t round(as.numeric(t[2]/(t[1] + t[2])),2) # Hep C print('Hep C - ErasmusMC') t <- table(df_emc_baseline$HCV) t round(as.numeric(t[2]/(t[1] + t[2])),2) print('Hep C - Okayama') t <- table(df_jp_baseline$HCV) t round(as.numeric(t[2]/(t[1] + t[2])),2) # Child pugh print('Child pugh - ErasmusMC') table(df_emc_baseline$Child_pug_class) print('Child pugh - Okayama') table(df_jp_baseline$Child_pug_class) # Albi grade print('ALBI grade - ErasmusMC') table(df_emc_baseline$ALBI_grade) print('ALBI grade - Okayama') table(df_jp_baseline$ALBI_grade) #Albumin print('Albumin - Erasmus MC') round(mean(df_emc_baseline$Albumin, na.rm=TRUE)) round(sd(df_emc_baseline$Albumin, na.rm=TRUE), 1) print('Albumin - Okayama') round(mean(df_jp_baseline$Albumin, na.rm=TRUE)) round(sd(df_jp_baseline$Albumin, na.rm=TRUE), 1) #Bilirubin print('Bilirubin - Erasmus MC') round(median(df_emc_baseline$Total_bilirubin, na.rm=TRUE)) round(quantile(df_emc_baseline$Total_bilirubin, na.rm=TRUE))[c(2,4)] print('Bilirubin - Okayama') round(median(df_jp_baseline$Total_bilirubin, na.rm=TRUE)) round(quantile(df_jp_baseline$Total_bilirubin, na.rm=TRUE))[c(2,4)] #AFP print('AFP - Erasmus MC') round(median(df_emc_baseline$AFP, na.rm=TRUE)) round(quantile(df_emc_baseline$AFP, na.rm=TRUE))[c(2,4)] print('AFP - Okayama') round(median(df_jp_baseline$AFP, na.rm=TRUE)) round(quantile(df_jp_baseline$AFP, na.rm=TRUE))[c(2,4)] #Tumor size print('Tumor size - Erasmus MC') round(median(df_emc_baseline$Tumor_size, na.rm=TRUE) * 10) round(quantile(df_emc_baseline$Tumor_size, na.rm=TRUE) * 10)[c(2,4)] print('Tumor size - Okayama') round(median(df_jp_baseline$Tumor_size, na.rm=TRUE) * 10) round(quantile(df_jp_baseline$Tumor_size, na.rm=TRUE) * 10)[c(2,4)] #Solitary tumor print('Solitary tumor - Erasmus MC') t <- table(df_emc_baseline$Solitary) t round(as.numeric(t[2]/(t[1] + t[2])),2) print('Solitary tumor - Okayama') t <- table(df_jp_baseline$Solitary) t round(as.numeric(t[2]/(t[1] + t[2])),2) #Microvascular invasion print('MVI - Erasmus MC') t <- table(df_emc_baseline$Microvascular_invasion) t round(as.numeric(t[2]/(t[1] + t[2])),2) print('MVI - Okayama') t <- table(df_jp_baseline$Microvascular_invasion) t round(as.numeric(t[2]/(t[1] + t[2])),2) #Recurrence <2 yr print('rec <2yr - Erasmus MC') ### afwijkend t <- table(df_emc_baseline$Early) t round(as.numeric(t[2]/(t[1] + t[2])),2) print('rec <2yr - Okayama') t <- table(df_jp_baseline$Early) t round(as.numeric(t[2]/(t[1] + t[2])),2) #Median recurrence free survival print('Median RFS - Erasmus MC') surv.obj.emc <- Surv(time = (df_emc_baseline$T_r/365.25*12), event = df_emc_baseline$E_r) fit <- survfit(surv.obj.emc ~ 1, data=df_emc_baseline) fit print('Median RFS - Japan') surv.obj.jp <- Surv(time = (df_jp_baseline$T_r/365.25*12), event = df_jp_baseline$E_r) fit <- survfit(surv.obj.jp ~ 1, data=df_jp_baseline) fit # Median followup print('Median recurrence free survival in months - Erasmus MC (pre)') surv.obj.emc <- Surv(time = (df_emc_baseline$T_r/365*12), event = !df_emc_baseline$E_r) fit <- survfit(surv.obj.emc ~ 1, data=df_emc_baseline) summary(fit) print('Median recurrence free survival in months - Erasmus MC (post)') surv.obj.emc.post <- Surv(time = (df_emc_baseline$T_r/365*12), event = !df_emc_baseline$E_r) fit <- survfit(surv.obj.emc ~ 1, data=df_emc_baseline) summary(fit) print('Median recurrence free survival in months - Okayama') surv.obj.jp <- Surv(time = (df_jp_baseline$T_r/365*12), event = !df_jp_baseline$E_r) fit <- survfit(surv.obj.jp ~ 1, data=df_jp_baseline) summary(fit) #"Extended hemihepatectomie rechts", "Extended hemihepatectomie links", "Hemihepatectomie rechts" , "Hemihepatectomie links", #"Segment resectie", "Wigresectie", "Open_RFA", "Resection_margin", "Adjuvant_chemotherapy", #"Intrahepatic_recurrence", "Extrahepatic_recurrence", #"Reoperation", "Salvage_LT", "Ablation", "TACE", "RTx", "Systemic_chemotherapy", "Best_supportive_care", ## Type of resection print('Extended hemi right (EMC-JP)') t <- table(df_emc_baseline["Extended hemihepatectomie rechts"]) t #round(as.numeric(t[2]/(t[1] + t[2])),2) t <- table(df_jp_baseline["Extended hemihepatectomie rechts"]) t #round(as.numeric(t[2]/(t[1] + t[2])),2) print('Extended hemi left (EMC-JP)') t <- table(df_emc_baseline["Extended hemihepatectomie links"]) t #round(as.numeric(t[2]/(t[1] + t[2])),2) t <- table(df_jp_baseline["Extended hemihepatectomie links"]) t #round(as.numeric(t[2]/(t[1] + t[2])),2) print('Hemi right (EMC-JP)') t <- table(df_emc_baseline["Hemihepatectomie rechts"]) t #round(as.numeric(t[2]/(t[1] + t[2])),2) t <- table(df_jp_baseline["Hemihepatectomie rechts"]) t #round(as.numeric(t[2]/(t[1] + t[2])),2) print('Hemi left (EMC-JP)') t <- table(df_emc_baseline["Hemihepatectomie links"]) t #round(as.numeric(t[2]/(t[1] + t[2])),2) t <- table(df_jp_baseline["Hemihepatectomie links"]) t #round(as.numeric(t[2]/(t[1] + t[2])),2) print('segment resectie (EMC-JP)') t <- table(df_emc_baseline["Segment resectie"]) t #round(as.numeric(t[2]/(t[1] + t[2])),2) t <- table(df_jp_baseline["Segment resectie"]) t #round(as.numeric(t[2]/(t[1] + t[2])),2) print('Wigresectie(EMC-JP)') t <- table(df_emc_baseline["Wigresectie"]) t #round(as.numeric(t[2]/(t[1] + t[2])),2) t <- table(df_jp_baseline["Wigresectie"]) t #round(as.numeric(t[2]/(t[1] + t[2])),2) print('Open_RFA (EMC-JP)') t <- table(df_emc_baseline["Open_RFA"]) t #round(as.numeric(t[2]/(t[1] + t[2])),2) t <- table(df_jp_baseline["Open_RFA"]) t #round(as.numeric(t[2]/(t[1] + t[2])),2) ## Margin print('Margin status (EMC-JP') t <- table(df_emc_baseline["Resection_margin"]) t round(as.numeric(t[2]/(t[1] + t[2])),2) t <- table(df_jp_baseline["Resection_margin"]) t round(as.numeric(t[2]/(t[1] + t[2])),2) ## Type of recurrence print('Intrahepatic recurrence (EMC-JP') t <- table(df_emc_baseline["Intrahepatic_recurrence"]) t round(as.numeric(t[2]/(t[1] + t[2])),2) t <- table(df_jp_baseline["Intrahepatic_recurrence"]) t round(as.numeric(t[2]/(t[1] + t[2])),2) print('Extrahepatic recurrence (EMC-JP') t <- table(df_emc_baseline["Extrahepatic_recurrence"]) t round(as.numeric(t[2]/(t[1] + t[2])),2) t <- table(df_jp_baseline["Extrahepatic_recurrence"]) t round(as.numeric(t[2]/(t[1] + t[2])),2) sum(df_emc_baseline$E_r) View(df_emc_baseline %>% filter(E_r == 1) %>% select(c(Patient_ID, Extrahepatic_recurrence, Intrahepatic_recurrence))) ## treatment of recurrence print('reoperation (EMC-JP') t <- table(df_emc_baseline["Reoperation"]) t round(as.numeric(t[2]/(t[1] + t[2])),2) t <- table(df_jp_baseline["Reoperation"]) t round(as.numeric(t[2]/(t[1] + t[2])),2) print('Salvage_LT (EMC-JP') t <- table(df_emc_baseline["Salvage_LT"]) t round(as.numeric(t[2]/(t[1] + t[2])),2) t <- table(df_jp_baseline["Salvage_LT"]) t round(as.numeric(t[2]/(t[1] + t[2])),2) print('Ablation (EMC-JP') t <- table(df_emc_baseline["Ablation"]) t round(as.numeric(t[2]/(t[1] + t[2])),2) t <- table(df_jp_baseline["Ablation"]) t round(as.numeric(t[2]/(t[1] + t[2])),2) print('TACE (EMC-JP') t <- table(df_emc_baseline["TACE"]) t round(as.numeric(t[2]/(t[1] + t[2])),2) t <- table(df_jp_baseline["TACE"]) t round(as.numeric(t[2]/(t[1] + t[2])),2) print('RTx (EMC-JP') t <- table(df_emc_baseline["RTx"]) t round(as.numeric(t[2]/(t[1] + t[2])),2) t <- table(df_jp_baseline["RTx"]) t round(as.numeric(t[2]/(t[1] + t[2])),2) print('Chemo (EMC-JP') t <- table(df_emc_baseline["Systemic_chemotherapy"]) t round(as.numeric(t[2]/(t[1] + t[2])),2) t <- table(df_jp_baseline["Systemic_chemotherapy"]) t round(as.numeric(t[2]/(t[1] + t[2])),2) print('Bests supportive care (EMC-JP') t <- table(df_emc_baseline["Best_supportive_care"]) t round(as.numeric(t[2]/(t[1] + t[2])),2) t <- table(df_jp_baseline["Best_supportive_care"]) t round(as.numeric(t[2]/(t[1] + t[2])),2) ## complete case analysis ### col_pre <- c('Sex', 'Albumin', 'Total_bilirubin', 'AFP', 'Tumor_size', 'Tumor_number', 'D_r', 'E_r') df_complete <- df_sel[complete.cases(df_sel[,col_pre]), ] # nr per center table(df_complete$Center) #---------- Calculate ERASL scores --------------------------------------------- # reformat data df_complete <- mutate(df_complete, #gender = as.numeric(Sex == 0), gender = as.numeric(Sex == 'M'), albigrade_1 = as.numeric(as.numeric(ALBI_grade) >1), log_afp = log(AFP), log_diamet = log(Tumor_size)) df_complete$aantaltum_23[df_complete$Tumor_number >= 4] <- 2 df_complete$aantaltum_23[(df_complete$Tumor_number == 2) |(df_complete$Tumor_number == 3)] <- 1 df_complete$aantaltum_23[df_complete$Tumor_number <2] <- 0 df_complete <- mutate(df_complete, pre_score = 0.818*gender + 0.447*albigrade_1 + 0.100*log_afp + 0.580*log_diamet + 0.492*aantaltum_23) # ERASL pre df_complete$pre_risk <-cut(df_complete$pre_score, breaks= c(-500, 2.558, 3.521,500), labels= c('low', 'intermediate', 'high')) df_complete$pre_risk_n <-cut(df_complete$pre_score, breaks= c(-500, 2.558, 3.521,500), labels= c(1, 2, 3)) df_complete <- mutate(df_complete, pre_low = as.numeric(pre_risk_n==1), pre_intermediate = as.numeric(pre_risk_n==2), pre_high = as.numeric(pre_risk_n==3) ) # ERASL post df_complete <- mutate(df_complete, post_score = 0.677*gender + 0.458*albigrade_1 + 0.082*log_afp + 0.451*log_diamet + 0.379*aantaltum_23 + 0.661*Microvascular_invasion) df_complete$post_risk <-cut(df_complete$post_score, breaks= c(-500, 2.332, 3.445,500), labels= c('low', 'intermediate', 'high')) df_complete$post_risk_n <-cut(df_complete$post_score, breaks= c(-500, 2.332, 3.445,500), labels= c(1, 2, 3)) df_complete <- mutate(df_complete, post_low = as.numeric(post_risk_n==1), post_intermediate = as.numeric(post_risk_n==2), post_high = as.numeric(post_risk_n==3) ) # split emc and jp describe erasl pre_risk and pre_scores (pooling?) df_emc_erasl <- filter(df_complete, Center == 'ErasmusMC') print('ERASL pre - Erasmus MC') mean(df_emc_erasl$pre_score, na.rm=TRUE) sd(df_emc_erasl$pre_score, na.rm=TRUE) table(df_emc_erasl$pre_risk) print('ERASL post - Erasmus MC') mean(df_emc_erasl$post_score, na.rm=TRUE) sd(df_emc_erasl$post_score, na.rm=TRUE) table(df_emc_erasl$post_risk) df_jp_erasl <- filter(df_complete, Center == 'Okayama') print('ERASL pre - Okayama') mean(df_jp_erasl$pre_score, na.rm=TRUE) sd(df_jp_erasl$pre_score, na.rm=TRUE) table(df_jp_erasl$pre_risk) print('ERASL post - Okayama') mean(df_jp_erasl$post_score, na.rm=TRUE) sd(df_jp_erasl$post_score, na.rm=TRUE) table(df_jp_erasl$post_risk) #---------- Survival analsis ERASL risk groups -------------------- col = c('Patient_ID', 'T_r', 'E_r', 'pre_score', 'pre_risk', 'pre_risk_n', 'post_score', 'post_risk', 'post_risk_n', 'Center') df_tmp <- df_complete[, col] df_tmp_emc_pre = subset(df_tmp, df_tmp$Center == "ErasmusMC") df_tmp_emc_post = subset(df_tmp_emc_pre, complete.cases(df_tmp_emc_pre)) df_tmp_jp_pre = subset(df_tmp, df_tmp$Center == "Okayama") df_tmp_jp_post = subset(df_tmp_jp_pre, complete.cases(df_tmp_jp_pre)) print('emc pre') fit <- survfit(Surv(time = df_tmp_emc_pre$T_r/365.25*12, event = df_tmp_emc_pre$E_r) ~ df_tmp_emc_pre$pre_risk) fit print('jp pre') fit <- survfit(Surv(time = df_tmp_jp_pre$T_r/365.25*12, event = df_tmp_jp_pre$E_r) ~ df_tmp_jp_pre$pre_risk) fit print('emc post') fit <- survfit(Surv(time = df_tmp_emc_post$T_r/365.25*12, event = df_tmp_emc_post$E_r) ~ df_tmp_emc_post$post_risk) fit print('jp post') fit <- survfit(Surv(time = df_tmp_jp_post$T_r/365.25*12, event = df_tmp_jp_post$E_r) ~ df_tmp_jp_post$post_risk) fit #---------- FU truncation after 24 months -------------------------- # set E for ( E = 1 and T > 24) --> E = 0 # set T for (T> 24) --> T = 24 df_compl_trunc = df_complete df_compl_trunc$E_r[(df_compl_trunc$E_r == TRUE) & (df_compl_trunc$T_r) > 2*365] <- 0 df_compl_trunc$T_r[df_compl_trunc$T_r > 365*2] <- 365*2 df_compl_trunc$T_r <- as.numeric(df_compl_trunc$T_r) col_pre <- c('Patient_ID', 'pre_score', 'pre_risk', 'pre_risk_n', 'pre_low', 'pre_intermediate', 'pre_high', 'T_r', 'E_r', 'gender', 'albigrade_1', 'log_afp', 'log_diamet', 'aantaltum_23', 'HCV', 'HBV') col_post <- c('Patient_ID','post_score', 'post_risk', 'post_risk_n','post_low', 'post_intermediate', 'post_high', 'T_r', 'E_r', 'gender', 'albigrade_1', 'log_afp', 'log_diamet', 'aantaltum_23', 'Microvascular_invasion', 'HCV', 'HBV') df_pre_emc <- filter(df_compl_trunc, df_compl_trunc$Center == 'ErasmusMC')[,col_pre] df_post_emc <- filter(df_compl_trunc, df_compl_trunc$Center == 'ErasmusMC')[,col_post] df_post_emc <- df_post_emc[complete.cases(df_post_emc[, col_post]), ] dim(df_post_emc) df_pre_jp <- filter(df_compl_trunc, df_compl_trunc$Center == 'Okayama')[,col_pre] df_post_jp <- filter(df_compl_trunc, df_compl_trunc$Center == 'Okayama')[,col_post] write.csv(df_pre_emc, "E:\\7. ERASL validation\\data\\intrim\\pre_ERASL_emc.csv") write.csv(df_pre_jp, "E:\\7. ERASL validation\\data\\intrim\\pre_ERASL_jp.csv") write.csv(df_post_emc, "E:\\7. ERASL validation\\data\\intrim\\post_ERASL_emc.csv") write.csv(df_post_jp, "E:\\7. ERASL validation\\data\\intrim\\post_ERASL_jp.csv") pre.surv.obj.emc <- Surv(time = df_pre_emc$T_r/365.25*12, event = df_pre_emc$E_r) post.surv.obj.emc <- Surv(time = df_post_emc$T_r/365.25*12, event = df_post_emc$E_r) pre.surv.obj.jp <- Surv(time = df_pre_jp$T_r/365.25*12, event = df_pre_jp$E_r) post.surv.obj.jp <- Surv(time = df_post_jp$T_r/365.25*12, event = df_post_jp$E_r) #---------- Misspecification ------------------------------------ # offset regressions pre.refit = coxph(pre.surv.obj.emc~gender+albigrade_1+log_afp+log_diamet+aantaltum_23+offset(pre_score), data=df_pre_emc) print('EMC pre') pre.refit pre.refit = coxph(pre.surv.obj.jp~gender+albigrade_1+log_afp+log_diamet+aantaltum_23+offset(pre_score), data=df_pre_jp) print('Japan pre') pre.refit post.refit = coxph(post.surv.obj.emc~gender+albigrade_1+log_afp+log_diamet+aantaltum_23+Microvascular_invasion+offset(post_score), data=df_post_emc) print('emc post') post.refit post.refit = coxph(post.surv.obj.jp~gender+albigrade_1+log_afp+log_diamet+aantaltum_23+Microvascular_invasion+offset(post_score), data=df_post_jp) print('jp post') post.refit # Inspection of proportional hazards assumption # emc pre pre.refit = coxph(pre.surv.obj.emc~pre_score, data=df_pre_emc) tmp <- cox.zph(pre.refit, transform="identity", global=T) # transform='km' print(tmp) plot(tmp) abline(h=0, lty=3) pre.refit = coxph(pre.surv.obj.emc~gender+albigrade_1+log_afp+log_diamet+aantaltum_23, data=df_pre_emc) tmp <- cox.zph(pre.refit, transform="identity", global=T) print(tmp) plot(tmp) # jp pre pre.refit = coxph(pre.surv.obj.jp~pre_score, data=df_pre_jp) tmp <- cox.zph(pre.refit, transform="identity", global=T) #transform='km' print(tmp) plot(tmp) abline(h=0, lty=3) pre.refit = coxph(pre.surv.obj.jp~gender+albigrade_1+log_afp+log_diamet+aantaltum_23, data=df_pre_jp) tmp <- cox.zph(pre.refit, transform="identity", global=T) print(tmp) plot(tmp) # emc post tmp <- cox.zph(post.refit, transform="identity", global=T) print(tmp) plot(tmp) abline(h=0, lty=3) post.refit = coxph(post.surv.obj.emc~post_score + post_score*T_r, data=df_post_emc) post.refit post.refit = coxph(post.surv.obj.emc~gender+albigrade_1+log_afp+log_diamet+aantaltum_23+Microvascular_invasion, data=df_post_emc) tmp <- cox.zph(post.refit, transform="identity", global=T) print(tmp) plot(tmp) # jp post post.refit = coxph(post.surv.obj.jp~gender+albigrade_1+log_afp+log_diamet+aantaltum_23+Microvascular_invasion, data=df_post_jp) tmp <- cox.zph(post.refit, transform="identity", global=T) print(tmp) plot(tmp) #---------- Discrimination --------------------------------- # Calibration slope - Harrel's C ### documentation #cph: https://www.rdocumentation.org/packages/rms/versions/5.1-3.1/topics/cph #validate: https://www.rdocumentation.org/packages/rms/versions/5.1-3.1/topics/validate.cph #### pre operative risk score ErasmusMC print("ERASMUSMC") print("calibration slope") # regression on LP cox.mod.pre <- cph(pre.surv.obj.emc ~ pre_score, data = df_pre_emc, init = 1, x=T, y=T, surv = T) cox.mod.pre print("optimism corrected harrel's C") set.seed(1) Dxy = validate(cox.mod.pre, method='boot', B=200)[29] Hc = Dxy/2 + 0.5; print(Hc) print("uncorrected harrel's C") Dxy = validate(cox.mod.pre, method='boot', B=200)[1] Hc = Dxy/2 + 0.5; print(Hc) harrel_c <- function(data, indices){ d <- data[indices,] boot.surv <- Surv(time = d$T_r, event = d$E_r) Dxy = unname(rcorr.cens(x=d$pre_score, S=boot.surv)[2]) harrel_c = -Dxy/2 + 0.5; return(harrel_c) } # ordinary non parametric bootstrap # note no optimism correction boot.obj <- boot(data=df_pre_emc, statistic=harrel_c, R=200) print(boot.obj) #### pre operative risk score Okayama print("OKAYAMA") print("calibration slope") cox.mod.pre <- cph(pre.surv.obj.jp ~ pre_score, data = df_pre_jp, init = 1, x=T, y=T, surv = T) cox.mod.pre print("optimism corrected harrel's C") set.seed(1) Dxy = validate(cox.mod.pre, method='boot', B=200)[29] Hc = Dxy/2 + 0.5; print(Hc) print("uncorrected harrel's C") Dxy = validate(cox.mod.pre, method='boot', B=200)[1] Hc = Dxy/2 + 0.5; print(Hc) harrel_c <- function(data, indices){ d <- data[indices,] boot.surv <- Surv(time = d$T_r, event = d$E_r) Dxy = unname(rcorr.cens(x=d$pre_score, S=boot.surv)[2]) harrel_c = -Dxy/2 + 0.5; return(harrel_c) } # ordinary non parametric bootstrap # note no optimism correction boot.obj <- boot(data=df_pre_jp, statistic=harrel_c, R=200) print(boot.obj) #### post operative risk score ErasmusMC print("calibration slope ErasmusMC") cox.mod.post <- cph(post.surv.obj.emc ~ post_score, data = df_post_emc, init = 1, x=T, y=T, surv = T) cox.mod.post print("optimism corrected harrel's C") set.seed(1) Dxy = validate(cox.mod.post, method='boot', B=200)[29] #[29] optimism correction [1] origional harrel_c = Dxy/2 + 0.5; print(harrel_c) harrel_c <- function(data, indices){ d <- data[indices,] boot.surv <- Surv(time = d$T_r, event = d$E_r) Dxy = unname(rcorr.cens(x=d$post_score, S=boot.surv)[2]) harrel_c = -Dxy/2 + 0.5; return(harrel_c) } # ordinary non parametric bootstrap # note no optimism correction boot.obj <- boot(data=df_post_emc, statistic=harrel_c, R=200) print(boot.obj) #### post operative risk score Okayama print("calibration slope Okayama") cox.mod.post <- cph(post.surv.obj.jp ~ post_score, data = df_post_jp, init = 1, x=T, y=T, surv = T) cox.mod.post print("optimism corrected harrel's C") set.seed(1) Dxy = validate(cox.mod.post, method='boot', B=200)[29] #[29] optimism correction [1] origional harrel_c = Dxy/2 + 0.5; print(harrel_c) harrel_c <- function(data, indices){ d <- data[indices,] boot.surv <- Surv(time = d$T_r, event = d$E_r) Dxy = unname(rcorr.cens(x=d$post_score, S=boot.surv)[2]) harrel_c = -Dxy/2 + 0.5; return(harrel_c) } # ordinary non parametric bootstrap # note no optimism correction boot.obj <- boot(data=df_post_jp, statistic=harrel_c, R=200) print(boot.obj) # Gonen and heller concordance index gh <- function(data, indices){ d <- data[indices] gh_stat <- GHCI(d) return(gh_stat) } print('ErasmusMC pre') boot.gh.pre <- boot(data=df_pre_emc$pre_score, statistic=gh, R=200) print(boot.gh.pre) print('Okayama Pre') boot.gh.pre <- boot(data=df_pre_jp$pre_score, statistic=gh, R=200) print(boot.gh.pre) print('ErasmusMC Post') boot.gh.post <- boot(data=df_post_emc$post_score, statistic=gh, R=200) ## NA/NaN/Inf in foreing function call (arg1) print(boot.gh.post) print('Okayama Post') boot.gh.post <- boot(data=df_post_jp$post_score, statistic=gh, R=200) print(boot.gh.post) ## Royston and Sauerbrei's D kap = sqrt(8/pi) sd = pi^2/6 Rd_pre <- function(data, indices){ d <- data[indices,] D = D.index(d$pre_score, d$T_r, d$E_r)$coef Rd = (D^2)/(kap^2)/(sd + (D^2/(kap^2))) return(Rd) } # ErasmusMC pre boot.Rd.pre <- boot(data=df_pre_emc, statistic=Rd_pre, R=200) print('ErasmusMC pre') print(boot.Rd.pre) # Okayama pre boot.Rd.pre <- boot(data=df_pre_jp, statistic=Rd_pre, R=200) print('Okayama pre') print(boot.Rd.pre) Rd <- function(data, indices){ d <- data[indices,] D = D.index(d$post_score, d$T_r, d$E_r)$coef Rd = (D^2)/(kap^2)/(sd + (D^2/(kap^2))) return(Rd) } # ErasmusMC post boot.Rd.post <- boot(data=df_post_emc, statistic=Rd, R=200) print('ErasmusMC post') print(boot.Rd.post) # Okayama post boot.Rd.post <- boot(data=df_post_jp, statistic=Rd, R=200) print('Okayama post') print(boot.Rd.post) ## Cumulative/dynamic tdAUC tdauc <- function(data, indices){ d <- data[indices,] d.surv.obj <- Surv(time = d$T_r, event = d$E_r) res.AUC.cd <- AUC.cd(Surv.rsp = d.surv.obj, Surv.rsp.new = d.surv.obj, lp = d$pre_score, lpnew = d$pre_score, times = 0:2 * 365.25 ) return(res.AUC.cd$iauc)} boot.tdauc.pre <- boot(data=df_pre_emc, statistic=tdauc, R=200) print(boot.tdauc.pre) boot.tdauc.pre <- boot(data=df_pre_jp, statistic=tdauc, R=200) print(boot.tdauc.pre) tdauc <- function(data, indices){ d <- data[indices,] d.surv.obj <- Surv(time = d$T_r, event = d$E_r) res.AUC.cd <- AUC.cd(Surv.rsp = d.surv.obj, Surv.rsp.new = d.surv.obj, lp = d$post_score, lpnew = d$post_score, times = 0:2 * 365.25 ) return(res.AUC.cd$iauc)} boot.tdauc.post <- boot(data=df_post_emc, statistic=tdauc, R=200) print(boot.tdauc.post) boot.tdauc.post <- boot(data=df_post_jp, statistic=tdauc, R=200) print(boot.tdauc.post) ## kaplan meier survival plots grouped by risk group # ErasmusMC post #tmp <- df_pre_emc[df_pre_emc$pre_risk_n %in% c(1,2),] #tmp.surv.obj <- Surv(time = tmp$T, event = tmp$E) fit <- survfit(pre.surv.obj.emc ~ pre_risk_n, data=df_pre_emc) plot(fit, col=c("green", "orange", "red"), xlab = "time in days", ylab="Survival rate", main="Progression free survival ERASL-pre score (EMC)", las=1) legend("topright", legend=c("low", "intermediate", "high"), col=c("green", "orange", "red"), lwd=2) # Okayama pre fit <- survfit(pre.surv.obj.jp ~ pre_risk_n, data=df_pre_jp) plot(fit, col=c("green", "orange", "red"), xlab = "time in days", ylab="Survival rate", main="Progression free survival ERASL-pre score (JP)", las=1) legend("topright", legend=c("low", "intermediate", "high"), col=c("green", "orange", "red"), lwd=2) # ErasmusMC post fit <- survfit(post.surv.obj.emc ~ post_risk_n, data=df_post_emc) plot(fit, col=c("green", "orange", "red"), xlab = "time in days", ylab="Survival rate", main="Progression free survival ERASL-post score (EMC)", las=1) legend("topright", legend=c("low", "intermediate", "high"), col=c("green", "orange", "red"), lwd=2) # Okayama post fit <- survfit(post.surv.obj.jp ~ post_risk_n, data=df_post_jp) plot(fit, col=c("green", "orange", "red"), xlab = "time in days", ylab="Survival rate", main="Progression free survival ERASL-post score (JP)", las=1) legend("topright", legend=c("low", "intermediate", "high"), col=c("green", "orange", "red"), lwd=2) ## Hazard Ratio between risk groups # Erasmus MC pre cox.mod <- coxph(pre.surv.obj.emc ~ pre_intermediate + pre_high, data=df_pre_emc) print('Erasmus mc - pre') cox.mod ## alsnog transformeren se(coef) round(exp(confint(cox.mod, level = 0.95)), 2) # Okayama pre cox.mod <- coxph(pre.surv.obj.jp ~ pre_intermediate + pre_high, data=df_pre_jp) print('Okayama - pre') cox.mod round(exp(confint(cox.mod, level = 0.95)), 2) # Erasmus MC post sum(df_post_emc$post_high) print(' ^ check sum post high') cox.mod <- coxph(post.surv.obj.emc ~ post_intermediate +post_high, data=df_post_emc) print('ErasmusMC - post') cox.mod round(exp(confint(cox.mod, level = 0.95)), 2) # Okayama post cox.mod <- coxph(post.surv.obj.jp ~ post_intermediate + post_high, data=df_post_jp) print('Okayama - post') cox.mod round(exp(confint(cox.mod, level = 0.95)), 2) ## Baseline hazard function: df_pre_emc$T_r # erasmus mc cox.mod.pre <- cph(pre.surv.obj.emc ~ offset(pre_score), x=T, y=T, data=df_pre_emc) cox.mod.post <- cph(post.surv.obj.emc ~ offset(post_score), x=T, y=T, data=df_post_emc) # cumulative baseline hazard H0(t) write.csv(basehaz(cox.mod.pre, centered=T), "E:\\7. ERASL validation\\data\\intrim\\H0_pre_emc.csv") write.csv(basehaz(cox.mod.post, centered=T), "E:\\7. ERASL validation\\data\\intrim\\H0_post_emc.csv") # japan cox.mod.pre <- cph(pre.surv.obj.jp ~ offset(pre_score), x=T, y=T, data=df_pre_jp) cox.mod.post <- cph(post.surv.obj.jp ~ offset(post_score), x=T, y=T, data=df_post_jp) write.csv(basehaz(cox.mod.pre, centered=T), 'E:\\7. ERASL validation\\data\\intrim\\H0_pre_jp.csv') write.csv(basehaz(cox.mod.post, centered=T), 'E:\\7. ERASL validation\\data\\intrim\\H0_post_jp.csv') #---------- Calibration ----------------------------------------------------------- # Appologies for the jump to python. At the moment of writing this script I'm a beginner in R and after wrestling # with the visualisations for a few hours it was more convienant to load the data into python, as I could make the graphs more clear. # The python code is included in the suplementary files aswell. #-------------------------------------------------------------------------------------- # compute - interpolated baseline survival function to_s0 # transform - empirical baseline hazard function to S0 at median erasl # for PRE and POST # - plot aprroximated published baseline survival curve # - plot empirical baseline curves of EMc/JP # for PRE and POST # compute ERASL prediction of RFS prob at time t for grid of n points between t=0, t=24 # Average the RFS probs at t per risk group # Plot average risk over time for each risk group # Super impose kaplan meier plot per risk group # For JP also construct alternative calibration plot # at t=12,24 # extract km survival probabilities for each risk group # extract predicted survival probabilities for each risk group # transform these to log(-log()) scale # make the plot #---------------------------------------------------------------------------------------- ## Construct baseline hazard function from derivation set t0 <-c(0,1,3,6,12,18,24) s_pre <- c(0.9999,0.998, 0.975, 0.915, 0.822, 0.753, 0.694) s_post <- c(0.9999,0.998, 0.975, 0.917, 0.826, 0.757, 0.698) to_H0 <- function(x, median) { s0 = exp((log(x))/(exp(median))) H0 = -log(s0) return(H0) } # convert H0_pre <- sapply(s_pre, to_H0, median=2.558) H0_post <- sapply(s_post, to_H0, median=2.332) f_H0_pre <- smooth.spline(t0, H0_pre) f_H0_post <- smooth.spline(t0, H0_post) df_pre_emc$T_pre_star_lit = predict(f_H0_pre, as.numeric(df_pre_emc$T_r)/365.25*12)$y df_pre_jp$T_pre_star_lit = predict(f_H0_pre, as.numeric(df_pre_jp$T_r)/365.25*12)$y df_post_emc$T_post_star_lit = predict(f_H0_post, as.numeric(df_post_emc$T_r)/365.25*12)$y df_post_jp$T_post_star_lit = predict(f_H0_post, as.numeric(df_post_jp$T_r)/365.25*12)$y # reinforce lower bound for ln() df_pre_emc$T_pre_star_lit[df_pre_emc$T_pre_star_lit <=0] <- 0.0000001 df_post_emc$T_post_star_lit[df_post_emc$T_post_star_lit <=0] <- 0.0000001 df_pre_jp$T_pre_star_lit[df_pre_jp$T_pre_star_lit <=0] <- 0.0000001 df_post_jp$T_post_star_lit[df_post_jp$T_post_star_lit <=0] <- 0.0000001 ## Weibull calibration function # EMC - pre fit <- survreg(Surv(T_pre_star_lit, E_r) ~ pre_score, data=df_pre_emc, dist="weibull") summary(fit) # jp - pre fit <- survreg(Surv(T_pre_star_lit, E_r) ~ pre_score, data=df_pre_jp, dist="weibull") summary(fit) # EMC - post fit <- survreg(Surv(T_post_star_lit, E_r) ~ post_score, data=df_post_emc, dist="weibull") summary(fit) # jp - post fit <- survreg(Surv(T_post_star_lit, E_r) ~ post_score, data=df_post_jp, dist="weibull") summary(fit) ## Exponential calibration function # EMC - pre fit <- survreg(Surv(T_pre_star_lit, E_r) ~ pre_score, data=df_pre_emc, dist="exponential") summary(fit) # jp - pre fit <- survreg(Surv(T_pre_star_lit, E_r) ~ pre_score, data=df_pre_jp, dist="exponential") summary(fit) # EMC - post fit <- survreg(Surv(T_post_star_lit, E_r) ~ post_score, data=df_post_emc, dist="exponential") summary(fit) # jp - post fit <- survreg(Surv(T_post_star_lit, E_r) ~ post_score, data=df_post_jp, dist="exponential") summary(fit) ### Explain 10. checking calibration results # Analaysis is a coppy of the one done before the weibull recalibration # The calibration of the survival function is done by: exp(-exp(1/y*(log(-log(f_s0_pre(t)))-a-b*score))) # With f_so_pre the baseline survival function for the pre erasl, and a, b, y the calibration parameters obtained from the weibull calibration ### # model extension with HBV, HCV # pre emc cox.mod.pre <- coxph(pre.surv.obj.emc ~ offset(pre_score) + HBV, data = df_pre_emc) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.emc ~ offset(pre_score) + HCV, data = df_pre_emc) cox.mod.pre # pre jp cox.mod.pre <- coxph(pre.surv.obj.jp ~ offset(pre_score) + HBV, data = df_pre_jp) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.jp ~ offset(pre_score) + HCV, data = df_pre_jp) cox.mod.pre # post emc cox.mod.post <- coxph(post.surv.obj.emc ~ offset(post_score) + HBV, data = df_post_emc) cox.mod.post cox.mod.post <- coxph(post.surv.obj.emc ~ offset(post_score) + HCV, data = df_post_emc) cox.mod.post # post jp cox.mod.post <- coxph(post.surv.obj.jp ~ offset(post_score) + HBV, data = df_post_jp) cox.mod.post cox.mod.post <- coxph(post.surv.obj.jp ~ offset(post_score) + HCV, data = df_post_jp) cox.mod.post #--------------- Forward selection - re-estimation_calibration ---------------------------- ##forward selection Erasmus MC pre #round1 cox.mod.pre <- coxph(pre.surv.obj.emc ~ pre_score, data = df_pre_emc) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.emc ~ pre_score + gender , data = df_pre_emc) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.emc ~ pre_score + albigrade_1 , data = df_pre_emc) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.emc ~ pre_score + log_afp , data = df_pre_emc) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.emc ~ pre_score + log_diamet , data = df_pre_emc) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.emc ~ pre_score + aantaltum_23 , data = df_pre_emc) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.emc ~ pre_score + HBV , data = df_pre_emc) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.emc ~ pre_score + HCV , data = df_pre_emc) cox.mod.pre #round2 cox.mod.pre <- coxph(pre.surv.obj.emc ~ pre_score + log_afp + gender , data = df_pre_emc) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.emc ~ pre_score + log_afp +albigrade_1 , data = df_pre_emc) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.emc ~ pre_score + log_afp + log_diamet , data = df_pre_emc) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.emc ~ pre_score + log_afp + aantaltum_23 , data = df_pre_emc) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.emc ~ pre_score + log_afp + HBV , data = df_pre_emc) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.emc ~ pre_score + log_afp + HCV , data = df_pre_emc) cox.mod.pre ##forward selection Erasmus MC post #round 1 cox.mod.post <- coxph(post.surv.obj.emc ~ post_score, data = df_post_emc) cox.mod.post cox.mod.post <- coxph(post.surv.obj.emc ~ post_score + gender , data = df_post_emc) cox.mod.post cox.mod.post <- coxph(post.surv.obj.emc ~ post_score + albigrade_1 , data = df_post_emc) cox.mod.post cox.mod.post <- coxph(post.surv.obj.emc ~ post_score + log_afp , data = df_post_emc) cox.mod.post cox.mod.post <- coxph(post.surv.obj.emc ~ post_score + log_diamet , data = df_post_emc) cox.mod.post cox.mod.post <- coxph(post.surv.obj.emc ~ post_score + aantaltum_23 , data = df_post_emc) cox.mod.post cox.mod.post <- coxph(post.surv.obj.emc ~ post_score + Microvascular_invasion , data = df_post_emc) cox.mod.post cox.mod.post <- coxph(post.surv.obj.emc ~ post_score + HBV , data = df_post_emc) cox.mod.post cox.mod.post <- coxph(post.surv.obj.emc ~ post_score + HCV , data = df_post_emc) cox.mod.post #round 2 cox.mod.post <- coxph(post.surv.obj.emc ~ post_score + Microvascular_invasion + gender , data = df_post_emc) cox.mod.post cox.mod.post <- coxph(post.surv.obj.emc ~ post_score + Microvascular_invasion + albigrade_1 , data = df_post_emc) cox.mod.post cox.mod.post <- coxph(post.surv.obj.emc ~ post_score + Microvascular_invasion + log_afp , data = df_post_emc) cox.mod.post cox.mod.post <- coxph(post.surv.obj.emc ~ post_score + Microvascular_invasion + log_diamet , data = df_post_emc) cox.mod.post cox.mod.post <- coxph(post.surv.obj.emc ~ post_score + Microvascular_invasion + aantaltum_23 , data = df_post_emc) cox.mod.post cox.mod.post <- coxph(post.surv.obj.emc ~ post_score + Microvascular_invasion + HBV , data = df_post_emc) cox.mod.post cox.mod.post <- coxph(post.surv.obj.emc ~ post_score + Microvascular_invasion + HCV , data = df_post_emc) cox.mod.post # round 3 cox.mod.post <- coxph(post.surv.obj.emc ~ post_score + Microvascular_invasion + log_afp + gender , data = df_post_emc) cox.mod.post cox.mod.post <- coxph(post.surv.obj.emc ~ post_score + Microvascular_invasion + log_afp + albigrade_1 , data = df_post_emc) cox.mod.post cox.mod.post <- coxph(post.surv.obj.emc ~ post_score + Microvascular_invasion + log_afp + log_diamet , data = df_post_emc) cox.mod.post cox.mod.post <- coxph(post.surv.obj.emc ~ post_score + Microvascular_invasion + log_afp + aantaltum_23 , data = df_post_emc) cox.mod.post cox.mod.post <- coxph(post.surv.obj.emc ~ post_score + Microvascular_invasion + log_afp + HBV , data = df_post_emc) cox.mod.post cox.mod.post <- coxph(post.surv.obj.emc ~ post_score + Microvascular_invasion + log_afp + HCV , data = df_post_emc) cox.mod.post #forward selection Okayama pre cox.mod.pre <- coxph(pre.surv.obj.jp ~ pre_score, data = df_pre_jp) cox.mod.pre #round 1 cox.mod.pre <- coxph(pre.surv.obj.jp ~ pre_score + gender , data = df_pre_jp) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.jp ~ pre_score + albigrade_1 , data = df_pre_jp) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.jp ~ pre_score + log_afp , data = df_pre_jp) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.jp ~ pre_score + log_diamet , data = df_pre_jp) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.jp ~ pre_score + aantaltum_23 , data = df_pre_jp) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.jp ~ pre_score + HBV , data = df_pre_jp) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.jp ~ pre_score + HCV , data = df_pre_jp) cox.mod.pre #round 2 cox.mod.pre <- coxph(pre.surv.obj.jp ~ pre_score + gender + albigrade_1 , data = df_pre_jp) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.jp ~ pre_score + gender + log_afp , data = df_pre_jp) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.jp ~ pre_score + gender + log_diamet , data = df_pre_jp) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.jp ~ pre_score + gender + aantaltum_23 , data = df_pre_jp) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.jp ~ pre_score + gender + HBV , data = df_pre_jp) cox.mod.pre cox.mod.pre <- coxph(pre.surv.obj.jp ~ pre_score + gender + HCV , data = df_pre_jp) cox.mod.pre #forward selection Okayama post #round 1 cox.mod.post <- coxph(post.surv.obj.jp ~ post_score + gender , data = df_post_jp) cox.mod.post cox.mod.post <- coxph(post.surv.obj.jp ~ post_score + albigrade_1 , data = df_post_jp) cox.mod.post cox.mod.post <- coxph(post.surv.obj.jp ~ post_score + log_afp , data = df_post_jp) cox.mod.post cox.mod.post <- coxph(post.surv.obj.jp ~ post_score + log_diamet , data = df_post_jp) cox.mod.post cox.mod.post <- coxph(post.surv.obj.jp ~ post_score + aantaltum_23 , data = df_post_jp) cox.mod.post cox.mod.post <- coxph(post.surv.obj.jp ~ post_score + Microvascular_invasion , data = df_post_jp) cox.mod.post cox.mod.post <- coxph(post.surv.obj.jp ~ post_score + HBV , data = df_post_jp) cox.mod.post cox.mod.post <- coxph(post.surv.obj.jp ~ post_score + HCV , data = df_post_jp) cox.mod.post #round 2 cox.mod.post <- coxph(post.surv.obj.jp ~ post_score + gender + albigrade_1 , data = df_post_jp) cox.mod.post cox.mod.post <- coxph(post.surv.obj.jp ~ post_score + gender + log_afp , data = df_post_jp) cox.mod.post cox.mod.post <- coxph(post.surv.obj.jp ~ post_score + gender + log_diamet , data = df_post_jp) cox.mod.post cox.mod.post <- coxph(post.surv.obj.jp ~ post_score + gender + aantaltum_23 , data = df_post_jp) cox.mod.post cox.mod.post <- coxph(post.surv.obj.jp ~ post_score + gender + Microvascular_invasion , data = df_post_jp) cox.mod.post cox.mod.post <- coxph(post.surv.obj.jp ~ post_score + gender + HBV , data = df_post_jp) cox.mod.post cox.mod.post <- coxph(post.surv.obj.jp ~ post_score + gender + HCV , data = df_post_jp) cox.mod.post