######################################################################################################################################################################################################################## #Load Packages ######################################################################################################################################################################################################################## library(truncnorm) library(lattice) library(nlme) library(MCMCpack) library(combinat) ######################################################################################################################################################################################################################## #Load Data ######################################################################################################################################################################################################################## data=read.csv("Training_data.csv") data_validation=read.csv("Validation_data.csv") set.seed(1) #set seed so each run matches reported results ######################################################################################################################################################################################################################## #Functions for model fitting ######################################################################################################################################################################################################################## posterior_mu_theta=function(Theta,sigma_theta_2,mu_0,sigma_0_2) { mu_0_star=(sigma_theta_2*mu_0)/(sigma_theta_2 + N*sigma_0_2) + (sigma_0_2*sum(Theta))/(sigma_theta_2 + N*sigma_0_2) sigma_0_star_2=(sigma_theta_2*sigma_0_2)/(sigma_theta_2 + N*sigma_0_2) list(mu_0_star=mu_0_star,sigma_0_star_2=sigma_0_star_2) } posterior_sigma_theta_2=function(Theta,mu_theta,a_theta,b_theta) { a_theta_star=a_theta + N/2 b_theta_star=b_theta + sum((Theta-mu_theta)^2)/2 list(a_theta_star=a_theta_star,b_theta_star=b_theta_star) } posterior_sigma_2=function(Y,t,D,J,Theta,I,gamma,Tau) { a_sigma=sum(J)/2 temp_I=rep(c(rep(0,n_0),I),J) temp_gamma=rep(c(rep(0,n_0),gamma),J) temp_gamma[is.na(temp_gamma)]=0 temp_Tau=rep(c(rep(0,n_0),Tau),J) temp_Tau[is.na(temp_Tau)]=0 Theta_star=rep(Theta,J)+temp_gamma*(t-temp_Tau)*as.numeric(t>temp_Tau) b_sigma=sum((Y-Theta_star)^2)/2 list(a_sigma=a_sigma,b_sigma=b_sigma) } posterior_mu_gamma=function(I,gamma,sigma_gamma_2,mu_1,sigma_1_2) { n_I=sum(I) mu_1_star=(sigma_gamma_2*mu_1)/(sigma_gamma_2 + n_I*sigma_1_2) + (sigma_1_2*sum(log(gamma),na.rm=T))/(sigma_gamma_2 + n_I*sigma_1_2) sigma_1_star_2=(sigma_gamma_2*sigma_1_2)/(sigma_gamma_2 + n_I*sigma_1_2) list(mu_1_star=mu_1_star,sigma_1_star_2=sigma_1_star_2) } posterior_sigma_gamma_2=function(I,gamma,mu_gamma,a_gamma,b_gamma) { n_I=sum(I) a_gamma_star=a_gamma+n_I/2 b_gamma_star=b_gamma+sum((log(gamma)-mu_gamma)^2,na.rm=T)/2 list(a_gamma_star=a_gamma_star,b_gamma_star=b_gamma_star) } posterior_theta=function(Y_i,t_i,D_i,J_i,mu_theta,sigma_theta_2,sigma_2,I_i,gamma_i,tau_i) { Y_star_i=Y_i if(is.na(I_i)==FALSE & I_i==1) { Y_star_i=Y_i-gamma_i*(t_i-tau_i)*as.numeric(t_i>tau_i) } mu_theta_star=(sigma_2*mu_theta)/(sigma_2 + J_i*sigma_theta_2) + (sigma_theta_2*sum(Y_star_i))/(sigma_2 + J_i*sigma_theta_2) sigma_theta_star_2=(sigma_2*sigma_theta_2)/(sigma_2 + J_i*sigma_theta_2) list(mu_theta_star=mu_theta_star,sigma_theta_star_2=sigma_theta_star_2) } log_likelihood_cases=function(Y_i,t_i,J_i,sigma_2,Theta_i,I_i,gamma_i,tau_i) { log_pr=NA if(I_i==0) { log_pr=log((2*pi*sigma_2)^(-J_i/2)) + (-sum((Y_i-Theta_i)^2)/(2*sigma_2)) } if(I_i==1) { log_pr=log((2*pi*sigma_2)^(-J_i/2)) + (-sum((Y_i-Theta_i-gamma_i*(t_i-tau_i)*as.numeric(t_i>tau_i))^2)/(2*sigma_2)) } log_pr } update_I=function(Y_i,t_i,J_i,d_i,mu_gamma,sigma_gamma_2,mu_tau,sigma_tau_2,Tau_star,sigma_2,Theta_i,I_i,gamma_i,tau_i,Pi) { #new_I=NA new_gamma=NA new_tau=NA if(I_i==0) { gamma_star=exp(rnorm(1,mean=mu_gamma,sd=sqrt(sigma_gamma_2))) tau_star=rtruncnorm(1,a=d_i-Tau_star,b=d_i,mean=d_i-mu_tau,sd=sqrt(sigma_tau_2)) temp_log_r=log_likelihood_cases(Y_i,t_i,J_i,sigma_2,Theta_i,I_i=1,gamma_i=gamma_star,tau_i=tau_star)-log_likelihood_cases(Y_i,t_i,J_i,sigma_2,Theta_i,I_i=0,gamma_i=NA,tau_i=NA) log_r=min(temp_log_r+log(Pi)-log(1-Pi),log(1)) u=runif(1,min=0,max=1) new_I=as.numeric(log(u)1.01) summary_stats[summary_stats[,8]>1.01,c(1,8)] #All less than 1.09 #Extract samples from posterior distributions of hyper parameters to use in implementation of screening in the validation cohort sigma_1_posterior=c(sigma_1_2_out_thin,sigma_1_2_out_2_thin) sigma_2_posterior=c(sigma_2_2_out_thin,sigma_2_2_out_2_thin) sigma_3_posterior=c(sigma_3_2_out_thin,sigma_3_2_out_2_thin) mu_theta_1_posterior=c(mu_theta_1_out_thin,mu_theta_1_out_2_thin) mu_theta_2_posterior=c(mu_theta_2_out_thin,mu_theta_2_out_2_thin) mu_theta_3_posterior=c(mu_theta_3_out_thin,mu_theta_3_out_2_thin) sigma_theta_1_2_posterior=c(sigma_theta_1_2_out_thin,sigma_theta_1_2_out_2_thin) sigma_theta_2_2_posterior=c(sigma_theta_2_2_out_thin,sigma_theta_2_2_out_2_thin) sigma_theta_3_2_posterior=c(sigma_theta_3_2_out_thin,sigma_theta_3_2_out_2_thin) Pi_1_posterior=c(Pi_1_out_thin,Pi_1_out_2_thin) Pi_2_posterior=c(Pi_2_out_thin,Pi_2_out_2_thin) Pi_3_posterior=c(Pi_3_out_thin,Pi_3_out_2_thin) mu_gamma_1_posterior=c(mu_gamma_1_out_thin,mu_gamma_1_out_2_thin) mu_gamma_2_posterior=c(mu_gamma_2_out_thin,mu_gamma_2_out_2_thin) mu_gamma_3_posterior=c(mu_gamma_3_out_thin,mu_gamma_3_out_2_thin) sigma_gamma_1_2_posterior=c(sigma_gamma_1_2_out_thin,sigma_gamma_1_2_out_2_thin) sigma_gamma_2_2_posterior=c(sigma_gamma_2_2_out_thin,sigma_gamma_2_2_out_2_thin) sigma_gamma_3_2_posterior=c(sigma_gamma_3_2_out_thin,sigma_gamma_3_2_out_2_thin) mu_tau_1_posterior=c(mu_tau_1_out_thin,mu_tau_1_out_2_thin) mu_tau_2_posterior=c(mu_tau_2_out_thin,mu_tau_2_out_2_thin) mu_tau_3_posterior=c(mu_tau_3_out_thin,mu_tau_3_out_2_thin) sigma_tau_1_2_posterior=c(sigma_tau_1_2_out_thin,sigma_tau_1_2_out_2_thin) sigma_tau_2_2_posterior=c(sigma_tau_2_2_out_thin,sigma_tau_2_2_out_2_thin) sigma_tau_3_2_posterior=c(sigma_tau_3_2_out_thin,sigma_tau_3_2_out_2_thin) S=length(Pi_1_posterior) ######################################################################################################################################################################################################################## #Implement screening in the validation data (pg 11 Supplementary Materials) ######################################################################################################################################################################################################################## #Get sample size N=length(unique(data_validation$ID)) #total number of patients n_0=length(unique(data_validation$ID[(data_validation$D==0)])) #number of control patients #Extract vectors d=data_validation$d[(data_validation$obs_number==1)] D=data_validation$D[(data_validation$obs_number==1)] J=rle(data_validation$ID)$lengths subject_ID=unique(data_validation$ID) #observed HCC diagnosis times in training cohort d_HCC=sort(unique(data$d[data$D==1])) p_Y1_noHCC=rep(NA,length(data_validation$ID)) p_Y2_noHCC=rep(NA,length(data_validation$ID)) p_Y3_noHCC=rep(NA,length(data_validation$ID)) p_Y1_HCC=rep(NA,length(data_validation$ID)) p_Y2_HCC=rep(NA,length(data_validation$ID)) p_Y3_HCC=rep(NA,length(data_validation$ID)) l=1 for(i in 1:N) { subject_data=subset(data_validation,data_validation$ID==subject_ID[i]) for(j in 1:J[i]) { temp_Y1=rep(subject_data$Y1[1:j],rep(S,j)) temp_Y2=rep(subject_data$Y2[1:j],rep(S,j)) temp_Y3=rep(subject_data$Y3[1:j],rep(S,j)) temp_t=rep(subject_data$t[1:j],rep(S,j)) #Calculate Pr(Y1|No HCC) for each patient at each screening time theta_1_posterior=rnorm(n=S,mean=mu_theta_1_posterior,sd=sqrt(sigma_theta_1_2_posterior)) prob_Y1_noHCC=dnorm(temp_Y1,mean=theta_1_posterior,sd=sqrt(sigma_1_posterior)) prob_Y1_noHCC_1=array(prob_Y1_noHCC,c(S,j)) prob_Y1_noHCC_2=apply(prob_Y1_noHCC_1,1,prod,na.rm=TRUE) p_Y1_noHCC[l]=mean(prob_Y1_noHCC_2) #Calculate Pr(Y2|No HCC) for each patient at each screening time theta_2_posterior=rnorm(n=S,mean=mu_theta_2_posterior,sd=sqrt(sigma_theta_2_2_posterior)) prob_Y2_noHCC=dnorm(temp_Y2,mean=theta_2_posterior,sd=sqrt(sigma_2_posterior)) prob_Y2_noHCC_1=array(prob_Y2_noHCC,c(S,j)) prob_Y2_noHCC_2=apply(prob_Y2_noHCC_1,1,prod,na.rm=TRUE) p_Y2_noHCC[l]=mean(prob_Y2_noHCC_2) #Calculate Pr(Y3|No HCC) for each patient at each screening time theta_3_posterior=rnorm(n=S,mean=mu_theta_3_posterior,sd=sqrt(sigma_theta_3_2_posterior)) prob_Y3_noHCC=dnorm(temp_Y3,mean=theta_3_posterior,sd=sqrt(sigma_3_posterior)) prob_Y3_noHCC_1=array(prob_Y3_noHCC,c(S,j)) prob_Y3_noHCC_2=apply(prob_Y3_noHCC_1,1,prod,na.rm=TRUE) p_Y3_noHCC[l]=mean(prob_Y3_noHCC_2) #Get draws of vector c(I_1,I_2) I_1_posterior=rbinom(n=S,size=1,prob=Pi_1_posterior) I_2_posterior=rbinom(n=S,size=1,prob=Pi_2_posterior) I_3_posterior=rbinom(n=S,size=1,prob=Pi_3_posterior) ############ Draw d from empirical distribution ############ u_temp=runif(S) d_draws=quantile(d_HCC,u_temp,type=1) #Calculate Pr(Y1|HCC) for each patient at each screening time theta_1_posterior=rnorm(n=S,mean=mu_theta_1_posterior,sd=sqrt(sigma_theta_1_2_posterior)) gamma_1_posterior=exp(rnorm(n=S,mean=mu_gamma_1_posterior,sd=sqrt(sigma_gamma_1_2_posterior))) Tau_1_posterior=rtruncnorm(n=S,a=(d_draws-Tau_star_1),b=d_draws,mean=d_draws-mu_tau_1_posterior,sd=sqrt(sigma_tau_1_2_posterior)) mean_Y1=rep(theta_1_posterior,j) + rep(I_1_posterior,j)*rep(gamma_1_posterior,j)*(temp_t-Tau_1_posterior)*as.numeric(temp_t>Tau_1_posterior) prob_Y1_HCC=dnorm(temp_Y1,mean=mean_Y1,sd=sqrt(sigma_1_posterior)) prob_Y1_HCC_1=array(prob_Y1_HCC,c(S,j)) prob_Y1_HCC_2=apply(prob_Y1_HCC_1,1,prod,na.rm=TRUE) p_Y1_HCC[l]=mean(prob_Y1_HCC_2) #Calculate Pr(Y2|HCC) for each patient at each screening time theta_2_posterior=rnorm(n=S,mean=mu_theta_2_posterior,sd=sqrt(sigma_theta_2_2_posterior)) gamma_2_posterior=exp(rnorm(n=S,mean=mu_gamma_2_posterior,sd=sqrt(sigma_gamma_2_2_posterior))) Tau_2_posterior=rtruncnorm(n=S,a=(d_draws-Tau_star_2),b=d_draws,mean=d_draws-mu_tau_2_posterior,sd=sqrt(sigma_tau_2_2_posterior)) mean_Y2=rep(theta_2_posterior,j) + rep(I_2_posterior,j)*rep(gamma_2_posterior,j)*(temp_t-Tau_2_posterior)*as.numeric(temp_t>Tau_2_posterior) prob_Y2_HCC=dnorm(temp_Y2,mean=mean_Y2,sd=sqrt(sigma_2_posterior)) prob_Y2_HCC_1=array(prob_Y2_HCC,c(S,j)) prob_Y2_HCC_2=apply(prob_Y2_HCC_1,1,prod,na.rm=TRUE) p_Y2_HCC[l]=mean(prob_Y2_HCC_2) #Calculate Pr(Y3|HCC) for each patient at each screening time theta_3_posterior=rnorm(n=S,mean=mu_theta_3_posterior,sd=sqrt(sigma_theta_3_2_posterior)) gamma_3_posterior=exp(rnorm(n=S,mean=mu_gamma_3_posterior,sd=sqrt(sigma_gamma_3_2_posterior))) Tau_3_posterior=rtruncnorm(n=S,a=(d_draws-Tau_star_3),b=d_draws,mean=d_draws-mu_tau_3_posterior,sd=sqrt(sigma_tau_3_2_posterior)) mean_Y3=rep(theta_3_posterior,j) + rep(I_3_posterior,j)*rep(gamma_3_posterior,j)*(temp_t-Tau_3_posterior)*as.numeric(temp_t>Tau_3_posterior) prob_Y3_HCC=dnorm(temp_Y3,mean=mean_Y3,sd=sqrt(sigma_3_posterior)) prob_Y3_HCC_1=array(prob_Y3_HCC,c(S,j)) prob_Y3_HCC_2=apply(prob_Y3_HCC_1,1,prod,na.rm=TRUE) p_Y3_HCC[l]=mean(prob_Y3_HCC_2) l=l+1 } #print(c(i,j,l)) } #Get results p_HCC=mean(data$D[(data$obs_number==1)]) #prior probability of HCC in training data p_noHCC=1-p_HCC data_validation["posterior_risk_M1"]=(p_Y1_HCC*p_HCC)/(p_Y1_noHCC*p_noHCC) data_validation["posterior_risk_M2"]=(p_Y2_HCC*p_HCC)/(p_Y2_noHCC*p_noHCC) data_validation["posterior_risk_M3"]=(p_Y3_HCC*p_HCC)/(p_Y3_noHCC*p_noHCC) ######################################################################################################################################################################################################################## #Estimate ROC(0.1) ######################################################################################################################################################################################################################## specificity_fixed=0.9 cut_off_M1=quantile(data_validation$posterior_risk_M1[(data_validation$D==0)],probs=specificity_fixed,type=1) cut_off_M2=quantile(data_validation$posterior_risk_M2[(data_validation$D==0)],probs=specificity_fixed,type=1) cut_off_M3=quantile(data_validation$posterior_risk_M3[(data_validation$D==0)],probs=specificity_fixed,type=1) at_least_one_positive_M1=rep(NA,N-n_0) at_least_one_positive_M2=rep(NA,N-n_0) at_least_one_positive_M3=rep(NA,N-n_0) for(i in 1:(N-n_0)) { subject_data=subset(data_validation,data_validation$ID==subject_ID[i+n_0]) at_least_one_positive_M1[i]=as.numeric(sum(subject_data$posterior_risk_M1>cut_off_M1,na.rm=T)>0) at_least_one_positive_M2[i]=as.numeric(sum(subject_data$posterior_risk_M2>cut_off_M2,na.rm=T)>0) at_least_one_positive_M3[i]=as.numeric(sum(subject_data$posterior_risk_M3>cut_off_M3,na.rm=T)>0) } round(mean(at_least_one_positive_M1)*100,2) #71.43 round(mean(at_least_one_positive_M2)*100,2) #67.35 round(mean(at_least_one_positive_M3)*100,2) #63.27