# tcgaLUADSurvival.R # # Stage I Lung Adenocarcinoma Patient Survival Prediction with Quantitative Image Features # rm(list=ls()) # load required R packages library(survival) library(glmnet) library(GGally) library(ggplot2) library(rms) # read in feature data load("../../tcgaLUADData.RData") ## Stage I only stageI<-(X[,9880]>0 & X[,9880]<20) X<-X[stageI,1:9879] cellProfilerIDReps<-cellProfilerIDReps[stageI] cellProfilerFileName<-cellProfilerFileName[stageI] survivedDaysReps<-survivedDaysReps[stageI]+1 eventReps<-eventReps[stageI] Ymatrix<-Ymatrix[stageI] Ymatrix<-Surv(Ymatrix[,1],Ymatrix[,2]) # initialize survivedDays, event, and arrays to store the results survivedDays<-Ymatrix[,1] event<-Ymatrix[,2] alpha<-0.1 predTest<-rep(0,dim(X)[1]) predTestGroup<-rep(0,dim(X)[1]) results<-array(0,dim=c(1,6)) # run leave-one-out cross-validation for (fold in 1:dim(X)[1]){ print(fold) testSet<-fold trainingSet<-which(!(1:dim(X)[1] %in% testSet)) # train a Cox model cv.tr<-cv.glmnet(as.matrix(X[trainingSet,]),Ymatrix[trainingSet],family='cox',alpha=alpha,nfolds=10) if (cv.tr$lambda.min==cv.tr$lambda[1]){ predictSIndex=2 } else { predictSIndex=which(cv.tr$lambda==cv.tr$lambda.min) } # make prediction on the test case predAll<-predict(cv.tr,as.matrix(X),s=cv.tr$lambda[predictSIndex],type='response') predTrain<-predict(cv.tr,as.matrix(X[trainingSet,]),s=cv.tr$lambda[predictSIndex],type='response') predTest[fold]<-predict(cv.tr,as.matrix(t(as.data.frame(X[testSet,]))),s=cv.tr$lambda[predictSIndex],type='response') nPGroups<-2 seqBreaks<-c(0,0.5,1) # use the median of survival indices in the training set as the threshold thres<-median(predTrain) plotTrain<-data.frame(predTrain,survivedDays[trainingSet],event[trainingSet]) colnames(plotTrain)<-c("predTrain","V2","V3") # classify patients in the training set for (i in 1:dim(plotTrain)[1]){ if (predTrain[i]=thres){ predTestGroup[fold]<-"Group 2" } } # build a data frame that stores the actual and predicted survival for the set plotTest<-data.frame(predTestGroup,survivedDays,event) colnames(plotTest)<-c("predTest","V2","V3") # calculate performance statistics sdfTrain<-survdiff(Surv(survivedDays[trainingSet], event[trainingSet]) ~ predTrain, plotTrain) sdfTest<-survdiff(Surv(survivedDays, event) ~ plotTest[,1], plotTest) pTrain<-1-pchisq(sdfTrain$chisq, length(sdfTrain$n)-1) pTest<-1-pchisq(sdfTest$chisq, length(sdfTest$n)-1) trainConcordance<-survConcordance(Surv(survivedDays[trainingSet], event[trainingSet]) ~ predTrain, as.data.frame(X[trainingSet,])) testConcordance<-survConcordance(Surv(survivedDays, event) ~ predTest, as.data.frame(X)) results[1]<-cv.tr$lambda[predictSIndex] results[2]<-cv.tr$glmnet.fit$df[predictSIndex] results[3]<-trainConcordance$concordance results[4]<-testConcordance$concordance results[5]<-pTrain results[6]<-pTest # build survival curves for the training set plotTrainGGsurv<-plotTrain plotTrainGGsurv[,2]<-plotTrainGGsurv[,2]/30 plotTrain.surv <- survfit(Surv(V2, V3) ~ predTrain, data = plotTrainGGsurv) # build survival curves for the test set plotTestGGsurv<-plotTest plotTestGGsurv[,2]<-plotTestGGsurv[,2]/30 plotTest.surv <- survfit(Surv(V2, V3) ~ predTest, data = plotTestGGsurv) # output survival curves for the training set pdf(paste("LOOCV1LambdaAuto/0.5/plots/plotTrainLambda",results[1],"Alpha",alpha,"Groups",nPGroups,".pdf", sep=""), width=11, height=7) print( ggsurv(plotTrain.surv) + guides(linetype = F) + xlab("Months") + ylab("Probability of Survival") + ylim(0,1) + theme(plot.title = element_text(size=24,face="bold"), legend.text=element_text(size=24), legend.title=element_text(size=24), axis.text=element_text(size=18), axis.title=element_text(size=24,face="bold"), legend.justification=c(1,1), legend.position=c(1,1)) + scale_colour_discrete(name = 'Prognostic Groups') ) dev.off() # output survival curves for the test set pdf(paste("plotTestLambda",results[1],"Alpha",alpha,"Groups",nPGroups,".pdf", sep=""), width=11, height=7) print( ggsurv(plotTest.surv) + guides(linetype = F) + xlab("Months") + ylab("Probability of Survival") + ylim(0,1) + theme(plot.title = element_text(size=24,face="bold"), legend.text=element_text(size=24), legend.title=element_text(size=24), axis.text=element_text(size=18), axis.title=element_text(size=24,face="bold"), legend.justification=c(1,1), legend.position=c(1,1)) + scale_colour_discrete(name = 'Prognostic Groups') ) dev.off() # output performance statistics write.table(results, paste("LUADStageIAlpha",alpha,"Results.csv",sep=""), quote=F, row.names=F, col.names=F, sep=",")