setwd("C:\\Users\\srijana\\Desktop\\RcurrentProject") Nut<- read.csv("Nutrients_plusOne.csv") attach(Nut) detach(Nut) Nut$WH.ratio <-Waist_Cir/Hip_Cir Nut$WH.ratioCat <- ifelse(Sex==2&Nut$WH.ratio<0.95|Sex==1&Nut$WH.ratio<0.8,1,2) Nut$BMIcat <- ifelse(Nut$BMI<27.5,1,2) CAD_group<-as.factor(CAD_group) Smoking <- as.factor(Smoking) Alcohol <- as.factor(Alcohol) Dyslipidemia <- as.factor(Dyslipidemia) T2DM <- as.factor(T2DM) Nut$WH.ratioCat <- as.factor(Nut$WH.ratioCat) Physical.activity <- as.factor(Physical.activity) # calculation for sociodemographic variable (Table 1) summary(Age) summary(Age[Nut$CAD_group==0]) summary(Age[Nut$CAD_group==1]) wilcox.test(age1,age2,paired = TRUE,correct = FALSE) wilcox.test(Age[Nut$CAD_group==0], Age[Nut$CAD_group==1],paired = TRUE,correct = FALSE) mcnemar.test(T2DM[Nut$CAD_group==0],T2DM[Nut$CAD_group==1],correct = FALSE) table(T2DM,CAD_group) mcnemar.test(Hypertension[CAD_group==0],Hypertension[CAD_group==1],correct = FALSE) table(Hypertension,CAD_group) table(Dyslipidemia, CAD_group) mcnemar.test(Dyslipidemia[Nut$CAD_group==0], Dyslipidemia[Nut$CAD_group==1],correct = FALSE) table(Nut$BMIcat,CAD_group) mcnemar.test(Nut$BMIcat[Nut$CAD_group==0], Nut$BMIcat[Nut$CAD_group==1],correct = FALSE) table(Nut$WH.ratioCat,CAD_group) mcnemar.test(Nut$WH.ratioCat[Nut$CAD_group==0], Nut$WH.ratioCat[Nut$CAD_group==1],correct = FALSE) table(Alcohol, CAD_group) mcnemar.test(Alcohol[Nut$CAD_group==0], Alcohol[Nut$CAD_group==1],correct = FALSE) table(Smoking, CAD_group) mcnemar.test(Smoking [Nut$CAD_group==0], Smoking [Nut$CAD_group==1],correct = FALSE) table(Physical.activity, CAD_group) mcnemar.test(Physical.activity[Nut$CAD_group==0], Physical.activity[Nut$CAD_group==1],correct = FALSE) #Nutrients variable (Table 2, S2 Table) tapply(Food.energy,CAD_group, sd) Fke1 <- Food.energy [CAD_group==1] summary(Fke1) Fke2 <- Food.energy[CAD_group==0] summary(Fke2) wilcox.test(Fke1,Fke2, paired = TRUE,correct = FALSE) t.test(Fke1,Fke2,paired = TRUE) tapply(Protein,CAD_group,sd) prt1 <-Protein[CAD_group==0] summary(prt1) prt2 <-Protein[CAD_group==1] summary(prt2) wilcox.test(prt1,prt2,paired = TRUE,correct = FALSE) t.test(prt1,prt2,paired = TRUE) tapply(Total.fat.oil,CAD_group,sd) lpd1<- Total.fat.oil[CAD_group==0] summary(lpd1) lpd2 <- Total.fat.oil[CAD_group==1] summary(lpd2) wilcox.test(lpd1,lpd2,paired = TRUE,correct=FALSE) t.test(lpd1,lpd2,paired = TRUE) tapply(Carbohydrate,CAD_group,sd) carb1 <-Carbohydrate[CAD_group==0] summary(carb1) carb2 <- Carbohydrate[CAD_group==1] wilcox.test(carb1,carb2,paired = TRUE) summary(carb2) t.test(carb1,carb2,paired = TRUE) tapply(Fiber,CAD_group,sd) fbr1 <- Fiber[CAD_group==0] summary(fbr1) fbr2 <- Fiber[CAD_group==1] summary(fbr2) wilcox.test(fbr1,fbr2,paired = TRUE,correct = FALSE) t.test(fbr1,fbr2,paired = TRUE) tapply(Calcium,CAD_group,sd) cal1<- Calcium[CAD_group==0] cal2 <-Calcium[CAD_group==1] wilcox.test(cal1,cal2,paired = TRUE,correct = FALSE) summary(cal1) summary(cal2) t.test(cal1,cal2,paired = TRUE) tapply(Phosphorus,CAD_group,sd) ph1 <-Phosphorus[CAD_group==0] ph2 <- Phosphorus[CAD_group==1] wilcox.test(ph1,ph2,paired = TRUE,correct = FALSE) summary(ph1) summary(ph2) t.test(ph1,ph2,paired = TRUE) tapply(Iron,CAD_group,sd) in1 <- Iron[CAD_group==0] in2 <- Iron[CAD_group==1] wilcox.test(in1,in2,paired = TRUE,correct = FALSE) summary(in1) summary(in2) t.test(in1,in2,paired = TRUE) tapply(Zinc,CAD_group,sd) zn1 <- Zinc[CAD_group==0] zn2 <- Zinc[CAD_group==1] wilcox.test(zn1,zn2,paired = TRUE,correct = FALSE) summary(zn1) summary(zn2) t.test(zn1,zn2,paired = TRUE) tapply(Thiamine,CAD_group,sd) th1 <- Thiamine[CAD_group==0] th2 <- Thiamine[CAD_group==1] wilcox.test(th1,th2,paired = TRUE,correct = FALSE) summary(th1) summary(th2) t.test(th1,th2,paired = TRUE) tapply(Riboflavin,CAD_group,sd) rb1 <- Riboflavin[CAD_group==0] rb2 <- Riboflavin[CAD_group==1] wilcox.test(rb1,rb2,paired = TRUE,correct = FALSE) summary(rb1) summary(rb2) t.test(rb1,rb2,paired = TRUE) tapply(Niacin,CAD_group,sd) nc1 <- Niacin[CAD_group==0] nc2 <- Niacin[CAD_group==1] wilcox.test(nc1,nc2,paired = TRUE,correct = FALSE) summary(nc1) summary(nc2) t.test(nc1,nc2,paired = TRUE) tapply(Vitamin.C,CAD_group,sd) vc1 <- Vitamin.C[CAD_group==0] vc2 <- Vitamin.C[CAD_group==1] summary(vc1) summary(vc2) wilcox.test(vc1,vc2,paired = TRUE,correct = FALSE) t.test(vc1,vc2,paired = TRUE) tapply(Beta.carotene,CAD_group,sd) ct1 <- Beta.carotene[CAD_group==0] ct2 <- Beta.carotene[CAD_group==1] wilcox.test(ct1,ct2,paired = TRUE,correct = FALSE) summary(ct1) summary(ct2) t.test(ct1,ct2,paired = TRUE) tapply(Vitamin.A.R.E.,CAD_group,sd) va1 <- Vitamin.A.R.E.[CAD_group==0] va2 <- Vitamin.A.R.E.[CAD_group==1] wilcox.test(va1,va2,paired = TRUE,correct = FALSE) summary(va1) summary(va2) t.test(va1,va2,paired = TRUE) tapply(PUFA,CAD_group,sd) pufa1<- PUFA[CAD_group==0] pufa2 <- PUFA[CAD_group==1] summary(pufa1) summary(pufa2) wilcox.test(pufa1,pufa2,paired = TRUE,correct = FALSE) t.test(pufa1,pufa2,paired = TRUE) tapply(SFA,CAD_group,sd) sfa1 <- SFA[CAD_group==0] sfa2 <-SFA[CAD_group==1] wilcox.test(sfa1,sfa2,paired = TRUE,correct = FALSE) summary(sfa1) summary(sfa2) t.test(sfa1,sfa2,paired = TRUE) tapply(MUFA,CAD_group,sd) mufa1 <- MUFA[CAD_group==0] mufa2 <- MUFA[CAD_group==1] summary(mufa1) summary(mufa2) wilcox.test(mufa1,mufa2,paired = TRUE,correct = FALSE) t.test(mufa1,mufa2,paired = TRUE) tapply(Cholesterol,CAD_group,sd) cl1 <- Cholesterol[CAD_group==0] cl2 <- Cholesterol[CAD_group==1] wilcox.test(cl1,cl2,paired = TRUE,correct = FALSE) summary(cl1) summary(cl2) t.test(cl1,cl2,paired = TRUE) #correlation matrix of nutrients NedCor <- subset(Nut, select=c(Food.energy,Carbohydrate,Protein,Total.fat.oil,PUFA,MUFA, SFA,Cholesterol, Fiber,Thiamine,Niacin,Riboflavin, Beta.carotene,Vitamin.A.R.E.,Vitamin.C,Zinc,Iron,Calcium,Phosphorus)) round(cor(NedCor),2) library(rtf) ColNut <- RTF("rts.doc") addParagraph(ColNut,"correlation among nutrients\n") addTable(ColNut,as.data.frame(round(cor(NedCor),2))) done(ColNut) library(survival) library(car) fitnut1 <- clogit(CAD_group ~ Food.energy+Carbohydrate+Total.fat.oil+Fiber+ Thiamine+Riboflavin+Beta.carotene+Vitamin.C+Zinc+Vitamin.A.R.E. +Iron+PUFA+MUFA+SFA+Cholesterol+T2DM+Dyslipidemia+Alcohol+ Smoking+Physical.activity+Age+BMI+WH.ratioCat+ strata(Pair_CADgroup),data=Nut) summary(fitnut1) sqrt(vif(fitnut1))>2 fitnut2 <- update(fitnut1,~.- MUFA) summary(fitnut2) anova(fitnut1,fitnut2, test="log-lik") fitnut3 <- update(fitnut2,~.- Fiber) summary(fitnut3) anova(fitnut2,fitnut3, test="log-lik") fitnut4 <- update(fitnut3,~.- Thiamine) summary(fitnut4) anova(fitnut3,fitnut4, test="log-lik") fitnut5 <- update(fitnut4,~.- Alcohol) summary(fitnut5) anova(fitnut4,fitnut5, test="log-lik") fitnut6 <- update(fitnut5,~.- Age) summary(fitnut6) anova(fitnut5,fitnut6, test="log-lik") fitnut7 <- update(fitnut6,~.- PUFA) summary(fitnut7) anova(fitnut6,fitnut7, test="log-lik") fitnut8 <- update(fitnut7,~.- Vitamin.A.R.E.) summary(fitnut8) anova(fitnut7,fitnut8, test="log-lik") fitnut9 <- update(fitnut8,~.- WH.ratioCat) summary(fitnut9) anova(fitnut8,fitnut9, test="log-lik") fitnut10 <- update(fitnut9,~.- Riboflavin) summary(fitnut10) anova(fitnut9,fitnut10, test="log-lik") fitnut11 <- update(fitnut10,~.- Zinc) summary(fitnut11) anova(fitnut10,fitnut11, test="log-lik") fitnut12 <- update(fitnut11,~.- Physical.activity) summary(fitnut12) anova(fitnut11,fitnut12, test="log-lik") fitnut13 <- update(fitnut12,~.- Cholesterol) summary(fitnut13) anova(fitnut12,fitnut13, test="log-lik") #Energy adjusted final model fitAll <- clogit(CAD_group ~ Food.energy+Carbohydrate+Total.fat.oil +Beta.carotene+Vitamin.C+Iron+SFA+Cholesterol+Smoking+T2DM+BMI +Dyslipidemia+strata(Pair_CADgroup),data=Nut) summary(fitAll) # Random Forest analysis library(randomForest) Xnut <- subset(Nut, select = c(T2DM,Dyslipidemia,Smoking,Physical.activity,WH.ratioCat, BMI, Alcohol,Age, Food.energy,Carbohydrate,Total.fat.oil,Fiber,Thiamine,Riboflavin, Vitamin.A.R.E., Beta.carotene,Vitamin.C,Zinc, Iron,MUFA,PUFA,SFA,Cholesterol)) Ynut <- CAD_group RS <- data.frame(Ynut,Xnut) set.seed(1234) Model.NutAll <- randomForest(Ynut ~ ., data=RS) plot(Model.NutAll) rfNews() mtry <- tuneRF(Xnut, Ynut, stepFactor = 0.4, ntreeTry = 250, improve = 0.01,trace = TRUE,plot = TRUE) Model.Nutrients <- randomForest(Ynut ~ ., data=RS,ntree=250,mtry=4, importance=TRUE,proximity=TRUE) attributes(Model.Nutrients) Model.Nutrients$importance hist(treesize(Model.Nutrients),main = "no of trees",col = "blue") varImpPlot(Model.Nutrients,main = "" ,bg="red",col = "blue",pch=18,n.var=12,sort = TRUE) print(Model.Nutrients) #Evaluation of logistic model (Table 5) install.packages("ROCR") library(ROCR) pred<- predict(fitAll,Nut) pred<-prediction(pred,Nut$CAD_group) eval<- performance(pred,"acc") plot(eval) max<-which.max(slot(eval,"y.values")[[1]]) acc <-slot(eval,"y.values")[[1]][max] acc cut<-slot(eval,"x.values")[[1]][max] print(c(Accuracy=acc,Cutoff=cut)) cut tab<-ifelse(pred>0.29,1,0) table(tab, Nut$CAD_group) TPR,recall 264/(264+18) TNR 288/(288+42) precision,sensitivity 264/(264+42) specificity 288/(288+18) F1-score 2*264/((2*264)+18+42) (2* 0.8627451*0.9361702)/(0.8627451+0.9361702) #Evaluation of the RF model accuracy (257+259)/(257+259+47+49) TPR,recall 257/(257+49) TNR 259/(259+47) precision, sensitivity 257/(257+47) specificity 259/(259+49) f1-score (2*0.8453947*0.8398693)/(0.8453947+0.8398693) #ROC curve (Figure 3) install.packages("pROC") library(pROC) par(pty="m") roc(Nut$CAD_group,predict(fitnut12), plot=TRUE,legacy.axes=TRUE, percent=TRUE, xlab="1-Specificity", ylab="Sensitivity", col="blue",lwd=3,print.auc=TRUE) plot.roc(Nut$CAD_group, Model.Nutrients$votes[,1],percent=TRUE, col="red", lwd=3, add=TRUE, print.auc=TRUE,print.auc.y=40) legend("bottomright",legend=c("Conditional Logistic Rgression", "Random Forest Regression"), col=c("blue","red"),lwd = 3,cex=0.82) #Train Test data set.seed(1234) Nut1 <- Nut[CAD_group==1,] Nut0 <- Nut[CAD_group==0,] ind <- sample(2,nrow(Nut1),replace=TRUE,prob=c(0.7,0.3)) training <- Nut1[ind==1,] testing <-Nut1[ind==2,] testing$Pair_CADgroup testC<-Nut0[c(5, 14, 16, 26, 28, 29, 41, 37, 39, 50, 56, 58, 60, 61, 66, 72, 74, 81, 86, 90, 92, 99, 111, 113, 116, 117, 119, 120, 121, 123, 124, 131, 135, 136, 140, 141, 147, 149, 154, 155, 157, 166, 170, 172, 175, 183, 185, 193, 190, 192, 194, 195, 196, 197, 197, 202, 204, 208, 211, 213, 216, 218, 224, 227, 238, 232, 235, 241, 243, 247, 248, 259, 269, 267, 270, 273, 276, 280, 283, 284, 290, 294, 296, 302, 305),] trainC<-Nut0[-c(5, 14, 16, 26, 28, 29, 41, 37, 39, 50, 56, 58, 60, 61, 66, 72, 74, 81, 86, 90, 92, 99, 111, 113, 116, 117, 119, 120, 121, 123, 124, 131, 135, 136, 140, 141, 147, 149, 154, 155, 157, 166, 170, 172, 175, 183, 185, 193, 190, 192, 194, 195, 196, 197, 197, 202, 204, 208, 211, 213, 216, 218, 224, 227, 238, 232, 235, 241, 243, 247, 248, 259, 269, 267, 270, 273, 276, 280, 283, 284, 290, 294, 296, 302, 305),] train<-rbind(training,trainC) test<-rbind(testing, testC) dim(train) #Note: It does not work because of less sample size