############################################################################### ## Creat the interaction variables setwd("D:\\5poultry\\logic\\0CreatCom4") d <- read.csv("environment.csv") ## popdenglwd zpopden <- (d$popden - mean(d$popden))/sd(d$popden) nzpopden <-(zpopden-min(zpopden))/(max(zpopden)-min(zpopden)) zglwd <- ((d$glwd - mean(d$glwd))/sd(d$glwd)) nzglwd <-(zglwd-min(zglwd))/(max(zglwd)-min(zglwd)) anzglwd <- (1-nzglwd) popdenglwd <- nzpopden*anzglwd*100 write.csv(popdenglwd,file="popdenglwd.csv") ## popdenramsar zpopden <- (d$popden - mean(d$popden))/sd(d$popden) nzpopden <-(zpopden-min(zpopden))/(max(zpopden)-min(zpopden)) zramsar <- ((d$ramsar - mean(d$ramsar))/sd(d$ramsar)) nzramsar <-(zramsar-min(zramsar))/(max(zramsar)-min(zramsar)) anzramsar <- (1-nzramsar) popdenramsar <- nzpopden*anzramsar*100 write.csv(popdenramsar,file="popdenramsar.csv") ## pouldenglwd zpoultry <- (d$poultry - mean(d$poultry))/sd(d$poultry) nzpoultry <-(zpoultry-min(zpoultry))/(max(zpoultry)-min(zpoultry)) zglwd <- ((d$glwd - mean(d$glwd))/sd(d$glwd)) nzglwd <-(zglwd-min(zglwd))/(max(zglwd)-min(zglwd)) anzglwd <- (1-nzglwd) poultryglwd <- nzpoultry*anzglwd*100 write.csv(popdenglwd,file="poultryglwd.csv") ## poultryramsar zpoultry <- (d$poultry - mean(d$poultry))/sd(d$poultry) nzpoultry <-(zpoultry-min(zpoultry))/(max(zpoultry)-min(zpoultry)) zramsar <- ((d$ramsar - mean(d$ramsar))/sd(d$ramsar)) nzramsar <-(zramsar-min(zramsar))/(max(zramsar)-min(zramsar)) anzramsar <- (1-nzramsar) poultryramsar <- nzpoultry*anzramsar*100 write.csv(poultryramsar,file="poultryramsar.csv") ## make the file "envrionment_com.csv" which includes 4 interaction variables ############################################################################### ## Univariate analysis ~ linear effect library(rms) library(PresenceAbsence) ## variables: city metropolis mjroad road ## glwd ramsar popden poultry dem aspect slope mapet ## maridity pre01 pre02 pre03 pre04 pre05 pre06 pre07 ## pre08 pre09 pre10 pre11 pre12 tmin01 tmin02 tmin03 tmin04 tmin05 tmin06 tmin07 tmin08 tmin09 ## tmin10 tmin11 tmin12 tmax01 tmax02 tmax03 tmax04 tmax05 tmax06 tmax07 ## tmax08 tmax09 tmax10 tmax11 tmax12 NDVI01 NDVI02 NDVI03 NDVI04 NDVI05 ## NDVI06 NDVI07 NDVI08 NDVI09 NDVI10 NDVI11 NDVI12 ## popdenglwd popdenramsar poultryglwd poultryramsar ## change 11 times when use another variable setwd("D:\\5Poultry\\logic\\1univariate") bootnum=1000 d <- read.csv("environment_com.csv") output<-data.frame(matrix(nrow=bootnum,ncol=13)) names(output)=c('EXP-B_poultryramsar','CIExpB-Low_poultryramsar','CIExpB-Up_poultryramsar','P_poultryramsar','R2','AIC',"AUC","AUC_LCI", "AUC_UCI",'P-Value','Threshold','Kappa','Kappa_sd') for (i in 1:bootnum){ P<-subset(d,select=c(names(d)),d$pa==1) A<-subset(d,select=c(names(d)),d$pa==0) Arind<-sample(1:nrow(A),245,replace=T) Prind<-sample(1:nrow(P),245,replace=T) Ain<-A[Arind,] Pin<-P[Prind,] dIN<- rbind(Ain,Pin) poultryramsar <- (dIN$poultryramsar - mean(d$poultryramsar))/sd(d$poultryramsar) model <- glm(pa ~ poultryramsar, family=binomial, data=dIN, x=T,y=T) model2<- lrm(pa ~ poultryramsar, data=dIN, x=T,y=T) ### Extracting model information output[i,"AIC"]=AIC(model) output[i,"R2"]<- model2$stats[10] output[i,"P-Value"]<- as.data.frame(anova(model2))$P[2] a<-length(rownames(summary(model)$coefficients)) for (r in 2:a) { vname<-rownames(summary(model)$coefficients)[r] vname2=paste("P_",vname,sep="") output[i,vname2]<-summary(model)$coefficients[r,4] vname2=paste("EXP-B_",vname,sep="") output[i,vname2]<-exp(summary(model)$coefficients[r,1]) vname2=paste("CIExpB-Low_",vname,sep="") output[i,vname2]<-exp(summary(model)$coefficients[r,1]-1.96*summary(model)$coefficients[r,2]) vname2=paste("CIExpB-Up_",vname,sep="") output[i,vname2]<-exp(summary(model)$coefficients[r,1]+1.96*summary(model)$coefficients[r,2]) } #AUC pr<-predict.glm(model,newdata=dIN, type='response') dfauc<-as.data.frame(cbind(plotID=1:length(dIN$pa),Observed=dIN$pa,Predicted=pr)) output[i,"AUC"]<-auc(dfauc)[1] output[i,"AUC_UCI"]<-auc(dfauc)[1]+1.96*auc(dfauc)[2] output[i,"AUC_LCI"]<-auc(dfauc)[1]-1.96*auc(dfauc)[2] ## Max KAPPA output[i,"Threshold"] <- optimal.thresholds(dfauc)$Predicted[4] output[i,"Kappa"]<-Kappa(cmx(dfauc, threshold = output[i,"Threshold"]))[1] output[i,"Kappa_sd"]<-Kappa(cmx(dfauc, threshold = output[i,"Threshold"]))[2] } summary(output) write.csv(output,"poultryramsar.csv") ############################################################################### ## Univariate analysis ~ square effect ## change 19 times when use another variable setwd("D:\\5Poultry\\logic\\1univariate") bootnum=1000 output<-data.frame(matrix(nrow=bootnum,ncol=17)) names(output)=c('EXP-B_NDVI12','CIExpB-Low_NDVI12','CIExpB-Up_NDVI12','P_NDVI12','EXP-B_SNDVI12','CIExpB-Low_SNDVI12','CIExpB-Up_SNDVI12','P_SNDVI12', 'R2','AIC',"AUC","AUC_LCI","AUC_UCI",'P-Value','Threshold','Kappa','Kappa_sd') d <- read.csv("environment_com.csv") for (i in 1:bootnum){ P<-subset(d,select=c(names(d)),d$pa==1) A<-subset(d,select=c(names(d)),d$pa==0) Arind<-sample(1:nrow(A),296,replace=T) Prind<-sample(1:nrow(P),296,replace=T) Ain<-A[Arind,] Pin<-P[Prind,] dIN<- rbind(Ain,Pin) NDVI12 <- (dIN$NDVI12- mean(d$NDVI12))/sd(d$NDVI12) SNDVI12<- NDVI12^2 model <- glm(pa ~ NDVI12 + SNDVI12, family=binomial, data=dIN, x=T,y=T) model2<- lrm(pa ~ NDVI12 + SNDVI12, data=dIN, x=T,y=T) ### Extracting model information output[i,"AIC"]=AIC(model) output[i,"R2"]<- model2$stats[10] output[i,"P-Value"]<- as.data.frame(anova(model2))$P[2] a<-length(rownames(summary(model)$coefficients)) for (r in 2:a) { vname<-rownames(summary(model)$coefficients)[r] vname2=paste("P_",vname,sep="") output[i,vname2]<-summary(model)$coefficients[r,4] vname2=paste("EXP-B_",vname,sep="") output[i,vname2]<-exp(summary(model)$coefficients[r,1]) vname2=paste("CIExpB-Low_",vname,sep="") output[i,vname2]<-exp(summary(model)$coefficients[r,1]-1.96*summary(model)$coefficients[r,2]) vname2=paste("CIExpB-Up_",vname,sep="") output[i,vname2]<-exp(summary(model)$coefficients[r,1]+1.96*summary(model)$coefficients[r,2]) } #AUC pr<-predict.glm(model,newdata=dIN, type='response') dfauc<-as.data.frame(cbind(plotID=1:length(dIN$pa),Observed=dIN$pa,Predicted=pr)) output[i,"AUC"]<-auc(dfauc)[1] output[i,"AUC_UCI"]<-auc(dfauc)[1]+1.96*auc(dfauc)[2] output[i,"AUC_LCI"]<-auc(dfauc)[1]-1.96*auc(dfauc)[2] ## Max KAPPA output[i,"Threshold"] <- optimal.thresholds(dfauc)$Predicted[4] output[i,"Kappa"]<-Kappa(cmx(dfauc, threshold = output[i,"Threshold"]))[1] output[i,"Kappa_sd"]<-Kappa(cmx(dfauc, threshold = output[i,"Threshold"]))[2] } summary(output) write.csv(output,"NDVI122.csv") ############################################################################### ## Moran's I ~ only significant varaibles ## [12] "dem" [13] "slope" [15] "popden" [17]"mapet" ## [19]"tmin01" [20]"tmin02" [21]"tmin03" [22]"tmin04" [23]"tmin05" [24]"tmin06" ## [25]"tmin07" [26]"tmin08" [27]"tmin09" [28]"tmin10" [29]"tmin11" [30]"tmin12" ## [31]"tmax01" [32]"tmax02" [33]"tmax03" [34]"tmax04" [35]"tmax05" [36]"tmax06" ## [37]"tmax07" [38]"tmax08" [39]"tmax09" [40]"tmax10" [41]"tmax11" [42]"tmax12" ## [43]"pre01" [44]"pre02" [45]"pre03" [46]"pre04" [47]"pre05" [48]"pre06" ## [49]"pre07" [50]"pre08" [51]"pre09" [52]"pre10" [53]"pre11" [54]"pre12" ## [55]"NDVI01" [58]"NDVI04" [59]"NDVI05" [60]"NDVI06" [61]"NDVI07" ## [62]"NDVI08" [63]"NDVI09" [64]"NDVI10" [65]"NDVI11" ## [67]"popdenglwd" [68]"popdenramsar" [69] "poultryglwd" setwd("D:\\5Poultry\\logic\\2Moran") ##IMPORT THE DATASET library(foreign) library(ncf) library(spdep) d <- read.csv("environment_com.csv") # change variable here var=55 names(d[var]) bootnum=1000 range=10 ##SUBSETTING into 2 datasets (presence (P) and absence (A)) A<-subset(d,select=c(names(d)),d$pa==0) P<-subset(d,select=c(names(d)),d$pa==1) outputmoran <-data.frame(matrix(nrow=bootnum,ncol=4)) names(outputmoran)=c("MoranI","Expectation","Variance","P-Value") ##BOOTSTRAP for(n in 1:bootnum) { Arind<-sample(1:nrow(A),130,replace=T) Prind<-sample(1:nrow(P),130,replace=T) Ain<-A[Arind,] Pin<-P[Prind,] dIN<- rbind(Ain,Pin) nbS<-dnearneigh(as.matrix(dIN[2:3]),0,range) nbS.listw<-nb2listw(nbS) mor<-moran.test(dIN[,var],listw=nbS.listw) outputmoran[n,1]<-mor[3]$estimate[1] outputmoran[n,2]<-mor[3]$estimate[2] outputmoran[n,3]<-mor[3]$estimate[3] outputmoran[n,4]<-mor[2]$p.value #calculate moran's I } summary (outputmoran) write.csv(outputmoran,file="NDVI01.csv",row.names=F) ############################################################################### ## VIF library(rms) library(PresenceAbsence) setwd("D:\\5Poultry\\logic\\3VIF") d <- read.csv("environment_com.csv") bootnum=1000 output<-data.frame(matrix(nrow=bootnum,ncol=9) names(output)=c("dem","tmin04" ,"tmax11", "pre04", "pre09","pre11","NDVI01","NDVI04","NDVI09","popdenglwd") for (i in 1:bootnum){ P<-subset(d,select=c(names(d)),d$pa==1) A<-subset(d,select=c(names(d)),d$pa==0) Arind<-sample(1:nrow(A),130,replace=T) Prind<-sample(1:nrow(P),130,replace=T) Ain<-A[Arind,] Pin<-P[Prind,] dIN<- rbind(Ain,Pin) model <- glm(pa ~ dem+tmin04+tmax11 +pre04+pre09+pre11+NDVI01+NDVI04+NDVI09+popdenglwd, data=dIN, x=T,y=T) a<-vif(model) output[i,'dem']<- a[1] output[i,'tmin04']<- a[2] output[i,'tmax11']<- a[3] output[i,'pre04']<- a[4] output[i,'pre09']<- a[5] output[i,'pre11']<- a[6] output[i,'NDVI01']<- a[7] output[i,'NDVI04']<- a[8] output[i,'NDVI09']<- a[9] output[i,'popdenglwd']<- a[10] } summary(output) write.csv(output,"vif10.csv") ################################################################################ ##stepwise selection ##13 variables:'dem','pre04','pre09','pre11','Spre11', 'tmin04','tmax11', 'Stmax11' ##'NDIVI01','SNDVI01','NDVI04', 'NDVI09', 'popdenglwd' library(MASS) library(rms) library(PresenceAbsence) ############################## setwd("D:\\5poultry\\logic\\4multivariate") bootnum=1000 matrixAUC.x<-matrix(nrow=101,ncol=bootnum) matrixAUC.y<-matrix(nrow=101,ncol=bootnum) output<-data.frame(matrix(nrow=bootnum,ncol=77)) names(output)=c('AIC','NO_Selected', 'C_Intercept','C_dem', 'C_pre04','C_pre09','C_pre11','C_Spre11','C_tmin04','C_tmax11', 'C_Stmax11','C_NDVI01','C_SNDVI01','C_NDVI04','C_NDVI09','C_popdenglwd', 'P_Intercept','P_dem','P_pre04','P_pre09','P_pre11','P_Spre11', 'P_tmin04','P_tmax11', 'P_Stmax11','P_NDVI01','P_SNDVI01','P_NDVI04','P_NDVI09','P_popdenglwd', 'EXP-B_Intercept','EXP-B_dem','EXP-B_pre04','EXP-B_pre09','EXP-B_pre11','EXP-B_Spre11', 'EXP-B_tmin04','EXP-B_tmax11','EXP-B_Stmax11','EXP-B_NDVI01','EXP-B_SNDVI01', 'EXP-B_NDVI04','EXP-B_NDVI09','EXP-B_popdenglwd', 'CIExpB-Low_Intercept','CIExpB-Low_dem','CIExpB-Low_pre04','CIExpB-Low_pre09', 'CIExpB-Low_pre11','CIExpB-Low_Spre11', 'CIExpB-Low_tmin04','CIExpB-Low_tmax11','CIExpB-Low_Stmax11', 'CIExpB-Low_NDVI01','CIExpB-Low_SNDVI01','CIExpB-Low_NDVI04','CIExpB-Low_NDVI09', 'CIExpB-Low_popdenglwd','CIExpB-Up_Intercept','CIExpB-Up_dem','CIExpB-Up_pre04', 'CIExpB-Up_pre09', 'CIExpB-Up_pre11', 'CIExpB-Up_Spre11', 'CIExpB-Up_tmin04','CIExpB-Up_tmax11', 'CIExpB-Up_Stmax11','CIExpB-Up_NDVI01','CIExpB-Up_SNDVI01', 'CIExpB-Up_NDVI04','CIExpB-Up_NDVI09','CIExpB-Up_popdenglwd', "AUC","AUC_sd", "Threshold", "Kappa", "Kappa_sd") d <- read.csv("environment_com.csv") # Z score transformation dem <- (dIN$dem- mean(d$dem))/sd(d$dem) pre04 <- (dIN$pre04- mean(d$pre04))/sd(d$pre04) pre09 <- (dIN$pre09- mean(d$pre09))/sd(d$pre09) pre11 <- (dIN$pre11- mean(d$pre11))/sd(d$pre11) Spre11 <- pre11^2 tmin04 <- (dIN$tmin04- mean(d$tmin04))/sd(d$tmin04) tmax11 <- (dIN$tmax11- mean(d$tmax11))/sd(d$tmax11) Stmax11 <- tmax11^2 NDVI01 <- (dIN$NDVI01- mean(d$NDVI01))/sd(d$NDVI01) SNDVI01 <- NDVI01^2 NDVI04 <- (dIN$NDVI04- mean(d$NDVI04))/sd(d$NDVI04) NDVI09 <- (dIN$NDVI09- mean(d$NDVI09))/sd(d$NDVI09) popdenglwd <- (dIN$popdenglwd-mean(d$popdenglwd))/sd(d$popdenglwd) for (i in 1:bootnum){ P<-subset(d,select=c(names(d)),d$pa==1) A<-subset(d,select=c(names(d)),d$pa==0) Arind<-sample(1:nrow(A),130,replace=T) Prind<-sample(1:nrow(P),130,replace=T) Ain<-A[Arind,] Pin<-P[Prind,] dIN<- rbind(Ain,Pin) model <- glm(pa ~ dem+pre04+pre09+pre11+Spre11+tmin04+tmax11+Stmax11+NDVI01+SNDVI01 +NDVI04+NDVI09+popdenglwd, family= binomial, data=dIN, x=T,y=T) modstep<-stepAIC(model) ### Extracting model information output[i,'AIC']<-AIC(modstep) a<-length(rownames(summary(modstep)$coefficients)) output[i,'NO_Selected']<-a-1 output[i,'C_Intercept']<-summary(modstep)$coefficients[1,1] output[i,'P_Intercept']<-summary(modstep)$coefficients[1,4] output[i,'EXP-B_Intercept']<-exp(summary(modstep)$coefficients[1,1]) output[i,'CIExpB-Low_Intercept']<-exp(summary(modstep)$coefficients[1,1]-1.96*summary(modstep)$coefficients[1,2]) output[i,'CIExpB-Up_Intercept']<-exp(summary(modstep)$coefficients[1,1]+1.96*summary(modstep)$coefficients[1,2]) for (r in 2:a) { vname<-rownames(summary(modstep)$coefficients)[r] vname2=paste("C_",vname,sep="") output[i,vname2]<-summary(modstep)$coefficients[r,1] vname2=paste("P_",vname,sep="") output[i,vname2]<-summary(modstep)$coefficients[r,4] vname2=paste("EXP-B_",vname,sep="") output[i,vname2]<-exp(summary(modstep)$coefficients[r,1]) vname2=paste("CIExpB-Low_",vname,sep="") output[i,vname2]<-exp(summary(modstep)$coefficients[r,1]-1.96*summary(modstep)$coefficients[r,2]) vname2=paste("CIExpB-Up_",vname,sep="") output[i,vname2]<-exp(summary(modstep)$coefficients[r,1]+1.96*summary(modstep)$coefficients[r,2]) } pr<-predict.glm(modstep,newdata=dIN, type='response') dfauc<-as.data.frame(cbind(plotID=1:length(dIN$pa),Observed=dIN$pa,Predicted1=pr)) output[i,"AUC"]<-auc(dfauc)[1] output[i,"AUC_SD"]<-auc(dfauc)[2] #write.csv(dfauc,paste("dfauc",as.character(i),".csv",sep="")) b<-roc.plot.calculate(dfauc) matrixAUC.x[,i]<-1-b[,4] matrixAUC.y[,i]<-b[,3] } write.csv(output,"Stepwise_selection.csv") ################################################################################ ## multiple logistic regression ~ final model ##4 variables: 'pre11', 'tmax11','Stmax11', 'popdenglwd' library(MASS) library(rms) library(PresenceAbsence) setwd("D:\\5poultry\\logic\\4multivariate") bootnum=1000 matrixAUC.x<-matrix(nrow=101,ncol=bootnum) matrixAUC.y<-matrix(nrow=101,ncol=bootnum) ##'pre11', 'tmax11','Stmax11', 'popdenglwd' output<-data.frame(matrix(nrow=bootnum,ncol=32)) names(output)=c('AIC','NO_Selected', 'C_Intercept','C_pre11','C_tmax11','C_Stmax11','C_popdenglwd', 'P_Intercept','P_pre11', 'P_tmax11','P_Stmax11','P_popdenglwd', 'EXP-B_Intercept','EXP-B_pre11','EXP-B_tmax11','EXP-B_Stmax11','EXP-B_popdenglwd', 'CIExpB-Low_Intercept','CIExpB-Low_pre11','CIExpB-Low_tmax11','CIExpB-Low_Stmax11', 'CIExpB-Low_popdenglwd', 'CIExpB-Up_Intercept','CIExpB-Up_pre11','CIExpB-Up_tmax11', 'CIExpB-Up_Stmax11','CIExpB-Up_popdenglwd', "AUC","AUC_sd", "Threshold", "Kappa", "Kappa_sd") d <- read.csv("environment_com.csv") # Z score transformation pre11 <- (dIN$pre11- mean(d$pre11))/sd(d$pre11) tmax11 <- (dIN$tmax11- mean(d$tmax11))/sd(d$tmax11) Stmax11 <- tmax11^2 popdenglwd <- (dIN$popdenglwd-mean(d$popdenglwd))/sd(d$popdenglwd) for (i in 1:bootnum){ P<-subset(d,select=c(names(d)),d$pa==1) A<-subset(d,select=c(names(d)),d$pa==0) Arind<-sample(1:nrow(A),130,replace=T) Prind<-sample(1:nrow(P),130,replace=T) Ain<-A[Arind,] Pin<-P[Prind,] dIN<- rbind(Ain,Pin) model <- glm(pa ~pre11+tmax11+Stmax11+popdenglwd, family= binomial, data=dIN, x=T,y=T) ### Extracting model information output[i,'AIC']<-AIC(model) a<-length(rownames(summary(model)$coefficients)) output[i,'NO_Selected']<-a-1 output[i,'C_Intercept']<-summary(model)$coefficients[1,1] output[i,'P_Intercept']<-summary(model)$coefficients[1,4] output[i,'EXP-B_Intercept']<-exp(summary(model)$coefficients[1,1]) output[i,'CIExpB-Low_Intercept']<-exp(summary(model)$coefficients[1,1]-1.96*summary(model)$coefficients[1,2]) output[i,'CIExpB-Up_Intercept']<-exp(summary(model)$coefficients[1,1]+1.96*summary(model)$coefficients[1,2]) for (r in 2:a) { vname<-rownames(summary(model)$coefficients)[r] vname2=paste("C_",vname,sep="") output[i,vname2]<-summary(model)$coefficients[r,1] vname2=paste("P_",vname,sep="") output[i,vname2]<-summary(model)$coefficients[r,4] vname2=paste("EXP-B_",vname,sep="") output[i,vname2]<-exp(summary(model)$coefficients[r,1]) vname2=paste("CIExpB-Low_",vname,sep="") output[i,vname2]<-exp(summary(model)$coefficients[r,1]-1.96*summary(model)$coefficients[r,2]) vname2=paste("CIExpB-Up_",vname,sep="") output[i,vname2]<-exp(summary(model)$coefficients[r,1]+1.96*summary(model)$coefficients[r,2]) } pr<-predict.glm(model,newdata=dIN, type='response') dfauc<-as.data.frame(cbind(plotID=1:length(dIN$pa),Observed=dIN$pa,Predicted1=pr)) output[i,"AUC"]<-auc(dfauc)[1] output[i,"AUC_sd"]<-auc(dfauc)[2] ## Max KAPPA output[i,"Threshold"] <- optimal.thresholds(dfauc)$Predicted[4] output[i,"Kappa"]<-Kappa(cmx(dfauc, threshold = output[i,"Threshold"]))[1] output[i,"Kappa_sd"]<-Kappa(cmx(dfauc, threshold = output[i,"Threshold"]))[2] #write.csv(dfauc,paste("dfauc",as.character(i),".csv",sep="")) b<-roc.plot.calculate(dfauc) matrixAUC.x[,i]<-1-b[,4] matrixAUC.y[,i]<-b[,3] } summary(output) write.csv(output,"finalmodel_04.csv") write.csv(matrixAUC.x,"MatAUCX_04.csv") write.csv(matrixAUC.y,"MatAUCY_04.csv")