library(dismo)
library(gbm)

# set the root directory for his work. It should have the folders containing your input and output files.
path.root <- 'D:/BRT/data/' 

tick<-read.csv(paste(path.root, "tick.csv", sep=''), header=TRUE)
# make a formula. This is only useful for the gbm function in the gbm package.
my.formula <- as.formula(paste('case', paste(colnames(tick)[c(3:43)], collapse='+'), sep='~'))

# In the first stage, the split-and-fitting step was repeated for 10 times to screen important predictors. 
# Validation of the trained model using the test set is not performed in this stage. 
# Predictors that had relative contribution <2% for all bootstrap training sets are excluded from the next stage. 
# In the second stage, the split-and-fitting step was repeated for 100 times using the remaining predictors.
n.iteration <- 10
i.var <- c(3:43)
n.var <- length(i.var)
rel.con<-preds.gbm.train<-preds.gbm.test<-preds.gbm.all<-NULL
resp.curve <- NULL
model.lst <- list()
for(i in 1:n.iteration)
{
   file.name <- paste(path.root, "train75/train", i, ".csv", sep='')
   train.data <- read.csv(file.name,header=TRUE)
   file.name <- paste(path.root, "test25/test", i, ".csv", sep='')
   test.data <- read.csv(file.name,header=TRUE)
   
   set.seed(10000+i) # set random seed so that results can be replicated; otherwise, you will get slightly different results each time you run the program
   #start.time<-Sys.time()
   gbm.train<-gbm.step(data=train.data,gbm.x=i.var, gbm.y=2, family="bernoulli", tree.complexity=5, 
                       learning.rate=0.005, bag.fraction=0.75, site.weights=train.data$weights, n.folds=10, max.trees = 3000, silent=TRUE, plot.main=FALSE)
   model.lst[[i]] <- gbm.train
 
   tmp <- summary(gbm.train)
   colnames(tmp) <- c('var', paste('rel.inf.', i, sep=''))   
   if(is.null(rel.con))  rel.con <- tmp  else  rel.con <- merge(rel.con, tmp, by='var', sort=FALSE)

   tmp <- cbind(rep(i,nrow(train.data)), train.data[,c('IDDD','case')], 
                predict.gbm(gbm.train, train.data, n.trees= gbm.train$n.trees, type="response"))
   colnames(tmp) <- c('round', 'IDDD', 'case', 'pred')             
   if(is.null(preds.gbm.train))  preds.gbm.train <- tmp  else  preds.gbm.train <- rbind(preds.gbm.train, tmp)
   
   tmp <- cbind(rep(i,nrow(test.data)), test.data[,c('IDDD','case')], 
                predict.gbm(gbm.train, test.data, n.trees= gbm.train$n.trees, type="response"))
   colnames(tmp) <- c('round', 'IDDD', 'case', 'pred')             
   if(is.null(preds.gbm.test))  preds.gbm.test <- tmp  else  preds.gbm.test <- rbind(preds.gbm.test, tmp)

   tmp <- cbind(rep(i,nrow(sfts)), sfts[,c('IDDD','case')], 
                predict.gbm(gbm.train, sfts, n.trees= gbm.train$n.trees, type="response"))
   colnames(tmp) <- c('round', 'IDDD', 'case', 'pred')             
   if(is.null(preds.gbm.all))  preds.gbm.all <- tmp  else  preds.gbm.all <- rbind(preds.gbm.all, tmp)
   

   resp.curve.tmp <- NULL
   for(j in 1:n.var)
   {
      tmp <- plot(gbm.train, i.var = j, continuous.resolution = 150, return.grid = TRUE, type = "link")
      tmp[,2] <- tmp[,2] - mean(tmp[,2])
      colnames(tmp)[2] <- paste('y', j, sep='')
      if (j==1) resp.curve.tmp <- cbind(rep(i, nrow(tmp)), 1:nrow(tmp), tmp)  else resp.curve.tmp <- cbind(resp.curve.tmp, tmp)
   }
   colnames(resp.curve.tmp)[1:2] <- c('round', 'point')
   resp.curve <- rbind(resp.curve, resp.curve.tmp)
   
}
   
# raw output, which contains the results of all iterations    
write.csv(rel.con, file =paste(path.root, "preds/rel_con.csv", sep='')) # relative contributions
write.csv(preds.gbm.train, file =paste(path.root, "preds/train.csv", sep='')) # predictions for training data
write.csv(preds.gbm.test, file =paste(path.root, "preds/test.csv", sep='')) # predictions for test data
write.csv(preds.gbm.all, file =paste(path.root, "preds/all.csv", sep='')) # predictions for all data
write.csv(resp.curve, file =paste(path.root, "preds/resp_curve.csv", sep='')) # response curves

# output average relative contributions， predicted probabilities, and response curves
average.rel.con <- data.frame(var_name=rel.con$var, avg_rel_con=apply(rel.con[,-1], 1, mean))  
average.preds.train <- aggregate(cbind(case,pred)~IDDD, data=preds.gbm.train, mean)
average.preds.test <- aggregate(cbind(case,pred)~IDDD, data=preds.gbm.test, mean)
average.preds.all <- aggregate(cbind(case,pred)~IDDD, data=preds.gbm.all, mean)

write.csv(average.rel.con, file =paste(path.root, "preds/average_rel_con.csv", sep=''))
write.csv(average.preds.train, file =paste(path.root, "preds/average_preds_train.csv", sep=''))
write.csv(average.preds.test, file =paste(path.root, "preds/average_preds_test.csv", sep=''))
write.csv(average.preds.all, file =paste(path.root, "preds/average_preds_all.csv", sep=''))

save(model.lst, rel.con, preds.gbm.train, preds.gbm.test,preds.gbm.all, resp.curve, average.rel.con, 
     average.preds.train, average.preds.test, average.preds.all,
     file=paste(path.root, "preds/BRT_Results.Rdata", sep=''))