library(aldvmm) 
  library(readxl)
  library(dplyr)
  
  
  #####################
  # 50-50 
  #####################
  
  NUM_RUNS <- 100
  RUNS <- c(1:NUM_RUNS)
  

  results <- data.frame()
  
  for (RUN in RUNS) {
    train_data_run <- read.csv(paste(DATA_PATH, "/splits_", NUM_RUNS, "/train_", RUN, ".csv", sep=""))
    test_data_run <- read.csv(paste(DATA_PATH, "/splits_", NUM_RUNS, "/test_", RUN, ".csv", sep=""))
  
    # need to make factor variables
    train_data_run <- train_data_run %>% mutate_at(factor_features, ~(factor(.)))
    test_data_run <- test_data_run %>% mutate_at(factor_features, ~(factor(.)))
    
    model_name <- "GLM"
    # fit the model on the run data
    fit <- glm(model_formula, family = Gamma(link = "log"), data = train_data_run)
    # predict on the test set
    y_pred <- predict(fit, newdata=test_data_run, type="response")
    y <- test_data_run$Disutility
  
    train_mae <- calc_mae(fit$fitted.values, fit$y)
    train_mse <- calc_mse(fit$fitted.values, fit$y)
    test_rsq = calc_rsq(y_pred, y)
    test_mae <- calc_mae(y_pred, y)
    test_mse <- calc_mse(y_pred, y)
  
    results_run <- c(model_name, RUN, train_mae, train_mse, test_mae, test_mse, test_rsq)
    results <- rbind(results, results_run)
    colnames(results) <- c("model_name", "run", "train_mae", "train_mse", "test_mae", "test_mse", "test_rsq")
    
    model_name <- "OLS"
    # fit the model on the run data
    fit <- lm(model_formula, data = train_data_run)
    # predict on the test set
    y_pred <- predict(fit, newdata=test_data_run)
    y <- test_data_run$Disutility
    
    train_mae <- mean(abs(fit$residuals))
    train_mse <- mean(fit$residuals ^ 2)
    test_rsq = calc_rsq(y_pred, y)
    test_mae <- calc_mae(y_pred, y)
    test_mse <- calc_mse(y_pred, y)
  
    results_run <- c(model_name, RUN, train_mae, train_mse, test_mae, test_mse, test_rsq)
    results <- rbind(results, results_run)
    
    model_name <- "CLAD"
    # fit the model on the run data
    train_data_run$yc <- rep(1.0, length(train_data_run$EQ5DINDEX))
    clad <- crq (Curv(dither(EQ5DINDEX, type = "right", value = 0.00005), yc, "right") ~ 
                   ISI_Score + 
                   deage + 
                   female + 
                   married + 
                   degree_4years + 
                   employed + 
                   retired + 
                   current_smoker + 
                   former_smoker + 
                   heavy_drinker + 
                   low_moderate_drinker + 
                   BMI_R + 
                   CCI + 
                   Combined_DP_PTS_AX + 
                   pain + 
                   treated + 
                   diagnosed + 
                   treated_ISI_Score + 
                   Not_Country_UK, 
                 data=train_data_run,
                 tau=0.5, 
                 method="Powell"
    )
    # predict on the test set
    y_pred <- predict(clad, newdata=test_data_run)
    y <- test_data_run$EQ5DINDEX
    
    train_mae <- mean(abs(clad$residuals))
    train_mse <- mean(clad$residuals ^ 2)
    test_rsq = calc_rsq(y_pred, y)
    test_mae <- calc_mae(y_pred, y)
    test_mse <- calc_mse(y_pred, y)
  
    results_run <- c(model_name, RUN, train_mae, train_mse, test_mae, test_mse, test_rsq)
    results <- rbind(results, results_run)
    
    fit_aldvmm <- TRUE
    if (fit_aldvmm) {
      model_name <- "ALDVMM_3"
      # fit the model on the run data
      fit <- aldvmm(formula = model_formula_aldvmm,
                    data = train_data_run,
                    psi = c(-0.594, 1),
                    ncmp = 3,
                    init.method = my_init,
                    optim.method = my_opt
      )
      
      # predict on the test set
      y_pred <- predict(fit, newdata=test_data_run)
      y <- test_data_run$EQ5DINDEX
      y_pred <- as.numeric(unlist(y_pred))
      
      train_mae <- fit$gof$mae
      train_mse <- fit$gof$mse
      test_rsq = calc_rsq(y_pred, y)
      test_mae <- calc_mae(y_pred, y)
      test_mse <- calc_mse(y_pred, y)
      
      results_run <- c(model_name, RUN, train_mae, train_mse, test_mae, test_mse, test_rsq)
      results <- rbind(results, results_run)
      
    }
    write.csv(results, paste(RESULTS_PATH, "/splits_", NUM_RUNS, "/test_runs_", RUN, "_.csv", sep=""), row.names=FALSE, na='')
    
    
  }
  
  write.csv(results, paste(RESULTS_PATH, "/splits_", NUM_RUNS, "/test_runs_all.csv", sep=""), row.names=FALSE, na='')