library(aldvmm) library(readxl) library(dplyr) ##################### # 50-50 ##################### NUM_RUNS <- 100 RUNS <- c(1:NUM_RUNS) results <- data.frame() for (RUN in RUNS) { train_data_run <- read.csv(paste(DATA_PATH, "/splits_", NUM_RUNS, "/train_", RUN, ".csv", sep="")) test_data_run <- read.csv(paste(DATA_PATH, "/splits_", NUM_RUNS, "/test_", RUN, ".csv", sep="")) # need to make factor variables train_data_run <- train_data_run %>% mutate_at(factor_features, ~(factor(.))) test_data_run <- test_data_run %>% mutate_at(factor_features, ~(factor(.))) model_name <- "GLM" # fit the model on the run data fit <- glm(model_formula, family = Gamma(link = "log"), data = train_data_run) # predict on the test set y_pred <- predict(fit, newdata=test_data_run, type="response") y <- test_data_run$Disutility train_mae <- calc_mae(fit$fitted.values, fit$y) train_mse <- calc_mse(fit$fitted.values, fit$y) test_rsq = calc_rsq(y_pred, y) test_mae <- calc_mae(y_pred, y) test_mse <- calc_mse(y_pred, y) results_run <- c(model_name, RUN, train_mae, train_mse, test_mae, test_mse, test_rsq) results <- rbind(results, results_run) colnames(results) <- c("model_name", "run", "train_mae", "train_mse", "test_mae", "test_mse", "test_rsq") model_name <- "OLS" # fit the model on the run data fit <- lm(model_formula, data = train_data_run) # predict on the test set y_pred <- predict(fit, newdata=test_data_run) y <- test_data_run$Disutility train_mae <- mean(abs(fit$residuals)) train_mse <- mean(fit$residuals ^ 2) test_rsq = calc_rsq(y_pred, y) test_mae <- calc_mae(y_pred, y) test_mse <- calc_mse(y_pred, y) results_run <- c(model_name, RUN, train_mae, train_mse, test_mae, test_mse, test_rsq) results <- rbind(results, results_run) model_name <- "CLAD" # fit the model on the run data train_data_run$yc <- rep(1.0, length(train_data_run$EQ5DINDEX)) clad <- crq (Curv(dither(EQ5DINDEX, type = "right", value = 0.00005), yc, "right") ~ ISI_Score + deage + female + married + degree_4years + employed + retired + current_smoker + former_smoker + heavy_drinker + low_moderate_drinker + BMI_R + CCI + Combined_DP_PTS_AX + pain + treated + diagnosed + treated_ISI_Score + Not_Country_UK, data=train_data_run, tau=0.5, method="Powell" ) # predict on the test set y_pred <- predict(clad, newdata=test_data_run) y <- test_data_run$EQ5DINDEX train_mae <- mean(abs(clad$residuals)) train_mse <- mean(clad$residuals ^ 2) test_rsq = calc_rsq(y_pred, y) test_mae <- calc_mae(y_pred, y) test_mse <- calc_mse(y_pred, y) results_run <- c(model_name, RUN, train_mae, train_mse, test_mae, test_mse, test_rsq) results <- rbind(results, results_run) fit_aldvmm <- TRUE if (fit_aldvmm) { model_name <- "ALDVMM_3" # fit the model on the run data fit <- aldvmm(formula = model_formula_aldvmm, data = train_data_run, psi = c(-0.594, 1), ncmp = 3, init.method = my_init, optim.method = my_opt ) # predict on the test set y_pred <- predict(fit, newdata=test_data_run) y <- test_data_run$EQ5DINDEX y_pred <- as.numeric(unlist(y_pred)) train_mae <- fit$gof$mae train_mse <- fit$gof$mse test_rsq = calc_rsq(y_pred, y) test_mae <- calc_mae(y_pred, y) test_mse <- calc_mse(y_pred, y) results_run <- c(model_name, RUN, train_mae, train_mse, test_mae, test_mse, test_rsq) results <- rbind(results, results_run) } write.csv(results, paste(RESULTS_PATH, "/splits_", NUM_RUNS, "/test_runs_", RUN, "_.csv", sep=""), row.names=FALSE, na='') } write.csv(results, paste(RESULTS_PATH, "/splits_", NUM_RUNS, "/test_runs_all.csv", sep=""), row.names=FALSE, na='')