## first install all necessary packages install.packages(c("Rcpp", "RcppEigen", "RcppNumerical", "VennDiagram", "visNetwork")) ## simulate data where covariate effects are different based on two stratifying factors set.seed(123) dat.sim <- genHierSparseData(ncats = 2, nvars = 100, nobs = 200, nobs.test = 1000) ## covariates x <- dat.sim$x x.test <- dat.sim$x.test ## responses y <- dat.sim$y y.test <- dat.sim$y.test ## stratifying factors grp <- dat.sim$group.ind grp.test <- dat.sim$group.ind.test ## fit adaptive vennLasso model ## and select tuning parameter ## using 5-fold cross validation fit <- cv.vennLasso(x = x, y = y, groups = grp, adaptive.lasso = TRUE, family = "gaussian", gamma = 1, ## adaptive lasso tuning parameter one.intercept = FALSE, ## allow separate intercept for each subpopulation nfolds = 5) ## plot CV MSEs and the tuning parameter ## which minimizes CV MSE plot(x = log(fit$lambda), y = fit$cvm, ylab = "CV MSE", xlab = expression(log(lambda))) abline(v = log(fit$lambda)[which.min(fit$cvm)]) ## plot the number of variables selected for ## each subpopulation using the tuning parameter ## selected by cross validation plotSelections(fit) #create predictions for a test set test.preds <- predict(fit, x.test, grp.test, s = "lambda.min") ## evaluate performance of predictions mean((y.test - test.preds) ^ 2) mean((y.test - mean(y.test)) ^ 2) min(fit$cvm) ## inspect the estimated coefficients. ## each column corresponds to a variable, each row is a subpopulaton round(fit$vennLasso.fit$beta[,, which.min(fit$cvm)], 3) ## look at the true coefficients from the data-generating model: round(dat.sim$beta.mat, 3) ## compare the first few variables round(fit$vennLasso.fit$beta[, 2:9, which.min(fit$cvm)], 3) round(dat.sim$beta.mat[, 1:8], 3)