#---------------------------------------------------------------------------------------------------------------- # # A Comparison of Covariate Adjustment Approaches under Model Misspecification # in Individually Randomized Trials: Additional Files # # # This R file contains functions for the Data Generating Mechanisms for the simulations # #---------------------------------------------------------------------------------------------------------------- # Load required packages------------------------------------------------------- library(splines) library(MASS) library(CausalGAM) library(tmle) library(twang) library(CBPS) library(PSW) library(stdReg) library(logbin) library(PSweight) library(logistf) # Main Simulation ------------------------------------------------------------ gen_cts_normal<-function(n, sd=42, true_diff=40, true_mean=FALSE, po=FALSE){ # Randomised treatment treat <- rep_len(0:1,n) # Baseline risk factor x <- rnorm(n, 0, 1) ### Outcome variables # Generic error term e1 <- rnorm(n, 0, sd) # Outcome 1: Linear y1_0_mean <- 395 + 110*x y1_1_mean <- 395 + 110*x + true_diff y1_0 <- y1_0_mean + e1 y1_1 <- y1_1_mean + e1 y1 <- y1_0*(1-treat) + y1_1*treat # Outcome 2: Quadratic y2_0_mean <- 600 + 50*x - 5*x*x y2_1_mean <- 600 + 50*x - 5*x*x + true_diff y2_0 <- y2_0_mean + e1 y2_1 <- y2_1_mean + e1 y2 <- y2_0*(1-treat) + y2_1*treat # Outcome 3: Flattening-off y3_1_mean <- 700 - exp(-(x-4)) + true_diff y3_0_mean <- 700 - exp(-(x-4)) y3_0_mean <- 700 - exp(-(x-4)) y3_1_mean <- 700 - exp(-(x-4)) + true_diff y3_0 <- y3_0_mean + e1 y3_1 <- y3_1_mean + e1 y3 <- y3_0*(1-treat) + y3_1*treat # Outcome 4: J-shaped y4_1_mean <- 200 + 100*x + 60*x*x + true_diff y4_0_mean <- 200 + 100*x + 60*x*x y4_0 <- y4_0_mean + e1 y4_1 <- y4_1_mean + e1 y4 <- y4_0*(1-treat) + y4_1*treat # Outcome 5: Full (i.e. symmetric) quadratic y5_1_mean <- 100 + 104*x*x + true_diff y5_0_mean <- 100 + 104*x*x y5_0 <- y5_0_mean + e1 y5_1 <- y5_1_mean + e1 y5 <- y5_0*(1-treat) + y5_1*treat # Outcome 6: Harmonic y6_1_mean <- 400 + 300*cos(2*pi*3*x*0.15 + 4) + true_diff y6_0_mean <- 400 + 300*cos(2*pi*3*x*0.15 + 4) y6_0 <- y6_0_mean + e1 y6_1 <- y6_1_mean + e1 y6 <- y6_0*(1-treat) + y6_1*treat # Outcome 7: Two-tier y7_1_mean <- 180 + 470*(x>0) + true_diff y7_0_mean <- 180 + 470*(x>0) y7_0 <- y7_0_mean + e1 y7_1 <- y7_1_mean + e1 y7 <- y7_0*(1-treat) + y7_1*treat ### Keep required variables d <- as.data.frame(cbind(treat, x, y1, y2, y3, y4, y5, y6, y7)) if (po==TRUE) { d<- as.data.frame(cbind(d, y1_0, y2_0, y3_0, y4_0, y5_0, y6_0, y7_0, y1_1, y2_1, y3_1, y4_1, y5_1, y6_1, y7_1)) } if (true_mean==TRUE) { d<- as.data.frame(cbind(d, y1_0_mean, y2_0_mean, y3_0_mean, y4_0_mean, y5_0_mean, y6_0_mean, y7_0_mean, y1_1_mean, y2_1_mean, y3_1_mean, y4_1_mean, y5_1_mean, y6_1_mean, y7_1_mean)) } # Return the dataset needed for analysis return(d) } # Extension 1: Multiple Covariates gen_multiple <- function(n, treat_effect=TRUE, po=FALSE){ # Patient ID and randomised treatment (exactly 1:1) data<-data.frame("id"=1:n, "treat"=rep_len(0:1,n)) # Baseline covariates - initially draw from MVN distribution (to be subsequently transformed to covariates) corr_mat<-matrix(c(1, -0.15815327, 0.01976785, -0.33605095, -0.02770091, -0.02947348, 0.12521156, 0.07258328, -0.00770749, -0.0520675, -0.15815327, 1, 0.21510675, 0.02304987, -0.23891207, -0.24050584, 0.28897531, -0.42597888, -0.18675189, 0.25504132, 0.01976785, 0.21510675, 1, 0.01665172, -0.03617062, -0.09391424, 0.07609943, -0.09578027, -0.01071428, 0.09443469, -0.33605095, 0.02304987, 0.01665172, 1, -0.1680565, -0.11050784, 0.12336967, -0.31154084, -0.09885801, 0.09544161, -0.02770091, -0.23891207, -0.03617062, -0.1680565, 1, 0.3954655, -0.47698826, 0.46050938, 0.3058763, -0.08384004, -0.02947348, -0.24050584, -0.09391424, -0.11050784, 0.3954655, 1, -0.49330009, 0.29356871, 0.13219781, -0.06329405, 0.12521156, 0.28897531, 0.07609943, 0.12336967, -0.47698826, -0.49330009, 1, -0.60894048, -0.33824607, 0.14271621, 0.07258328, -0.42597888, -0.09578027, -0.31154084, 0.46050938, 0.29356871, -0.60894048, 1, 0.4723672, -0.33232821, -0.00770749, -0.18675189, -0.01071428, -0.09885801, 0.3058763, 0.13219781, -0.33824607, 0.4723672, 1, -0.34010754, -0.0520675, 0.25504132, 0.09443469, 0.09544161, -0.08384004, -0.06329405, 0.14271621, -0.33232821, -0.34010754, 1), ncol=10, byrow=T) vec_means<-rep(0,10) d<-as.data.frame(mvrnorm(n, vec_means, corr_mat)) names(d)<-c(paste("x",1:10,sep="")) #d contains 10 N(0,1) variables correlated to each other # Create covariates d$c1<-d$x1*14.3+48.9 # Modelled on age d$c2<- 1/(d$x2*0.007 + 0.0377) # BMI d$c3<-(d$x3*1.756 + 7.167)**2 - 5 # Baseline vitamin D d$c4<-d$x4*109.75 + 379.8 # Baseline PEFR d$c5<-qexp(pnorm(d$x5),rate=1/0.5) # Baseline Asthma score d$c6<-qexp(pnorm(d$x6),rate=1) # Baseline Vent score d$c7<-(qnorm(pnorm(d$x7)*pnorm((25-20)/4.5)))*4.5 + 20 # Baseline ACT score d$c8<-(d$x8*1.59 + 4.75)**2 # Baseline SGRQ score d$c9<-(d$x9*0.512 + 1.036)**2 # Baseline RQLQ score d$c10<-(qnorm(pnorm(d$x10)*pnorm((100-80)/20)))*20 + 80 # Baseline EuroQoL score # Sex (c11) d$lp11<- (8.36 - 0.196*d$c1 -0.650*d$c2 + 0.0095*d$c1*d$c2- 0.009*d$c3 + 0.020*d$c4 + 0.568*d$c5- 0.023*d$c6+ 0.015*d$c7 + 0.0007*d$c8- 0.364*d$c9 - 0.022*d$c10) u<-runif(n,0,1) d$c11<-ifelse(u<(exp(d$lp11)/(1 + exp(d$lp11))),1,0) # Ethnicity (c12 and c13) # - White vs not d$lp12a<- (-17.45 + 28.29*d$c11 + 0.203*d$c1 + 0.279*d$c2 - 0.0063*d$c1*d$c2+ 0.026*d$c3 + 0.009*d$c4 - 0.353*d$c5 + 0.210*d$c6 + 0.234*d$c7 + 0.021*d$c8 - 0.168*d$c9 + 0.020*d$c10 - 0.4538531*d$c1*d$c11 - 0.894*d$c2*d$c11 + 0.0174*d$c1*d$c2*d$c11 + 0.020*d$c3*d$c11 - 0.006*d$c4*d$c11 + 2.875*d$c5*d$c11 + 0.046*d$c6*d$c11- 0.159*d$c7*d$c11- 0.072*d$c8*d$c11+ 0.415*d$c9*d$c11 - 0.022*d$c10*d$c11) ua<-runif(n,0,1) d$c12a<-ifelse(ua<(exp(d$lp12a)/(1 + exp(d$lp12a))),1,0) # - Black vs other in non-white d$lp12b <- (-3.61 - 1.829*d$c11 + 0.016*d$c1 + 0.245*d$c2 + 0.04*d$c3 + 0.001*d$c4 - 0.426*d$c5- 0.276*d$c6 - 0.044*d$c7 - 0.079*d$c8+ 0.849*d$c9 - 0.040*d$c10) ub<-runif(n,0,1) d$c12b<-ifelse(ub<(exp(d$lp12b)/(1 + exp(d$lp12b))),1,0) d$c12_temp[d$c12a==1]<-1 d$c12_temp[d$c12b==1 & d$c12a==0]<-2 d$c12_temp[d$c12b==0 & d$c12a==0]<-3 # Dummy variables for ethnicity d$c12<-ifelse(d$c12_temp==2,1,0) d$c13<-ifelse(d$c12_temp==3,1,0) # Smoking (c14) d$lp14 <- (-6.23 +1.13*d$c12 - 0.381*d$c13 - 0.032*d$c11 + 0.342*d$c1 - 0.0045*d$c1^2 - 0.218*d$c2 + 0.012*d$c3 - 0.002*d$c4- 0.734*d$c5+ 0.77*d$c6 + 0.044*d$c7 + 0.051*d$c8 - 0.659*d$c9 + 0.021*d$c10) uc<-runif(n,0,1) d$c14<-ifelse(uc<(exp(d$lp14)/(1 + exp(d$lp14))),1,0) # Create 3 variables highly colinear covariates with pefr_0 u1<- d$x4*sqrt(0.95) + rnorm(n,0,1)*sqrt(0.05) u2<- d$x4*sqrt(0.95) + rnorm(n,0,1)*sqrt(0.05) u3<- d$x4*sqrt(0.95) + rnorm(n,0,1)*sqrt(0.05) # Create 3 colinear noise variables (c15-17) d$c15<-qexp(pnorm(u1),1/0.5) d$c16<-(qnorm(pnorm(u2)*pnorm((50-40)/10)))*10 + 40 d$c17<-qchisq(pnorm(u3),4) # Create other noise variables (i.e. not risk factors) here d$c18 <- rnorm(n, 0, 1) d$c19 <- qexp(pnorm(rnorm(n, 0, 1)), rate=2) d$c20 <- (qnorm(pnorm(rnorm(n, 0, 1))*pnorm((50-40)/10)))*10 + 40 d$c21 <- qchisq(pnorm(rnorm(n, 0, 1)), 4) #### Centre and standardise the covariates d$c1 <- (d$c1 - mean(d$c1))/sd(d$c1) d$c2 <- (d$c2 - mean(d$c2))/sd(d$c2) d$c3 <- (d$c3 - mean(d$c3))/sd(d$c3) d$c4 <- (d$c4 - mean(d$c4))/sd(d$c4) d$c5 <- (d$c5 - mean(d$c5))/sd(d$c5) d$c6 <- (d$c6 - mean(d$c6))/sd(d$c6) d$c7 <- (d$c7 - mean(d$c7))/sd(d$c7) d$c8 <- (d$c8 - mean(d$c8))/sd(d$c8) d$c9 <- (d$c9 - mean(d$c9))/sd(d$c9) d$c10 <- (d$c10 - mean(d$c10))/sd(d$c10) d$c11 <- (d$c11 - mean(d$c11))/sd(d$c11) d$c12 <- (d$c12 - mean(d$c12))/sd(d$c12) d$c13 <- (d$c13 - mean(d$c13))/sd(d$c13) d$c14 <- (d$c14 - mean(d$c14))/sd(d$c14) d$c15 <- (d$c15 - mean(d$c15))/sd(d$c15) d$c16 <- (d$c16 - mean(d$c16))/sd(d$c16) d$c17 <- (d$c17 - mean(d$c17))/sd(d$c17) d$c18 <- (d$c18 - mean(d$c18))/sd(d$c18) d$c19 <- (d$c19 - mean(d$c19))/sd(d$c19) d$c20 <- (d$c20 - mean(d$c20))/sd(d$c20) d$c21 <- (d$c21 - mean(d$c21))/sd(d$c21) # Put covariates, ID and treatment together d<-cbind(data,d) ### Generate outcomes if (treat_effect==TRUE) { # Continuous outcome: Based on real data. Lots of small effects and interactions with sex. No treatment-covariate interactions. y1_base_mean0 <- (381.1555 -9.226248*d$c14 + 12.07*d$c12 + 23.29*d$c13 + 1.525504*d$c11 - 10.4878*d$c1 - .595686*d$c2 - .1443187*d$c1*d$c2 + .0335872*d$c3 + 103.1218*d$c4 -4.413275*d$c5 -1.633389*d$c6 + 1.02916*d$c7 + 4.685524*d$c8 + 1.990803*d$c9 + 8.379155*d$c10 + 16.73711*d$c1*d$c11 -4.196649*d$c2*d$c11 + 26.74518*d$c1*d$c2*d$c11 -4.499786*d$c3*d$c11 + 9.622228*d$c4*d$c11 + 2.60351*d$c5*d$c11 + 7.98627*d$c6*d$c11 + 9.375551*d$c7*d$c11 -14.80215*d$c8*d$c11 + 5.965411*d$c9*d$c11 - 20.99641*d$c10*d$c11) d$cts_y1_0 <- y1_base_mean0 + rnorm(n, 0, 39.4) d$cts_y1_1 <- y1_base_mean0 + rnorm(n, 0, 39.4) + 42 d$cts_y1 <- d$cts_y1_0*(d$treat==0) + d$cts_y1_1*(d$treat==1) # Binary outcome: Based on real data. d$lpbin_y1 <- (-.1866098 + .3067374*d$c14 -.781858*d$c12 -.9585292*d$c13 -.1157582*d$c11 -.1660964*d$c1 + .025177*d$c2 - .0659497*d$c1*d$c2 -.0227533*d$c3 -.1541787*d$c4 + .234973*d$c5 -.2809131*d$c6 -.2913446*d$c7 + .0465881*d$c8 + .1649682*d$c9 + .098289*d$c10 -.145291*d$c1*d$c11 + .0775514*d$c2*d$c11 -1.091366*d$c1*d$c2*d$c11 + .0302699*d$c3*d$c11 + .1504022*d$c4*d$c11 -.391075*d$c5*d$c11 -.1739193*d$c6*d$c11 -1.024611*d$c7*d$c11 + .4959778*d$c8*d$c11 -.4946643*d$c9*d$c11 + .1711421*d$c10*d$c11 -0.7765288*d$treat) uc<-runif(n,0,1) d$bin_y1<-ifelse(uc<(exp(d$lpbin_y1)/(1 + exp(d$lpbin_y1))),1,0) ### Generate outcomes with single treat-covariate interactions (with PEFR) # Continuous outcome: Based on real data. Lots of small effects and interactions with sex. One treatment-covariate interaction (with PEFR). y2_base_mean0 <- (381.1555 -9.226248*d$c14 + 12.07*d$c12 + 23.29*d$c13 + 1.525504*d$c11 - 10.4878*d$c1 - .595686*d$c2 - .1443187*d$c1*d$c2 + .0335872*d$c3 + 80*d$c4 -4.413275*d$c5 -1.633389*d$c6 + 1.02916*d$c7 + 4.685524*d$c8 + 1.990803*d$c9 + 8.379155*d$c10 + 16.73711*d$c1*d$c11 -4.196649*d$c2*d$c11 + 26.74518*d$c1*d$c2*d$c11 -4.499786*d$c3*d$c11 + 9.622228*d$c4*d$c11 + 2.60351*d$c5*d$c11 + 7.98627*d$c6*d$c11 + 9.375551*d$c7*d$c11 -14.80215*d$c8*d$c11 + 5.965411*d$c9*d$c11 - 20.99641*d$c10*d$c11) d$cts_y2_0 <- y2_base_mean0 + rnorm(n, 0, 39.4) d$cts_y2_1 <- y2_base_mean0 + rnorm(n, 0, 39.4) + (42 + 20*d$c4) d$cts_y2 <- d$cts_y2_0*(d$treat==0) + d$cts_y2_1*(d$treat==1) # Binary outcome: Based on real data. d$lpbin_y2 <- (-.1866098 + .3067374*d$c14 -.781858*d$c12 -.9585292*d$c13 -.1157582*d$c11 -.1660964*d$c1 + .025177*d$c2 - .0659497*d$c1*d$c2 -.0227533*d$c3 -0.05*d$c4 + .234973*d$c5 -.2809131*d$c6 -.2913446*d$c7 + .0465881*d$c8 + .1649682*d$c9 + .098289*d$c10 -.145291*d$c1*d$c11 + .0775514*d$c2*d$c11 -1.091366*d$c1*d$c2*d$c11 + .0302699*d$c3*d$c11 + .1504022*d$c4*d$c11 -.391075*d$c5*d$c11 -.1739193*d$c6*d$c11 -1.024611*d$c7*d$c11 + .4959778*d$c8*d$c11 -.4946643*d$c9*d$c11 + .1711421*d$c10*d$c11 - 0.7765288*d$treat - 0.1*d$c4*d$treat) uc<-runif(n,0,1) d$bin_y2<-ifelse(uc<(exp(d$lpbin_y2)/(1 + exp(d$lpbin_y2))),1,0) ### Generate outcomes with multiple low-level treat-covariate interactions # Continuous outcome: Based on real data. Lots of small effects and interactions with sex. y3_base_mean0 <-(381.1555 -9.226248*d$c14 + 12.07*d$c12 + 23.29*d$c13 + 1.525504*d$c11 - 5*d$c1 - 0.3*d$c2 - .1443187*d$c1*d$c2 + 0.01*d$c3 + 80*d$c4 -2*d$c5 -1.633389*d$c6 + 1.02916*d$c7 + 4.685524*d$c8 + 1.990803*d$c9 + 8.379155*d$c10 + 16.73711*d$c1*d$c11 -4.196649*d$c2*d$c11 + 26.74518*d$c1*d$c2*d$c11 -4.499786*d$c3*d$c11 + 9.622228*d$c4*d$c11 + 2.60351*d$c5*d$c11 + 7.98627*d$c6*d$c11 + 9.375551*d$c7*d$c11 -14.80215*d$c8*d$c11 + 5.965411*d$c9*d$c11 - 20.99641*d$c10*d$c11) d$cts_y3_0 <- y3_base_mean0 + rnorm(n, 0, 39.4) d$cts_y3_1 <- y3_base_mean0 + rnorm(n, 0, 39.4) + (42 - 10*d$c1 - 0.3*d$c2 + 0.02*d$c3 + 20*d$c4 -2*d$c5) d$cts_y3 <- d$cts_y3_0*(d$treat==0) + d$cts_y3_1*(d$treat==1) # Binary outcome: Based on real data. d$lpbin_y3 <- (-.1866098 + 0.1*d$c14 -0.5*d$c12 -0.2*d$c13 -.1157582*d$c11 -0.06*d$c1 + .025177*d$c2 - .0659497*d$c1*d$c2 -.0227533*d$c3 -0.05*d$c4 + .234973*d$c5 -.2809131*d$c6 -.2913446*d$c7 + .0465881*d$c8 + .1649682*d$c9 + .098289*d$c10 -.145291*d$c1*d$c11 + .0775514*d$c2*d$c11 -1.091366*d$c1*d$c2*d$c11 + .0302699*d$c3*d$c11 + .1504022*d$c4*d$c11 -.391075*d$c5*d$c11 -.1739193*d$c6*d$c11 -1.024611*d$c7*d$c11 + .4959778*d$c8*d$c11 -.4946643*d$c9*d$c11 + .1711421*d$c10*d$c11 - 0.7765288*d$treat - 0.1*d$c4*d$treat - 0.1*d$c1*d$treat + 0.2*d$c14*d$treat - 0.3*d$c12*d$treat - 0.8*d$c13*d$treat) uc<-runif(n,0,1) d$bin_y3<-ifelse(uc<(exp(d$lpbin_y3)/(1 + exp(d$lpbin_y3))),1,0) } else { # Continuous outcome: Based on real data. Lots of small effects and interactions with sex. No treatment-covariate interactions. y1_base_mean0 <- (381.1555 -9.226248*d$c14 + 12.07*d$c12 + 23.29*d$c13 + 1.525504*d$c11 - 10.4878*d$c1 - .595686*d$c2 - .1443187*d$c1*d$c2 + .0335872*d$c3 + 103.1218*d$c4 -4.413275*d$c5 -1.633389*d$c6 + 1.02916*d$c7 + 4.685524*d$c8 + 1.990803*d$c9 + 8.379155*d$c10 + 16.73711*d$c1*d$c11 -4.196649*d$c2*d$c11 + 26.74518*d$c1*d$c2*d$c11 -4.499786*d$c3*d$c11 + 9.622228*d$c4*d$c11 + 2.60351*d$c5*d$c11 + 7.98627*d$c6*d$c11 + 9.375551*d$c7*d$c11 -14.80215*d$c8*d$c11 + 5.965411*d$c9*d$c11 - 20.99641*d$c10*d$c11) d$cts_y1_0 <- y1_base_mean0 + rnorm(n, 0, 39.4) d$cts_y1_1 <- y1_base_mean0 + rnorm(n, 0, 39.4) d$cts_y1 <- d$cts_y1_0*(d$treat==0) + d$cts_y1_1*(d$treat==1) # Binary outcome: Based on real data. d$lpbin_y1 <- (-.1866098 + .3067374*d$c14 -.781858*d$c12 -.9585292*d$c13 -.1157582*d$c11 -.1660964*d$c1 + .025177*d$c2 - .0659497*d$c1*d$c2 -.0227533*d$c3 -.1541787*d$c4 + .234973*d$c5 -.2809131*d$c6 -.2913446*d$c7 + .0465881*d$c8 + .1649682*d$c9 + .098289*d$c10 -.145291*d$c1*d$c11 + .0775514*d$c2*d$c11 -1.091366*d$c1*d$c2*d$c11 + .0302699*d$c3*d$c11 + .1504022*d$c4*d$c11 -.391075*d$c5*d$c11 -.1739193*d$c6*d$c11 -1.024611*d$c7*d$c11 + .4959778*d$c8*d$c11 -.4946643*d$c9*d$c11 + .1711421*d$c10*d$c11) uc<-runif(n,0,1) d$bin_y1<-ifelse(uc<(exp(d$lpbin_y1)/(1 + exp(d$lpbin_y1))),1,0) ### Generate outcomes with single treat-covariate interactions (with PEFR) # Continuous outcome: Based on real data. Lots of small effects and interactions with sex. One treatment-covariate interaction (with PEFR). y2_base_mean0 <- (381.1555 -9.226248*d$c14 + 12.07*d$c12 + 23.29*d$c13 + 1.525504*d$c11 - 10.4878*d$c1 - .595686*d$c2 - .1443187*d$c1*d$c2 + .0335872*d$c3 + 80*d$c4 -4.413275*d$c5 -1.633389*d$c6 + 1.02916*d$c7 + 4.685524*d$c8 + 1.990803*d$c9 + 8.379155*d$c10 + 16.73711*d$c1*d$c11 -4.196649*d$c2*d$c11 + 26.74518*d$c1*d$c2*d$c11 -4.499786*d$c3*d$c11 + 9.622228*d$c4*d$c11 + 2.60351*d$c5*d$c11 + 7.98627*d$c6*d$c11 + 9.375551*d$c7*d$c11 -14.80215*d$c8*d$c11 + 5.965411*d$c9*d$c11 - 20.99641*d$c10*d$c11) d$cts_y2_0 <- y2_base_mean0 + rnorm(n, 0, 39.4) d$cts_y2_1 <- y2_base_mean0 + rnorm(n, 0, 39.4) d$cts_y2 <- d$cts_y2_0*(d$treat==0) + d$cts_y2_1*(d$treat==1) # Binary outcome: Based on real data. d$lpbin_y2 <- (-.1866098 + .3067374*d$c14 -.781858*d$c12 -.9585292*d$c13 -.1157582*d$c11 -.1660964*d$c1 + .025177*d$c2 - .0659497*d$c1*d$c2 -.0227533*d$c3 -0.05*d$c4 + .234973*d$c5 -.2809131*d$c6 -.2913446*d$c7 + .0465881*d$c8 + .1649682*d$c9 + .098289*d$c10 -.145291*d$c1*d$c11 + .0775514*d$c2*d$c11 -1.091366*d$c1*d$c2*d$c11 + .0302699*d$c3*d$c11 + .1504022*d$c4*d$c11 -.391075*d$c5*d$c11 -.1739193*d$c6*d$c11 -1.024611*d$c7*d$c11 + .4959778*d$c8*d$c11 -.4946643*d$c9*d$c11 + .1711421*d$c10*d$c11) uc<-runif(n,0,1) d$bin_y2<-ifelse(uc<(exp(d$lpbin_y2)/(1 + exp(d$lpbin_y2))),1,0) ### Generate outcomes with multiple low-level treat-covariate interactions # Continuous outcome: Based on real data. Lots of small effects and interactions with sex. y3_base_mean0 <-(381.1555 -9.226248*d$c14 + 12.07*d$c12 + 23.29*d$c13 + 1.525504*d$c11 - 5*d$c1 - 0.3*d$c2 - .1443187*d$c1*d$c2 + 0.01*d$c3 + 80*d$c4 -2*d$c5 -1.633389*d$c6 + 1.02916*d$c7 + 4.685524*d$c8 + 1.990803*d$c9 + 8.379155*d$c10 + 16.73711*d$c1*d$c11 -4.196649*d$c2*d$c11 + 26.74518*d$c1*d$c2*d$c11 -4.499786*d$c3*d$c11 + 9.622228*d$c4*d$c11 + 2.60351*d$c5*d$c11 + 7.98627*d$c6*d$c11 + 9.375551*d$c7*d$c11 -14.80215*d$c8*d$c11 + 5.965411*d$c9*d$c11 - 20.99641*d$c10*d$c11) d$cts_y3_0 <- y3_base_mean0 + rnorm(n, 0, 39.4) d$cts_y3_1 <- y3_base_mean0 + rnorm(n, 0, 39.4) d$cts_y3 <- d$cts_y3_0*(d$treat==0) + d$cts_y3_1*(d$treat==1) # Binary outcome: Based on real data. d$lpbin_y3 <- (-.1866098 + 0.1*d$c14 -0.5*d$c12 -0.2*d$c13 -.1157582*d$c11 -0.06*d$c1 + .025177*d$c2 - .0659497*d$c1*d$c2 -.0227533*d$c3 -0.05*d$c4 + .234973*d$c5 -.2809131*d$c6 -.2913446*d$c7 + .0465881*d$c8 + .1649682*d$c9 + .098289*d$c10 -.145291*d$c1*d$c11 + .0775514*d$c2*d$c11 -1.091366*d$c1*d$c2*d$c11 + .0302699*d$c3*d$c11 + .1504022*d$c4*d$c11 -.391075*d$c5*d$c11 -.1739193*d$c6*d$c11 -1.024611*d$c7*d$c11 + .4959778*d$c8*d$c11 -.4946643*d$c9*d$c11 + .1711421*d$c10*d$c11) uc<-runif(n,0,1) d$bin_y3<-ifelse(uc<(exp(d$lpbin_y3)/(1 + exp(d$lpbin_y3))),1,0) } # Select only required elements of d to keep if (po==TRUE) { d2 <- d[,c("id","treat","cts_y1", "cts_y1_1", "cts_y1_0", "bin_y1", "cts_y2", "cts_y2_1", "cts_y2_0", "bin_y2", "cts_y3", "cts_y3_1", "cts_y3_0", "bin_y3", "c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16", "c17", "c18", "c19", "c20", "c21")] } else { d2 <- d[,c("id","treat","cts_y1", "bin_y1", "cts_y2", "bin_y2", "cts_y3", "bin_y3", "c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16", "c17", "c18", "c19", "c20", "c21")] } return(d2) } # Extension 2: Interactions---------------------------------------------------- gen_cts_inter <- function(n, true_mean=FALSE, po=FALSE){ # Randomised treatment treat <- rep_len(0:1,n) # Baseline risk factor x <- rnorm(n, 0, 1) c <- x**2 ### Outcome variables # Generic error term e1 <- rnorm(n, 0, 60) # Outcome 1: Linear in both arms, quantitative interaction y1_0_mean <- 395 + 80*x y1_1_mean <- 395 + 80*x + 20*x + 40 y1_0 <- y1_0_mean + e1 y1_1 <- y1_1_mean + e1 y1 <- y1_0*(1-treat) + y1_1*treat # Outcome 2: Linear in both arms, qualitative interaction (i.e. switch direction) y2_0_mean <- 500 + 20*x y2_1_mean <- 500 + 20*x + 90*x - 65 y2_0 <- y2_0_mean + e1 y2_1 <- y2_1_mean + e1 y2 <- y2_0*(1-treat) + y2_1*treat # Outcome 3: Linear in one arm, exponential in other y3_0_mean <- 450 + 100*x y3_1_mean <- 450 + 100 - exp(-(x-3.5)) y3_0 <- y3_0_mean + e1 y3_1 <- y3_1_mean + e1 y3 <- y3_0*(1-treat) + y3_1*treat # Outcome 4: Exponential in one arm, absent in other y4_0_mean <- 0 y4_1_mean <- 13*(c**2) y4_0 <- y4_0_mean + e1 y4_1 <- y4_1_mean + e1 y4 <- y4_0*(1-treat) + y4_1*treat ### Keep required variables d <- as.data.frame(cbind(treat, c, x, y1, y2, y3, y4)) if (po==TRUE) { d<- as.data.frame(cbind(d, y1_0, y2_0, y3_0, y4_0, y1_1, y2_1, y3_1, y4_1)) } if (true_mean==TRUE) { d<- as.data.frame(cbind(d, y1_0_mean, y2_0_mean, y3_0_mean, y4_0_mean, y1_1_mean, y2_1_mean, y3_1_mean, y4_1_mean)) } # Return the dataset needed for analysis return(d) } # Extension 3: Binary outcome-------------------------------------------------- generate_bin_strong <-function(n, true_or=0.46, po=FALSE){ # Randomised treatment treat <- rep_len(0:1,n) # Baseline risk factor x <- (qnorm(runif(n)*pnorm((25-20)/4.5)) + dnorm((25-20)/4.5)/pnorm((25-20)/4.5))/sqrt(1 - ((25-20)/4.5)*dnorm((25-20)/4.5)/pnorm((25-20)/4.5) + (dnorm((25-20)/4.5)/pnorm((25-20)/4.5))^2) ### Outcome variables # Outcome 1: Linear lnpy1_0 <- -2 -4*x lnpy1_1 <- -2 -4*x + log(true_or) y1_0 <- runif(n)< exp(lnpy1_0)/(1 + exp(lnpy1_0)) y1_1 <- runif(n)< exp(lnpy1_1)/(1 + exp(lnpy1_1)) y1 <- y1_0*(treat==0) + y1_1*(treat==1) # Outcome 2: Quadratic lnpy2_0 <- -1*x*x - 4.5*x - 1 lnpy2_1 <- -1*x*x - 4.5*x - 1 + log(true_or) y2_0 <- runif(n)< exp(lnpy2_0)/(1 + exp(lnpy2_0)) y2_1 <- runif(n)< exp(lnpy2_1)/(1 + exp(lnpy2_1)) y2 <- y2_0*(treat==0) + y2_1*(treat==1) # Outcome 3: Flattening-off lnpy3_0 <- 10 - 15*exp(-x) lnpy3_1 <- 10 - 15*exp(-x) + log(true_or) y3_0 <- runif(n)< exp(lnpy3_0)/(1 + exp(lnpy3_0)) y3_1 <- runif(n)< exp(lnpy3_1)/(1 + exp(lnpy3_1)) y3 <- y3_0*(treat==0) + y3_1*(treat==1) # Outcome 4: J-shaped lnpy4_0 <- -3*x^2 - 6*x - 0.5 lnpy4_1 <- -3*x^2 - 6*x - 0.5 + log(true_or) y4_0 <- runif(n)< exp(lnpy4_0)/(1 + exp(lnpy4_0)) y4_1 <- runif(n)< exp(lnpy4_1)/(1 + exp(lnpy4_1)) y4 <- y4_0*(treat==0) + y4_1*(treat==1) # Outcome 5: Full (i.e. symmetric) quadratic lnpy5_0 <- -4*x^2 - 3.5*x + 0.5 lnpy5_1 <- -4*x^2 - 3.5*x + 0.5 + log(true_or) y5_0 <- runif(n)< exp(lnpy5_0)/(1 + exp(lnpy5_0)) y5_1 <- runif(n)< exp(lnpy5_1)/(1 + exp(lnpy5_1)) y5 <- y5_0*(treat==0) + y5_1*(treat==1) # Outcome 6: Harmonic lnpy6_0 <- 10*cos(2*pi*3*x*0.15 + 5.25) - 5 lnpy6_1 <- 10*cos(2*pi*3*x*0.15 + 5.25) - 5 + log(true_or) y6_0 <- runif(n)< exp(lnpy6_0)/(1 + exp(lnpy6_0)) y6_1 <- runif(n)< exp(lnpy6_1)/(1 + exp(lnpy6_1)) y6 <- y6_0*(treat==0) + y6_1*(treat==1) # Outcome 7: Two-tier lnpy7_0 <- -2 + 6*(x>0) lnpy7_1 <- -2 + 6*(x>0) + log(true_or) y7_0 <- runif(n)< exp(lnpy7_0)/(1 + exp(lnpy7_0)) y7_1 <- runif(n)< exp(lnpy7_1)/(1 + exp(lnpy7_1)) y7 <- y7_0*(treat==0) + y7_1*(treat==1) # Select only required elements of d to keep if (po==TRUE) { d <- as.data.frame(cbind(treat, x, y1, y2, y3, y4, y5, y6, y7, y1_0, y1_1, y2_0, y2_1, y3_0, y3_1, y4_0, y4_1, y5_0, y5_1, y6_0, y6_1, y7_0, y7_1)) } else { d <- as.data.frame(cbind(treat, x, y1, y2, y3, y4, y5, y6, y7)) } # Return the dataset needed for analysis return(d) }