#### BOUNDING THE LOCAL AVERAGE TREATMENT EFFECT ####
#### IN AN INSTRUMENTAL VARIABLE ANALYSIS OF ####
#### ENGAGEMENET WITH A MOBILE INTERVENTION ####

#### APPLICATION TO REACH STUDY ####

#### Last updated: 04/01/2021 ####

## LOAD RELEVANT LIBRARIES
library("mice")

## IMPORT DATA
dat <- read.csv("reach.csv", header = TRUE, stringsAsFactors = FALSE)
N <- dim(dat)[1]

## DESCRIPTION OF VARIABLES IN DATA
# ID                    Unique subject ID
# REACH                 Indicator of treatment arm (0 = Control; 1 = REACH)
# Age.0                 Age at time of randomization (years)
# Male.0                Gender (0 = female; 1 = male)
# RaceEthnicity.0       Race/Ethnicity (0 = white, 1 = black, 2 = Hispanic, 3 = Asian, 4 = other)
# EducYears.0           Self-reported years of education
# DM.Duration.0         Duration of diabetes at time of randomization (years)
# PDSMS4.0              Perceived Diabetes Self-Management Scale score (baseline)
# ARMSD.0               Adherence to Refills and Medications Scale (baseline)
# SDSCA.0               Summary of Diabetes Self-Care Activities medications subscale (baseline)
# PDQ_info.0            Personal Diabetes Questionnaire score (baseline)
# Insulin.0             Indicator of insulin use (baseline)
# ResponseRate          Response rate (proportion)
# A1c.0                 HbA1c (baseline)
# A1c.6                 HbA1c (six months post-randomization)
##

## MULTIPLE IMPUTATION PROCEDURE VIA CHAINED EQUATIONS

## The subject ID is a passive variable that need not be included
dat.noID <- dat[,-1]

## Set the number of imputations
n.imp <- 500

## Set seed for reproducibility
set.seed(1)

## Perform the imputation
dat.impute <- mice(dat.noID, m = n.imp)

## EXTRACT, ORGANIZE, AND MERGE THE IMPUTED DATA SETS

## Keep a record of the original data set (IMP = 0 denotes original data)
## Note that we only extract the variables that will be used in the model
imputed.data <- cbind(dat[,c(1:2,13:15)], IMP = rep(0, length(dat$ID)))

## Stack (and index) the imputed data sets
for (j in 1:n.imp) {
  imputed.data <- rbind(imputed.data,
                        cbind(ID = dat$ID, ### This marks the subject ID
                              complete(dat.impute, j)[,c(1,12:14)], ### Extract only relevant variables
                              IMP = rep(j, length(dat$ID)) ### This marks the imputation number
                              )
                        )
}

## Write the imputed data sets to a single file to avoid
## needing to run the imputation repeatedly
write.csv(imputed.data, "reach-imputed-data.csv", row.names = FALSE)

## ANALYSES FOR PAPER

## Characterize distribution of response rate in REACH group
hist(dat$ResponseRate[dat$REACH == 1], xlab = "Engagement", main = "")

print(rbind(MEAN = mean(dat$ResponseRate[dat$REACH == 1]),
        SD = sd(dat$ResponseRate[dat$REACH == 1]),
        Q75 = as.numeric(quantile(dat$ResponseRate[dat$REACH == 1], 0.75)),
        Q50 = as.numeric(quantile(dat$ResponseRate[dat$REACH == 1], 0.50)),
        Q25 = as.numeric(quantile(dat$ResponseRate[dat$REACH == 1], 0.25)),
        P50 = mean(as.numeric(dat$ResponseRate[dat$REACH == 1] <= 0.5)),
        P0 = mean(as.numeric(dat$ResponseRate[dat$REACH == 1] == 0))
        )
      )

# MEAN 0.813761468
# SD   0.231091368
# Q75  0.969000000
# Q50  0.915000000
# Q25  0.740000000
# P50  0.110091743
# P0   0.009174312

## ESTIMATE THE INTENTION-TO-TREAT EFFECT (ITT)

## Set seed for reproducibility
set.seed(1)

## Set knots for restricted cubic spline on baseline HbA1c
knot1 <- 8.9
knot2 <- 9.7
knot3 <- 11.1

## Set number of bootstrap replicates
n.boot <- 500

## Store ITT estimates across imputation/bootstrap iterations
ITT.RES <- matrix(0, nrow = n.imp * n.boot, ncol = 1)

## Store estimates of mean engagement across imputation/bootstrap iterations
ENGAGE.RES <- matrix(0, nrow = n.imp * n.boot, ncol = 1)
ENGAGE.RES.D <- matrix(0, nrow = n.imp * n.boot, ncol = 1)

for (j in 1:n.imp)
{
  ## Extract the 'j'th imputed data set
  j.dat <- imputed.data[(N*j + 1):(N*(j + 1)),]
  
  ## Iterate bootstrap procedure
  for (k in 1:n.boot)
  {
    ## Take random sample with replacement
    samp <- sample(1:N, size = N, replace = TRUE)
    bdat <- j.dat[samp,]
    
    ## Create basis functions for restricted cubic spline
    bA1c <- bdat$A1c.0
    h2 <- bA1c
    tmp1 <- (pmax(0, (bA1c - knot1)^3) - pmax(0, (bA1c - knot3)^3))/(knot3 - knot1)
    tmp2 <- (pmax(0, (bA1c - knot2)^3) - pmax(0, (bA1c - knot3)^3))/(knot3 - knot2)
    h3 <- tmp1 - tmp2
    
    ## Define basis matrix for ITT
    X <- cbind(1, bdat$REACH, h2, h3)
    Y <- cbind(bdat$A1c.6)
    
    ## Point estimate for ITT
    jk.ITT <- (solve(t(X) %*% X) %*% (t(X) %*% Y))[2]
    ITT.RES[n.boot*(j - 1) + k] <- jk.ITT
    
    ## Point estimate for mean engagement
    jk.mean.engage <- mean(bdat$ResponseRate[bdat$REACH == 1])
    jk.mean.engage.d <- mean(as.numeric(bdat$ResponseRate[bdat$REACH == 1] > 0.8))
    ENGAGE.RES[n.boot*(j - 1) + k] <- jk.mean.engage
    ENGAGE.RES.D[n.boot*(j - 1) + k] <- jk.mean.engage.d
  }
  
  ## Display progress
  if(round(j/10) == (j/10)) {print(paste(j, "iterations complete!"))}
}

## Display ITT estimates (verify approximate normality)
hist(ITT.RES, xlab = expression(Delta[ITT]), main = "", breaks = 20)

## Extract point estimate, standard error, confidence interval, and p-value for ITT
print(rbind(MEAN = mean(ITT.RES),
        SE = sd(ITT.RES),
        CI.LO = as.numeric(quantile(ITT.RES, 0.025)),
        CI.HI = as.numeric(quantile(ITT.RES, 0.975)),
        p = 2 * pnorm(-abs(mean(ITT.RES)/sd(ITT.RES)))
        )
      )

# MEAN  -0.760743513
# SE     0.270556003
# CI.LO -1.298593431
# CI.HI -0.234922760
# p      0.004926844

## Display mean response rate estimates (verify approximate normality)
hist(ENGAGE.RES, xlab = "Response Rate", main = "", breaks = 20)

## Storage of the ITT effects/mean engagement level is important
## for the sensitivity analysis procedure

## Set range of "a" under consideration (engagement)
a.range <- c(0, 0.5, 0.8138, 1)

## Set range of gamma under consideration (sensitivity parameter)
gamma.range <- c(0.00, 0.25, 0.50, 0.75, 1.00)

## Function to perform sensitivity analysis based on extracted ITT effects
## and extracted mean engagement levels
Delta.gamma <- function(a, ITT, Engage, gamma)
{
  Delta.1 <- ITT/(gamma * (1 - Engage) + Engage)
  Delta.a <- Delta.1 * ((1 - a) * gamma + a)
  Point.Est <- mean(Delta.a)
  CI.LO = as.numeric(quantile(Delta.a, 0.025))
  CI.HI = as.numeric(quantile(Delta.a, 0.975))
  return(c(Point.Est, CI.LO, CI.HI))
}

## Perform sensitivity analyses for various gamma levels and label for clarity
Delta.0.00 <- data.frame(t(apply(cbind(a.range), 1, Delta.gamma, ITT = ITT.RES, Engage = ENGAGE.RES, gamma = gamma.range[1])))
Delta.0.25 <- data.frame(t(apply(cbind(a.range), 1, Delta.gamma, ITT = ITT.RES, Engage = ENGAGE.RES, gamma = gamma.range[2])))
Delta.0.50 <- data.frame(t(apply(cbind(a.range), 1, Delta.gamma, ITT = ITT.RES, Engage = ENGAGE.RES, gamma = gamma.range[3])))
Delta.0.75 <- data.frame(t(apply(cbind(a.range), 1, Delta.gamma, ITT = ITT.RES, Engage = ENGAGE.RES, gamma = gamma.range[4])))
Delta.1.00 <- data.frame(t(apply(cbind(a.range), 1, Delta.gamma, ITT = ITT.RES, Engage = ENGAGE.RES, gamma = gamma.range[5])))

names(Delta.0.00) <- names(Delta.0.25) <- names(Delta.0.50) <- names(Delta.0.75) <- names(Delta.1.00) <- c("Point.Est", "CI.LO", "CI.HI")
row.names(Delta.0.00) <- row.names(Delta.0.25) <- row.names(Delta.0.50) <- row.names(Delta.0.75) <- row.names(Delta.1.00) <- a.range

## Display results
print(Delta.0.00)
print(Delta.0.25)
print(Delta.0.50)
print(Delta.0.75)
print(Delta.1.00)

## These results should mirror those that appear in the manuscript

# > print(Delta.0.00)
# Point.Est      CI.LO      CI.HI
# 0       0.0000000  0.0000000  0.0000000
# 0.5    -0.4675473 -0.7987176 -0.1444205
# 0.8138 -0.7609800 -1.2999928 -0.2350587
# 1      -0.9350946 -1.5974353 -0.2888409
# > print(Delta.0.25)
# Point.Est      CI.LO       CI.HI
# 0      -0.2210713 -0.3775068 -0.06827876
# 0.5    -0.5526783 -0.9437669 -0.17069689
# 0.8138 -0.7607949 -1.2991518 -0.23497451
# 1      -0.8842853 -1.5100271 -0.27311503
# > print(Delta.0.50)
# Point.Est      CI.LO      CI.HI
# 0      -0.4194014 -0.7154009 -0.1296621
# 0.5    -0.6291021 -1.0731014 -0.1944931
# 0.8138 -0.7607102 -1.2975942 -0.2351810
# 1      -0.8388028 -1.4308019 -0.2593241
# > print(Delta.0.75)
# Point.Est     CI.LO      CI.HI
# 0      -0.5983792 -1.021185 -0.1849461
# 0.5    -0.6981091 -1.191383 -0.2157705
# 0.8138 -0.7606995 -1.298199 -0.2351159
# 1      -0.7978390 -1.361580 -0.2465948
# > print(Delta.1.00)
# Point.Est     CI.LO      CI.HI
# 0      -0.7607435 -1.298593 -0.2349228
# 0.5    -0.7607435 -1.298593 -0.2349228
# 0.8138 -0.7607435 -1.298593 -0.2349228
# 1      -0.7607435 -1.298593 -0.2349228

## These results should mirror the figure that appears in the manuscript

## Panel for gamma = 0.25
plot(a.range, Delta.0.25[,1], frame.plot = FALSE, xlab = "Engagement", ylab = "Treatment Effect", type = "l",
     xlim = c(0,1), ylim = c(-2,0), main = expression(paste(gamma, " = 0.25")), col = "gray10", lwd = 1.2,
     xaxt = 'n')
axis(1, c(0, 0.25, 0.50, 0.75, 1))
for (k in 1:length(a.range))
{
  segments(a.range[k], Delta.0.25[k,2], a.range[k], Delta.0.25[k,3],
           col = "gray40", lwd = 1.3)
  segments(a.range[k] - 0.02, Delta.0.25[k,2], a.range[k] + 0.02, Delta.0.25[k,2],
           col = "gray40", lwd = 1.3)
  segments(a.range[k] - 0.02, Delta.0.25[k,3], a.range[k] + 0.02, Delta.0.25[k,3],
           col = "gray40", lwd = 1.3)
}
abline(0,0, lty = 2, col = "gray40")
points(mean(ENGAGE.RES), mean(ITT.RES), col = "gray60", pch = 20, cex = 1.2)
points(1, Delta.0.25[length(a.range),1], col = "gray30", pch = 20, cex = 1.2)
points(0, Delta.0.25[1,1], col = "gray30", pch = 20, cex = 1.2)

## Panel for gamma = 0.50
plot(a.range, Delta.0.50[,1], frame.plot = FALSE, xlab = "Engagement", ylab = "Treatment Effect", type = "l",
     xlim = c(0,1), ylim = c(-2,0), main = expression(paste(gamma, " = 0.50")), col = "gray10", lwd = 1.2,
     xaxt = 'n')
axis(1, c(0, 0.25, 0.50, 0.75, 1))
for (k in 1:length(a.range))
{
  segments(a.range[k], Delta.0.50[k,2], a.range[k], Delta.0.50[k,3],
           col = "gray40", lwd = 1.3)
  segments(a.range[k] - 0.02, Delta.0.50[k,2], a.range[k] + 0.02, Delta.0.50[k,2],
           col = "gray40", lwd = 1.3)
  segments(a.range[k] - 0.02, Delta.0.50[k,3], a.range[k] + 0.02, Delta.0.50[k,3],
           col = "gray40", lwd = 1.3)
}
abline(0,0, lty = 2, col = "gray40")
points(mean(ENGAGE.RES), mean(ITT.RES), col = "gray60", pch = 20, cex = 1.2)
points(1, Delta.0.50[length(a.range),1], col = "gray30", pch = 20, cex = 1.2)
points(0, Delta.0.50[1,1], col = "gray30", pch = 20, cex = 1.2)

## Panel for gamma = 0.75
plot(a.range, Delta.0.75[,1], frame.plot = FALSE, xlab = "Engagement", ylab = "Treatment Effect", type = "l",
     xlim = c(0,1), ylim = c(-2,0), main = expression(paste(gamma, " = 0.75")), col = "gray10", lwd = 1.2,
     xaxt = 'n')
axis(1, c(0, 0.25, 0.50, 0.75, 1))
for (k in 1:length(a.range))
{
  segments(a.range[k], Delta.0.75[k,2], a.range[k], Delta.0.75[k,3],
           col = "gray40", lwd = 1.3)
  segments(a.range[k] - 0.02, Delta.0.75[k,2], a.range[k] + 0.02, Delta.0.75[k,2],
           col = "gray40", lwd = 1.3)
  segments(a.range[k] - 0.02, Delta.0.75[k,3], a.range[k] + 0.02, Delta.0.75[k,3],
           col = "gray40", lwd = 1.3)
}
abline(0,0, lty = 2, col = "gray40")
points(mean(ENGAGE.RES), mean(ITT.RES), col = "gray60", pch = 20, cex = 1.2)
points(1, Delta.0.75[length(a.range),1], col = "gray30", pch = 20, cex = 1.2)
points(0, Delta.0.75[1,1], col = "gray30", pch = 20, cex = 1.2)

## Further exploration of sensitivity parameters

## Idea 1
data.frame(t(apply(cbind(0.1921754), 1, Delta.gamma, ITT = ITT.RES, Engage = ENGAGE.RES, gamma = 0.50)))
## Implication: For gamma = 0.50, a > 19.2% is associated with clinically meaningful point estimate of 0.5%

## Idea 2
data.frame(t(apply(cbind(0.10815985), 1, Delta.gamma, ITT = ITT.RES, Engage = ENGAGE.RES, gamma = 0.25)))
## Implication: For gamma = 0.25, a < 10.8% rules out clinically meaningful effect of 0.5%

## Idea 3
gamma.test <- 0.6094988
data.frame(t(apply(cbind(0), 1, Delta.gamma, ITT = ITT.RES, Engage = ENGAGE.RES, gamma = gamma.test)))
## Implication: For gamma = 0.69, the difference between the NECE and ECCE is 0.25%

## Function to perform sensitivity analysis based on extracted ITT effects
## and extracted mean engagement levels - with dichotomization
Delta.gamma <- function(a, ITT, Engage, gamma, zeta)
{
  Delta.1 <- ITT/(gamma * (1 - Engage) + Engage)
  Delta.a <- Delta.1 * ((1 - (a >= zeta)) * gamma + (a >= zeta))
  Point.Est <- mean(Delta.a)
  CI.LO = as.numeric(quantile(Delta.a, 0.025))
  CI.HI = as.numeric(quantile(Delta.a, 0.975))
  return(c(Point.Est, CI.LO, CI.HI))
}

## Perform sensitivity analyses for various gamma levels and label for clarity
Delta.0.00 <- data.frame(t(apply(cbind(a.range), 1, Delta.gamma, ITT = ITT.RES, Engage = ENGAGE.RES.D, gamma = gamma.range[1], zeta = 0.8)))
Delta.0.25 <- data.frame(t(apply(cbind(a.range), 1, Delta.gamma, ITT = ITT.RES, Engage = ENGAGE.RES.D, gamma = gamma.range[2], zeta = 0.8)))
Delta.0.50 <- data.frame(t(apply(cbind(a.range), 1, Delta.gamma, ITT = ITT.RES, Engage = ENGAGE.RES.D, gamma = gamma.range[3], zeta = 0.8)))
Delta.0.75 <- data.frame(t(apply(cbind(a.range), 1, Delta.gamma, ITT = ITT.RES, Engage = ENGAGE.RES.D, gamma = gamma.range[4], zeta = 0.8)))
Delta.1.00 <- data.frame(t(apply(cbind(a.range), 1, Delta.gamma, ITT = ITT.RES, Engage = ENGAGE.RES.D, gamma = gamma.range[5], zeta = 0.8)))

names(Delta.0.00) <- names(Delta.0.25) <- names(Delta.0.50) <- names(Delta.0.75) <- names(Delta.1.00) <- c("Point.Est", "CI.LO", "CI.HI")
row.names(Delta.0.00) <- row.names(Delta.0.25) <- row.names(Delta.0.50) <- row.names(Delta.0.75) <- row.names(Delta.1.00) <- a.range

## Display results
print(Delta.0.00)
print(Delta.0.25)
print(Delta.0.50)
print(Delta.0.75)
print(Delta.1.00)

# > print(Delta.0.00)
# Point.Est     CI.LO      CI.HI
# 0       0.000000  0.000000  0.0000000
# 0.5     0.000000  0.000000  0.0000000
# 0.8138 -1.094279 -1.886604 -0.3376174
# 1      -1.094279 -1.886604 -0.3376174
# > print(Delta.0.25)
# Point.Est      CI.LO       CI.HI
# 0      -0.2463257 -0.4216836 -0.07608206
# 0.5    -0.2463257 -0.4216836 -0.07608206
# 0.8138 -0.9853029 -1.6867345 -0.30432823
# 1      -0.9853029 -1.6867345 -0.30432823
# > print(Delta.0.50)
# Point.Est      CI.LO      CI.HI
# 0      -0.448322 -0.7654562 -0.1385717
# 0.5    -0.448322 -0.7654562 -0.1385717
# 0.8138 -0.896644 -1.5309123 -0.2771433
# 1      -0.896644 -1.5309123 -0.2771433
# > print(Delta.0.75)
# Point.Est     CI.LO      CI.HI
# 0      -0.6172380 -1.053212 -0.1907379
# 0.5    -0.6172380 -1.053212 -0.1907379
# 0.8138 -0.8229839 -1.404283 -0.2543171
# 1      -0.8229839 -1.404283 -0.2543171
# > print(Delta.1.00)
# Point.Est     CI.LO      CI.HI
# 0      -0.7607435 -1.298593 -0.2349228
# 0.5    -0.7607435 -1.298593 -0.2349228
# 0.8138 -0.7607435 -1.298593 -0.2349228
# 1      -0.7607435 -1.298593 -0.2349228

# Additional figure to display results for dichotomized approach
g.range <- c(0.25, 0.50, 0.75)
plot(g.range - 0.03, c(Delta.0.25[1,1], Delta.0.50[1,1], Delta.0.75[1,1]), frame.plot = FALSE, xlab = expression(gamma), ylab = "Treatment Effect", type = "p",
     xlim = c(0,1), ylim = c(-2,0), main = "", col = "gray55",
     xaxt = 'n', cex = 0.8, pch = 20)
points(g.range + 0.03, c(Delta.0.25[3,1], Delta.0.50[3,1], Delta.0.75[3,1]), xlab = expression(gamma), ylab = "Treatment Effect", type = "p",
     col = "gray30", cex = 0.8, pch = 20)
axis(1, c(0.25, 0.50, 0.75))
segments(0, 0, 1, 0, lty = 2, lwd = 1.2, col = "gray60")
segments(g.range[1] - 0.03, Delta.0.25[1,2], g.range[1] - 0.03, Delta.0.25[1,3], col = "gray55", lwd = 1.3)
segments(g.range[1] - 0.04, Delta.0.25[1,2], g.range[1] - 0.02, Delta.0.25[1,2], col = "gray55", lwd = 1.3)
segments(g.range[1] - 0.04, Delta.0.25[1,3], g.range[1] - 0.02, Delta.0.25[1,3], col = "gray55", lwd = 1.3)

segments(g.range[1] + 0.03, Delta.0.25[3,2], g.range[1] + 0.03, Delta.0.25[3,3], col = "gray30", lwd = 1.3)
segments(g.range[1] + 0.04, Delta.0.25[3,2], g.range[1] + 0.02, Delta.0.25[3,2], col = "gray30", lwd = 1.3)
segments(g.range[1] + 0.04, Delta.0.25[3,3], g.range[1] + 0.02, Delta.0.25[3,3], col = "gray30", lwd = 1.3)

segments(g.range[2] - 0.03, Delta.0.50[1,2], g.range[2] - 0.03, Delta.0.50[1,3], col = "gray55", lwd = 1.3)
segments(g.range[2] - 0.04, Delta.0.50[1,2], g.range[2] - 0.02, Delta.0.50[1,2], col = "gray55", lwd = 1.3)
segments(g.range[2] - 0.04, Delta.0.50[1,3], g.range[2] - 0.02, Delta.0.50[1,3], col = "gray55", lwd = 1.3)

segments(g.range[2] + 0.03, Delta.0.50[3,2], g.range[2] + 0.03, Delta.0.50[3,3], col = "gray30", lwd = 1.3)
segments(g.range[2] + 0.04, Delta.0.50[3,2], g.range[2] + 0.02, Delta.0.50[3,2], col = "gray30", lwd = 1.3)
segments(g.range[2] + 0.04, Delta.0.50[3,3], g.range[2] + 0.02, Delta.0.50[3,3], col = "gray30", lwd = 1.3)

segments(g.range[3] - 0.03, Delta.0.75[1,2], g.range[3] - 0.03, Delta.0.75[1,3], col = "gray55", lwd = 1.3)
segments(g.range[3] - 0.04, Delta.0.75[1,2], g.range[3] - 0.02, Delta.0.75[1,2], col = "gray55", lwd = 1.3)
segments(g.range[3] - 0.04, Delta.0.75[1,3], g.range[3] - 0.02, Delta.0.75[1,3], col = "gray55", lwd = 1.3)

segments(g.range[3] + 0.03, Delta.0.75[3,2], g.range[3] + 0.03, Delta.0.75[3,3], col = "gray30", lwd = 1.3)
segments(g.range[3] + 0.04, Delta.0.75[3,2], g.range[3] + 0.02, Delta.0.75[3,2], col = "gray30", lwd = 1.3)
segments(g.range[3] + 0.04, Delta.0.75[3,3], g.range[3] + 0.02, Delta.0.75[3,3], col = "gray30", lwd = 1.3)

legend(0, -1.6, col = c("gray55", "gray30"), lwd = rep(1.3, 2), lty = rep(1,2), pch = rep(20, 2), c("NECE", "ECCE"), cex = 0.6)