###############################################################################
# Equivalence of entropy balancing and the method of moments for 
# matching-adjusted indirect comparison
#    David M. Phillippo, Sofia Dias, A.E. Ades, Nicky J. Welton
#
# -----------------------------------------------------------------------------
#
# This code implements standard MAIC (method of moments), entropy balancing 
# MAIC with uniform base weights, and entropy balancing MAIC with non-uniform 
# base weights. These are then applied to a simple example (taken from NICE 
# TSD 18) to demonstrate.
# 
###############################################################################


# MAIC code ---------------------------------------------------------------

if (!require(sandwich)) {install.packages("sandwich"); library(sandwich)}
if (!require(matrixStats)) {install.packages("matrixStats"); library(matrixStats)}

# The following functions implement the objective and gradient functions for
# method of moments and entropy balancing forms of MAIC. They can be passed
# to a suitable numerical optimisation routine to minimise over the first 
# parameter a1. The matrix X should be a matrix of IPD covariates *centered* at
# the covariate moments in the aggregate population.

## Standard (method of moments) MAIC --------------------------------------

# Objective function
objfn_mm <- function(a1, X){
  return(sum(exp(X %*% a1)))
}

# Gradient function
gradfn_mm <- function(a1, X){
  return(colSums(sweep(X, 1, exp(X %*% a1), "*")))
}


## Entropy balancing MAIC (uniform base weights) --------------------------

objfn_eb <- function(a1, X){
  # Use numerically stable log sum exp, equivalent to
  # log(mean(exp(X %*% a1)))
  return(logSumExp(X %*% a1) - log(nrow(X)))
}

gradfn_eb <- function(a1, X){
  eXa1 <- exp(X %*% a1)
  return(crossprod(X, eXa1 / sum(eXa1)))
}


## Entropy balancing MAIC (non-uniform base weights) ----------------------

# The argument w0 is a vector of prior weights (of length equal to nrow(X)).

objfn_eb0 <- function(a1, X, w0){
  # Use numerically stable log sum exp, equivalent to
  # log(sum(w0 * exp(X %*% a1)))
  return(logSumExp(X %*% a1 + log(w0)))
}

gradfn_eb0 <- function(a1, X, w0){
  eXa1 <- w0 * exp(X %*% a1)
  return(crossprod(X, eXa1 / sum(eXa1)))
}


# Example -----------------------------------------------------------------

# We apply the methods to a simulated example from NICE TSD 18.

## Simulated data sets ----------------------------------------------------

if (!require(dplyr)) {install.packages("dplyr"); library(dplyr)}
if (!require(tidyr)) {install.packages("tidyr"); library(tidyr)}
if (!require(wakefield)) {install.packages("wakefield"); library(wakefield)}
if (!require(ggplot2)) {install.packages("ggplot2"); library(ggplot2)}

set.seed(61374988)

# Study characteristics
N_AB <- 500
N_AC <- 300
agerange_AB <- 45:75
agerange_AC <- 45:55
femalepc_AB <- 0.64
femalepc_AC <- 0.8

# Outcome model
b_0 <- 0.85
b_gender <- 0.12
b_age <- 0.05
b_age_trt <- -0.08
b_trt_B <- -2.1
b_trt_C <- -2.5


### Generate AB trial
AB_IPD <- 
  rbind(
    
  # Generate A arm
  r_data_frame(n = N_AB/2,     # Number of individuals in arm A
               id,             # Unique ID
               age = age(x = agerange_AB),   # Generate ages
               gender = gender(prob = c(1 - femalepc_AB, femalepc_AB)), # Generate genders
               trt = "A"       # Assign treatment A
               ),
  
  # Generate B arm
  r_data_frame(n = N_AB/2,     # Number of individuals in arm B
               id,             # Unique ID
               age = age(x = agerange_AB),   # Generate ages
               gender = gender(prob = c(1 - femalepc_AB, femalepc_AB)), # Generate genders
               trt = "B"       # Assign treatment B
               )
  ) %>%
  
  # Generate outcomes using logistic model
  mutate(
    yprob = 1 / (1 + exp(-(
      b_0 + b_gender * (gender == "Male") + b_age * (age - 40) + 
        if_else(trt == "B", b_trt_B + b_age_trt * (age - 40), 0)
    ))),
    y = rbinom(N_AB, 1, yprob)
  ) %>%
  select(-yprob)   # Drop the yprob column

# Tabulate
AB_IPD %>% group_by(trt) %>%
  summarise(n(), mean(age), sd(age), `n(male)` = sum(gender == "Male"), 
            `%(male)` = mean(gender == "Male"), sum(y), mean(y))


### Generate AC trial
AC_IPD <- 
  rbind(
    
  # Generate A arm
  r_data_frame(n = N_AC/2,     # Number of individuals in arm A
               id,             # Unique ID
               age = age(x = agerange_AC),   # Generate ages
               gender = gender(prob = c(1 - femalepc_AC, femalepc_AC)), # Generate genders
               trt = "A"       # Assign treatment A
               ),
  
  # Generate C arm
  r_data_frame(n = N_AC/2,     # Number of individuals in arm C
               id,             # Unique ID
               age = age(x = agerange_AC),   # Generate ages
               gender = gender(prob = c(1 - femalepc_AC, femalepc_AC)), # Generate genders
               trt = "C"       # Assign treatment C
               )
  ) %>%
  
  # Generate outcomes using logistic model
  mutate(
    yprob = 1 / (1 + exp(-(
      b_0 + b_gender * (gender == "Male") + b_age * (age - 40) + 
        if_else(trt == "C", b_trt_C + b_age_trt * (age - 40), 0)
    ))),
    y = rbinom(N_AC, 1, yprob)
  ) %>%
  select(-yprob)   # Drop the yprob column

# Tabulate
AC_IPD %>% group_by(trt) %>%
  summarise(n(), mean(age), sd(age), `n(male)` = sum(gender == "Male"), 
            `%(male)` = mean(gender == "Male"), sum(y), mean(y))

# Create aggregate data
AC_AgD <- 
  cbind(
    # Trial level stats: mean and sd of age, number and proportion of males
    summarise(AC_IPD, age_mean = mean(age), age_sd = sd(age), 
              N_male = sum(gender == "Male"), prop_male = mean(gender == "Male")),
    
    # Summary outcomes for A arm
    filter(AC_IPD, trt == "A") %>% 
      summarise(y_A_sum = sum(y), y_A_bar = mean(y), N_A = n()),
    
    # Summary outcomes for C arm
    filter(AC_IPD, trt == "C") %>% 
      summarise(y_C_sum = sum(y), y_C_bar = mean(y), N_C = n())
  )

AC_AgD


## Calculate weights ------------------------------------------------------

# Centred EMs
X_EM_0 <- sweep(with(AB_IPD, cbind(age, age^2)), 2, 
                with(AC_AgD, c(age_mean, age_mean^2 + age_sd^2)), '-')

# Estimate weights

# Method of moments
(opt_mm <- optim(par = c(0,0), fn = objfn_mm, gr = gradfn_mm, X = X_EM_0, method = "L-BFGS-B"))
a1_mm <- opt_mm$par
wt_mm <- exp(X_EM_0 %*% a1_mm)
wt_mm_rs <- (wt_mm / sum(wt_mm)) * N_AB   # rescaled weights
sum(wt_mm)^2 / sum(wt_mm^2)   # effective sample size

# Entropy balancing (uniform base weights)
(opt_eb <- optim(par = c(0,0), fn = objfn_eb, gr = gradfn_eb, X = X_EM_0, method = "L-BFGS-B"))
a1_eb <- opt_eb$par
wt_eb <- exp(X_EM_0 %*% a1_eb)
wt_eb_rs <- (wt_eb / sum(wt_eb)) * N_AB   # rescaled weights
sum(wt_eb)^2 / sum(wt_eb^2)   # effective sample size

# Compare MM and EB weights
ggplot(aes(x = wt_mm, y = wt_eb),
       data = tibble(wt_mm, wt_eb)) + 
  geom_abline(slope = 1, colour = "grey60") +
  geom_point()

all.equal(wt_mm, wt_eb)
# Equal to within tolerance of numerical optimisation routine

# Summary of weights, histogram
summary(wt_mm_rs)
qplot(wt_mm_rs, geom = "histogram", 
      xlab = "Rescaled weight (multiple of original unit weight)", 
      binwidth = function(x) diff(range(x)) / nclass.Sturges(x),
      boundary = 0)

summary(wt_eb_rs)
qplot(wt_eb_rs, geom = "histogram", 
      xlab = "Rescaled weight (multiple of original unit weight)", 
      binwidth = function(x) diff(range(x)) / nclass.Sturges(x),
      boundary = 0)

# Entropy balancing (non-uniform base weights)

# We randomly assign a vector of base weights w0 - in reality, these would be
# provided by a prior weighting method
w0 <- rbeta(N_AB, 2, 5)
w0_rs <- w0 / sum(w0) * N_AB
  
(opt_eb0 <- optim(par = c(0,0), fn = objfn_eb0, gr = gradfn_eb0, X = X_EM_0, w0 = w0, method = "L-BFGS-B"))
a1_eb0 <- opt_eb0$par
wt_eb0 <- w0 * exp(X_EM_0 %*% a1_eb0)
wt_eb0_rs <- (wt_eb0 / sum(wt_eb0)) * N_AB   # rescaled weights
sum(wt_eb0)^2 / sum(wt_eb0^2)   # effective sample size

# Compare base weights with final weights
ggplot(aes(x = w0, y = wt_eb0),
       data = tibble(w0, wt_eb0)) +
  geom_abline(slope = 1, colour = "grey60") +
  geom_point()

ggplot(aes(x = wt, fill = type),
       data = bind_rows(tibble(wt = w0_rs, type = "Initial"),
                        tibble(wt = wt_eb0_rs, type = "Final"))) +
  geom_histogram(alpha = 0.5, position = "identity", boundary = 0,
                 binwidth = function(x) diff(range(x)) / nclass.Sturges(x))

## Calculate indirect comparison ------------------------------------------

# Create the weighting estimator using a simple linear model, use sandwich
# estimator for standard error. Here we just use the EB weights for
# demonstration.

# Binomial GLM
fit1 <- glm(cbind(y, 1 - y) ~ trt + gender, data = AB_IPD, family = binomial, weights = wt_eb)

# Sandwich estimator of variance matrix
V_sw <- vcovHC(fit1)

# The log OR of B vs. A is just the trtB parameter estimate
print(d_AB_MAIC <- unname(coef(fit1)["trtB"]))
print(var_d_AB_MAIC <- V_sw["trtB", "trtB"])

# Estimated log OR of C vs. A from the AC trial
d_AC <- with(AC_AgD, log(y_C_sum * (N_A - y_A_sum) / (y_A_sum * (N_C - y_C_sum))))
var_d_AC <- with(AC_AgD, 1/y_A_sum + 1/(N_A - y_A_sum) + 1/y_C_sum + 1/(N_C - y_C_sum))

# Indirect comparison of C vs. B in AC trial
print(d_BC_MAIC <- d_AC - d_AB_MAIC)
print(var_d_BC_MAIC <- var_d_AC + var_d_AB_MAIC)