### This function extends glmmPQL function in the package MASS
### to fit multivariate generalised linear mixed-effects regression by PQL 
### Require data in long format
### Felix Achana (Clinical Trials Unit, University of Warwick)
### Nov 2019 ############################################################
##########################################################################


### list of functions
#1:mergeFormula() - Internal function 1: merges fixed-effects terms into one-multivariate fixed-effects formula
#2:getfamily() - Internal function 2: Extracts glm family for each response
#3:fitglm()  - Internal function 3: fits univariate GLMs to each outcome and returns
#4:extractlinkinv() - Internal function 4: Calculates mu = g-1(linear pred) and derivative g'(mu)
#5:mglmmPQL() - Main mglmmPQL function
#6:getVarCov_revised_lme() - Proposed patch for getVarCov.lme, see https://www.jepusto.com/bug-in-nlme-getvarcov/
#8:ebfunc1() - Internal function to calculate EB etsimates and thier vairances
#9:ebFunc() - function to call ebfunc1() and perform eb estimation of cluster-specific effects



## load required packages for the wrapper function
#install.packages("pacman")
#pacman::p_load(nlme,formula.tools,boot,data.table)

#######################################################################################
### Internal function 1: merges fixed-effects terms into one-multivariate fixed-effects formula 
mergeFormula <- 
  function(feFormulae){ 
    lhs <- rhs <- list()
    nout <- length(feFormulae)
    for(j in 1:nout){
      lhs[[j]] <- feFormulae[[j]][[2L]]
      rhs[[j]] <- strsplit(deparse(feFormulae[[j]][[3L]],width.cutoff = 500L), " \\+ ")[[1]]
      rhs[[j]][[length(rhs[[j]])]] <- strsplit(rhs[[j]][[length(rhs[[j]])]], "- 1")
    }
    unique(unlist(lhs, recursive = TRUE, use.names = TRUE))
    if(length(unique(lhs))!=1) stop('both formulas must have the same response')
    rhs <- unlist(rhs, recursive = TRUE, use.names = TRUE)
    
    lhs <- unlist(lhs[[1]],recursive = TRUE,use.names = TRUE)  # create the merged rhs and lhs in character string form
    rhs[[length(rhs)]] <- paste(rhs[[length(rhs)]],"-1",sep="")
    out <- reformulate(rhs, lhs)
    environment(out) <- parent.frame()
    return(out)
  }


### Internal function 2: Extracts glm family for each response
getfamily <- 
  function(j,family){
    fm <- family[[j]]
    if(is.character(fm)) fam <- get(fm)
    if(is.list(fm))      fam <- fm
    if(is.function(fm))  fam <- fm()
    if(is.null(fm)){
      print(fm)
      stop("'family' not recognized")
    }
    return(fam)
  }


### Internal function 3: fits univariate GLMs to each outcome and returns 
### (1) linear predictor (eta), (2) residuals and (3) prior weights
fitglm =
  function(fixed,family,data,weights){
    fit0  = glm(fixed,family,data,na.action = "na.omit")
    return(data.table(fit0$data,y0 = fit0$y,eta=fit0$linear.predictors,res = fit0$residuals,w = fit0$prior.weights,wz = fit0$weights))
  }


## Internal function 4: Calculates mu = g-1(linear pred) and derivative g'(mu)
## This function performs the necessary calculations for the linearization step
extractlinkinv <-   
  function(j,data,family){
    eta1  <- data[outvar==levels(as.factor(data[,outvar]))[j], eta]
    w1    <- data[outvar==levels(as.factor(data[,outvar]))[j], w]
    fm    <- family[[j]]
    if(is.list(fm)) {     
       mu1   <- fm$linkinv(eta1)
       mueta <- fm$mu.eta(eta1)
       wz1   <- w1*mueta^2/fm$variance(mu1)
    }
    if(is.function(fm)){
       mu1   <- fm()$linkinv(eta1)
       mueta <- fm()$mu.eta(eta1)
       wz1   <- w1*mueta^2/fm()$variance(mu1)
    }
    list(mu=mu1,mu.eta.val=mueta,wz=wz1)
  }


##############################################################################
#### Main mglmmPQL function
#### Created from modifying glmmPQL from package MASS for multiple outcomes
#### Calls the internal functions above to perform various things
#### 1. Takes stacked vector of linearised reponses from the fitglm function
#### 2. Calls mle from package nmle and fits linear mixed-effects 
#### 3. Check fitted values from step 2 for convergence, if ok stop else 
#### 4. Generate updated linearised response based on fitted values from step 3 (makes use of internal func 4)
#### Iterate between 3 and 4 until convergence
##################################################################################### 

mglmmPQL = 
  function (mvfixed,random,family,corr,lmeweights, data,outcomevar,
            method  =  "REML",
            niter   = 200,
            verbose = TRUE,
            na.action = "na.omit",
            control = lmeControl(maxIter= 100,opt = c("nlminb"))) {
  #data$invwt  <- 1 
  names(data)[names(data)==outcomevar] <- "outvar"
  nout = length(mvfixed)
  data = rbindlist(
         lapply(1:nout,function(j) 
         fitglm(mvfixed[[j]],family[[j]],data=subset(data,outvar==levels(as.factor(data$outvar))[j]))))

      m       <- mcall <- Call <- match.call()
      nm      <- names(m)[-1L]
      fixed   <- mergeFormula(mvfixed)  # fixed-effects for multivariate response
      keep    <- is.element(nm, c( "data", "subset", "na.action", "fixed", "subset"))
      for (i in nm[!keep]) m[[i]] <- NULL
      allvars <- if (is.list(random)) 
                  allvars <- c(all.vars(fixed), names(random), unlist(lapply(random,function(x) all.vars(formula(x))))) else c(all.vars(fixed), all.vars(random))
                  Terms     <- if (missing(data)) terms(fixed) else terms(fixed,data = data)
      
      off <- attr(Terms, "offset")
      if (length(off <- attr(Terms, "offset"))) 
          allvars    <- c(allvars, as.character(attr(Terms, "variables"))[off + 1])
      if (!missing(corr) && !is.null(attr(corr,    "formula"))) 
          allvars <- c(allvars, all.vars(attr(corr,  "formula")))
          allvars <- c(allvars, all.vars(attr(lmeweights, "formula")))
      
      Call$fixed  <- eval(fixed)
      Call$random <- eval(random)
      m$formula   <- as.formula(paste("~", paste(allvars, collapse = "+")))
      m$data      <- data
      environment(m$formula) <- environment(fixed)
      m$drop.unused.levels   <- TRUE
      m[[1L]] <- quote(stats::model.frame)
      
      ### Ignore these bits for now
      tmpdat  <- eval.parent(m)
      off <- model.offset(tmpdat)
      if (is.null(off)) 
        off <- 0
      wts <- model.weights(tmpdat)
      if (is.null(wts)) 
        wts <- rep(1, nrow(tmpdat))
      tmpdat$wts <- wts
      ### End code to ignore
  
  ### Make data and functions avaialable to fit multi-response glm.
  off  <-0
  data <- data[, zz := eta + res - off]
  eta  <- data$eta
  nm   <- names(mcall)[-1L]
  keep <- is.element(nm, c("fixed", "random", "data", "na.action", "control"))
  for (i in nm[!keep]) mcall[[i]] <- NULL
  fixed[[2L]]      <- quote(zz)
  mcall[["fixed"]] <- fixed
  mcall[[1L]]      <- quote(nlme::lme)
  mcall$random     <- random
  mcall$method     <- method
  if (!missing(corr)) 
    mcall$correlation <- corr
  mcall$weights   <- lmeweights
  data$invwt      <- 1/data$wz
  mcall$data      <- data
  mcall$control   <- control
  mcall$na.action <- na.action

  # Iterate until convergence
  for (i in seq_len(niter)) {
    if (verbose) 
      message(gettextf("iteration %d", i), domain = NA)
    iter    <- i
    fit     <- eval(mcall)
    etaold  <- eta
    eta     <- fitted(fit) + off
    data$eta <- eta
    if (sum((eta - etaold)^2) < 1e-06 * sum(eta^2)) 
      break
    
    # call internal function to perform linearization step
    # first-order Taylor series approximation to the link response
    mle.fit <- list()
    for(j in 1: nout){
      mle.fit[[j]] <- extractlinkinv(j,data,family)
    }
    mle.fit <- rbindlist(mle.fit)
    data[,  zz    := eta + (y0 - mle.fit$mu)/mle.fit$mu.eta.val - off]
    data[,  ri    := (y0 - mle.fit$mu)/mle.fit$mu.eta.val]
    data[,  invwt := 1/mle.fit$wz]
    mcall$data <- data
  }
  
  attributes(fit$logLik) <- NULL
  fit$call   <- Call
  fit$family <- family
  fit$logLik <- as.numeric(NA)
  fit$method <- method
  fit$iter <- iter
  oldClass(fit) <- c("mglmmPQL", oldClass(fit))
  fit
  }


####################################################################
## Proposed patch for getVarCov.lme
## by James E. Pustejovsky 
## Taken from https://www.jepusto.com/bug-in-nlme-getvarcov/

getVarCov_revised_lme <- function (obj, individuals, type = c("random.effects", "conditional", "marginal"), ...) {
  type <- match.arg(type)
  if (any("nlme" == class(obj))) 
    stop("not implemented for \"nlme\" objects")
  if (length(obj$group) > 1) 
    stop("not implemented for multiple levels of nesting")
  sigma <- obj$sigma
  D <- as.matrix(obj$modelStruct$reStruct[[1]]) * sigma^2
  if (type == "random.effects") {
    result <- D
  }
  else {
    result <- list()
    groups <- sort(obj$groups[[1]])
    ugroups <- unique(groups)
    if (missing(individuals)) 
      individuals <- as.matrix(ugroups)[1, ]
    if (is.numeric(individuals)) 
      individuals <- ugroups[individuals]
    for (individ in individuals) {
      indx <- which(individ == ugroups)
      if (!length(indx)) 
        stop(gettextf("individual %s was not used in the fit", 
                      sQuote(individ)), domain = NA)
      if (is.na(indx)) 
        stop(gettextf("individual %s was not used in the fit", 
                      sQuote(individ)), domain = NA)
      ind <- groups == individ
      if (!is.null(obj$modelStruct$corStruct)) {
        V <- corMatrix(obj$modelStruct$corStruct)[[as.character(individ)]]
      }
      else V <- diag(sum(ind))
      if (!is.null(obj$modelStruct$varStruct)) 
        sds <- 1/varWeights(obj$modelStruct$varStruct)[ind]
      else sds <- rep(1, sum(ind))
      sds <- obj$sigma * sds
      cond.var <- t(V * sds) * sds
      dimnames(cond.var) <- list(1:nrow(cond.var), 1:ncol(cond.var))
      if (type == "conditional") 
        result[[as.character(individ)]] <- cond.var
      else {
        Z <- model.matrix(obj$modelStruct$reStruc, getData(obj))[ind, 
                                                                 , drop = FALSE]
        result[[as.character(individ)]] <- cond.var + 
          Z %*% D %*% t(Z)
      }
    }
  }
  class(result) <- c(type, "VarCov")
  attr(result, "group.levels") <- names(obj$groups)
  result
}


############################################################################################
### This function performs empirical Bayes/shrinkage estimation of cluster-specific effects 
### in generalised linear mixed-models fitted using lme
### Still in early stages of development and testing
### Implementated equations in Laveille (2017)
### Felix Achana - 21 Nov 2019

# Internal function to calculate EB etsimates and thier vairances
ebfunc1 <- 
  function(obj,groups,i) {
     # predict random-effect term bi and conditional variance-covariance matrix, gammai (equation 11)
    ind    = groups == i
    xi     = model.matrix(obj, data = obj$data[ind])
    zi     = model.matrix(formula(obj$modelStruct$reStr)[[1]],data=obj$data[ind])
    matG   = getVarCov_revised_lme(obj, type="random.effects")
    matR   = as.matrix(as.data.table(getVarCov_revised_lme(obj,individuals=i,type="conditional")[1]))
    febeta = fixef(obj) 
    gammai = solve(t(zi)%*%solve(matR)%*%zi + solve(matG))
    bi     = as.numeric(t(gammai%*%t(zi)%*%solve(matR) %*% (obj$data[ind]$y - xi%*%febeta))) 
    names(bi) = colnames(gammai)
    #bi     = ranef(obj)[i,] 
    
    #Empirical Bayes estimates 
    eb    = febeta
    m     = match(names(bi), names(febeta))
    eb[m] = eb[m] + bi
    eb    = unlist(eb[names(bi)])
    se_eb = sqrt(diag(as.matrix(gammai)))
    names(se_eb) = paste("se_",names(se_eb),sep = "")
    return(list(ebe=eb, se_eb=se_eb, bi=bi,vcoveb=gammai))
  }


### function to call internal functions above and perform eb estimation of cluster-specific effects
ebFunc = 
  function(obj,individuals=NULL){
    groups  = obj$groups[[1]]
    groups  = obj$groups[[1]]
    ugroups = unique(groups)
    ngroups = length(ugroups)
    
    if(is.null(individuals)){
      output  = lapply(1:ngroups,function(i) ebfunc1(obj,groups,i))    
    } else {
      output  = lapply(individuals,function(i) ebfunc1(obj,groups,i)) 
    }
    
    eb = cbind(
      rbindlist(lapply(1:ngroups,function(i) data.table(individuals = i, t(output[[i]]$eb)))),
      rbindlist(lapply(1:ngroups,function(i) data.table(individuals = i, t(output[[i]]$se_eb)))))
    
    b = rbindlist(lapply(1:ngroups,     function(i) data.table(individuals = i, t(output[[i]]$bi))))
    vcoveb = rbindlist(lapply(1:ngroups,function(i) data.table(individuals = i, t(output[[i]]$vcoveb))))
    
    return(list(eb=eb,b=b,vcoveb=vcoveb))
    
  }