### This function extends glmmPQL function in the package MASS ### to fit multivariate generalised linear mixed-effects regression by PQL ### Require data in long format ### Felix Achana (Clinical Trials Unit, University of Warwick) ### Nov 2019 ############################################################ ########################################################################## ### list of functions #1:mergeFormula() - Internal function 1: merges fixed-effects terms into one-multivariate fixed-effects formula #2:getfamily() - Internal function 2: Extracts glm family for each response #3:fitglm() - Internal function 3: fits univariate GLMs to each outcome and returns #4:extractlinkinv() - Internal function 4: Calculates mu = g-1(linear pred) and derivative g'(mu) #5:mglmmPQL() - Main mglmmPQL function #6:getVarCov_revised_lme() - Proposed patch for getVarCov.lme, see https://www.jepusto.com/bug-in-nlme-getvarcov/ #8:ebfunc1() - Internal function to calculate EB etsimates and thier vairances #9:ebFunc() - function to call ebfunc1() and perform eb estimation of cluster-specific effects ## load required packages for the wrapper function #install.packages("pacman") #pacman::p_load(nlme,formula.tools,boot,data.table) ####################################################################################### ### Internal function 1: merges fixed-effects terms into one-multivariate fixed-effects formula mergeFormula <- function(feFormulae){ lhs <- rhs <- list() nout <- length(feFormulae) for(j in 1:nout){ lhs[[j]] <- feFormulae[[j]][[2L]] rhs[[j]] <- strsplit(deparse(feFormulae[[j]][[3L]],width.cutoff = 500L), " \\+ ")[[1]] rhs[[j]][[length(rhs[[j]])]] <- strsplit(rhs[[j]][[length(rhs[[j]])]], "- 1") } unique(unlist(lhs, recursive = TRUE, use.names = TRUE)) if(length(unique(lhs))!=1) stop('both formulas must have the same response') rhs <- unlist(rhs, recursive = TRUE, use.names = TRUE) lhs <- unlist(lhs[[1]],recursive = TRUE,use.names = TRUE) # create the merged rhs and lhs in character string form rhs[[length(rhs)]] <- paste(rhs[[length(rhs)]],"-1",sep="") out <- reformulate(rhs, lhs) environment(out) <- parent.frame() return(out) } ### Internal function 2: Extracts glm family for each response getfamily <- function(j,family){ fm <- family[[j]] if(is.character(fm)) fam <- get(fm) if(is.list(fm)) fam <- fm if(is.function(fm)) fam <- fm() if(is.null(fm)){ print(fm) stop("'family' not recognized") } return(fam) } ### Internal function 3: fits univariate GLMs to each outcome and returns ### (1) linear predictor (eta), (2) residuals and (3) prior weights fitglm = function(fixed,family,data,weights){ fit0 = glm(fixed,family,data,na.action = "na.omit") return(data.table(fit0$data,y0 = fit0$y,eta=fit0$linear.predictors,res = fit0$residuals,w = fit0$prior.weights,wz = fit0$weights)) } ## Internal function 4: Calculates mu = g-1(linear pred) and derivative g'(mu) ## This function performs the necessary calculations for the linearization step extractlinkinv <- function(j,data,family){ eta1 <- data[outvar==levels(as.factor(data[,outvar]))[j], eta] w1 <- data[outvar==levels(as.factor(data[,outvar]))[j], w] fm <- family[[j]] if(is.list(fm)) { mu1 <- fm$linkinv(eta1) mueta <- fm$mu.eta(eta1) wz1 <- w1*mueta^2/fm$variance(mu1) } if(is.function(fm)){ mu1 <- fm()$linkinv(eta1) mueta <- fm()$mu.eta(eta1) wz1 <- w1*mueta^2/fm()$variance(mu1) } list(mu=mu1,mu.eta.val=mueta,wz=wz1) } ############################################################################## #### Main mglmmPQL function #### Created from modifying glmmPQL from package MASS for multiple outcomes #### Calls the internal functions above to perform various things #### 1. Takes stacked vector of linearised reponses from the fitglm function #### 2. Calls mle from package nmle and fits linear mixed-effects #### 3. Check fitted values from step 2 for convergence, if ok stop else #### 4. Generate updated linearised response based on fitted values from step 3 (makes use of internal func 4) #### Iterate between 3 and 4 until convergence ##################################################################################### mglmmPQL = function (mvfixed,random,family,corr,lmeweights, data,outcomevar, method = "REML", niter = 200, verbose = TRUE, na.action = "na.omit", control = lmeControl(maxIter= 100,opt = c("nlminb"))) { #data$invwt <- 1 names(data)[names(data)==outcomevar] <- "outvar" nout = length(mvfixed) data = rbindlist( lapply(1:nout,function(j) fitglm(mvfixed[[j]],family[[j]],data=subset(data,outvar==levels(as.factor(data$outvar))[j])))) m <- mcall <- Call <- match.call() nm <- names(m)[-1L] fixed <- mergeFormula(mvfixed) # fixed-effects for multivariate response keep <- is.element(nm, c( "data", "subset", "na.action", "fixed", "subset")) for (i in nm[!keep]) m[[i]] <- NULL allvars <- if (is.list(random)) allvars <- c(all.vars(fixed), names(random), unlist(lapply(random,function(x) all.vars(formula(x))))) else c(all.vars(fixed), all.vars(random)) Terms <- if (missing(data)) terms(fixed) else terms(fixed,data = data) off <- attr(Terms, "offset") if (length(off <- attr(Terms, "offset"))) allvars <- c(allvars, as.character(attr(Terms, "variables"))[off + 1]) if (!missing(corr) && !is.null(attr(corr, "formula"))) allvars <- c(allvars, all.vars(attr(corr, "formula"))) allvars <- c(allvars, all.vars(attr(lmeweights, "formula"))) Call$fixed <- eval(fixed) Call$random <- eval(random) m$formula <- as.formula(paste("~", paste(allvars, collapse = "+"))) m$data <- data environment(m$formula) <- environment(fixed) m$drop.unused.levels <- TRUE m[[1L]] <- quote(stats::model.frame) ### Ignore these bits for now tmpdat <- eval.parent(m) off <- model.offset(tmpdat) if (is.null(off)) off <- 0 wts <- model.weights(tmpdat) if (is.null(wts)) wts <- rep(1, nrow(tmpdat)) tmpdat$wts <- wts ### End code to ignore ### Make data and functions avaialable to fit multi-response glm. off <-0 data <- data[, zz := eta + res - off] eta <- data$eta nm <- names(mcall)[-1L] keep <- is.element(nm, c("fixed", "random", "data", "na.action", "control")) for (i in nm[!keep]) mcall[[i]] <- NULL fixed[[2L]] <- quote(zz) mcall[["fixed"]] <- fixed mcall[[1L]] <- quote(nlme::lme) mcall$random <- random mcall$method <- method if (!missing(corr)) mcall$correlation <- corr mcall$weights <- lmeweights data$invwt <- 1/data$wz mcall$data <- data mcall$control <- control mcall$na.action <- na.action # Iterate until convergence for (i in seq_len(niter)) { if (verbose) message(gettextf("iteration %d", i), domain = NA) iter <- i fit <- eval(mcall) etaold <- eta eta <- fitted(fit) + off data$eta <- eta if (sum((eta - etaold)^2) < 1e-06 * sum(eta^2)) break # call internal function to perform linearization step # first-order Taylor series approximation to the link response mle.fit <- list() for(j in 1: nout){ mle.fit[[j]] <- extractlinkinv(j,data,family) } mle.fit <- rbindlist(mle.fit) data[, zz := eta + (y0 - mle.fit$mu)/mle.fit$mu.eta.val - off] data[, ri := (y0 - mle.fit$mu)/mle.fit$mu.eta.val] data[, invwt := 1/mle.fit$wz] mcall$data <- data } attributes(fit$logLik) <- NULL fit$call <- Call fit$family <- family fit$logLik <- as.numeric(NA) fit$method <- method fit$iter <- iter oldClass(fit) <- c("mglmmPQL", oldClass(fit)) fit } #################################################################### ## Proposed patch for getVarCov.lme ## by James E. Pustejovsky ## Taken from https://www.jepusto.com/bug-in-nlme-getvarcov/ getVarCov_revised_lme <- function (obj, individuals, type = c("random.effects", "conditional", "marginal"), ...) { type <- match.arg(type) if (any("nlme" == class(obj))) stop("not implemented for \"nlme\" objects") if (length(obj$group) > 1) stop("not implemented for multiple levels of nesting") sigma <- obj$sigma D <- as.matrix(obj$modelStruct$reStruct[[1]]) * sigma^2 if (type == "random.effects") { result <- D } else { result <- list() groups <- sort(obj$groups[[1]]) ugroups <- unique(groups) if (missing(individuals)) individuals <- as.matrix(ugroups)[1, ] if (is.numeric(individuals)) individuals <- ugroups[individuals] for (individ in individuals) { indx <- which(individ == ugroups) if (!length(indx)) stop(gettextf("individual %s was not used in the fit", sQuote(individ)), domain = NA) if (is.na(indx)) stop(gettextf("individual %s was not used in the fit", sQuote(individ)), domain = NA) ind <- groups == individ if (!is.null(obj$modelStruct$corStruct)) { V <- corMatrix(obj$modelStruct$corStruct)[[as.character(individ)]] } else V <- diag(sum(ind)) if (!is.null(obj$modelStruct$varStruct)) sds <- 1/varWeights(obj$modelStruct$varStruct)[ind] else sds <- rep(1, sum(ind)) sds <- obj$sigma * sds cond.var <- t(V * sds) * sds dimnames(cond.var) <- list(1:nrow(cond.var), 1:ncol(cond.var)) if (type == "conditional") result[[as.character(individ)]] <- cond.var else { Z <- model.matrix(obj$modelStruct$reStruc, getData(obj))[ind, , drop = FALSE] result[[as.character(individ)]] <- cond.var + Z %*% D %*% t(Z) } } } class(result) <- c(type, "VarCov") attr(result, "group.levels") <- names(obj$groups) result } ############################################################################################ ### This function performs empirical Bayes/shrinkage estimation of cluster-specific effects ### in generalised linear mixed-models fitted using lme ### Still in early stages of development and testing ### Implementated equations in Laveille (2017) ### Felix Achana - 21 Nov 2019 # Internal function to calculate EB etsimates and thier vairances ebfunc1 <- function(obj,groups,i) { # predict random-effect term bi and conditional variance-covariance matrix, gammai (equation 11) ind = groups == i xi = model.matrix(obj, data = obj$data[ind]) zi = model.matrix(formula(obj$modelStruct$reStr)[[1]],data=obj$data[ind]) matG = getVarCov_revised_lme(obj, type="random.effects") matR = as.matrix(as.data.table(getVarCov_revised_lme(obj,individuals=i,type="conditional")[1])) febeta = fixef(obj) gammai = solve(t(zi)%*%solve(matR)%*%zi + solve(matG)) bi = as.numeric(t(gammai%*%t(zi)%*%solve(matR) %*% (obj$data[ind]$y - xi%*%febeta))) names(bi) = colnames(gammai) #bi = ranef(obj)[i,] #Empirical Bayes estimates eb = febeta m = match(names(bi), names(febeta)) eb[m] = eb[m] + bi eb = unlist(eb[names(bi)]) se_eb = sqrt(diag(as.matrix(gammai))) names(se_eb) = paste("se_",names(se_eb),sep = "") return(list(ebe=eb, se_eb=se_eb, bi=bi,vcoveb=gammai)) } ### function to call internal functions above and perform eb estimation of cluster-specific effects ebFunc = function(obj,individuals=NULL){ groups = obj$groups[[1]] groups = obj$groups[[1]] ugroups = unique(groups) ngroups = length(ugroups) if(is.null(individuals)){ output = lapply(1:ngroups,function(i) ebfunc1(obj,groups,i)) } else { output = lapply(individuals,function(i) ebfunc1(obj,groups,i)) } eb = cbind( rbindlist(lapply(1:ngroups,function(i) data.table(individuals = i, t(output[[i]]$eb)))), rbindlist(lapply(1:ngroups,function(i) data.table(individuals = i, t(output[[i]]$se_eb))))) b = rbindlist(lapply(1:ngroups, function(i) data.table(individuals = i, t(output[[i]]$bi)))) vcoveb = rbindlist(lapply(1:ngroups,function(i) data.table(individuals = i, t(output[[i]]$vcoveb)))) return(list(eb=eb,b=b,vcoveb=vcoveb)) }