## R code to generate figures for Cook/Zea SIM paper
## "Missing Data and Sensitivity Analysis for Binary Data with Implications 
##  for Sample Size and Power of Randomized Clinical Trials."

## Define some functions:

## find MLE of common p under null hypothesis.  Uses trigonometric solution 
## to cubic equation to make it efficiently vectorizable. 
pHat <- function(R=c(1,1), X){
  ## X should have dim 2x3 or 2x3xk 
  if(length(dim(X))==2){if(all(dim(X)==c(2,3))) dim(X) <- c(2,3,1)
    else stop("Dimension of X is not 2x3")}
  else if(length(dim(X))==3 && !all(dim(X)[1:2]==c(2,3)))
    stop("Dimension of X is not 2x3xk")
  R <- matrix(R,nrow=2)
  m <- X[,3,]
  if(!is.matrix(m)) dim(m) <- c(2,1)
  Xsums <- colSums(X)
  n <- colSums(Xsums)
  y <- Xsums[1,]
  ## coefficients of cubic equation:  ax^3 + bx^2 + cx + d:
  coef <- cbind(-n*(R[1,]-1)*(R[2,]-1),
    (R[1,]-1)*(R[2,]-1)*y - n*(R[2,]+ R[1,]-2) +
      m[2,]*R[2,]*(R[1,]-1) + m[1,]*R[1,]*(R[2,]-1),
    (colSums(R)-2)*y + m[2,]*R[2,] + m[1,]*R[1,] - n,
    y)
  ## transform to "depressed form": t^3+p*t+q=0
  p <- (3*coef[,1]*coef[,3]-coef[,2]^2)/(3*coef[,1]^2)
  q <- (2*coef[,2]^3-9*coef[,1]*coef[,2]*coef[,3]+27*coef[,1]^2*coef[,4])/(27*coef[,1]^3)
  ## t is trigonometric solution
  t <- 2*sqrt(-p/3)*cos(outer(1/3*acos(3*q/2/p*sqrt(-3/p)),2*pi*0:2/3,"-"))-coef[,2]/coef[,1]/3
  ##tq is solution to quadratic if a = 0 (and b!=0)
  tq <- -(coef[,3]+sqrt(ifelse(coef[,1]==0, coef[,3]^2-4*coef[,2]*coef[,4],0)))/2/coef[,2]
  ## vector "one" guarantees that the "test" in ifelse has the right length
  one <- rep(1,length(y)) 
  ifelse(coef[,1]==0&coef[,2]==0|m[1,]+m[2,]==0, ## case a=b=0, linear
    y/(n-m[1,]*R[1,]-m[2,]*R[2,]),
    ifelse(coef[,1]==0,tq,  ## case a=0, b!=0, quadratic
      ifelse(R[1,]>one&R[2,]>one, t[,1],  ## now pick the correct cubic root. 
        ifelse(R[1,]<one&R[2,]<one, t[,3], t[,2]))))
}

## Calculate Z given vector or matrix of values of R and data matrix/array X.
## R can be:
##    vector of length 1 if Rt=Rc, vector 
##    vector of length 2  (Rc,Rt)
##    2xk matrix with rows  Rc and Rt.
## X can be 2x3 matrix of form in Table 2 or
##          2x3xk array of k-tables 
##  2x3xk array can be use for efficient simulation. 
zTest <- function(R,X){
  ## X should have dim 2x3 or 2x3xk, convert to 2x3x1 if 2x3
  if(length(dim(X))==2){if(all(dim(X)==c(2,3))) dim(X) <- c(2,3,1)
                        else stop("Dimension of X is not 2x3")}
  else if(length(dim(X))==3 && !all(dim(X)[1:2]==c(2,3)))
    stop("Dimension of X is not 2x3xk")
  k <- dim(X)[3]
  ## if R is matrix, either k=1 (one table), or number of cols=k
  if(length(R)==1) R <- rep(R,2)
  else if(is.matrix(R)) {
    if(all(dim(R)!=2)) stop("If R is a matrix, it must have one dimension = 2")
    if(dim(R)[1]!=2) R <- t(R)
    if(dim(R)[2]==1) R <- c(R)
    else {
      if(dim(R)[2]!=k&k>1) stop("No. of columns of R does't match number of tables in X")
      k <- pmax(k,dim(R)[2])
    }}
  else if(length(R)!=2) stop("R has the wrong length/structure")
  phat <- pHat(R,X)
  phat <- rbind(phat,phat)
  m <- X[,3,]
  y <- X[,1,]
  n <- apply(X,c(1,3),sum)[,]
  u <- y -n*phat + m*R*phat/(1-phat+phat*R)
  v <- (n-m*R/(1-phat+R*phat)^2)*phat*(1-phat)
  v <- 1/(colSums(1/v))
  ifelse(v<=0&abs(u[1,])<.Machine$double.eps^.7, 0, u[1,]/sqrt(pmax(v,0)))
}
    
## ez calculates expected Z given its arguments
ez <- function(R, n, q, p, R0=R){
  ## R is the assumed R for analysis
  ## R0 is the true R for the underlying data
  ## p is the true p for the underlying data, length 1 or 2
  ## n is the sample size per group, length 1 or 2
  ## q is the marginal missingness probability per group, length 1 or 2
  if(length(R)==1) R <- c(R,R)
  if(length(R0)==1) R0 <- c(R0,R0)
  if(length(p)==1) p <- c(p,p)
  if(length(n)==1) n <- c(n,n)
  pp0 <- (1-p+p*R0)
  pii <- q/pp0
  Ey <- p*(1-R0*pii)*n
  Em <- pp0*pii*n
  ## Create array of expected values and pass to zTest to get expected Z
  X <- array(c(Ey,n-Ey-Em,Em),c(2,length(Ey)/2,3))
  X <- aperm(X,c(1,3,2))
  zTest(R,X)
}


## function to generate boundary points for ellipse 
##     t is between 0 and 1 with angle from x-axis = pi*(1/4+2*t)
##     a is x=y at t=0 ("major axis")
##     e = eccentricity of ellipse
Ellipse <- function(t, a,e){
  a <- log(a)
  t <- 2*pi*t
  x <- exp(a*(cos(t) - sqrt(1-e^2)*sin(t)))
  y <- exp(a*(cos(t) + sqrt(1-e^2)*sin(t)))
  cbind(x=x,y=y)
}

## function to generate boundary points for diamond shaped region 
## between (e,e), (-d,d),  (-e,-e), (d,-d). 
Diamond <- function(t,d,e) {t <- 4*(t%%1) 
  e <- log(e)
  d <- log(d)
  x <- ifelse(t<=1, e-t*(e+d), ifelse(t<=2, -d+(t-1)*(d-e), 
                               ifelse(t<=3, -e+(t-2)*(e+d),  d-(t-3)*(d-e))))
  y <- ifelse(t<=1, e-t*(e-d), ifelse(t<=2,  d-(t-1)*(d+e), 
                               ifelse(t<=3, -e+(t-2)*(e-d), -d+(t-3)*(d+e))))
  cbind(x=exp(x),y=exp(y))}

## Compute inflation factor and locations where minimum exp Z is achieved
##    rFun is function to generate boundary of of plausible region (e.g., "Ellipse") 
##    n is sample size per group
##    q is the marginal missingness probability per group, length 1 or 2
##    p is the true p for the underlying data, length 1 (pc) or 2 (pc,pt)
##       if length(p)==1, calculate alterative pt<pc with desired power given n. 
##    level, beta desired type I and type II error rates. 
##    side is one or two sided level. 
## Value:
##    inflation= inflation factor (may be infinite)
##    EZ = minimum expected Z
##    p= hypothesized (pc, pt), either as given in call, or using computed pt
##    n= total sample size (nc+nt) both original and inflated.
##    R0= value of true (Rc,Rt) minimizing expectated Z
##    R1= value of (Rc,Rt) used in the analysis minimizing expectated Z
inflation <- function(rFun, n, q, p, level=0.05, beta=0.1, side=2,  ...) {
  d <- (qnorm(level/side,lower=FALSE)+qnorm(beta,lower=FALSE))
  if(length(n)==1) n <- c(n,n) else if(length(n)!=2) stop("n should have length 1 or 2")
  if(length(p)==1) {
    ssFun <- function(p,n,pc,d,side) {
      pbar <- (p*n[1]+pc*n[2])/sum(n)
      sum(n)/prod(n)*pbar*(1-pbar)*d^2/(p-pc)^2-1}
    p1 <- uniroot(ssFun, c(0,p), n=n,pc=p, d=d,side=side)$root
  p <- c(p,p1)}
  FUN <- function(t,fun, n, q,p,...) ez(fun(t[2],...), n=n, q, p, fun(t[1],...))
  O <- optim(par=c(.8,.3), fn=FUN, q=q, p=p, fun=rFun, n=n, ...)
  if(O$value<=0) inf <- Inf else inf <- d^2/O$value^2
  list(inflation=inf, EZ = O$value, p=p, n=sum(n)*c(1,inf), R0=rFun(O$par[1],...), R1=rFun(O$par[2],...), t=O$par, q=q)
}


### now generate figures:
### Note that calls to `polygon' below require a device that support
### semi-transparent backgrounds.  On other devices, polygons need be drawn 
### before the other content.  

## Create vector of values of R from .1 to 10 on log-scale
r <- exp(seq(log(.1),log(10),len=100))
##Figure 2(a)
Z2a <- outer(r, r, function(r1,r2) ez(1, n=100, q=.05, p=.3, rbind(c(r1),c(r2))))
rejProb2a <- pchisq(3.84, df=1, ncp=Z2a^2, lower=FALSE)*100
contour(r,r,rejProb2a,log="xy", levels=5:9)
abline(0,1)
points(1,1,pch=16)
polygon(Ellipse(1:100/100,2,.9),col=gray(0.7,.2), border=NA)
polygon(Ellipse(1:100/100,5,.9),col=gray(0.7,.2), border=NA)


##Figure 3(e)
X3e <- rbind(c(301, 594, 105),c(247, 650, 103))
Z3e <- outer(r, r, function(r1,r2) zTest(rbind(c(r1),c(r2)), X3e))
contour(r,r,Z3e,log="xy")
contour(r,r,Z3e,log="xy", level=1.96, lty=2, add=TRUE)
points(1,1,pch=16)
polygon(Ellipse(1:100/100,2,.9),col=gray(0.7,.2), border=NA)
polygon(Ellipse(1:100/100,5,.9),col=gray(0.7,.2), border=NA)

##Figure 4(b)
infl4b <- inflation(Ellipse, q=.1, p=.3, n=1000, a=2.3, e=.9)
theta4b <- outer(r,r, function(r1,r2) ez(rbind(c(r1),c(r2)),1000,.1, infl4b$p, infl4b$R0))
contour(r,r,theta4b,log="xy", levels=c(-1,1:6))
contour(r,r,theta4b,log="xy", add=TRUE, levels=0,lty=2)
contour(r,r,theta4b,log="xy", add=TRUE, level=ez(infl4b$R1,1000,.1, infl4b$p, infl4b$R0), lty=3)
lines(Ellipse(0:100/100, 2.3,.9), lty=3)
points(1,1,pch=16)
polygon(Ellipse(1:100/100,2,.9),col=gray(0.7,.2), border=NA)
polygon(Ellipse(1:100/100,5,.9),col=gray(0.7,.2), border=NA)


##Figure 5(b)
a <- exp(0:100/100*log(10))
infl5b.05 <- sapply(a, function(a) inflation(Ellipse, q=.05, p=.3, n=1000, a=a, e=.9)$infl)
infl5b.10 <- sapply(a, function(a) inflation(Ellipse, q=.10, p=.3, n=1000, a=a, e=.9)$infl)
infl5b.15 <- sapply(a, function(a) inflation(Ellipse, q=.15, p=.3, n=1000, a=a, e=.9)$infl)
plot(a, infl5b.05, type="l", log="xy", ylim=c(1,1000), ylab="Inflation Factor")
lines(a, infl5b.10, lty=2)
lines(a, infl5b.15, lty=3)
polygon(c(1,2,2,1), c(1,1,1000,1000), border=FALSE, col=gray(.7,.2))
polygon(c(1,5,5,1), c(1,1,1000,1000), border=FALSE, col=gray(.7,.2))