#################################################################################
# Biometrics- 150118PR                                                          #
# This r-code is provided by HYOYOUNG CHOO-WOSOBA                               #
# This r-code is a part of the real data analysis section in the paper including# 
# MES estimates with bootstrap-base-standard errors                             #
# This r-code includes codes for Cardinal Research Clueter (CRC) server lines   #
#################################################################################

args <- commandArgs(trailingOnly = TRUE)  # Cluster code
seq <- as.numeric(args[1])         # Cluster code


library(MASS)
library(pscl)
library(matrixcalc)

caries <- read.csv("/home/h0choo01/caries_bs_CorrectZ/caries.csv", header=TRUE)

# Z function 
Z<- function(xmat,b,v, max) {
  
  lambda <- exp(xmat%*%b)
   # Compute the terms used to sum for the (in)finite summation
       forans <- matrix(0,ncol=max+1,nrow=length(lambda))
   for (j in 1:max){
     temp <- matrix(0,ncol=j,nrow=length(lambda))
     for (i in 1:j){temp[,i] <- lambda/(i^c(v))}
     for (k in 1:length(lambda)){forans[k,j+1] <- prod(temp[k,])}  
     }
   forans[,1] <- rep(1,length(lambda))


# Determine the (in)finite sum
   ans <- rowSums(forans)

return(ans)
}

# Calculate estimating u 
eu_cmp <- function(xmat,zmat,beta,gamma,y,v,max) {
              eu <- matrix(0,nrow(y),1)
              p <- exp(zmat%*%gamma)/(1+exp(zmat%*%gamma))
              exp_u <- p/(p+(1-p)/Z(xmat,beta,v,max))
              for (i in 1:nrow(y)){
                   eu[i,] <- ifelse(y[i]==0,exp_u[i],0)
                   }
              return(eu) }

# Functions for MES estimation


  RowbyRow<-function(A, b){  # b is a vector, A is a matrix
                temp <-A
                for (i in 1:length(A[,1]))
                    {temp[i,]<-A[i,]*b[i]}
                return(temp)
                }
 prod.b2 <- function(xmat, b, v,max) {
                  l <- exp(xmat%*%b)
               smat <- matrix(0,length(l), max)
               for(i in 1:length(l)) {
                   for( j  in 1: max) {
                        smat[i,j] <- l[i]/j^v
                        }}
               temp <- matrix(NA, length(l),max)
               for (i in 1:length(l)) {
                   for (k in 1: max) {
                         temp[i,k] <- k^2*prod(smat[i,1:k])
                         }
                         }
               res <- apply(temp, 1, sum)
               return(res)
                 }
 prod.b1 <- function(xmat, b, v,max) {
                  l <- exp(xmat%*%b)
               smat <- matrix(0,length(l), max)
               for(i in 1:length(l)) {
                   for( j  in 1: max) {
                        smat[i,j] <- l[i]/j^v
                        }}
               temp <- matrix(NA, length(l),max)
               for (i in 1:length(l)) {
                   for (k in 1: max) {
                         temp[i,k] <- k*prod(smat[i,1:k])
                         }
                         }
               res <- apply(temp, 1, sum)
               return(res)
                 }
  prod.dlv1 <- function(xmat, b,v,max) {
               l <- exp(xmat%*%b)
               smat <- matrix(0,length(l), max)
               for(i in 1:length(l)) {
                   for( j  in 1: max) {
                        smat[i,j] <- l[i]/j^v
                        }}
               temp <- matrix(NA, length(l),max)
               for (i in 1:length(l)) {
                   for (k in 1: max) {
                         temp[i,k] <- k*(log(factorial(k)))*prod(smat[i,1:k])
                         }
                         }
               res <- apply(temp, 1, sum)
               return(res)
                   }

   prod.v2 <- function(xmat, b,v,max) {
               l <- exp(xmat%*%b)
               smat <- matrix(0,length(l), max)
               for(i in 1:length(l)) {
                   for( j  in 1: max) {
                        smat[i,j] <- l[i]/j^v
                        }}
               temp <- matrix(NA, length(l),max)
               for (i in 1:length(l)) {
                   for (k in 1: max) {
                         temp[i,k] <- (log(factorial(k)))^2*prod(smat[i,1:k])
                         }
                         }
               res <- apply(temp, 1, sum)
               return(res)
                   }
    prod.v1 <- function(xmat, b,v,max) {
               l <- exp(xmat%*%b)
               smat <- matrix(0,length(l), max)
               for(i in 1:length(l)) {
                   for( j  in 1: max) {
                        smat[i,j] <- l[i]/j^v
                        }}
               temp <- matrix(NA, length(l),max)
               for (i in 1:length(l)) {
                   for (k in 1: max) {
                         temp[i,k] <- (log(factorial(k)))*prod(smat[i,1:k])
                         }
                         }
               res <- apply(temp, 1, sum)
               return(res)
                   }
  var.w <- function(xmat,beta,v,max){

              temp1 <- prod.b2(xmat,beta,v,max)/Z(xmat,beta,v,max)
              temp2 <- prod.b1(xmat,beta,v,max)/Z(xmat,beta,v,max)
              temp <- temp1-temp2^2
              res <- diag(temp)
              return(res)
              }
 e.w <- function(xmat,beta,v,max){
             prod.b1(xmat,beta,v,max)/Z(xmat,beta,v,max)
             }
 # derivative terms of estimators by using complete pseudo loglikelihood based form for MES estimates
  ddlv <- function(xmat,zmat,b,g,y,v,max){
                 uhat <- eu_cmp(xmat,zmat,b,g,y,v,max)
                 wv1 <- prod.v2(xmat, b,v,max)/Z(xmat,b,v,max)
                 wv2 <- (prod.v1(xmat, b,v,max)/Z(xmat,b,v,max))^2
                 v <- wv1-wv2
                 res <- v*(1-uhat)
                 return(-sum(res))
                 }
  dlvv<- function(xmat,zmat,b,g,y,v,max) {
                 p <- exp(zmat%*%g)/(1+exp(zmat%*%g))
                 uhat <- eu_cmp(xmat,zmat,b,g,y,v,max)
                 temp1 <- -log(factorial(y))+(prod.v1(xmat, b,v,max)/Z(xmat,b,v,max))
                 res <- (1-uhat)*temp1
                 return(t(res))
                 }
   dpg <- function(zmat, gamma) {      # derivative term of gamma
            temp <- exp(zmat%*%gamma)
            temp1 <- RowbyRow(zmat,temp)
            temp2 <- (1+temp)^2
            res <- RowbyRow(temp1,1/temp2)
            return(res)
            }
  dlb <- function(xmat,beta,v,max) {          # derivative term of beta
                 temp1 <- prod.b2(xmat,beta,v,max)/Z(xmat,beta,v,max)
                 temp2 <- prod.b1(xmat,beta,v,max)/Z(xmat,beta,v,max)
                 temp3 <- temp1-(temp2)^2
                 res <- RowbyRow(xmat, temp3)
                 return(res)
                 }
  dlv<- function(xmat, beta,v,max) {                 # derivative term of v
                 temp1 <- as.matrix(prod.dlv1(xmat,beta,v,max)/Z(xmat,beta,v,max))
                 temp2 <- as.matrix(prod.b1(xmat,beta,v,max))*as.matrix(prod.v1(xmat,beta,v,max))/((Z(xmat,beta,v,max))^2)
                 res <- temp2-temp1
                 return(res)
                 }
  prod_dlv2 <- function(xmat, b,v, max) {
                       l <- exp(xmat%*%b)
                       smat <- matrix(0,length(l), max)
                       for(i in 1:length(l)) {
                           for( j  in 1: max) {
                                smat[i,j] <- l[i]/j^v
                                }}
                       temp <- matrix(NA, length(l),max)
                       for (i in 1:length(l)) {
                           for (k in 1: max) {
                                 temp[i,k] <- k^2*(log(factorial(k)))*prod(smat[i,1:k])
                                 }
                                 }
                       res <- apply(temp, 1, sum)
                       return(res)
                           }
                                   
  r <- 0.25     # setup the tuning parameter for a modified Newton-Raphson algorithm            
                         
  var_gamma <- function(zmat,gamma) {
                    p <-  exp(zmat%*%gamma)/(1+exp(zmat%*%gamma))
                    return(diag(c(RowbyRow(p,1-p))))
                    }
  # create a function for correlation coeff. for zero-inf. part
  corr_zero1 <- function(eu,p) {
                 uist <- NULL
                for(i in 1:(length(y)-1)) {
                     for (j in (i+1):length(y)) {
                           uist0  <- (eu[i,]-p[i,])*(eu[j,]-p[j,])/sqrt(p[i,]*(1-p[i,])*p[j,]*(1-p[j,]))
                           uist <- rbind(uist0,uist)
                                 }
                              }
                return(sum(uist))
                  }
  corr_zero2 <- function(eu,p) {
                   std_u <- (eu-p)^2/(p*(1-p))
                   return(sum(std_u))
                   }
                   
  # create a function for correlation coeff. for CMP part

   corr_cmp2 <- function(eu,beta0,v0) {
                   l <- e.w(xmat,beta0,v0,max)
                   res <- (1-eu)^2*(y-l)^2/as.matrix(diag(var.w(xmat,beta0,v0,max)))
                   return(sum(res))
                   }
  # calculate the derivative term of rho and delta
   ddel <- function(csize){ 
               dde1 <- NULL
                    for( i in 2:(csize-1)) {
                          a1 <- matrix(c(NA,rep(1,csize-i)),csize-i+1,1)
                          dde1 <- rbind(dde1, a1)
                          }
               dde2 <- matrix(c(NA,rep(1,csize-1)),csize,1)
               ddel <- rbind(dde2,dde1,NA)
               return(ddel)
               }

 corr_zero_bs1 <- function(eu,p) {
                 uist <- NULL
                for(i in 1:(length(y)-1)) {
                     for (j in (i+1):length(y)) {
                           uist0  <- (eu[i,]-p[i,])*(eu[j,]-p[j,])/sqrt(p[i,]*(1-p[i,])*p[j,]*(1-p[j,]))
                           uist <- rbind(uist0,uist)
                                 }
                              }
                return(sum(uist))
                  }
 # pseudo log likelihood function for MPL estimates used for initial values of MES estimates 
  logL_cmp <- function(parm) {
            bhat <- as.matrix(parm[1:9])
            ghat <- as.matrix(parm[10:18])
            vhat <- parm[19]
            p <- exp(zmat%*%ghat)/(1+exp(zmat%*%ghat))
            l <- exp(xmat%*%bhat)
            yidx <- ifelse(y==0,1,0)
            l_0 <- log(p+(1-p)/Z(xmat,bhat,vhat,100))
            l_c <- log((1-p)*l^y/((factorial(y)^c(vhat))*Z(xmat,bhat,vhat,100)))
            lsum <- sum(yidx*l_0)+sum((1-yidx)*l_c)
            return(-lsum)
            }                 
 set.seed(588+seq)
 B <- 1
 bsparm <- matrix(NA,21,B) 
 
  for ( BS in 1:B) {
          id <- sample(unique(caries$SID), replace=TRUE, size=length(unique(caries$SID)))
          bs_data <- NULL
          for( i in 1: length(id)){
               bs_data <- rbind(bs_data,caries[which(caries$SID==id[i]),])
               }  
          
           onevec <- as.matrix(rep(1,nrow(bs_data)) )
          y <- as.matrix(bs_data$CariesCount)
          
          all_x <- bs_data[,9:16]
          newx <- data.frame(int=onevec,all_x)
          xmat <- as.matrix(cbind(newx[,c(1,9)],newx[,2:8]))
         
          zmat <- xmat
               
          
          cs_bs <- matrix(NA,length(id),1) 
          for (k in 1:length(id)){
                cs_bs[k,]  <- nrow(caries[which(caries$SID==id[k]),])
                }
          N <- length(unique(caries$SID))
          N1  <- sum(cs_bs*(cs_bs-1)/2)
          nt <- nrow(bs_data)
        
           id_bs <- NULL
          for(i in 1:nrow(cs_bs)) { 
                 idx <- as.matrix(rep(i,cs_bs[i])) 
                 id_bs <- rbind(id_bs, idx)
                    }
          id_bs <- as.matrix(id_bs)
         
           # Get the initial value by using zeroinfl in R
           mZIP_bs <- zeroinfl(formula=CariesCount~ Gender + DentalExamAge + AUCmgF5_9yrs
                     + AUCSodaOz5_9yrs + ToothBrushingFreqPerDayAvg
                         + DentalVisitPast6moAvg + FluorideTreatment6moAvg +
                                   HomeFluorideppmAvg,
                                   dist = "poisson", data =bs_data)
           
           beta0 <- as.matrix(summary(mZIP_bs)$coefficients$count[,1])
           
           gamma0 <-as.matrix(summary(mZIP_bs)$coefficients$zero[,1])
           v0 <- 1
           
            mlebs_parm <- optim(c(beta0,gamma0,v0), logL_cmp,control=list(maxit=30000), hessian=TRUE)
            beta0 <- as.matrix(c(mlebs_parm$par[1:9]))
            gamma0 <- as.matrix(c(mlebs_parm$par[10:18]))
            v0 <- c(mlebs_parm$par[19])
  
           delta0 <- 0.5
           rho0 <- 0.5
           allx <- cbind(id_bs, xmat)
           ally <- cbind(id_bs, as.numeric(paste(bs_data$CariesCount)))
          colnames(allx)[1] <- "id"
           colnames(ally) <- c("id","CariesCount")
           max <- 100
           res_bsparm <- matrix(NA, 21,100)
            for (j in 1: 100) {
                all_ddg <- matrix(0,length(gamma0),length(gamma0))
                all_geeg <- matrix(0,length(gamma0),1)
                all_ddb <- matrix(0,length(beta0),length(beta0))
                all_geeb <- matrix(0,length(beta0),1)
                all_del1 <- 0
               all_del2 <- 0
               all_rho1 <- 0
               all_rho2 <- 0
               N2 <- 0
               ntot <- 0
                for (k in 1:N) {
                        xmat <- as.matrix(subset(allx,allx[,1]==k))[,-1]
                        zmat <- xmat
                        y <-  as.matrix(as.matrix(subset(ally,ally[,1]==k))[,-1])
        
                         p0 <- exp(zmat%*%gamma0)/(1+exp(zmat%*%gamma0))
         
                         eu0 <- eu_cmp(xmat,zmat,beta0,gamma0,y,v0,max)
                         ntot1 <- sum((1-eu0)^2)
                         ntot <- ntot+ntot1
                         N20 <- as.vector(1-eu0)%*%t(as.vector(1-eu0))
                         N21 <- upper.triangle(N20)
                         diag(N21) <- 0
                         N22 <- sum(N21)
                         N2 <- N2 + N22
                         ew0 <- e.w(xmat,beta0,v0,max)
                         cor_u <- matrix(delta0,cs_bs[k],cs_bs[k])+ diag(1-c(delta0),cs_bs[k],cs_bs[k])
                         wu0 <- sqrt(var_gamma(zmat,gamma0))%*%cor_u%*%sqrt(var_gamma(zmat,gamma0)) # cov-var of u
                         cor_y <- matrix(rho0,cs_bs[k],cs_bs[k])+ diag(1-c(rho0),cs_bs[k],cs_bs[k])
                         wy0 <- sqrt(var.w(xmat,beta0,v0,max))%*%cor_y%*%sqrt(var.w(xmat,beta0,v0,max))
        
                         # update gamma estimate
                         
                         temp_g <- t(dpg(zmat,gamma0))%*%solve(wu0)
                         geeg <- as.matrix(apply(RowbyRow(t(temp_g),eu0-p0),2,sum))
                         temp_g1 <- t(dpg(zmat,gamma0))%*%solve(wu0)%*%dpg(zmat,gamma0) + geeg%*%t(geeg)
         
         
                         all_ddg <- all_ddg + temp_g1 ## sum upto N for derivative of gee for gamma
                         all_geeg <- all_geeg + geeg  ## sum upto N for gee for gamma
         
                        # update delta estimate
                          all_del1 <- all_del1 + corr_zero_bs1(eu0,p0)
                          all_del2 <- all_del2 + corr_zero2(eu0,p0)
         
                          # update beta estimate
                         temp_b <- dlb(xmat,beta0,v0,max)
                        temp_b1 <- t(temp_b)%*%solve(wy0)%*%diag(c(1-eu0))
                         geeb <- as.matrix(apply(RowbyRow(t(temp_b1),(y-e.w(xmat,beta0,v0,max))),2,sum))
                         temp_b2 <- t(temp_b)%*%solve(wy0)%*%diag(c(1-eu0))%*%temp_b + geeb%*%t(geeb)
        
                         
                         all_geeb <- all_geeb + geeb
                         all_ddb <- all_ddb + temp_b2
        
                          # update rho estimate (cor.coefficient for y)
                         vary <- as.matrix(diag(var.w(xmat,beta0,v0,max)))
                          temp_rho <- cbind(ddel(cs_bs[k]),vech((1-eu0)%*%t(1-eu0)),vech((y-e.w(xmat,beta0,v0,max))%*%t(y-e.w(xmat,beta0,v0,max))),vech(vary%*%t(vary)) )
                         temp_rho <- na.omit(temp_rho)
                          temp_rho1 <- temp_rho[,2]*temp_rho[,3]/sqrt(temp_rho[,4])
         
                          all_rho1 <- all_rho1 + sum(temp_rho1)                                                 
                          all_rho2 <- all_rho2 + corr_cmp2(eu0,beta0,v0)  
                 
                         }
         
                 all_xmat <- as.matrix(allx[,-1])
                 all_zmat <- all_xmat
                 all_ys <- as.matrix(ally[,2])
                 v1 <- v0 - r*sum(dlvv(all_xmat,all_zmat,beta0,gamma0,all_ys,v0,max))/(ddlv(all_xmat,all_zmat,beta0,gamma0,all_ys,v0,max)+ sum(dlvv(all_xmat,all_zmat,beta0,gamma0,all_ys,v0,max))^2)
         
                ########## updating scheme #############
        if(j==1){ all_geeg1 <- all_geeg} else{ all_geeg1 <- ((j-1)*all_geeg1+all_geeg)/j } 
        if(j==1){ all_ddg1 <- all_ddg} else{ all_ddg1 <- ((j-1)*all_ddg1+all_ddg)/j } 
        if(j==1){ all_geeb1 <- all_geeb} else{ all_geeb1 <- ((j-1)*all_geeb1+all_geeb)/j } 
        if(j==1){ all_ddb1 <- all_ddb} else{ all_ddb1 <- ((j-1)*all_ddb1+all_ddb)/j } 
        
                   
        gnew0 <- gamma0 + r*solve(all_ddg1)%*%all_geeg1
        gnew <- (j*gamma0+gnew0)/(j+1)
        del_new <- (all_del1/N1)/(all_del2/nt)
        

        beta11 <- beta0 + r*solve(all_ddb1)%*%all_geeb1
        beta1 <- (j*beta0+beta11)/(j+1)
        rho_new <- (all_rho1/N2)/(all_rho2/ntot)
        vnew <- (j*v0+v1)/(j+1) 

        res_bsparm[,j] <- matrix(c(j,beta1,gnew,vnew, max(abs(gnew-gamma0),abs(beta1-beta0),abs(vnew-v0))),length(c(beta1,gnew,vnew))+2,1)
        if(max(abs(gnew-gamma0),abs(beta1-beta0),abs(vnew-v0)) <  0.01 | j==100) {
                             break
                           }
      
        gamma0<- gnew
        beta0<- beta1
        delta0 <- del_new
        rho0 <- rho_new
        v0 <- vnew
       
        cat(c(res_bsparm[,j]),"\n")
                      } 
                             
              bsparm[,BS] <- res_bsparm[,j]
                 
                   }
 
 file.name <- paste("bscaries_CorrectZ=",seq,".RData", sep="")
 save.image(file.name)
 q()