###########################################
##              Supplement A:            ##
##        EMVS for Binary Outcomes       ##
##         Application Simulation        ##
##              Beta-Binomial            ##    
##        Determinsitic Annealing        ## 
##              4/22/2016                ##					
########################################### 

## EMVS Function
# Give function the initial parameters and the original data set, labeled x1-xn ind and y dep, including intercept, v0 and v1
# theta0 = initial theta (usually 0.5)
# beta_0 = initial beta. If MISSING, myEM uses MLE 
# data = n x (p+2) matrix of variables labled  'y', 'x0', 'x1', ..., 'xp' where x0 is column of 1's...grouped covariates at the end
# indicator = list of which covarites are grouped indicators (already in indicator form)
# interact = 1 x 2 x k array of interactions. Each column is a pair of parent terms. For example,
#            interact[,,3] = c(2 8) is the third interaction and it is between x2 and x8. if there is an interaction with a group, 
#            just list one of the group terms and list the grouped interactions LAST
#            If interact MiSSING, then total covariates is the total number of parent effects
# heredity = 4 x 1 column vector of the heredity structure to impose. For example, strong = c(0,0,0,1)
# poly = heredity structure for squared terms
# alpha, beta = parameters for beta prior (usually set to 1)
# b = current 1/Temp of the determinstic annealing variant

######################################################################################################################
#### Note that running this function does not execute the determinstic annealing. A loop
#### is written in the Data Generation Section (below) that intoduces the determinstic annealing
#### Also, this function will generate the interaction terms if they are provided
#############################################################################################################

myEM <- function(theta0, beta0 = beta0, data ,indicator = indicator, interact = interact, heredity, poly, V0 = V0, V1 = V1, alpha = alpha, beta = beta,b=b, tol = 1e-6, maxit = 750){
  # get number of main effects before interactions are made
  given <- ncol(data) - 2                  # 1 for y and 1 for x0<-c(1,1,...,1) 
  
  ##### Interaction and Indicators  #####  
  if(!missing(indicator) & !missing(interact)){
    
    # Identify which interactions involve indicators and append new indicators to list
    
    # Make more indicators if they're in an interaction (Can handle interaction of indicators)
    new_interact <- numeric()         # Home for new interactions
    interact_nogroup <- numeric()     # Seperate array for interactions that do not involve a group
    new_indicator<-indicator                  # Home for new indicator groups
    for(i in 1:dim(interact)[3]){     # Look through all the interact terms given
      num_dum <- 0                    # Count if 0 1 or 2 of the interaction terms are apart of a indicator group
      place <- c(0,0)                 # Identify which group are they apart of
      for(k in 1:dim(interact)[2]){      # Look through the 2 representative components of the interaction   
        for(j in 1:length(indicator)){       # Look through the length of the indicator group
          if(interact[,k,i] %in% indicator[[j]]){   # Is one of the interaction terms apart of a indicator group
            num_dum<- num_dum+1                 # If yes, record
            place[k]<-j                         # Note which group it is in
          }
        }
      }
      if(num_dum == 2){              # If both are indicators - make new indicator terms and new interaction
        new_indicator <- c(new_indicator,list(group = seq(1,(length(indicator[[place[1]]])*length(indicator[[place[2]]]))) + max(unlist(new_indicator),given)))
        new_interact <- rbind(new_interact, expand.grid(indicator[[place[1]]],indicator[[place[2]]]))
        
      }else if(num_dum == 1){        # If one is indicator - make new indicator terms and new interaction
        new_indicator <- c(new_indicator,list(group = seq(1,length(indicator[[sum(place)]])) + max(unlist(new_indicator),given)))
        new_interact <- rbind(new_interact, expand.grid(indicator[[sum(place)]],interact[,match(0,place),i]))
        
      }else if(num_dum == 0){         # If neither are indicators - append the interaction
        interact_nogroup <- rbind(interact_nogroup, interact[,,i])
      }
    }
    new_interact <- array(as.vector(rbind(new_interact[,1],new_interact[,2])), dim=c(1,2,nrow(new_interact)))
    interact_nogroup <- array(as.vector(rbind(interact_nogroup[,1],interact_nogroup[,2])), dim=c(1,2,nrow(interact_nogroup)))
    
    # Make a vector with all of the interaction terms in it
    all_inter <- array(c(as.vector(interact_nogroup),as.vector(new_interact)), dim = c(1,2,(dim(new_interact)[3]+dim(interact_nogroup)[3])))
    # Figure out how many gamma terms we need (not including intercept)
    group <- length(new_indicator)                  # Group is the number of indicator variable groups ex. new_indicator <- list(group1=c(16,17),group2=c(18,19,20))
    group_size <- 0                             # Home for the count
    for(i in 1:group){                            # look through the new indicator list
      group_size <- group_size + length(new_indicator[[i]])   # Group size is total number of indicator variables
    }
    
    # Account for number of interactions
    num_inter <- dim(all_inter)[3] 
    
    # number of Gammas = main effects + number of interactions + number of grouped terms - number of groups 
    p <- given + num_inter + group - group_size
    
    # Add interaction to the data set and label them 'x4x6'. Intx enter the data set in order given in 'interact'
    for(i in 1:dim(all_inter)[3]){
      int <- all_inter[,,i]
      data <-cbind(data, eval(parse(text=paste("data$x",int[1],sep="")))*eval(parse(text=paste("data$x",int[2],sep=""))))
      names(data)[ncol(data)] <- paste("x",int[1],"_x",int[2],sep="")
    }
    
    # Set iterator counter, variances, and tuning parameters 
    counter <- 0
    if(missing(V0)){
      V0 <- (log(1.1)/qnorm(.975))^2               # 95% CI at OR (0.869,1.15) Under null
    }
    if(missing(V1)){
      V1 <- (log(4)/qnorm(.975))^2                   
    }
    if(missing(alpha)){
      alpha <- 1                                   # Non-informative beta prior
    }
    if(missing(beta)){
      beta <- 1                                    # Non-informative beta prior
    }  
    
    if(missing(beta0)){                            # Provide starting values if they are missing
      beta0<-summary(glm(y~.,data=data,family=binomial))$coefficients[,1]
    } 
    
    # Set intital vectors
    p_star0 <- c(rep(0,p + 1))                     # Current probability of inclusion
    p_star <- c(rep(0,p + 1))                      # Next step probability of inclusion
    expand_p0 <- c(rep(0,(given + num_inter + 1))) # Vector for previously expanded p_star
    expand_p <- rep(0,(1 + given + num_inter))     # Next step for expanded p_star
    variance <- rep(0,(1 + given + num_inter))     # Vector for exclusion variance, adjusted by groupings
    db <- c(rep(0,(given + num_inter + 1)))        # Vector for first derivate of beta
    dtheta <- 0                                    # Vector for first derivate of theta
    
    # Make empty hessian matrix 
    hessian <- matrix(rep(0),(given + num_inter + 1),(given + num_inter + 1))
    
    # This is the actual selection loop
    while (counter <= maxit) {
      #print(counter)
      counter <- counter + 1
      mk <- exp(as.matrix(data[,-1])%*%beta0)
      
      ### calculate probability of inclusion considering grouping and interactions.
      
      # Get the number of main effects that were not indicators originally
      group_orig <- length(indicator)                                  # Group is the number of ORIGINAL indicator variable groups ex. indicator <- list(group1=c(16,17),group2=c(18,19,20))
      group_size_orig <- 0                                         # Home for the count
      for(i in 1:group_orig){                                      # look through the ORIGINAL indicator list
        group_size_orig <- group_size_orig + length(indicator[[i]])   # Group size is total number of ORIGINAL indicator variables
      }
      no_indicators <- given - group_size_orig
      
      # Calculate probability of inclusion, p_star, for main effects (first) and  ORIGINAL indicators (second)
      for(i in 0:no_indicators){                 # for not dummied terms
        p_star[i+1] <- (dnorm(as.numeric(beta0[i+1]), mean = 0,sd = sqrt(V1))*theta0)^b/((dnorm(as.numeric(beta0[i+1]), mean = 0, sd = sqrt(V1))*theta0)^b + (dnorm(as.numeric(beta0[i+1]), mean = 0, sd = sqrt(V0))*(1-theta0))^b)
        p_star[1] <- 1 
      }
      
      for(i in 1:group_orig){                     # each group of ORIGINAL indicators
        m <- length(indicator[[i]])
        Vn <- ((qnorm(1-.05/(2*m))*sqrt(V0))/1.96)^2    # Variance adjustment (See appendix)
        p_star[no_indicators + 1 + i] <-(prod(dnorm(as.numeric(beta0[(indicator[[i]]+1)]), mean = 0,sd = sqrt(V1)))*theta0)^b/((prod(dnorm(as.numeric(beta0[(indicator[[i]]+1)]), mean = 0,sd = sqrt(V1)))*theta0)^b+(prod(dnorm(as.numeric(beta0[(indicator[[i]]+1)]), mean = 0,sd = sqrt(Vn)))*theta0)^b)
      }
      # At this point, all of the orginal covariates have p-star
      # Make p-star, for interactions that do not have indicators (includes squared terms)
      for(i in 1:dim(interact_nogroup)[3]){     
        int <- interact_nogroup[,,i]          
        if(int[1]!=int[2]){                   # this is for interactions
          # current estimate for parent effects
          pb_star <- p_star[int[1]+1]              
          pc_star <- p_star[int[2]+1]
          # pi_star <- dnorm(intx, mean = 0,sd = sqrt(V1))*theta0/(dnorm(intx, mean = 0, sd = sqrt(V1))*theta0 + dnorm(intx, mean = 0, sd = sqrt(V0))*(1-theta0))
          # estimate of interaction without heredity constrain
          pi_star <- (dnorm(as.numeric(beta0[i + 1 + given]), mean = 0,sd = sqrt(V1))*theta0)^b/((dnorm(as.numeric(beta0[i + 1 + given]), mean = 0, sd = sqrt(V1))*theta0)^b + (dnorm(as.numeric(beta0[i + 1 + given]), mean = 0, sd = sqrt(V0))*(1-theta0))^b)
          #############################################
          #pi_star*a[1]*(1-pb_star)*(1-pc_star) # (0,0)
          #pi_star*a[2]*(pb_star)*(1-pc_star)   # (1,0)
          #pi_star*a[3]*(1-pb_star)*(pc_star)   # (0,1)
          #pi_star*a[4]*pb_star*pc_star         # (1,1)
          #############################################
          p_star[i + 1 + no_indicators + group_orig] <- pi_star*heredity[1]*(1-pb_star)*(1-pc_star) + pi_star*heredity[2]*(pb_star)*(1-pc_star) + pi_star*heredity[3]*(1-pb_star)*(pc_star) + pi_star*heredity[4]*pb_star*pc_star
        } else { # this is for polynomial terms
          # inclusion for the parent
          pa_star <- p_star[int[1]+1]  
          # estimate of polynomial without heredity constraint
          pp_star <- (dnorm(as.numeric(beta0[i + 1 + given]), mean = 0,sd = sqrt(V1))*theta0)^b/((dnorm(as.numeric(beta0[i + 1 + given]), mean = 0, sd = sqrt(V1))*theta0)^b + (dnorm(as.numeric(beta0[i + 1 + given]), mean = 0, sd = sqrt(V0))*(1-theta0))^b)
          p_star[i + 1 + no_indicators + group_orig] <- pp_star*poly[1]*(1-pa_star) + pp_star*poly[2]*pa_star
        } 
      }
      
      # Make an array of the p-star locations of the parent terms in the groups
      unique <- numeric()                           # Place to catch unique interaction term places
      for(i in 1:dim(new_interact)[3]){             # Look through each new interaction (because they have a indicator)
        catch <- c(0,0)                             # Home for what p-star it came from
        for(j in 1:2){                              # Look through each intx
          for(k in 1:length(indicator)){                # Look through each indicator
            if(new_interact[,j,i] %in% indicator[[k]]){ # Is this intx in this indicator group?
              catch[j] <- k + no_indicators            # Yes, place = indicator group # + number of not-indicators covariates
            }
          }
          if(catch[j] == 0){                        # No,
            catch[j] <- new_interact[,j,i]         # place = the interaction term
          }
        }
        unique <- rbind(unique, catch)              # put them all together
      }
      unique <- unique(unique)                     # Remove duplicates  
      
      # Evaluate p_star for interactions from groups
      for(i in (group_orig+1):group){   
        m <- length(new_indicator[[i]])
        Vn <- ((qnorm(1-.05/(2*m))*sqrt(V0))/1.96)^2    # Variance adjustment (See appendix)
        
        # p-star for indicator interaction without heredity structure 
        p_dum_intx <- (prod(dnorm(as.numeric(beta0[(new_indicator[[i]] + 1 + length(interact_nogroup)/2)]), mean = 0,sd = sqrt(V1)))*theta0)^b/((prod(dnorm(as.numeric(beta0[(new_indicator[[i]] + 1 + length(interact_nogroup)/2)]), mean = 0,sd = sqrt(V1)))*theta0)^b+(prod(dnorm(as.numeric(beta0[(new_indicator[[i]] + 1 + length(interact_nogroup)/2)]), mean = 0,sd = sqrt(Vn)))*theta0)^b)
        
        # p-star for parent terms of interaction 
        pb_star <- p_star[unique[(i-group_orig),1]+1]
        pc_star <- p_star[unique[(i-group_orig),2]+1]
        
        # p-star for interaction with heredity structure   
        p_star[1 + i + no_indicators + dim(interact_nogroup)[3]] <- p_dum_intx*heredity[1]*(1-pb_star)*(1-pc_star) + p_dum_intx*heredity[2]*(pb_star)*(1-pc_star) + p_dum_intx*heredity[3]*(1-pb_star)*(pc_star) + p_dum_intx*heredity[4]*pb_star*pc_star  
      }
      
      # Calculate derivate of Q wrt beta j for main effects that are not indicatored
      for(i in 0:no_indicators){
        db[i+1] <- sum((data$y-mk/(1+mk))*data[i+2])-as.numeric(beta0[i+1])*((1-as.numeric(p_star[i+1]))*(1/V0)+as.numeric(p_star[i+1])*(1/V1))      
        variance[i+1] <- V0
        expand_p[i+1] <- p_star[i+1]
      }  
      
      # Calculate derivate of Q wrt beta j for each group of ORIGINAL indicators
      for(i in 1:group_orig){                      
        for(j in indicator[[i]]){                               # Note that we are traversing the value of each group
          m <- length(indicator[[i]])                         # Get size of indicator group
          Vn <- ((qnorm(1-.05/(2*m))*sqrt(V0))/1.96)^2    # Variance adjustment (See appendix)
          db[j+1] <-sum((data$y-mk/(1+mk))*data[[names(data[2 + j])]])-as.numeric(beta0[j + 1])*((1-as.numeric(p_star[no_indicators + 1 + i]))*(1/Vn) + as.numeric(p_star[no_indicators + 1 + i])*(1/V1))
          variance[j+1] <- Vn
          expand_p[j+1] <- p_star[no_indicators + 1 + i]
        }
      }   
      
      # Calculate derivate of Q wrt beta j for interaction terms WITHOUT grouping
      for(i in 1:(length(interact_nogroup)/2)){                                     # Double check the indexes
        db[i+no_indicators+1+group_size_orig] <- sum((data$y-mk/(1+mk))*data[i+no_indicators+2+group_size_orig])-as.numeric(beta0[i+no_indicators+1+group_size_orig])*((1-as.numeric(p_star[i+no_indicators+1+group_orig]))*(1/V0)+as.numeric(p_star[i+no_indicators+1+group_orig])*(1/V1))      
        variance[i+no_indicators+1+group_size_orig] <- V0
        expand_p[i+no_indicators+1+group_size_orig] <- p_star[i+no_indicators+1+group_orig]
      }  
      
      # Calculate derivate of Q wrt beta j for interaction terms WITH grouping
      for(i in (group_orig+1):group){                      
        for(j in new_indicator[[i]]){                               # Note that we are traversing the value of each group
          m <- length(new_indicator[[i]])                         # Get size of indicator group
          Vn <- ((qnorm(1-.05/(2*m))*sqrt(V0))/1.96)^2        # Variance adjustment (See appendix)
          db[1 + length(interact_nogroup)/2 + j] <-sum((data$y-mk/(1+mk))*data[[names(data[2 + length(interact_nogroup)/2 + j])]]) - as.numeric(beta0[1+ j + length(interact_nogroup)/2])*((1-as.numeric(p_star[no_indicators + length(interact_nogroup)/2 + i + 1]))*(1/Vn) + as.numeric(p_star[no_indicators + length(interact_nogroup)/2 + i + 1])*(1/V1))
          variance[1 + length(interact_nogroup)/2 + j] <- Vn
          expand_p[1 + length(interact_nogroup)/2 + j] <- p_star[no_indicators + length(interact_nogroup)/2 + i + 1]
        }
      }  
      
      
      # Calculate derivate of Q wrt beta j and beta k for parent terms, not indicators 
      
      for(i in 0:no_indicators){
        for(j in i:(given + num_inter)){        
          if (i!=j) hessian[i+1, j+1] <- -sum(data[[names(data)[2 + i]]]*data[[names(data)[2 + j]]]*(mk/(1+mk))*(1/(1+mk))) 
          if (i==j) hessian[i+1, j+1] <- -sum(data[[names(data)[2 + i]]]^2*(mk/(1+mk))*(1/(1+mk)))-((1-as.numeric(p_star[i+1]))*(1/V0)+as.numeric(p_star[i+1])*(1/V1))
          hessian[j+1, i+1] <- hessian[i+1, j+1]                   # Reflect over diagonal
        }
      }
      
      # Calculate derivative of Q wrt beta j and beta k of ORIGINAL indicators
      for(i in (no_indicators + 1):(no_indicators + group_size_orig)){
        for(j in i:(given + num_inter)){     
          for(place in 1:group_orig){                                  # set the correct p_star value by searching for the group that the covariate 'i' lives
            if(!is.na(match(i,indicator[[place]]))){                      # if number i is in the group 
              m <- length(indicator[[place]])                             # number of indicators in the group
              Vn <- ((qnorm(1-.05/(2*m))*sqrt(V0))/1.96)^2            # Variance adjustment (See appendix)
              if (i!=j) hessian[i+1, j+1] <- -sum(data[[names(data)[2 + i]]]*data[[names(data)[2 + j]]]*(mk/(1+mk))*(1/(1+mk))) 
              if (i==j) hessian[i+1, j+1] <- -sum(data[[names(data)[2 + i]]]^2*(mk/(1+mk))*(1/(1+mk))) - ((1-as.numeric(p_star[no_indicators + 1 + place]))*(1/Vn) + as.numeric(p_star[no_indicators + 1 + place])*(1/V1))      
              hessian[j+1, i+1] <- hessian[i+1, j+1]                   # Reflect over diagonal
            }   
          } 
        }                
      }
      
      # Calculate derivative of Q wrt beta j and beta k of interactions WITHOUT indicators
      
      for(i in (no_indicators + group_size_orig + 1):(no_indicators + group_size_orig + length(interact_nogroup)/2)){
        for(j in i:(given + num_inter)){        
          if (i!=j) hessian[i+1, j+1] <- -sum(data[[names(data)[2 + i]]]*data[[names(data)[2 + j]]]*(mk/(1+mk))*(1/(1+mk))) 
          if (i==j) hessian[i+1, j+1] <- -sum(data[[names(data)[2 + i]]]^2*(mk/(1+mk))*(1/(1+mk)))-((1-as.numeric(p_star[i - group_size_orig + group_orig + 1]))*(1/V0)+as.numeric(p_star[i - group_size_orig + group_orig + 1])*(1/V1))
          hessian[j+1, i+1] <- hessian[i+1, j+1]                     # Reflect over diagonal 
        }
      }
      
      # Calculate derivative of Q wrt beta j and beta k of interactions WITH indicators
      
      for(i in (no_indicators + group_size_orig + length(interact_nogroup)/2 + 1):(given + num_inter)){
        for(j in i:(given + num_inter)){     
          for(place in (group_orig + 1):group){                                  # set the correct p_star value by searching for the group that the covariate 'i' lives
            if(!is.na(match(i,(new_indicator[[place]] + length(interact_nogroup)/2)))){                      # if number i is in the group 
              m <- length(new_indicator[[place]])                             # number of indicators in the group
              Vn <- ((qnorm(1-.05/(2*m))*sqrt(V0))/1.96)^2            # Variance adjustment (See appendix)
              if (i!=j) hessian[i+1, j+1] <- -sum(data[[names(data)[2 + i]]]*data[[names(data)[2 + j]]]*(mk/(1+mk))*(1/(1+mk))) 
              if (i==j) hessian[i+1, j+1] <- -sum(data[[names(data)[2 + i]]]^2*(mk/(1+mk))*(1/(1+mk))) - ((1-as.numeric(p_star[length(interact_nogroup)/2 + no_indicators + 1 + place ]))*(1/Vn) + as.numeric(p_star[length(interact_nogroup)/2 + no_indicators + 1 + place ])*(1/V1))      
              hessian[j+1, i+1] <- hessian[i+1, j+1]                  # Reflect over diagonal
            }   
          } 
        }                
      }
      
      
      # hessian evaluated at current step
      hessian_inv <- solve(hessian)
      
      # Calculate the next iteration of beta using the gradient algorith 
      beta1 <- beta0 - hessian_inv%*%db 
      
      # Solve closed form of theta     
      theta1 <- (sum(p_star[-1]) + alpha - 1)/(p + beta + alpha - 2)
      
      if(theta1 < 0.000001|theta1 > 0.999999){ # Rule out boundaries 
        return(list(beta=beta1,p=p_star,theta=theta1,iter=counter))
      }
      
      #  Make decision on covergence using difference in likelihoods.  
      #  Expand_p and variance were made in db loops. 1st 'expands' the grouping gammas 2nd logs the variance used for each term (adjusted by groupings)
      Qlogistic <- function(data,beta_test,theta,p_star,expand_p,variance){
        size <-length(p_star) - 1
        mk <- exp(as.matrix(data[,-1])%*%beta_test)
        Q <- sum(data$y*log(mk/(1+mk)))+sum((1-data$y)*log(exp(1/(1+mk))))-1/(2*V1)*beta_test[1]^2-(1/2)*sum(beta_test[-1]*((1-as.numeric(expand_p[-1]))*(1/variance[-1])+as.numeric(expand_p[-1])*(1/V1)))+sum(p_star[-1])*log(theta/(1-theta))+(alpha-1)*log(theta)+(beta+size-1)*log(1-theta)   
        return(Q)
      }
      
      Rlogistic <-function(p_star, theta){
        R <- sum(p_star[-1])*log(theta) + (length(p_star)-1- sum(p_star[-1]))*log(1-theta)
        return(R)
      }
      
      # Finds Q and R at current and previous steps 
      test <- Qlogistic(data,beta1,theta1, p_star0,expand_p0,variance) - Rlogistic(p_star0,theta1)
      test2 <- Qlogistic(data,beta0,theta0, p_star0,expand_p0,variance) - Rlogistic(p_star0,theta0)
      print(counter)
      # Convergence diagnosis 
      if (abs(test-test2) < tol){  # calculate the l1 norm
        cat("\nSuccessfully Converged\n","beta",beta1,"\np_est",p_star,"\ntheta",theta1,"\nitr",counter,"\nexp",expand_p)
        return(list(beta=beta1,p=p_star,theta=theta1,iter=counter,expand_p=expand_p))
      } else {
        beta0 <- beta1
        theta0 <- theta1
        p_star0 <- p_star
        expand_p0 <- expand_p
      }
    }  
    print("Convergence Failed")
    return(list(beta=beta1,p=p_star,theta=theta1,iter=counter,expand_p=expand_p))
  }  
}




#########################################################
# Data Generation for the Simulated Application Scenarios
# (Simuation Section 3)
#########################################################



#### Simulate data ####
library(MASS)
cor <- c(00,04,08) # set the correlation between continous covariates

for(m in c(1,2,3)){  # For each simulated model (see Simulation 3 section of CSDA paper)
  for(c in 1:3){     # For each correlation structure 
    # Make a place to collect output 
    out <- data.frame(t(rep(0,112))) #beta-59 p-50 (ignores grouping) theta-1 iter-1 time-1
    out2 <- data.frame(t(rep(0,59))) # p-59 (expands p for grouping) 
    
    # Label output columns 
    ncov <- 59
    for( i in 1:length(out)){
      if(i <= ncov ){
        names(out)[i] <- paste("beta",i-1,sep="")    
      }
      if(i > ncov ){
        names(out)[i] <- paste("p",i-60,sep="")
      }
      names(out)[110] <- "theta" # Theta hat
      names(out)[111] <- "iter"  # Number of iterations
      names(out)[112] <- "time"  # Time to converge
    }
    for(t in 1:500) {  # For 500 simulations
      set.seed(t)  
      len <- 1000      # For 1000 individuals
      cont <- 4        # Number of continous covariates
      mu <-rep(0,cont) # Mu of continuous covariates
      
      # Correlation structure
      if(c == 1){
        sigma <- diag(cont)
      }else if(c == 2){
        sigma <-matrix(rep(.4),cont,cont) + .6*diag(cont)
      }else if(c == 3){
        sigma <-matrix(rep(.8),cont,cont) + .2*diag(cont)
      }
      
      # Set Heredity structure
      # a <- c(0,0,0,1) # Weak
      a <- c(0,1,1,1)   # String
      # Polynomial structure
      q <- c(0,1)  
      
      data <- mvrnorm(n = len, mu,sigma)   # Simulate Continuous covariates
      discrete1 <- rbinom(len,1,.5)       # Simulate 4 2-level qualitative covariates
      discrete2 <- rbinom(len,1,.5)
      discrete3 <- rbinom(len,1,.5)
      discrete4 <- rbinom(len,1,.5)
      
      group2 <- t(rmultinom(len, 1, c(.25,.25,.25))[2:3,]) # Simulatied 1 3 level qualitative covariate
      
      # Add intercept term
      data <- data.frame(cbind(rep(1),discrete1,discrete2,discrete3,discrete4,data,group2))
      # Combine data
      
      # Make names for columns
      for(i in 0:(ncol(data)-1)){
        names(data)[i+1] <- paste("x",i,sep = "")
      }
      
      # Make interaction terms to calculate y ('myem' will calculate these terms given an array of main effect pairs)
      
      given <- ncol(data)-1
      indicator <- list(group1=c(9,10))  ## Test
      ## Look at all pairwise interactions as well as the squared terms for the continuous covariates. 
      par <- c(1,2,3,4,5,6,7,8,9)                     # list number of covariates (ONLY NEED ONE term from group(9))
      comb <- combn(par,2)                            # All possible interaction pairs
      combin <- cbind(comb,c(5,5),c(6,6),c(7,7),c(8,8))      # Squared for continous terms
      beta_home <- interact <- array(combin,c(1,2,40))       # Make an array with all of the terms
      
      # Make more indicators if they're in an interaction (Can handle interaction of indicators)
      new_interact <- numeric()         # Home for new interactions
      interact_nogroup <- numeric()     # Seperate array for interactions that do not involve a group
      new_indicator<-indicator          # Home for new indicator groups
      for(i in 1:dim(interact)[3]){     # Look through all the interact terms given
        num_dum <- 0                    # Count if 0 1 or 2 of the interaction terms are apart of a indicator group
        place <- c(0,0)                  # Which group are they apart of
        for(k in 1:dim(interact)[2]){      # Look through the 2 representative componenets of the interaction   
          for(j in 1:length(indicator)){      # Look through the length of the indicator group
            if(interact[,k,i] %in% indicator[[j]]){   # Is one of the interaction terms apart of a indicator group
              num_dum<- num_dum+1                 # If yes, record
              place[k]<-j                         # Note which group it is in
            }
          }
        }
        if(num_dum == 2){              # If both are indicators - make new indicator terms and new interaction
          new_indicator <- c(new_indicator,list(group = seq(1,(length(indicator[[place[1]]])*length(indicator[[place[2]]]))) + max(unlist(new_indicator),given)))
          new_interact <- rbind(new_interact, expand.grid(indicator[[place[1]]],indicator[[place[2]]]))
          
        }else if(num_dum == 1){        # If one is indicator - make new indicator terms and new interaction
          new_indicator <- c(new_indicator,list(group = seq(1,length(indicator[[sum(place)]])) + max(unlist(new_indicator),given)))
          new_interact <- rbind(new_interact, expand.grid(indicator[[sum(place)]],interact[,match(0,place),i]))
          
        }else if(num_dum == 0){         # If neither are indicators - append the interaction
          interact_nogroup <- rbind(interact_nogroup, interact[,,i])
        }
      }
      new_interact <- array(as.vector(rbind(new_interact[,1],new_interact[,2])), dim=c(1,2,nrow(new_interact)))
      interact_nogroup <- array(as.vector(rbind(interact_nogroup[,1],interact_nogroup[,2])), dim=c(1,2,nrow(interact_nogroup)))
      
      # Make a vector with all of the interaction terms in it
      all_inter <- array(c(as.vector(interact_nogroup),as.vector(new_interact)), dim = c(1,2,(dim(new_interact)[3]+dim(interact_nogroup)[3])))
      
      # Add interaction to the data set and label them 'x4x6'. Intx enter the data set in order given in 'interact'
      for(i in 1:dim(all_inter)[3]){
        int <- all_inter[,,i]
        data <-cbind(data, eval(parse(text=paste("data$x",int[1],sep="")))*eval(parse(text=paste("data$x",int[2],sep=""))))
        names(data)[ncol(data)] <- paste("x",int[1],"_x",int[2],sep="")
      }
      
      # Make Output 59 terms
      #b b b b c c c c d2 d2
      # Make Output
      if(m == 1){
        beta_true <- c(0,-0.65,0.5,0,0,0.65,-0.5,0,0,0.6,0,0.6,rep(0,2),-0.6,rep(0,18),0.6,rep(0,6),0.5,rep(0,2),-0.6,0.5,rep(0,6),-0.6,0.5,rep(0,6))
      }else if(m == 2){
        beta_true <- c(0,0,0.5,0,0,0.65,-0.5,0,0,0.6,0,0.6,rep(0,2),-0.6,rep(0,18),0.6,rep(0,6),0.5,rep(0,2),-0.6,0.5,rep(0,6),-0.6,0.5,rep(0,6))
      }else if(m == 3){
        beta_true <- c(0,0,0.5,0,0,0,0,0,0,0.6,0,0.6,rep(0,2),-0.6,rep(0,18),0.6,rep(0,6),0.5,rep(0,2),-0.6,0.5,rep(0,6),-0.6,0.5,rep(0,6))
      }  
      
      z <- as.matrix(data)%*%beta_true   # linear combination 
      pr <- 1/(1+exp(-z))         # pass through an inv-logit function
      y <- rbinom(len,1,pr)
      
      data <- data.frame(y,data)
      
      # Run logistic regression model for starting points
      model <- summary(glm(y~.,data=data,family=binomial))
      
      # Start timer
      ptm <- proc.time()
      # Give initial coefficients 
      beta0 <- model$coefficients[,1]
      
      # This is the Determinstic annealing loop that calls the EMVS function 'myEM'
      for(loop in 2:10){
        b <-loop/10
        this <- myEM(theta0 = 0.50, beta0 = beta0, data = data[,1:12],indicator=indicator, interact = beta_home, heredity = a, poly = q, V0 = (log(1.05)/qnorm(.975))^2 , V1 = (log(4)/qnorm(.975))^(2), alpha = 1, beta = 1, b=b)
        # Set mode to initial values for next temperature
        beta0<-this$beta
        theta0<-this$theta
        
        if(b==1){
          time <- proc.time() - ptm
          out<-rbind(out,c(this$beta, this$p, this$theta, this$iter,time[3]))
          out2[t,] <- this$exp
        }
      }     
      
      
      
      # Save output
      #out<-rbind(out,c(this$beta, this$p, this$theta, this$iter,time[3]))
    }
    
    # Optional: Set a working Directory for Output
    #setwd("")
    
    # Optional: Save the results to a file. Note Expand gives p_star expanded for grouped covariates
    #write.table(out, file = paste("model_",m,"_app_weak_106_4_0",cor[c],sep=""))
    #write.table(out2, file = paste("model_",m,"_app_expand_weak_106_4_0",cor[c],sep=""))
  }
}