########################################### ## Supplement A: ## ## EMVS for Binary Outcomes ## ## Application Simulation ## ## Beta-Binomial ## ## Determinsitic Annealing ## ## 4/22/2016 ## ########################################### ## EMVS Function # Give function the initial parameters and the original data set, labeled x1-xn ind and y dep, including intercept, v0 and v1 # theta0 = initial theta (usually 0.5) # beta_0 = initial beta. If MISSING, myEM uses MLE # data = n x (p+2) matrix of variables labled 'y', 'x0', 'x1', ..., 'xp' where x0 is column of 1's...grouped covariates at the end # indicator = list of which covarites are grouped indicators (already in indicator form) # interact = 1 x 2 x k array of interactions. Each column is a pair of parent terms. For example, # interact[,,3] = c(2 8) is the third interaction and it is between x2 and x8. if there is an interaction with a group, # just list one of the group terms and list the grouped interactions LAST # If interact MiSSING, then total covariates is the total number of parent effects # heredity = 4 x 1 column vector of the heredity structure to impose. For example, strong = c(0,0,0,1) # poly = heredity structure for squared terms # alpha, beta = parameters for beta prior (usually set to 1) # b = current 1/Temp of the determinstic annealing variant ###################################################################################################################### #### Note that running this function does not execute the determinstic annealing. A loop #### is written in the Data Generation Section (below) that intoduces the determinstic annealing #### Also, this function will generate the interaction terms if they are provided ############################################################################################################# myEM <- function(theta0, beta0 = beta0, data ,indicator = indicator, interact = interact, heredity, poly, V0 = V0, V1 = V1, alpha = alpha, beta = beta,b=b, tol = 1e-6, maxit = 750){ # get number of main effects before interactions are made given <- ncol(data) - 2 # 1 for y and 1 for x0<-c(1,1,...,1) ##### Interaction and Indicators ##### if(!missing(indicator) & !missing(interact)){ # Identify which interactions involve indicators and append new indicators to list # Make more indicators if they're in an interaction (Can handle interaction of indicators) new_interact <- numeric() # Home for new interactions interact_nogroup <- numeric() # Seperate array for interactions that do not involve a group new_indicator<-indicator # Home for new indicator groups for(i in 1:dim(interact)[3]){ # Look through all the interact terms given num_dum <- 0 # Count if 0 1 or 2 of the interaction terms are apart of a indicator group place <- c(0,0) # Identify which group are they apart of for(k in 1:dim(interact)[2]){ # Look through the 2 representative components of the interaction for(j in 1:length(indicator)){ # Look through the length of the indicator group if(interact[,k,i] %in% indicator[[j]]){ # Is one of the interaction terms apart of a indicator group num_dum<- num_dum+1 # If yes, record place[k]<-j # Note which group it is in } } } if(num_dum == 2){ # If both are indicators - make new indicator terms and new interaction new_indicator <- c(new_indicator,list(group = seq(1,(length(indicator[[place[1]]])*length(indicator[[place[2]]]))) + max(unlist(new_indicator),given))) new_interact <- rbind(new_interact, expand.grid(indicator[[place[1]]],indicator[[place[2]]])) }else if(num_dum == 1){ # If one is indicator - make new indicator terms and new interaction new_indicator <- c(new_indicator,list(group = seq(1,length(indicator[[sum(place)]])) + max(unlist(new_indicator),given))) new_interact <- rbind(new_interact, expand.grid(indicator[[sum(place)]],interact[,match(0,place),i])) }else if(num_dum == 0){ # If neither are indicators - append the interaction interact_nogroup <- rbind(interact_nogroup, interact[,,i]) } } new_interact <- array(as.vector(rbind(new_interact[,1],new_interact[,2])), dim=c(1,2,nrow(new_interact))) interact_nogroup <- array(as.vector(rbind(interact_nogroup[,1],interact_nogroup[,2])), dim=c(1,2,nrow(interact_nogroup))) # Make a vector with all of the interaction terms in it all_inter <- array(c(as.vector(interact_nogroup),as.vector(new_interact)), dim = c(1,2,(dim(new_interact)[3]+dim(interact_nogroup)[3]))) # Figure out how many gamma terms we need (not including intercept) group <- length(new_indicator) # Group is the number of indicator variable groups ex. new_indicator <- list(group1=c(16,17),group2=c(18,19,20)) group_size <- 0 # Home for the count for(i in 1:group){ # look through the new indicator list group_size <- group_size + length(new_indicator[[i]]) # Group size is total number of indicator variables } # Account for number of interactions num_inter <- dim(all_inter)[3] # number of Gammas = main effects + number of interactions + number of grouped terms - number of groups p <- given + num_inter + group - group_size # Add interaction to the data set and label them 'x4x6'. Intx enter the data set in order given in 'interact' for(i in 1:dim(all_inter)[3]){ int <- all_inter[,,i] data <-cbind(data, eval(parse(text=paste("data$x",int[1],sep="")))*eval(parse(text=paste("data$x",int[2],sep="")))) names(data)[ncol(data)] <- paste("x",int[1],"_x",int[2],sep="") } # Set iterator counter, variances, and tuning parameters counter <- 0 if(missing(V0)){ V0 <- (log(1.1)/qnorm(.975))^2 # 95% CI at OR (0.869,1.15) Under null } if(missing(V1)){ V1 <- (log(4)/qnorm(.975))^2 } if(missing(alpha)){ alpha <- 1 # Non-informative beta prior } if(missing(beta)){ beta <- 1 # Non-informative beta prior } if(missing(beta0)){ # Provide starting values if they are missing beta0<-summary(glm(y~.,data=data,family=binomial))$coefficients[,1] } # Set intital vectors p_star0 <- c(rep(0,p + 1)) # Current probability of inclusion p_star <- c(rep(0,p + 1)) # Next step probability of inclusion expand_p0 <- c(rep(0,(given + num_inter + 1))) # Vector for previously expanded p_star expand_p <- rep(0,(1 + given + num_inter)) # Next step for expanded p_star variance <- rep(0,(1 + given + num_inter)) # Vector for exclusion variance, adjusted by groupings db <- c(rep(0,(given + num_inter + 1))) # Vector for first derivate of beta dtheta <- 0 # Vector for first derivate of theta # Make empty hessian matrix hessian <- matrix(rep(0),(given + num_inter + 1),(given + num_inter + 1)) # This is the actual selection loop while (counter <= maxit) { #print(counter) counter <- counter + 1 mk <- exp(as.matrix(data[,-1])%*%beta0) ### calculate probability of inclusion considering grouping and interactions. # Get the number of main effects that were not indicators originally group_orig <- length(indicator) # Group is the number of ORIGINAL indicator variable groups ex. indicator <- list(group1=c(16,17),group2=c(18,19,20)) group_size_orig <- 0 # Home for the count for(i in 1:group_orig){ # look through the ORIGINAL indicator list group_size_orig <- group_size_orig + length(indicator[[i]]) # Group size is total number of ORIGINAL indicator variables } no_indicators <- given - group_size_orig # Calculate probability of inclusion, p_star, for main effects (first) and ORIGINAL indicators (second) for(i in 0:no_indicators){ # for not dummied terms p_star[i+1] <- (dnorm(as.numeric(beta0[i+1]), mean = 0,sd = sqrt(V1))*theta0)^b/((dnorm(as.numeric(beta0[i+1]), mean = 0, sd = sqrt(V1))*theta0)^b + (dnorm(as.numeric(beta0[i+1]), mean = 0, sd = sqrt(V0))*(1-theta0))^b) p_star[1] <- 1 } for(i in 1:group_orig){ # each group of ORIGINAL indicators m <- length(indicator[[i]]) Vn <- ((qnorm(1-.05/(2*m))*sqrt(V0))/1.96)^2 # Variance adjustment (See appendix) p_star[no_indicators + 1 + i] <-(prod(dnorm(as.numeric(beta0[(indicator[[i]]+1)]), mean = 0,sd = sqrt(V1)))*theta0)^b/((prod(dnorm(as.numeric(beta0[(indicator[[i]]+1)]), mean = 0,sd = sqrt(V1)))*theta0)^b+(prod(dnorm(as.numeric(beta0[(indicator[[i]]+1)]), mean = 0,sd = sqrt(Vn)))*theta0)^b) } # At this point, all of the orginal covariates have p-star # Make p-star, for interactions that do not have indicators (includes squared terms) for(i in 1:dim(interact_nogroup)[3]){ int <- interact_nogroup[,,i] if(int[1]!=int[2]){ # this is for interactions # current estimate for parent effects pb_star <- p_star[int[1]+1] pc_star <- p_star[int[2]+1] # pi_star <- dnorm(intx, mean = 0,sd = sqrt(V1))*theta0/(dnorm(intx, mean = 0, sd = sqrt(V1))*theta0 + dnorm(intx, mean = 0, sd = sqrt(V0))*(1-theta0)) # estimate of interaction without heredity constrain pi_star <- (dnorm(as.numeric(beta0[i + 1 + given]), mean = 0,sd = sqrt(V1))*theta0)^b/((dnorm(as.numeric(beta0[i + 1 + given]), mean = 0, sd = sqrt(V1))*theta0)^b + (dnorm(as.numeric(beta0[i + 1 + given]), mean = 0, sd = sqrt(V0))*(1-theta0))^b) ############################################# #pi_star*a[1]*(1-pb_star)*(1-pc_star) # (0,0) #pi_star*a[2]*(pb_star)*(1-pc_star) # (1,0) #pi_star*a[3]*(1-pb_star)*(pc_star) # (0,1) #pi_star*a[4]*pb_star*pc_star # (1,1) ############################################# p_star[i + 1 + no_indicators + group_orig] <- pi_star*heredity[1]*(1-pb_star)*(1-pc_star) + pi_star*heredity[2]*(pb_star)*(1-pc_star) + pi_star*heredity[3]*(1-pb_star)*(pc_star) + pi_star*heredity[4]*pb_star*pc_star } else { # this is for polynomial terms # inclusion for the parent pa_star <- p_star[int[1]+1] # estimate of polynomial without heredity constraint pp_star <- (dnorm(as.numeric(beta0[i + 1 + given]), mean = 0,sd = sqrt(V1))*theta0)^b/((dnorm(as.numeric(beta0[i + 1 + given]), mean = 0, sd = sqrt(V1))*theta0)^b + (dnorm(as.numeric(beta0[i + 1 + given]), mean = 0, sd = sqrt(V0))*(1-theta0))^b) p_star[i + 1 + no_indicators + group_orig] <- pp_star*poly[1]*(1-pa_star) + pp_star*poly[2]*pa_star } } # Make an array of the p-star locations of the parent terms in the groups unique <- numeric() # Place to catch unique interaction term places for(i in 1:dim(new_interact)[3]){ # Look through each new interaction (because they have a indicator) catch <- c(0,0) # Home for what p-star it came from for(j in 1:2){ # Look through each intx for(k in 1:length(indicator)){ # Look through each indicator if(new_interact[,j,i] %in% indicator[[k]]){ # Is this intx in this indicator group? catch[j] <- k + no_indicators # Yes, place = indicator group # + number of not-indicators covariates } } if(catch[j] == 0){ # No, catch[j] <- new_interact[,j,i] # place = the interaction term } } unique <- rbind(unique, catch) # put them all together } unique <- unique(unique) # Remove duplicates # Evaluate p_star for interactions from groups for(i in (group_orig+1):group){ m <- length(new_indicator[[i]]) Vn <- ((qnorm(1-.05/(2*m))*sqrt(V0))/1.96)^2 # Variance adjustment (See appendix) # p-star for indicator interaction without heredity structure p_dum_intx <- (prod(dnorm(as.numeric(beta0[(new_indicator[[i]] + 1 + length(interact_nogroup)/2)]), mean = 0,sd = sqrt(V1)))*theta0)^b/((prod(dnorm(as.numeric(beta0[(new_indicator[[i]] + 1 + length(interact_nogroup)/2)]), mean = 0,sd = sqrt(V1)))*theta0)^b+(prod(dnorm(as.numeric(beta0[(new_indicator[[i]] + 1 + length(interact_nogroup)/2)]), mean = 0,sd = sqrt(Vn)))*theta0)^b) # p-star for parent terms of interaction pb_star <- p_star[unique[(i-group_orig),1]+1] pc_star <- p_star[unique[(i-group_orig),2]+1] # p-star for interaction with heredity structure p_star[1 + i + no_indicators + dim(interact_nogroup)[3]] <- p_dum_intx*heredity[1]*(1-pb_star)*(1-pc_star) + p_dum_intx*heredity[2]*(pb_star)*(1-pc_star) + p_dum_intx*heredity[3]*(1-pb_star)*(pc_star) + p_dum_intx*heredity[4]*pb_star*pc_star } # Calculate derivate of Q wrt beta j for main effects that are not indicatored for(i in 0:no_indicators){ db[i+1] <- sum((data$y-mk/(1+mk))*data[i+2])-as.numeric(beta0[i+1])*((1-as.numeric(p_star[i+1]))*(1/V0)+as.numeric(p_star[i+1])*(1/V1)) variance[i+1] <- V0 expand_p[i+1] <- p_star[i+1] } # Calculate derivate of Q wrt beta j for each group of ORIGINAL indicators for(i in 1:group_orig){ for(j in indicator[[i]]){ # Note that we are traversing the value of each group m <- length(indicator[[i]]) # Get size of indicator group Vn <- ((qnorm(1-.05/(2*m))*sqrt(V0))/1.96)^2 # Variance adjustment (See appendix) db[j+1] <-sum((data$y-mk/(1+mk))*data[[names(data[2 + j])]])-as.numeric(beta0[j + 1])*((1-as.numeric(p_star[no_indicators + 1 + i]))*(1/Vn) + as.numeric(p_star[no_indicators + 1 + i])*(1/V1)) variance[j+1] <- Vn expand_p[j+1] <- p_star[no_indicators + 1 + i] } } # Calculate derivate of Q wrt beta j for interaction terms WITHOUT grouping for(i in 1:(length(interact_nogroup)/2)){ # Double check the indexes db[i+no_indicators+1+group_size_orig] <- sum((data$y-mk/(1+mk))*data[i+no_indicators+2+group_size_orig])-as.numeric(beta0[i+no_indicators+1+group_size_orig])*((1-as.numeric(p_star[i+no_indicators+1+group_orig]))*(1/V0)+as.numeric(p_star[i+no_indicators+1+group_orig])*(1/V1)) variance[i+no_indicators+1+group_size_orig] <- V0 expand_p[i+no_indicators+1+group_size_orig] <- p_star[i+no_indicators+1+group_orig] } # Calculate derivate of Q wrt beta j for interaction terms WITH grouping for(i in (group_orig+1):group){ for(j in new_indicator[[i]]){ # Note that we are traversing the value of each group m <- length(new_indicator[[i]]) # Get size of indicator group Vn <- ((qnorm(1-.05/(2*m))*sqrt(V0))/1.96)^2 # Variance adjustment (See appendix) db[1 + length(interact_nogroup)/2 + j] <-sum((data$y-mk/(1+mk))*data[[names(data[2 + length(interact_nogroup)/2 + j])]]) - as.numeric(beta0[1+ j + length(interact_nogroup)/2])*((1-as.numeric(p_star[no_indicators + length(interact_nogroup)/2 + i + 1]))*(1/Vn) + as.numeric(p_star[no_indicators + length(interact_nogroup)/2 + i + 1])*(1/V1)) variance[1 + length(interact_nogroup)/2 + j] <- Vn expand_p[1 + length(interact_nogroup)/2 + j] <- p_star[no_indicators + length(interact_nogroup)/2 + i + 1] } } # Calculate derivate of Q wrt beta j and beta k for parent terms, not indicators for(i in 0:no_indicators){ for(j in i:(given + num_inter)){ if (i!=j) hessian[i+1, j+1] <- -sum(data[[names(data)[2 + i]]]*data[[names(data)[2 + j]]]*(mk/(1+mk))*(1/(1+mk))) if (i==j) hessian[i+1, j+1] <- -sum(data[[names(data)[2 + i]]]^2*(mk/(1+mk))*(1/(1+mk)))-((1-as.numeric(p_star[i+1]))*(1/V0)+as.numeric(p_star[i+1])*(1/V1)) hessian[j+1, i+1] <- hessian[i+1, j+1] # Reflect over diagonal } } # Calculate derivative of Q wrt beta j and beta k of ORIGINAL indicators for(i in (no_indicators + 1):(no_indicators + group_size_orig)){ for(j in i:(given + num_inter)){ for(place in 1:group_orig){ # set the correct p_star value by searching for the group that the covariate 'i' lives if(!is.na(match(i,indicator[[place]]))){ # if number i is in the group m <- length(indicator[[place]]) # number of indicators in the group Vn <- ((qnorm(1-.05/(2*m))*sqrt(V0))/1.96)^2 # Variance adjustment (See appendix) if (i!=j) hessian[i+1, j+1] <- -sum(data[[names(data)[2 + i]]]*data[[names(data)[2 + j]]]*(mk/(1+mk))*(1/(1+mk))) if (i==j) hessian[i+1, j+1] <- -sum(data[[names(data)[2 + i]]]^2*(mk/(1+mk))*(1/(1+mk))) - ((1-as.numeric(p_star[no_indicators + 1 + place]))*(1/Vn) + as.numeric(p_star[no_indicators + 1 + place])*(1/V1)) hessian[j+1, i+1] <- hessian[i+1, j+1] # Reflect over diagonal } } } } # Calculate derivative of Q wrt beta j and beta k of interactions WITHOUT indicators for(i in (no_indicators + group_size_orig + 1):(no_indicators + group_size_orig + length(interact_nogroup)/2)){ for(j in i:(given + num_inter)){ if (i!=j) hessian[i+1, j+1] <- -sum(data[[names(data)[2 + i]]]*data[[names(data)[2 + j]]]*(mk/(1+mk))*(1/(1+mk))) if (i==j) hessian[i+1, j+1] <- -sum(data[[names(data)[2 + i]]]^2*(mk/(1+mk))*(1/(1+mk)))-((1-as.numeric(p_star[i - group_size_orig + group_orig + 1]))*(1/V0)+as.numeric(p_star[i - group_size_orig + group_orig + 1])*(1/V1)) hessian[j+1, i+1] <- hessian[i+1, j+1] # Reflect over diagonal } } # Calculate derivative of Q wrt beta j and beta k of interactions WITH indicators for(i in (no_indicators + group_size_orig + length(interact_nogroup)/2 + 1):(given + num_inter)){ for(j in i:(given + num_inter)){ for(place in (group_orig + 1):group){ # set the correct p_star value by searching for the group that the covariate 'i' lives if(!is.na(match(i,(new_indicator[[place]] + length(interact_nogroup)/2)))){ # if number i is in the group m <- length(new_indicator[[place]]) # number of indicators in the group Vn <- ((qnorm(1-.05/(2*m))*sqrt(V0))/1.96)^2 # Variance adjustment (See appendix) if (i!=j) hessian[i+1, j+1] <- -sum(data[[names(data)[2 + i]]]*data[[names(data)[2 + j]]]*(mk/(1+mk))*(1/(1+mk))) if (i==j) hessian[i+1, j+1] <- -sum(data[[names(data)[2 + i]]]^2*(mk/(1+mk))*(1/(1+mk))) - ((1-as.numeric(p_star[length(interact_nogroup)/2 + no_indicators + 1 + place ]))*(1/Vn) + as.numeric(p_star[length(interact_nogroup)/2 + no_indicators + 1 + place ])*(1/V1)) hessian[j+1, i+1] <- hessian[i+1, j+1] # Reflect over diagonal } } } } # hessian evaluated at current step hessian_inv <- solve(hessian) # Calculate the next iteration of beta using the gradient algorith beta1 <- beta0 - hessian_inv%*%db # Solve closed form of theta theta1 <- (sum(p_star[-1]) + alpha - 1)/(p + beta + alpha - 2) if(theta1 < 0.000001|theta1 > 0.999999){ # Rule out boundaries return(list(beta=beta1,p=p_star,theta=theta1,iter=counter)) } # Make decision on covergence using difference in likelihoods. # Expand_p and variance were made in db loops. 1st 'expands' the grouping gammas 2nd logs the variance used for each term (adjusted by groupings) Qlogistic <- function(data,beta_test,theta,p_star,expand_p,variance){ size <-length(p_star) - 1 mk <- exp(as.matrix(data[,-1])%*%beta_test) Q <- sum(data$y*log(mk/(1+mk)))+sum((1-data$y)*log(exp(1/(1+mk))))-1/(2*V1)*beta_test[1]^2-(1/2)*sum(beta_test[-1]*((1-as.numeric(expand_p[-1]))*(1/variance[-1])+as.numeric(expand_p[-1])*(1/V1)))+sum(p_star[-1])*log(theta/(1-theta))+(alpha-1)*log(theta)+(beta+size-1)*log(1-theta) return(Q) } Rlogistic <-function(p_star, theta){ R <- sum(p_star[-1])*log(theta) + (length(p_star)-1- sum(p_star[-1]))*log(1-theta) return(R) } # Finds Q and R at current and previous steps test <- Qlogistic(data,beta1,theta1, p_star0,expand_p0,variance) - Rlogistic(p_star0,theta1) test2 <- Qlogistic(data,beta0,theta0, p_star0,expand_p0,variance) - Rlogistic(p_star0,theta0) print(counter) # Convergence diagnosis if (abs(test-test2) < tol){ # calculate the l1 norm cat("\nSuccessfully Converged\n","beta",beta1,"\np_est",p_star,"\ntheta",theta1,"\nitr",counter,"\nexp",expand_p) return(list(beta=beta1,p=p_star,theta=theta1,iter=counter,expand_p=expand_p)) } else { beta0 <- beta1 theta0 <- theta1 p_star0 <- p_star expand_p0 <- expand_p } } print("Convergence Failed") return(list(beta=beta1,p=p_star,theta=theta1,iter=counter,expand_p=expand_p)) } } ######################################################### # Data Generation for the Simulated Application Scenarios # (Simuation Section 3) ######################################################### #### Simulate data #### library(MASS) cor <- c(00,04,08) # set the correlation between continous covariates for(m in c(1,2,3)){ # For each simulated model (see Simulation 3 section of CSDA paper) for(c in 1:3){ # For each correlation structure # Make a place to collect output out <- data.frame(t(rep(0,112))) #beta-59 p-50 (ignores grouping) theta-1 iter-1 time-1 out2 <- data.frame(t(rep(0,59))) # p-59 (expands p for grouping) # Label output columns ncov <- 59 for( i in 1:length(out)){ if(i <= ncov ){ names(out)[i] <- paste("beta",i-1,sep="") } if(i > ncov ){ names(out)[i] <- paste("p",i-60,sep="") } names(out)[110] <- "theta" # Theta hat names(out)[111] <- "iter" # Number of iterations names(out)[112] <- "time" # Time to converge } for(t in 1:500) { # For 500 simulations set.seed(t) len <- 1000 # For 1000 individuals cont <- 4 # Number of continous covariates mu <-rep(0,cont) # Mu of continuous covariates # Correlation structure if(c == 1){ sigma <- diag(cont) }else if(c == 2){ sigma <-matrix(rep(.4),cont,cont) + .6*diag(cont) }else if(c == 3){ sigma <-matrix(rep(.8),cont,cont) + .2*diag(cont) } # Set Heredity structure # a <- c(0,0,0,1) # Weak a <- c(0,1,1,1) # String # Polynomial structure q <- c(0,1) data <- mvrnorm(n = len, mu,sigma) # Simulate Continuous covariates discrete1 <- rbinom(len,1,.5) # Simulate 4 2-level qualitative covariates discrete2 <- rbinom(len,1,.5) discrete3 <- rbinom(len,1,.5) discrete4 <- rbinom(len,1,.5) group2 <- t(rmultinom(len, 1, c(.25,.25,.25))[2:3,]) # Simulatied 1 3 level qualitative covariate # Add intercept term data <- data.frame(cbind(rep(1),discrete1,discrete2,discrete3,discrete4,data,group2)) # Combine data # Make names for columns for(i in 0:(ncol(data)-1)){ names(data)[i+1] <- paste("x",i,sep = "") } # Make interaction terms to calculate y ('myem' will calculate these terms given an array of main effect pairs) given <- ncol(data)-1 indicator <- list(group1=c(9,10)) ## Test ## Look at all pairwise interactions as well as the squared terms for the continuous covariates. par <- c(1,2,3,4,5,6,7,8,9) # list number of covariates (ONLY NEED ONE term from group(9)) comb <- combn(par,2) # All possible interaction pairs combin <- cbind(comb,c(5,5),c(6,6),c(7,7),c(8,8)) # Squared for continous terms beta_home <- interact <- array(combin,c(1,2,40)) # Make an array with all of the terms # Make more indicators if they're in an interaction (Can handle interaction of indicators) new_interact <- numeric() # Home for new interactions interact_nogroup <- numeric() # Seperate array for interactions that do not involve a group new_indicator<-indicator # Home for new indicator groups for(i in 1:dim(interact)[3]){ # Look through all the interact terms given num_dum <- 0 # Count if 0 1 or 2 of the interaction terms are apart of a indicator group place <- c(0,0) # Which group are they apart of for(k in 1:dim(interact)[2]){ # Look through the 2 representative componenets of the interaction for(j in 1:length(indicator)){ # Look through the length of the indicator group if(interact[,k,i] %in% indicator[[j]]){ # Is one of the interaction terms apart of a indicator group num_dum<- num_dum+1 # If yes, record place[k]<-j # Note which group it is in } } } if(num_dum == 2){ # If both are indicators - make new indicator terms and new interaction new_indicator <- c(new_indicator,list(group = seq(1,(length(indicator[[place[1]]])*length(indicator[[place[2]]]))) + max(unlist(new_indicator),given))) new_interact <- rbind(new_interact, expand.grid(indicator[[place[1]]],indicator[[place[2]]])) }else if(num_dum == 1){ # If one is indicator - make new indicator terms and new interaction new_indicator <- c(new_indicator,list(group = seq(1,length(indicator[[sum(place)]])) + max(unlist(new_indicator),given))) new_interact <- rbind(new_interact, expand.grid(indicator[[sum(place)]],interact[,match(0,place),i])) }else if(num_dum == 0){ # If neither are indicators - append the interaction interact_nogroup <- rbind(interact_nogroup, interact[,,i]) } } new_interact <- array(as.vector(rbind(new_interact[,1],new_interact[,2])), dim=c(1,2,nrow(new_interact))) interact_nogroup <- array(as.vector(rbind(interact_nogroup[,1],interact_nogroup[,2])), dim=c(1,2,nrow(interact_nogroup))) # Make a vector with all of the interaction terms in it all_inter <- array(c(as.vector(interact_nogroup),as.vector(new_interact)), dim = c(1,2,(dim(new_interact)[3]+dim(interact_nogroup)[3]))) # Add interaction to the data set and label them 'x4x6'. Intx enter the data set in order given in 'interact' for(i in 1:dim(all_inter)[3]){ int <- all_inter[,,i] data <-cbind(data, eval(parse(text=paste("data$x",int[1],sep="")))*eval(parse(text=paste("data$x",int[2],sep="")))) names(data)[ncol(data)] <- paste("x",int[1],"_x",int[2],sep="") } # Make Output 59 terms #b b b b c c c c d2 d2 # Make Output if(m == 1){ beta_true <- c(0,-0.65,0.5,0,0,0.65,-0.5,0,0,0.6,0,0.6,rep(0,2),-0.6,rep(0,18),0.6,rep(0,6),0.5,rep(0,2),-0.6,0.5,rep(0,6),-0.6,0.5,rep(0,6)) }else if(m == 2){ beta_true <- c(0,0,0.5,0,0,0.65,-0.5,0,0,0.6,0,0.6,rep(0,2),-0.6,rep(0,18),0.6,rep(0,6),0.5,rep(0,2),-0.6,0.5,rep(0,6),-0.6,0.5,rep(0,6)) }else if(m == 3){ beta_true <- c(0,0,0.5,0,0,0,0,0,0,0.6,0,0.6,rep(0,2),-0.6,rep(0,18),0.6,rep(0,6),0.5,rep(0,2),-0.6,0.5,rep(0,6),-0.6,0.5,rep(0,6)) } z <- as.matrix(data)%*%beta_true # linear combination pr <- 1/(1+exp(-z)) # pass through an inv-logit function y <- rbinom(len,1,pr) data <- data.frame(y,data) # Run logistic regression model for starting points model <- summary(glm(y~.,data=data,family=binomial)) # Start timer ptm <- proc.time() # Give initial coefficients beta0 <- model$coefficients[,1] # This is the Determinstic annealing loop that calls the EMVS function 'myEM' for(loop in 2:10){ b <-loop/10 this <- myEM(theta0 = 0.50, beta0 = beta0, data = data[,1:12],indicator=indicator, interact = beta_home, heredity = a, poly = q, V0 = (log(1.05)/qnorm(.975))^2 , V1 = (log(4)/qnorm(.975))^(2), alpha = 1, beta = 1, b=b) # Set mode to initial values for next temperature beta0<-this$beta theta0<-this$theta if(b==1){ time <- proc.time() - ptm out<-rbind(out,c(this$beta, this$p, this$theta, this$iter,time[3])) out2[t,] <- this$exp } } # Save output #out<-rbind(out,c(this$beta, this$p, this$theta, this$iter,time[3])) } # Optional: Set a working Directory for Output #setwd("") # Optional: Save the results to a file. Note Expand gives p_star expanded for grouped covariates #write.table(out, file = paste("model_",m,"_app_weak_106_4_0",cor[c],sep="")) #write.table(out2, file = paste("model_",m,"_app_expand_weak_106_4_0",cor[c],sep="")) } }