#0 Preliminaries ----------------------------------------------------------- # _0.1 Packages ------------------------------------------------------------ library(tidyverse) library(tidylog) library(psych) library(mice) library(micemd) # _0.2 Load data files ---------------------------------------------------------- load(file = "02760-0001-Data.rda") wave1 <- da02760.0001 load(file = "04652-0001-Data.rda") wave2 <- da04652.0001 load(file = "36346-0001-Data.rda") wave3 <- da36346.0001 wave1 <- as_tibble(wave1) wave2 <- as_tibble(wave2) wave3 <- as_tibble(wave3) # 1 Wrangling ------------------------------------------------------------- # _1.1 Selecting variables ------------------------------------------------- wave1 <- dplyr::select(wave1, c( M2ID, M2FAMNUM, SAMPLMAJ, A1STATUS, A1PAGE_M2, A1PRAGE_2019, A1PBYEAR_2019, A1PRSEX, TOT_SIBS, ZYGCAT, A1SS7R, A1SHHTOT, #race, income*, A1SA18, A1SA19, #exercise A1SA20, A1SA21, A1PDEPRE, A1PDEPDX, # depression A1SA12G, # sleep problems A1PA40, A1PA43, #ever and current cigarette smoking A1SL1, A1SL2 # social contact )) # Select exercise variables for wave2 wave2 <- dplyr::select(wave2, c( M2ID, B1STINC1, #income, B1SA30A, B1SA30B, B1SA30C, B1SA30D, B1SA30E, B1SA30F, # exercise B1SA31A, B1SA31B, B1SA31C, B1SA31D, B1SA31E, B1SA31F, B1PDEPRE, B1PDEPDX, # depression B1SA10G, # sleep problems B1SA61A, B1SA61B, B1SA61C, B1SA61D, #Sleep complaints B1PA38A, B1PA39, #ever and current cigarette smoking B1SI1, B1SI2 # social contact )) # Select exercise variables for wave3 wave3 <- dplyr::select(wave3, c( M2ID, C1STINC, #income, C1SA26A, C1SA26B, C1SA26C, C1SA26D, C1SA26E, C1SA26F, # exercise C1SA27A, C1SA27B, C1SA27C, C1SA27D, C1SA27E, C1SA27F, C1PDEPRE, C1PDEPDX, # depression C1SA10G,# sleep problems C1SA57A, C1SA57B, C1SA57C, C1SA57D, #sleep complaints C1PA38A, C1PA39, # ever and current cigarette smoking C1SI1, C1SI2 # social contact )) # Joining datasets into one midus <- left_join(wave1, wave2, by = "M2ID") midus <- left_join(midus, wave3, by = "M2ID") midus <- as_tibble(midus) # rename variables midus <- rename( midus, age = A1PRAGE_2019, race = A1SS7R, income_w1 = A1SHHTOT, income_w2 = B1STINC1, income_w3 = C1STINC, s_vig_w1 = A1SA18, w_vig_w1 = A1SA19, # Wave1 ex s_mod_w1 = A1SA20, w_mod_w1 = A1SA21, s_vig_job_w2 = B1SA30A, w_vig_job_w2 = B1SA30B, # Wave2 ex s_vig_chor_w2 = B1SA30C, w_vig_chor_w2 = B1SA30D, s_vig_leis_w2 = B1SA30E, w_vig_leis_w2 = B1SA30F, s_mod_job_w2 = B1SA31A, w_mod_job_w2 = B1SA31B, s_mod_chor_w2 = B1SA31C, w_mod_chor_w2 = B1SA31D, s_mod_leis_w2 = B1SA31E, w_mod_leis_w2 = B1SA31F, s_vig_job_w3 = C1SA26A, w_vig_job_w3 = C1SA26B, # Wave3 ex s_vig_chor_w3 = C1SA26C, w_vig_chor_w3 = C1SA26D, s_vig_leis_w3 = C1SA26E, w_vig_leis_w3 = C1SA26F, s_mod_job_w3 = C1SA27A, w_mod_job_w3 = C1SA27B, s_mod_chor_w3 = C1SA27C, w_mod_chor_w3 = C1SA27D, s_mod_leis_w3 = C1SA27E, w_mod_leis_w3 = C1SA27F, depr_sev_w1 = A1PDEPRE, depr_bin_w1 = A1PDEPDX, # wave 1 others sleep_prob_w1 = A1SA12G, eversmoke_w1 =A1PA40, smoking_w1 = A1PA43, neighbcont_w1 = A1SL1, neighbconv_w1 = A1SL2, depr_sev_w2 = B1PDEPRE, depr_bin_w2 = B1PDEPDX, # wave 2 others sleep_prob_w2 = B1SA10G, eversmoke_w2 = B1PA38A, smoking_w2 = B1PA39, neighbcont_w2 = B1SI1, neighbconv_w2 = B1SI2, sleepc1_w2 = B1SA61A, sleepc2_w2 = B1SA61B, sleepc3_w2 = B1SA61C, sleepc4_w2 = B1SA61D, depr_sev_w3 = C1PDEPRE, depr_bin_w3 = C1PDEPDX, # wave 3 others sleep_prob_w3 = C1SA10G, eversmoke_w3 = C1PA38A, smoking_w3 = C1PA39, neighbcont_w3 = C1SI1, neighbconv_w3 = C1SI2, sleepc1_w3 = C1SA57A, sleepc2_w3 = C1SA57B, sleepc3_w3 = C1SA57C, sleepc4_w3 = C1SA57D ) # _1.2 Recode variables ---------------------------------------------------- # midus <- midus %>% mutate(A1PRSEX = recode(A1PRSEX, "(1) Male" = '0', "(2) Female" = '1'), A1PRSEX = as.double(A1PRSEX) - 1, race = ifelse(is.na(race), NA, ifelse(race == "(1) White", 0, 1))) %>% rename(sex=A1PRSEX) # Exercise # Wave 1 midus <- midus %>% mutate_at( vars(c(s_vig_w1, w_vig_w1, s_mod_w1, w_mod_w1)), ~as.numeric(recode(., "(1) Several times a week or more" = 5, "(2) About once a week" = 4, "(3) Several times a month" = 3, "(4) About once a month" = 2, "(5) Less than once a month" = 1, "(6) Never" = 0 )) ) # Wave 2 and 3 midus <- midus %>% mutate_at( vars(c( s_vig_job_w2, w_vig_job_w2, # Wave2 ex s_vig_chor_w2, w_vig_chor_w2, s_vig_leis_w2, w_vig_leis_w2, s_mod_job_w2, w_mod_job_w2, s_mod_chor_w2, w_mod_chor_w2, s_mod_leis_w2, w_mod_leis_w2, s_vig_job_w3, w_vig_job_w3, # Wave3 ex s_vig_chor_w3, w_vig_chor_w3, s_vig_leis_w3, w_vig_leis_w3, s_mod_job_w3, w_mod_job_w3, s_mod_chor_w3, w_mod_chor_w3, s_mod_leis_w3, w_mod_leis_w3 )), ~as.numeric(recode(., "(1) SEVERAL TIMES A WEEK" = 5, "(2) ONCE A WEEK" = 4, "(3) SEVERAL TIMES A MONTH" = 3, "(4) ONCE A MONTH" = 2, "(5) LESS THAN ONCE A MONTH" = 1, "(6) NEVER" = 0 )) ) # depression w1 & 2 midus <- midus %>% mutate_at( vars(c(depr_bin_w1, depr_bin_w2)), ~as.numeric(recode(., "(0) Negative" = 0, "(1) Positive" = 1 )) ) # depression w3 midus <- midus %>% mutate_at( vars(c(depr_bin_w3)), ~as.numeric(recode(., "(0) NEGATIVE" = 0, "(1) POSITIVE" = 1 )) ) midus <- midus %>% # sleep problems ordinal w1 mutate_at( vars(c(sleep_prob_w1)), ~as.numeric(recode(., "(6) Not at all" = 0, "(5) Once a month" = 1, "(4) Several times a month" = 2, "(3) Once a week" = 3, "(2) Several times a week" = 4, "(1) Almost every day" = 5 )) ) midus <- midus %>% # sleep problems ordinal w2-3 mutate_at( vars(c(sleep_prob_w2, sleep_prob_w3)), ~as.numeric(recode(., "(6) NOT AT ALL" = 0, "(5) ONCE A MONTH" = 1, "(4) SEVERAL TIMES A MONTH" = 2, "(3) ONCE A WEEK" = 3, "(2) SEVERAL TIMES A WEEK" = 4, "(1) ALMOST EVERY DAY" = 5 )) ) midus <- midus %>% #extra sleep items for w2-w3 mutate_at(vars(sleepc1_w2, sleepc2_w2, sleepc3_w2, sleepc4_w2, sleepc1_w3, sleepc2_w3, sleepc3_w3, sleepc4_w3), ~as.numeric(recode(., '(1) NEVER' = 0, '(2) RARELY' = 1, '(3) SOMETIMES' = 2, '(4) OFTEN' = 3, '(5) ALMOST ALWAYS' = 5))) midus <- midus %>% # smoking w1 mutate_at( vars(c(eversmoke_w1, smoking_w1)), ~as.numeric(recode(., "(1) Yes" = 1, "(2) No" = 0 ))) midus <- midus %>% #Never smokers were coded as NA's mutate(smoking_w1 = replace_na(smoking_w1, 0)) midus <- midus %>% # smoking w2 and w3 mutate_at( vars(c(eversmoke_w2, smoking_w2, eversmoke_w3, smoking_w3)), funs(recode(., "(1) YES" = 1, "(2) NO" = 0 )) ) # Identify participants with inconsistencies in reporting cigarette usage (i.e. they # report never having smoked in wave 2 or 3, when they reported smoking on a previous wave) midus <- midus %>% mutate(errorsmoke_w2 = ifelse(eversmoke_w2 == 0 & (smoking_w1 == 1 | eversmoke_w1 == 1), "error", "ok"), errorsmoke_w3 = ifelse(eversmoke_w3 == 0 & (smoking_w1 == 1 | smoking_w2 == 1 | eversmoke_w1 == 1 | eversmoke_w2 == 1), "error", "ok")) # Filter out those participants midus <- midus %>% filter(is.na(errorsmoke_w2) | errorsmoke_w2 != "error", #keep those who were NA on errorsmoke is.na(errorsmoke_w3) | errorsmoke_w3 != "error") %>% #or who did not have a discrepancy dplyr::select(-c(errorsmoke_w2, errorsmoke_w3)) # The smoking_w2 and smoking_w3 variables are "NA's" rather than "0" if the person reports never having smoked a cigarette. # I need to distinguish actual missing NA's from "not currently smoking" values (0's). # If the smoking variable is NA and the depression var (which didn't have the same 0/NA conflation) # is also NA, then score the code the smoking variable as NA (a true NA) # If the smoking variable is NA and the depression var is present, then 0 (Non-smoker) # Otherwise, the smoker variable is either a 0 or 1 as given midus <- midus %>% mutate(smoking_w2 = ifelse(is.na(smoking_w2) & is.na(depr_sev_w2), NA, ifelse(is.na(smoking_w2) & !is.na(depr_sev_w2), 0, smoking_w2)), smoking_w3 = ifelse(is.na(smoking_w3) & is.na(depr_sev_w3), NA, ifelse(is.na(smoking_w3) & !is.na(depr_sev_w3), 0, smoking_w3))) #Neighbor variables midus <- midus %>% # neighb contact and conversation recoded w1 mutate_at( vars(c(neighbcont_w1, neighbconv_w1)), ~as.numeric(recode(., "(6) Never or hardly ever" = 0, "(5) Less than once a month" = 1, "(4) 1-3 times a month" = 2, "(3) About once a week" = 3, "(2) Several times a week" = 4, "(1) Almost every day" = 5 )) ) midus <- midus %>% # neighb contact and conversation recoded w2, w3 mutate_at( vars(c( neighbcont_w2, neighbcont_w3, neighbconv_w2, neighbconv_w3 )), ~as.numeric(recode(., "(6) NEVER OR HARDLY EVER" = 0, "(5) LESS THAN ONCE A MONTH" = 1, "(4) 1-3 TIMES A MONTH" = 2, "(3) ABOUT ONCE A WEEK" = 3, "(2) SEVERAL TIMES A WEEK" = 4, "(1) ALMOST EVERY DAY" = 5 )) ) # Create 4 composite variables for w2 and w3 exercise each # (vig_win, vig_sum, mod_win, and mod_sum); the same already exist for w1 midus <- midus %>% mutate( w_vig_w2 = (w_vig_job_w2 + w_vig_chor_w2 + w_vig_leis_w2) , s_vig_w2 = (s_vig_job_w2 + s_vig_chor_w2 + s_vig_leis_w2) , w_mod_w2 = (w_mod_job_w2 + w_mod_chor_w2 + w_mod_leis_w2) , s_mod_w2 = (s_mod_job_w2 + s_mod_chor_w2 + s_mod_leis_w2) , w_vig_w3 = (w_vig_job_w3 + w_vig_chor_w3 + w_vig_leis_w3) , s_vig_w3 = (s_vig_job_w3 + s_vig_chor_w3 + s_vig_leis_w3) , w_mod_w3 = (w_mod_job_w3 + w_mod_chor_w3 + w_mod_leis_w3) , s_mod_w3 = (s_mod_job_w3 + s_mod_chor_w3 + s_mod_leis_w3) ) # Create exercise averages for each wave midus <- midus %>% mutate( exercise_w1 = (((w_vig_w1 + s_vig_w1) * 7) / 2 + ((w_mod_w1 + s_mod_w1) * 5) / 2),# multiply by met unit; divide by 2 to get average exercise_w2 = (((w_vig_w2 + s_vig_w2) * 7) / 2 + ((w_mod_w2 + s_mod_w2) * 5) / 2),# exercise_w3 = (((w_vig_w3 + s_vig_w3) * 7) / 2 + ((w_mod_w3 + s_mod_w3) * 5) / 2))# # Neighbor contact composite variables midus <- midus %>% mutate( neighbs_w1 = (neighbcont_w1 + neighbconv_w1) / 2, neighbs_w2 = (neighbcont_w2 + neighbconv_w2) / 2, neighbs_w3 = (neighbcont_w3 + neighbconv_w3) / 2 ) # sleep composites for w2 and w3 midus <- midus %>% mutate(sleepc_w2 = sleepc1_w2 + sleepc2_w2 + sleepc3_w2 + sleepc4_w2, sleepc_w3 = sleepc1_w3 + sleepc2_w3 + sleepc3_w3 + sleepc4_w3) # Correlation among single-item sleep problem variable (collected at all three waves) and # 4-item sleep problem scales (collected at wave 2 and wave 3) cor(midus$sleepc_w3, midus$sleep_prob_w3, use = "complete.obs") cor(midus$sleepc_w2, midus$sleep_prob_w2, use = "complete.obs") cor(midus$sleepc_w2, midus$sleep_prob_w1, use = "complete.obs") #W2 full scale with W1: r = .44 cor(midus$sleep_prob_w2, midus$sleep_prob_w1, use = "complete.obs") #W2 single item with W1 single item: r = .44 write.csv(midus, "midus.csv") write_rds(midus, "midus.rds") # _1.3 Wide to long ------------------------------------------------------------ midus <- read_rds("midus.rds") #Rescaling w1 exercise to be M = 0, SD = 1 because they involved a different number of items print(describe(midus[,80:82]), digits = 5) midus <- midus %>% mutate(exercise_raw_w1 = exercise_w1, exercise_raw_w2 = exercise_w2, exercise_raw_w3 = exercise_w3, exercise_w1 = (exercise_w1-43.0304)/15.56109, exercise_w2 = (exercise_w2-74.0327)/47.00911, exercise_w3 = (exercise_w3-77.3660)/48.05796) #Creating long data midus_long <- midus %>% pivot_longer( -c(M2ID:race), #variables you don't want to stack (i.e. between person) names_to = c('.value', "wave"), #".value" specifies that the "value" being measured comes from the column name, "wave" is the new column with the wave values names_sep = "_w", #pulling the wave number from all the variables values_drop_na = FALSE) midus_long <- midus_long %>% #Removing an extra row per person that got added from the previous code filter(wave != 'NA') %>% mutate(wave = as.numeric(wave)) # _1.4 Centering variables for completer dataset ------------------------------------------------- #scale raw variables midus_long <- midus_long %>% mutate(sleep_prob_z = as.vector(scale(sleep_prob)), neighbs_z = as.vector(scale(neighbs)), exercise_z = as.vector(scale(exercise)), income_z = as.vector(scale(income)), age_z = as.vector(scale(age))) #Person means midus_long <- midus_long %>% group_by(M2ID) %>% mutate(sleep_mean = mean(sleep_prob_z, na.rm = TRUE), neigh_mean = mean(neighbs_z, na.rm = TRUE), smoke_mean = mean(smoking, na.rm = TRUE), exercise_mean = mean(exercise_z, na.rm = TRUE), dep_bin_mean = mean(depr_bin, na.rm = TRUE), income_mean = mean(income_z, na.rm = TRUE)) %>% ungroup() #Person-centered midus_long <- midus_long %>% mutate(sleep_c = sleep_prob_z - sleep_mean, neigh_c = neighbs_z - neigh_mean, smoke_c = smoking - smoke_mean, exercise_c = exercise_z - exercise_mean, dep_bin_c = depr_bin - dep_bin_mean, income_c = income_z - income_mean) write.csv(midus_long, 'midus_long.csv') write_rds(midus_long, 'midus_long.rds') # _1.5 Create completer dataset --------------------------------------------------------- completer <- read_rds("midus_long.rds") #read in dataset completer <- completer %>% mutate(wave_num = (as.double(wave)-1)/2,) %>% #For scaling, change wave to 0, .5, 1 dplyr::select(M2ID, dep_bin_c, exercise_c, neigh_c, #Select relevant variables for analyses smoke_c, sleep_c, wave_num, income_c, dep_bin_mean, exercise_mean, neigh_mean, smoke_mean, sleep_mean, income_mean, sex, age_z,race)%>% mutate(na_sum = rowSums(is.na(.))) %>% # find and keep only those with full data group_by(M2ID) %>% mutate(na_sum_id = sum(na_sum)) %>% ungroup() %>% filter(na_sum_id == 0) %>% dplyr::select(-M2ID) write_rds(completer, "midus_long_completers.rds") # 2 Missing data ------------------------------------------------------------ #Exploring missingness midus <- as_tibble(read_rds('midus.rds')) #read in wide file na_to_1 <- function(x) { ifelse(is.na(x), 1, 0) } midus_r <- midus %>% dplyr::select(M2ID, age, sex, race, income_w1, income_w2, income_w3, sleep_prob_w1, sleep_prob_w2, sleep_prob_w3, smoking_w1, smoking_w2, smoking_w3, depr_bin_w1, depr_bin_w2, depr_bin_w3, exercise_w1, exercise_w2, exercise_w3, neighbs_w1, neighbs_w2, neighbs_w3) %>% mutate(race = as.factor(race), sex = as.factor(sex)) %>% mutate(across(c(income_w1:neighbs_w3), .fns = list(na_to_1))) # _2.1 Exploring predictors of missingness -------------------------------- #Income summary(glm(income_w2_1 ~ age, family = "binomial", data = midus_r)) summary(glm(income_w3_1 ~ age, family = "binomial", data = midus_r)) summary(glm(income_w2_1 ~ race, family = "binomial", data = midus_r)) summary(glm(income_w3_1 ~ race, family = "binomial", data = midus_r)) summary(glm(income_w2_1 ~ sex, family = "binomial", data = midus_r)) summary(glm(income_w3_1 ~ sex, family = "binomial", data = midus_r)) summary(glm(income_w2_1 ~ sleep_prob_w1, family = "binomial", data = midus_r)) summary(glm(income_w3_1 ~ sleep_prob_w1, family = "binomial", data = midus_r)) summary(glm(income_w2_1 ~ exercise_w1, family = "binomial", data = midus_r)) summary(glm(income_w3_1 ~ exercise_w1, family = "binomial", data = midus_r)) summary(glm(income_w2_1 ~ depr_bin_w1, family = "binomial", data = midus_r)) summary(glm(income_w3_1 ~ depr_bin_w1, family = "binomial", data = midus_r)) summary(glm(income_w2_1 ~ smoking_w1, family = "binomial", data = midus_r)) summary(glm(income_w3_1 ~ smoking_w1, family = "binomial", data = midus_r)) summary(glm(income_w2_1 ~ neighbs_w1, family = "binomial", data = midus_r)) summary(glm(income_w3_1 ~ neighbs_w1, family = "binomial", data = midus_r)) #sleep problems summary(glm(sleep_prob_w2_1 ~ age, family = "binomial", data = midus_r)) summary(glm(sleep_prob_w3_1 ~ age, family = "binomial", data = midus_r)) summary(glm(sleep_prob_w2_1 ~ race, family = "binomial", data = midus_r)) summary(glm(sleep_prob_w3_1 ~ race, family = "binomial", data = midus_r)) summary(glm(sleep_prob_w2_1 ~ sex, family = "binomial", data = midus_r)) summary(glm(sleep_prob_w3_1 ~ sex, family = "binomial", data = midus_r)) summary(glm(sleep_prob_w2_1 ~ income_w1, family = "binomial", data = midus_r)) summary(glm(sleep_prob_w3_1 ~ income_w1, family = "binomial", data = midus_r)) summary(glm(sleep_prob_w2_1 ~ exercise_w1, family = "binomial", data = midus_r)) summary(glm(sleep_prob_w3_1 ~ exercise_w1, family = "binomial", data = midus_r)) summary(glm(sleep_prob_w2_1 ~ depr_bin_w1, family = "binomial", data = midus_r)) summary(glm(sleep_prob_w3_1 ~ depr_bin_w1, family = "binomial", data = midus_r)) summary(glm(sleep_prob_w2_1 ~ smoking_w1, family = "binomial", data = midus_r)) summary(glm(sleep_prob_w3_1 ~ smoking_w1, family = "binomial", data = midus_r)) summary(glm(sleep_prob_w2_1 ~ neighbs_w1, family = "binomial", data = midus_r)) summary(glm(sleep_prob_w3_1 ~ neighbs_w1, family = "binomial", data = midus_r)) #exercise summary(glm(exercise_w2_1 ~ age, family = "binomial", data = midus_r)) summary(glm(exercise_w3_1 ~ age, family = "binomial", data = midus_r)) summary(glm(exercise_w2_1 ~ race, family = "binomial", data = midus_r)) summary(glm(exercise_w3_1 ~ race, family = "binomial", data = midus_r)) summary(glm(exercise_w2_1 ~ sex, family = "binomial", data = midus_r)) summary(glm(exercise_w3_1 ~ sex, family = "binomial", data = midus_r)) summary(glm(exercise_w2_1 ~ income_w1, family = "binomial", data = midus_r)) summary(glm(exercise_w3_1 ~ income_w1, family = "binomial", data = midus_r)) summary(glm(exercise_w2_1 ~ sleep_prob_w1, family = "binomial", data = midus_r)) summary(glm(exercise_w3_1 ~ sleep_prob_w1, family = "binomial", data = midus_r)) summary(glm(exercise_w2_1 ~ depr_bin_w1, family = "binomial", data = midus_r)) summary(glm(exercise_w3_1 ~ depr_bin_w1, family = "binomial", data = midus_r)) summary(glm(exercise_w2_1 ~ smoking_w1, family = "binomial", data = midus_r)) summary(glm(exercise_w3_1 ~ smoking_w1, family = "binomial", data = midus_r)) summary(glm(exercise_w2_1 ~ neighbs_w1, family = "binomial", data = midus_r)) summary(glm(exercise_w3_1 ~ neighbs_w1, family = "binomial", data = midus_r)) #depression summary(glm(depr_bin_w2_1 ~ age, family = "binomial", data = midus_r)) summary(glm(depr_bin_w3_1 ~ age, family = "binomial", data = midus_r)) summary(glm(depr_bin_w2_1 ~ race, family = "binomial", data = midus_r)) summary(glm(depr_bin_w3_1 ~ race, family = "binomial", data = midus_r)) summary(glm(depr_bin_w2_1 ~ sex, family = "binomial", data = midus_r)) summary(glm(depr_bin_w3_1 ~ sex, family = "binomial", data = midus_r)) summary(glm(depr_bin_w2_1 ~ income_w1, family = "binomial", data = midus_r)) summary(glm(depr_bin_w3_1 ~ income_w1, family = "binomial", data = midus_r)) summary(glm(depr_bin_w2_1 ~ sleep_prob_w1, family = "binomial", data = midus_r)) summary(glm(depr_bin_w3_1 ~ sleep_prob_w1, family = "binomial", data = midus_r)) summary(glm(depr_bin_w2_1 ~ exercise_w1, family = "binomial", data = midus_r)) summary(glm(depr_bin_w3_1 ~ exercise_w1, family = "binomial", data = midus_r)) summary(glm(depr_bin_w2_1 ~ smoking_w1, family = "binomial", data = midus_r)) summary(glm(depr_bin_w3_1 ~ smoking_w1, family = "binomial", data = midus_r)) summary(glm(depr_bin_w2_1 ~ neighbs_w1, family = "binomial", data = midus_r)) summary(glm(depr_bin_w3_1 ~ neighbs_w1, family = "binomial", data = midus_r)) #smoking summary(glm(smoking_w2_1 ~ age, family = "binomial", data = midus_r)) summary(glm(smoking_w3_1 ~ age, family = "binomial", data = midus_r)) summary(glm(smoking_w2_1 ~ race, family = "binomial", data = midus_r)) summary(glm(smoking_w3_1 ~ race, family = "binomial", data = midus_r)) summary(glm(smoking_w2_1 ~ sex, family = "binomial", data = midus_r)) summary(glm(smoking_w3_1 ~ sex, family = "binomial", data = midus_r)) summary(glm(smoking_w2_1 ~ income_w1, family = "binomial", data = midus_r)) summary(glm(smoking_w3_1 ~ income_w1, family = "binomial", data = midus_r)) summary(glm(smoking_w2_1 ~ sleep_prob_w1, family = "binomial", data = midus_r)) summary(glm(smoking_w3_1 ~ sleep_prob_w1, family = "binomial", data = midus_r)) summary(glm(smoking_w2_1 ~ exercise_w1, family = "binomial", data = midus_r)) summary(glm(smoking_w3_1 ~ exercise_w1, family = "binomial", data = midus_r)) summary(glm(smoking_w2_1 ~ depr_bin_w1, family = "binomial", data = midus_r)) summary(glm(smoking_w3_1 ~ depr_bin_w1, family = "binomial", data = midus_r)) summary(glm(smoking_w2_1 ~ neighbs_w1, family = "binomial", data = midus_r)) summary(glm(smoking_w3_1 ~ neighbs_w1, family = "binomial", data = midus_r)) #smoking summary(glm(neighbs_w2_1 ~ age, family = "binomial", data = midus_r)) summary(glm(neighbs_w3_1 ~ age, family = "binomial", data = midus_r)) summary(glm(neighbs_w2_1 ~ race, family = "binomial", data = midus_r)) summary(glm(neighbs_w3_1 ~ race, family = "binomial", data = midus_r)) summary(glm(neighbs_w2_1 ~ sex, family = "binomial", data = midus_r)) summary(glm(neighbs_w3_1 ~ sex, family = "binomial", data = midus_r)) summary(glm(neighbs_w2_1 ~ income_w1, family = "binomial", data = midus_r)) summary(glm(neighbs_w3_1 ~ income_w1, family = "binomial", data = midus_r)) summary(glm(neighbs_w2_1 ~ sleep_prob_w1, family = "binomial", data = midus_r)) summary(glm(neighbs_w3_1 ~ sleep_prob_w1, family = "binomial", data = midus_r)) summary(glm(neighbs_w2_1 ~ exercise_w1, family = "binomial", data = midus_r)) summary(glm(neighbs_w3_1 ~ exercise_w1, family = "binomial", data = midus_r)) summary(glm(neighbs_w2_1 ~ depr_bin_w1, family = "binomial", data = midus_r)) summary(glm(neighbs_w3_1 ~ depr_bin_w1, family = "binomial", data = midus_r)) summary(glm(neighbs_w2_1 ~ smoking_w1, family = "binomial", data = midus_r)) summary(glm(neighbs_w3_1 ~ smoking_w1, family = "binomial", data = midus_r)) # 3 Imputation -------------------------------------------------------------- # _3.1 Basic preparation ------------------------------------------------------------ midus <- as_tibble(read_rds('midus_long.rds')) #read in long file midusb <- midus %>% #create minimized dataset dplyr::select(M2ID, wave, age, sex, race, depr_bin, exercise_raw, neighbs, smoking, sleep_prob, income) %>% mutate(wave = (as.double(wave)-1)/2, #scale wave to be 0, .5 and 1 age = as.vector(age), depr_bin = as.factor(depr_bin), smoking = as.factor(smoking), sleep_prob = as.ordered(sleep_prob), race = as.factor(race), sex = as.factor(sex)) # _3.2 Multilevel imputation setup -------------------------------------------- # See: "Multiple Imputation of Missing Data for Multilevel Models: Simulations and Recommendations" # Example of what the -2, 2, 3, and 4 mean in the predictor matrix (https://stefvanbuuren.name/fimd/sec-mlguidelines.html) # set up imputation methods impMethod <- character(ncol(midusb)) # create empty vector for names(impMethod) <- colnames(midusb) # imputation methods # ... define method for each variable #"2l.pan" = normal, homogeneous variances; 2l.norm = normal, heterogeneous variances; 2l.lmer = normal #2l.binary = binary, impMethod[ "age" ] <- "2lonly.pan" impMethod[ "race" ] <- "2lonly.binary" impMethod[ "depr_bin" ] <- "2l.binary" impMethod[ "exercise_raw" ] <- "2l.pan" impMethod[ "neighbs" ] <- "2l.pan" impMethod[ "smoking" ] <- "2l.binary" impMethod[ "sleep_prob" ] <- "2l.pan" impMethod[ "income" ] <- "2l.pan" impMethod[ "sex" ] <- "2lonly.binary" # set up predictor matrix predMatrix <- matrix(0, ncol(midusb), ncol(midusb)) # create empty predictor rownames(predMatrix) <- colnames(predMatrix) <- colnames(midusb) # matrix # ... define predictors for each variable predMatrix[ "age" , c("M2ID", "wave", "race", "depr_bin", "exercise_raw", "neighbs", "smoking", "sleep_prob", "income", "sex") ] <- c(-2,1,1,1,1,1,1, 1,1,1) # -2 = cluster variable predMatrix[ "race" , c("M2ID", "wave", "age", "depr_bin", "exercise_raw", "neighbs", "smoking", "sleep_prob", "income", "sex") ] <- c(-2,1,1,1,1,1,1, 1,1,1) # 1 = overall effect predMatrix[ "depr_bin" , c("M2ID", "wave", "age", "race", "exercise_raw", "neighbs", "smoking", "sleep_prob", "income", "sex") ] <- c(-2,3,1,1,3,3,3, 3,3,1) # 3 = overall + group-level effect predMatrix[ "exercise_raw" , c("M2ID", "wave", "age", "race", "depr_bin", "neighbs", "smoking", "sleep_prob", "income", "sex") ] <- c(-2,3,1,1,3,3,3, 3,3,1) predMatrix[ "neighbs" , c("M2ID", "wave", "age", "race", "depr_bin", "exercise_raw", "smoking", "sleep_prob", "income", "sex") ] <- c(-2,3,1,1,3,3,3, 3,3,1) predMatrix[ "smoking" , c("M2ID", "wave", "age", "race", "depr_bin", "exercise_raw", "neighbs", "sleep_prob", "income", "sex") ] <- c(-2,3,1,1,3,3,3, 3,3,1) predMatrix[ "sleep_prob" , c("M2ID", "wave", "age", "race", "depr_bin", "exercise_raw", "neighbs", "smoking", "income", "sex") ] <- c(-2,3,1,1,3,3,3, 3,3,1) predMatrix[ "income" , c("M2ID", "wave", "age", "race", "depr_bin", "exercise_raw", "neighbs", "smoking", "sleep_prob", "sex") ] <- c(-2,3,1,1,3,3,3, 3,3,1) predMatrix[ "sex" , c("M2ID", "wave", "age", "race", "depr_bin", "exercise_raw", "neighbs", "smoking", "sleep_prob", "income" ) ] <- c(-2,1,1,1,1,1,1,1, 1,1) # _3.3 Multilevel imputation --------------------------------------------------- imputed <- mice.par(midusb, m=15, imputationMethod=impMethod, predictorMatrix=predMatrix, nnodes = 3) write_rds(imputed, "midus_imputed_multilevel.rds") midus_long_imp <- read_rds("midus_imputed_multilevel.rds") midus_long_imp <- as_tibble(complete(midus_long_imp,"long",include = T)) # _3.4 Creating imputed datasets for network models ----------------------- #Centering the variables midus_long_imp_zc <- midus_long_imp %>% #Standardize the variables within imputation mutate(sleep_prob = as.double(sleep_prob), depr_bin = as.double(depr_bin), smoking = as.double(smoking)) %>% group_by(.imp) %>% mutate(sleep_prob_z = as.vector(scale(sleep_prob)), neighbs_z = as.vector(scale(neighbs)), income_z = as.vector(scale(income)), age_z = as.vector(scale(age))) %>% ungroup() %>% group_by(.imp, wave) %>% #standardizing the exercise variables within wave mutate(exercise_z = as.vector(scale(exercise_raw))) %>% ungroup() %>% group_by(.imp, M2ID) %>% #Cluster mean mutate(sleep_mean = mean(sleep_prob_z, na.rm = TRUE), neigh_mean = mean(neighbs_z, na.rm = TRUE), smoke_mean = mean(smoking, na.rm = TRUE), exercise_mean = mean(exercise_z, na.rm = TRUE), dep_bin_mean = mean(depr_bin, na.rm = TRUE), income_mean = mean(income_z, na.rm = TRUE)) %>% ungroup() %>% mutate(sleep_c = sleep_prob_z - sleep_mean, #person-centered neigh_c = neighbs_z - neigh_mean, smoke_c = smoking - smoke_mean, exercise_c = exercise_z - exercise_mean, dep_bin_c = depr_bin - dep_bin_mean, income_c = income_z - income_mean) midus_long_imp_zc #turn back into mids file midus_mids <- as.mids(midus_long_imp_zc, .imp = ".imp", .id = ".id") write_rds(midus_mids, "midus_mids.rds") #create within mids file midus_long_imp_zc_within <- midus_long_imp_zc %>% dplyr::select(.imp, .id, dep_bin_c, exercise_c, neigh_c, sleep_c, smoke_c, income_c, wave) midus_mids_within <- as.mids(midus_long_imp_zc_within, .imp = ".imp", .id = ".id") write_rds(midus_mids_within, "midus_mids_within.rds") #create between mids file midus_long_imp_zc_between <- midus_long_imp_zc %>% dplyr::select(.imp, .id, dep_bin_mean, exercise_mean, neigh_mean, smoke_mean, sleep_mean, income_mean, sex, age_z, race) %>% mutate(female = as.double(sex)-1, race = as.double(race)-1) %>% dplyr::select(-sex) midus_mids_between <- as.mids(midus_long_imp_zc_between, .imp = ".imp", .id = ".id") write_rds(midus_mids_between, "midus_mids_between.rds") # 4 Summary statistics ------------------------------------------------------ # _4.1 Raw variables ---------------------------------------------------------- midus_long <- as_tibble(read_rds('midus_long.rds')) midus_sum <- midus_long %>% dplyr::select(M2ID, wave, age, sex, race, income, depr_bin, sleep_prob, smoking, neighbs, exercise_raw) w1 <- midus_sum %>% filter(wave == 1) w2 <- midus_sum %>% filter(wave == 2) w3 <- midus_sum %>% filter(wave == 3) names(w1) print(describe(w1, ), digits = 4) print(describe(w2, ), digits = 4) print(describe(w3, ), digits = 4) # _4.2 Imputed data ----------------------------------------------------------- midus_long_imp <- read_rds("midus_imputed_multilevel.rds") midus_sum_imp <- as_tibble(complete(midus_long_imp, "long", include = TRUE)) midus_sum_imp <- midus_sum_imp %>% filter(.imp != 0) %>% mutate(race = as.double(race)-1, sex = as.double(sex)-1, depr_bin = as.double(depr_bin)-1, smoking = as.double(smoking)-1, sleep_prob = as.double(sleep_prob)-1) describe(midus_sum_imp) w1_imp <- midus_sum_imp %>% filter(wave == 0) w2_imp <- midus_sum_imp %>% filter(wave == .5) w3_imp <- midus_sum_imp %>% filter(wave == 1) print(describe(w1_imp), digits = 4) print(describe(w2_imp), digits = 4) print(describe(w3_imp), digits = 4) # _4.3 Completers ----------------------------------------------------------- completer <- read_rds("midus_long.rds") midus_sum_comp <- completer %>% dplyr::select(M2ID, age, sex, race, depr_bin, exercise_raw, neighbs, smoking, sleep_prob, wave, income) %>% mutate(na_sum = rowSums(is.na(.))) %>% group_by(M2ID) %>% mutate(na_sum_id = sum(na_sum)) %>% # mutate(num_na = sum(map(~is.na(.))) %>% ungroup() %>% filter(na_sum_id == 0) %>% dplyr::select(-M2ID) describe(midus_sum_comp) w1_comp <- midus_sum_comp %>% filter(wave == 1) w2_comp <- midus_sum_comp %>% filter(wave == 2) w3_comp <- midus_sum_comp %>% filter(wave == 3) print(describe(w1_comp, ), digits = 4) print(describe(w2_comp, ), digits = 4) print(describe(w3_comp, ), digits = 4)