--- title: "CEPH Mortality Cancer and Fertility Analysis" author: "Huong Meeks" date: "June 3, 2019" output: html_document --- ```{r Libraries and Functions, include = FALSE} library(odbc) library(dplyr) library(lubridate) library(moonBook) library(survival) library(survminer) library(ggplot2) library(gridExtra) library(ggsci) library(broom) library(RColorBrewer) safe_ifelse <- function(cond, yes, no){ structure(ifelse(cond, yes, no), class = class(yes)) } scale_this <- function(x) { (x - mean(x, na.rm = TRUE)) / sd(x, na.rm = TRUE) } get_estimate <- function(inresult, model="cox", name="pstat", dataset="proband", Zcrit=1.96) { ##est <- result$coefficients[name] result_tab <- NULL summ <- summary(inresult) if ( model == 'clogit' | model == 'glm') { coef_name <- rownames(summ$coefficients) } else { coef_name <- names(inresult$coefficient) } if (name !="all") { coef_name <- coef_name[grepl(name, coef_name)] } for (i in 1:length(coef_name)) { if ( !(coef_name[i] %in% c("sexn", "sexu")) ) { dataset_coefname <- coef_name[i] pval <- summ$coefficients[coef_name[i],'Pr(>|z|)'] if (model == "cox" | model == "mortality" | model == "logit" | model== "clogit") { est <- summ$coefficients[coef_name[i],"coef"] RR <- summ$coefficients[coef_name[i],"exp(coef)"] se <- summ$coefficients[coef_name[i],"se(coef)"] Z <- summ$coefficients[coef_name[i],"z"] ll.ci <- summ$conf.int[coef_name[i], 3] ul.ci <- summ$conf.int[coef_name[i], 4] } else if (model == 'glm' | model == 'clogit') { est <- summ$coefficients[coef_name[i],"Estimate"] RR <- exp(est) se <- summ$coefficients[coef_name[i],"Std. Error"] Z <- summ$coefficients[coef_name[i],"z value"] ll.ci = exp(est - Zcrit*se) ul.ci = exp(est + Zcrit*se) } else if (model=='lmer') { pval <- pnorm(Z,lower.tail=F) } result_tab <- rbind(result_tab, data.frame( Covariate=dataset_coefname, Est=round(est,2), SE=round(se,2), Z=round(Z,2), Pval=round(pval, 3), RR=round(RR,2), ll.ci=round(ll.ci,2), ul.ci=round(ul.ci,2) ) ) } } return( result_tab ) } ``` ```{r Table 1- Associations of germline mutation rates with mortality in 122 Generation I individuals, include = TRUE} ### Residuals ============== ceph_cox <- coxph(Surv(censortime, censor) ~ resid + byr + male + ParentalAge, data = ceph) ceph_cox_res <- get_estimate(ceph_cox, name = "all", model = "cox") ceph_cox_res ceph_f_cox <- coxph(Surv(censortime, censor) ~ resid + byr + ParentalAge, data = ceph[ceph$male == 0, ]) ceph_f_cox_res <- get_estimate(ceph_f_cox, name = "all", model = "cox") ceph_f_cox_res ceph_m_cox <- coxph(Surv(censortime, censor) ~ resid + byr + ParentalAge, data = ceph[ceph$male == 1, ]) ceph_m_cox_res <- get_estimate(ceph_m_cox, name = "all", model = "cox") ceph_m_cox_res # residual continuous per standard deviation # For both gender combined sd(ceph$resid) #1.350442 # HR/SD = exp(SD*log(HR)) = exp(1.350442*0.18292) = 1.280208 # 95% CI: # LL HR/SD = exp(SD*log(HR)) = exp(1.350442*(0.18292-1.96*0.07488)) = 1.050037 # UL HR/SD = exp(SD*log(HR)) = exp(1.350442*(0.18292+1.96*0.07488)) = 1.560834 # For females: sd(ceph[ceph$male == 0, ]$resid) #0.724996 # HR/SD = exp(SD*log(HR)) = exp(0.724996*0.26967) = 1.215931 # 95% CI: # LL HR/SD = exp(SD*log(HR)) = exp(0.724996*(0.26967-1.96*0.19081)) = 0.9271593 # UL HR/SD = exp(SD*log(HR)) = exp(0.724996*(0.26967+1.96*0.19081)) = 1.594642 # For males: sd(ceph[ceph$male == 1, ]$resid) #1.775433 # HR/SD = exp(SD*log(HR)) = exp(1.775433*0.18761) = 1.395271 # 95% CI: # LL HR/SD = exp(SD*log(HR)) = exp(1.775433*(0.18761-1.96*0.08392)) = 1.041915 # UL HR/SD = exp(SD*log(HR)) = exp(1.775433*(0.18761+1.96*0.08392)) = 1.868466 ### Residuals Quartiles ========== ceph_cox <- coxph(Surv(censortime, censor) ~ resid.4lev.f + byr + male + ParentalAge, data = ceph) ceph_cox_res <- get_estimate(ceph_cox, name = "all", model = "cox") ceph_cox_res ceph_f_cox <- coxph(Surv(censortime, censor) ~ resid.4lev.f + byr + ParentalAge, data = ceph[ceph$male == 0, ]) ceph_f_cox_res <- get_estimate(ceph_f_cox, name = "all", model = "cox") ceph_f_cox_res ceph_m_cox <- coxph(Surv(censortime, censor) ~ resid.4lev.f + byr + ParentalAge, data = ceph[ceph$male == 1, ]) ceph_m_cox_res <- get_estimate(ceph_m_cox, name = "all", model = "cox") ceph_m_cox_res # Residuals trend test ceph_cox <- coxph(Surv(censortime, censor) ~ resid.4lev + byr + male + ParentalAge, data = ceph) ceph_cox_res <- get_estimate(ceph_cox, name = "all", model = "cox") ceph_cox_res ceph_f_cox <- coxph(Surv(censortime, censor) ~ resid.4lev + byr + ParentalAge, data = ceph[ceph$male == 0, ]) ceph_f_cox_res <- get_estimate(ceph_f_cox, name = "all", model = "cox") ceph_f_cox_res ceph_m_cox <- coxph(Surv(censortime, censor) ~ resid.4lev + byr + ParentalAge, data = ceph[ceph$male == 1, ]) ceph_m_cox_res <- get_estimate(ceph_m_cox, name = "all", model = "cox") ceph_m_cox_res ``` ```{r Table 2- Associations of germline mutaton rates with reproductive lifespan in 53 Generation I women with ALB >= 30 years, include = TRUE} ### Number of live births =========================== ### Residuals ============ ceph_glm <- glm(NumLB ~ resid + byr + ParentalAge, family = "poisson", data = ceph_f_albge30) ceph_glm_res <- get_estimate(ceph_glm, name = "all", model = "glm") ceph_glm_res sd(ceph_f_albge30$resid) #0.732117 # HR/SD = exp(SD*log(HR)) = exp(0.732117*-0.124792) = 1.393811 # 95% CI: # LL HR/SD = exp(SD*log(HR)) = exp(0.732117*(-0.124792-1.96*0.076747)) = 0.9372871 # UL HR/SD = exp(SD*log(HR)) = exp(0.732117*(-0.124792+1.96*0.076747)) = 2.072693 ### Residuals Tertiles 2 levels ========== ceph_glm <- glm(NumLB ~ resid.2lev.t.adj + byr + ParentalAge, family = "poisson", data = ceph_f_albge30) ceph_glm_res <- get_estimate(ceph_glm, name = "all", model = "glm") ceph_glm_res ### Age at last birth =========================== ### Residuals ============== # Age at last birth less than 25th percentile ceph_glm <- glm(alb.lt25pct ~ resid + byr + ParentalAge + albmiss, family = "binomial", data = ceph_f_albge30) ceph_glm_res <- get_estimate(ceph_glm, name = "all", model = "glm") ceph_glm_res sd(ceph_f_albge30$resid) #0.732117 # HR/SD = exp(SD*log(HR)) = exp(0.732117*1.02343) = 2.115456 # 95% CI: # LL HR/SD = exp(SD*log(HR)) = exp(0.732117*(1.02343-1.96*0.48737)) = 1.051186 # UL HR/SD = exp(SD*log(HR)) = exp(0.732117*(1.02343+1.96*0.48737)) = 4.257243 ### Residuals Tertiles 2 levels========== # Age at last birth less than 25th percentile ceph_glm <- glm(alb.lt25pct ~ resid.2lev.t.adj + byr + ParentalAge + albmiss, family = "binomial", data = ceph_f_albge30) ceph_glm_res <- get_estimate(ceph_glm, name = "all", model = "glm") ceph_glm_res ``` ```{r Supplementary Table 1, include = TRUE} mytable(Rel ~ NoAutosomalDNMs06022019 + DNMsRates + byr + borninutah + white + hispanic + ParentalAge + NumLB + alb + albmiss + cancerdiag + mindiagage + dyr + diedinutah + censor.codgroup, data = ceph) ``` ```{r Supplementary Table 2, include = TRUE} median_age_q <- ceph %>% group_by(male, resid.4lev.f) %>% summarise(median.age = median(ParentalAge, na.rm = TRUE)) median_age_t <- ceph %>% group_by(male, resid.3lev.f) %>% summarise(median.age = median(ParentalAge, na.rm = TRUE)) ``` ```{r Supplemental Table 3, include = TRUE} ### DNMsRates/ParentalAge Continuous ========== ceph_cox <- coxph(Surv(censortime, censor) ~ dnmratesperage + byr + male + ParentalAge, data = ceph) ceph_cox_res <- get_estimate(ceph_cox, name = "all", model = "cox") ceph_cox_res ceph_f_cox <- coxph(Surv(censortime, censor) ~ dnmratesperage + byr + ParentalAge, data = ceph[ceph$male == 0, ]) ceph_f_cox_res <- get_estimate(ceph_f_cox, name = "all", model = "cox") ceph_f_cox_res ceph_m_cox <- coxph(Surv(censortime, censor) ~ dnmratesperage + byr + ParentalAge, data = ceph[ceph$male == 1, ]) ceph_m_cox_res <- get_estimate(ceph_m_cox, name = "all", model = "cox") ceph_m_cox_res ### DNMsRates/ParentalAge Quartiles ========== ceph_cox <- coxph(Surv(censortime, censor) ~ dnmratesperage.4lev.f + byr + male + ParentalAge, data = ceph) ceph_cox_res <- get_estimate(ceph_cox, name = "all", model = "cox") ceph_cox_res ceph_f_cox <- coxph(Surv(censortime, censor) ~ dnmratesperage.4lev.f + byr + ParentalAge, data = ceph[ceph$male == 0, ]) ceph_f_cox_res <- get_estimate(ceph_f_cox, name = "all", model = "cox") ceph_f_cox_res ceph_m_cox <- coxph(Surv(censortime, censor) ~ dnmratesperage.4lev.f + byr + ParentalAge, data = ceph[ceph$male == 1, ]) ceph_m_cox_res <- get_estimate(ceph_m_cox, name = "all", model = "cox") ceph_m_cox_res # DNMrates trend test ceph_cox <- coxph(Surv(censortime, censor) ~ dnmratesperage.4lev + byr + male + ParentalAge, data = ceph) ceph_cox_res <- get_estimate(ceph_cox, name = "all", model = "cox") ceph_cox_res ceph_f_cox <- coxph(Surv(censortime, censor) ~ dnmratesperage.4lev + byr + ParentalAge, data = ceph[ceph$male == 0, ]) ceph_f_cox_res <- get_estimate(ceph_f_cox, name = "all", model = "cox") ceph_f_cox_res ceph_m_cox <- coxph(Surv(censortime, censor) ~ dnmratesperage.4lev + byr + ParentalAge, data = ceph[ceph$male == 1, ]) ceph_m_cox_res <- get_estimate(ceph_m_cox, name = "all", model = "cox") ceph_m_cox_res ``` ```{r Supplemental Table 4, include = TRUE} ### Residuals ============== ceph_cox <- coxph(Surv(censortime.cancerdiag, censor.cancerdiag == 1) ~ resid + byr + male + ParentalAge, data = ceph) ceph_cox_res <- get_estimate(ceph_cox, name = "all", model = "cox") ceph_cox_res ceph_f_cox <- coxph(Surv(censortime.cancerdiag, censor.cancerdiag == 1) ~ resid + byr + ParentalAge, data = ceph[ceph$male == 0, ]) ceph_f_cox_res <- get_estimate(ceph_f_cox, name = "all", model = "cox") ceph_f_cox_res ceph_m_cox <- coxph(Surv(censortime.cancerdiag, censor.cancerdiag == 1) ~ resid + byr + ParentalAge, data = ceph[ceph$male == 1, ]) ceph_m_cox_res <- get_estimate(ceph_m_cox, name = "all", model = "cox") ceph_m_cox_res # residual continuous per standard deviation # For both gender combined sd(ceph$resid) #1.350442 # HR/SD = exp(SD*log(HR)) = exp(1.350442*0.127983) = 1.188668 # 95% CI: # LL HR/SD = exp(SD*log(HR)) = exp(1.350442*(0.127983-1.96*0.134061)) = 0.8335947 # UL HR/SD = exp(SD*log(HR)) = exp(1.350442*(0.127983+1.96*0.134061)) = 1.694987 # For females: sd(ceph[ceph$male == 0, ]$resid) #0.724996 # HR/SD = exp(SD*log(HR)) = exp(0.724996*-0.40901) = 0.7433929 # 95% CI: # LL HR/SD = exp(SD*log(HR)) = exp(0.724996*(-0.40901-1.96*0.36216)) = 0.4443436 # UL HR/SD = exp(SD*log(HR)) = exp(0.724996*(-0.40901+1.96*0.36216)) = 1.243706 # For males: sd(ceph[ceph$male == 1, ]$resid) #1.775433 # HR/SD = exp(SD*log(HR)) = exp(1.775433*0.216624) = 1.469029 # 95% CI: # LL HR/SD = exp(SD*log(HR)) = exp(1.775433*(0.216624-1.96*0.150118)) = 0.8712847 # UL HR/SD = exp(SD*log(HR)) = exp(1.775433*(0.216624+1.96*0.150118)) = 2.476854 ### Residuals Tertiles ========== ceph_cox <- coxph(Surv(censortime.cancerdiag, censor.cancerdiag == 1) ~ resid.3lev.f + byr + male + ParentalAge, data = ceph) ceph_cox_res <- get_estimate(ceph_cox, name = "all", model = "cox") ceph_cox_res ceph_f_cox <- coxph(Surv(censortime.cancerdiag, censor.cancerdiag == 1) ~ resid.3lev.f + byr + ParentalAge, data = ceph[ceph$male == 0, ]) ceph_f_cox_res <- get_estimate(ceph_f_cox, name = "all", model = "cox") ceph_f_cox_res ceph_m_cox <- coxph(Surv(censortime.cancerdiag, censor.cancerdiag == 1) ~ resid.3lev.f + byr + ParentalAge, data = ceph[ceph$male == 1, ]) ceph_m_cox_res <- get_estimate(ceph_m_cox, name = "all", model = "cox") ceph_m_cox_res # Residuals trend test ceph_cox <- coxph(Surv(censortime.cancerdiag, censor.cancerdiag == 1) ~ resid.3lev + byr + male + ParentalAge, data = ceph) ceph_cox_res <- get_estimate(ceph_cox, name = "all", model = "cox") ceph_cox_res ceph_f_cox <- coxph(Surv(censortime.cancerdiag, censor.cancerdiag == 1) ~ resid.3lev + byr + ParentalAge, data = ceph[ceph$male == 0, ]) ceph_f_cox_res <- get_estimate(ceph_f_cox, name = "all", model = "cox") ceph_f_cox_res ceph_m_cox <- coxph(Surv(censortime.cancerdiag, censor.cancerdiag == 1) ~ resid.3lev + byr + ParentalAge, data = ceph[ceph$male == 1, ]) ceph_m_cox_res <- get_estimate(ceph_m_cox, name = "all", model = "cox") ceph_m_cox_res ```