###### Supplementary Code ###### 

## Manuscript Information:
## Heterogenous Humoral and Cellular Immune Responses with Distinct Trajectories Post-SARS-CoV-2 Infection in a Population-Based Cohort
## Menges, Zens, Ballouz et al.
## Nature Communications, 2022

### Packages & Presets ----------------------------------------------------------------------

# Packages
library(openxlsx)
library(tidyverse)
library(table1)
library(ggpubr)
library(pheatmap)
library(RColorBrewer)
library(lme4)
library(lmerTest)

# Table specifications
render.cont <- function(x) { with(stats.default(x), c("", "Mean (SD)" = sprintf("%0.1f (%0.1f)", MEAN, SD), "Median (IQR)" = sprintf("%0.1f (%0.1f to %0.1f)", MEDIAN, Q1, Q3), "Range" = sprintf("%0.0f to %0.0f", MIN, MAX))) }
render.cat <- function(x) { c("", sapply(stats.default(x), function(y) with(y, sprintf("%d (%0.1f%%)", FREQ, PCTnoNA)))) }

# Colors
col <- c(brewer.pal(n=11, name="RdBu"), brewer.pal(n=11, name="PRGn"))


### Load and Process Dataset ---------------------------------------------------------------------- 

## Import analysis dataset
source_data <- read.xlsx("./Source Data.xlsx", sheet = "data")

## Define factors for relevant variables
source_data <- source_data %>% 
  mutate(timepoint = factor(timepoint, levels = c("W2", "M1", "M3", "M6")),
         age_group = factor(age_group), 
         age_group_2l = factor(age_group_2l), 
         sex = factor(sex), 
         symp_count_init_3l = factor(symp_count_init_3l, levels = c("Asymptomatic", "1-5 symptoms", "≥6 symptoms")), 
         smoking = factor(smoking, levels = c("Non-smoker", "Ex-smoker", "Smoker")), 
         hosp_2wks = factor(hosp_2wks), 
         across(c(result_S_IgA, result_S_IgG, result_N_IgG, result_S_IgA_IgG, result_S_N_IgG, result_S_N_IgA_IgG, result_Roche_S_Ig, result_Roche_S_Ig, result_Roche_S_N_Ig, result_neutr_wt, result_neutr_delta, result_neutr_omicron, M_Pos, N_Pos, S1_Pos, S2_Pos, Any_Pos), ~ factor(.x)))

## Create wide dataset
source_data_w <- source_data %>% 
  pivot_wider(names_from = c("timepoint"), 
              values_from = c("time_since_diagnosis", 
                              "result_S_IgA", "ratio_S_IgA", "logratio_S_IgA", 
                              "result_S_IgG", "ratio_S_IgG", "logratio_S_IgG", 
                              "result_N_IgG", "ratio_N_IgG", "logratio_N_IgG", 
                              "result_S_IgA_IgG", "result_S_N_IgG", "result_S_N_IgA_IgG", 
                              "ratio_S_IgG_conv", 
                              "result_Roche_S_Ig", "Roche_S_Ig", "result_Roche_N_Ig", "Roche_N_Ig", "result_Roche_S_N_Ig", 
                              "result_neutr_wt", "neutr_wt", "neutr_wt_cc", "log_neutr_wt", 
                              "result_neutr_delta", "neutr_delta", "neutr_delta_cc", "log_neutr_delta", 
                              "result_neutr_omicron", "neutr_omicron", "neutr_omicron_cc", "log_neutr_omicron", 
                              "Any_Pos", "TC.corr", "TC.corr_cc", "log_TC.corr", 
                              "M_Pos", "M.corr", "M.corr_cc", "log_M.corr", 
                              "N_Pos", "N.corr", "N.corr_cc", "log_N.corr", 
                              "S1_Pos", "S1.corr", "S1.corr_cc", "log_S1.corr", 
                              "S2_Pos", "S2.corr", "S2.corr_cc", "log_S2.corr",
                              "PBMCs/mLBlood", 
                              "CD4AIM.delta", "CD4AIM.delta_cc", "CD8AIM.delta", "CD8AIM.delta_cc", 
                              "CD4+.T.cell.count", "CD8+.T.cell.count", "B.cell.count", "NK.cell.count", "CD56+CD16+.NK", "total.memory.B.cell", 
                              "CD4AIM.sumcount", "CD4AIM.calc.naive", "CD4AIM.calc.EM", "CD4AIM.calc.CM", "CD4AIM.calc.TEMRA",
                              "CD8AIM.sumcount", "CD8AIM.calc.naive", "CD8AIM.calc.EM", "CD8AIM.calc.CM", "CD8AIM.calc.TEMRA"
              ))

## Create graphics dataset
source_data_g <- source_data %>% 
  mutate(across(c("result_S_IgA", "result_S_IgG", "result_N_IgG", "result_S_IgA_IgG", "result_S_N_IgG", "result_S_N_IgA_IgG", "result_Roche_S_Ig", "result_Roche_N_Ig", "result_Roche_S_N_Ig", "result_neutr_wt", "result_neutr_delta", "result_neutr_omicron", "M_Pos", "N_Pos", "S1_Pos", "S2_Pos", "Any_Pos"), ~ fct_relevel(., "positive", "negative")))


### Define Functions ---------------------------------------------------------------------- 

## Figure settings
# Define point and line sizes
point_sz <- 0.25
line_sz <- 0.25

# Define style for figures
graph_style <- theme_bw() + theme(plot.title = element_text(size = 7, face = "bold"),
                                  strip.background = element_blank(),
                                  strip.text = element_text(size = 7, face = "bold"),
                                  legend.title = element_text(size = 6),
                                  legend.text = element_text(size = 6),
                                  legend.position = "right",
                                  legend.margin = margin(0, 0, 0, 0),
                                  legend.box.margin = margin(0, 0, 0, -5),
                                  panel.grid.major.y = element_line(color = "#EDEFF1"),
                                  panel.grid.minor.y = element_blank(),
                                  panel.grid.major.x = element_blank(),
                                  panel.grid.minor.x = element_blank(),
                                  axis.title.y = element_text(size = 6, color = "#000000", margin = margin(r = 3)),
                                  axis.title.x = element_text(size = 6, color = "#000000", margin = margin(t = 3)),
                                  axis.text = element_text(size = 5, color = "#000000"),
                                  plot.margin = grid::unit(c(0.5, 0.5, 0.5, 0.5), "mm"))

# Define split violin plot functions
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin, 
                           draw_group = function(self, data, ..., draw_quantiles = NULL) {
                             data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
                             grp <- data[1, "group"]
                             newdata <- plyr::arrange(transform(data, x = if (grp %% 2 == 1) xminv else xmaxv), if (grp %% 2 == 1) y else -y)
                             newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
                             newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])
                             
                             if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
                               stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
                                                                         1))
                               quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
                               aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
                               aesthetics$alpha <- rep(1, nrow(quantiles))
                               both <- cbind(quantiles, aesthetics)
                               quantile_grob <- GeomPath$draw_panel(both, ...)
                               ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
                             }
                             else {
                               ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
                             }
                           })

geom_split_violin <- function(mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., 
                              draw_quantiles = NULL, trim = FALSE, scale = "area", na.rm = FALSE, 
                              show.legend = NA, inherit.aes = TRUE) {
  layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, 
        position = position, show.legend = show.legend, inherit.aes = inherit.aes, 
        params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}

## Analysis functions
# Generate estimation funciton for antibody decay analyses
half_life_ab <- function(x) {
  res <- broom.mixed::tidy(x, conf.int = TRUE) %>% 
    filter(term == "time_max_test") %>% 
    select(est = estimate, ci.lb = conf.low, ci.ub = conf.high) %>% 
    unlist()
  thalf <- log(.5) / res
  thalf
}

# Generate estimation funciton for T cell decay analyses
half_life_tc <- function(x) {
  res <- broom.mixed::tidy(x, conf.int = TRUE) %>% 
    filter(term == "time_since_diagnosis") %>% 
    select(est = estimate, ci.lb = conf.low, ci.ub = conf.high) %>% 
    unlist()
  thalf <- log(.5) / res
  thalf
}

# Generate output function for univariable mixed linear regression models
gen_output_lmer_uv <- function(outc, vars.adj, vars.cov, data) {
  output <- list()
  for (i in c(vars.adj, vars.cov)) {
    frm <- as.formula(paste(outc, " ~ ", i, "+ (1 | id)", sep = ""))
    m <- lmer(frm, data = data)
    m_out <- broom.mixed::tidy(m, conf.int = TRUE, conf.level = 0.95, conf.method = "Wald", exponentiate = FALSE, p.value = TRUE) %>% 
      filter(effect == "fixed" & term != "(Intercept)") %>% 
      rename(outcome = term)
    output[[i]] <- m_out
  }
  output <- bind_rows(output) %>% 
    mutate(coef = estimate, CI.lb = conf.low, CI.ub = conf.high, 
           coef_CI = paste(formatC(coef, format = "f", digits = 2), " (", formatC(CI.lb, format = "f", digits = 2), " to ", formatC(CI.ub, format = "f", digits = 2), ")", sep = ""), 
           pval = ifelse(p.value < 0.001, "<0.001", formatC(p.value, format = "f", digits = 4))) %>% 
    select(outcome, coef_CI, pval, everything())
  output
}

# Generate output function for multivariable mixed linear regression models
gen_output_lmer_mv <- function(outc, vars.adj, vars.cov, data) {
  output <- list()
  frm <- as.formula(paste(outc, " ~ ", paste(vars.adj, collapse = " + "), "+ (1 | id)", sep = ""))
  m <- lmer(frm, data = data)
  m_out <- broom.mixed::tidy(m, conf.int = TRUE, conf.level = 0.95, conf.method = "Wald", exponentiate = FALSE) %>% 
    filter(effect == "fixed" & term != "(Intercept)") %>% 
    rename(outcome = term)
  n <- nrow(m_out)
  output[["base"]] <- m_out
  for (i in vars.cov) {
    frm <- as.formula(paste(outc, " ~ ", paste(vars.adj, collapse = " + "), " + ", i, "+ (1 | id)", sep = ""))
    m <- lmer(frm, data = data)
    m_out <- broom.mixed::tidy(m, conf.int = TRUE, conf.level = 0.95, conf.method = "Wald", exponentiate = FALSE) %>% 
      filter(effect == "fixed" & term != "(Intercept)") %>% 
      rename(outcome = term)
    output[[i]] <- m_out[-seq(1, n), ]
  }
  output <- bind_rows(output) %>% 
    mutate(coef = estimate, CI.lb = conf.low, CI.ub = conf.high, 
           coef_CI = paste(formatC(coef, format = "f", digits = 2), " (", formatC(CI.lb, format = "f", digits = 2), " to ", formatC(CI.ub, format = "f", digits = 2), ")", sep = ""), 
           pval = ifelse(p.value < 0.001, "<0.001", formatC(p.value, format = "f", digits = 4))) %>% 
    select(outcome, coef_CI, pval, everything())
  output
}

# Generate output function for univariable logistic regression models
gen_output_glm_uv <- function(outc, vars.adj, vars.cov, data) {
  output <- list()
  for (i in c(vars.adj, vars.cov)) {
    frm <- as.formula(paste(outc, " ~ ", i, sep = ""))
    m <- glm(frm, data = data, family = "binomial")
    m_out <- broom::tidy(m, conf.int = TRUE, conf.level = 0.95, conf.method = "Wald", exponentiate = TRUE) %>% 
      filter(term != "(Intercept)") %>% 
      rename(outcome = term)
    output[[i]] <- m_out
  }
  output <- bind_rows(output) %>% 
    mutate(coef = estimate, CI.lb = conf.low, CI.ub = conf.high, 
           coef_CI = paste(formatC(coef, format = "f", digits = 2), " (", formatC(CI.lb, format = "f", digits = 2), " to ", formatC(CI.ub, format = "f", digits = 2), ")", sep = ""), 
           pval = ifelse(p.value < 0.001, "<0.001", formatC(p.value, format = "f", digits = 4))) %>% 
    select(outcome, coef_CI, pval, everything())
  output
}

# Generate output function for multivariable logistic regression models
gen_output_glm_mv <- function(outc, vars.adj, vars.cov, data) {
  output <- list()
  frm <- as.formula(paste(outc, " ~ ", paste(vars.adj, collapse = " + "), sep = ""))
  m <- glm(frm, data = data, family = "binomial")
  m_out <- broom::tidy(m, conf.int = TRUE, conf.level = 0.95, conf.method = "Wald", exponentiate = TRUE) %>% 
    filter(term != "(Intercept)") %>% 
    rename(outcome = term)
  n <- nrow(m_out)
  output[["base"]] <- m_out
  for (i in vars.cov) {
    frm <- as.formula(paste(outc, " ~ ", paste(vars.adj, collapse = " + "), " + ", i, sep = ""))
    m <- glm(frm, data = data, family = "binomial")
    m_out <- broom::tidy(m, conf.int = TRUE, conf.level = 0.95, conf.method = "Wald", exponentiate = TRUE) %>% 
      filter(term != "(Intercept)") %>% 
      rename(outcome = term)
    output[[i]] <- m_out[-seq(1, n), ]
  }
  output <- bind_rows(output) %>% 
    mutate(coef = estimate, CI.lb = conf.low, CI.ub = conf.high, 
           coef_CI = paste(formatC(coef, format = "f", digits = 2), " (", formatC(CI.lb, format = "f", digits = 2), " to ", formatC(CI.ub, format = "f", digits = 2), ")", sep = ""), 
           pval = ifelse(p.value < 0.001, "<0.001", formatC(p.value, format = "f", digits = 4))) %>% 
    select(outcome, coef_CI, pval, everything())
  output
}


### Study participant characteristics ---------------------------------------------------------------------- 

## Figure 1a: Study participant characteristics within overall study population
# Age
fig_age_overall <- source_data_w %>% 
  ggplot(aes(x = age )) + 
  geom_histogram(bins = 30, color = "gray", fill = "#BFBFBF") + 
  scale_x_continuous(breaks = c(20, 40, 60, 80)) + 
  graph_style + theme(panel.grid.major.y = element_blank(), 
                      panel.border = element_blank(), 
                      axis.title.x = element_text(face = "bold", margin = margin(t = 8)),
                      plot.margin=grid::unit(c(0,0,0,0), "mm")) + 
  labs(title = NULL, y = NULL, x = "Age")
fig_age_overall

ggsave("./Fig1A1.pdf", 
       plot = fig_age_overall, 
       width = 3, height = 2.5, device = "pdf", units = "cm")

# Sex
fig_sex_overall <- source_data_w %>% 
  group_by(sex) %>% 
  summarise(n = n()) %>% 
  mutate(freq = round(n / sum(n) * 100, 1) ,
         lab.pos = round(cumsum(freq) - .5 * freq, 2)) %>% 
  ggplot(aes(x = 2, y = freq, fill = sex)) +
  geom_bar(stat = "identity") +
  coord_polar("y", start = 200) +
  geom_text(aes(y = 0, x = 0.3, label = paste("431")), col = "black", fontface = "bold", size = 2.5) +
  scale_fill_manual(values = c("#7F7F7F", "#8DA0CB")) +
  theme_void() + theme(legend.position = "bottom", 
                       legend.direction = "vertical", 
                       legend.title = element_text(face = "bold", size = 6), 
                       legend.text = element_text(size = 6),
                       plot.margin=grid::unit(c(0,0,0,0), "mm")) +
  guides(fill = guide_legend(keywidth = .25, keyheight = .25)) + 
  labs(title = NULL, fill = "Sex", y = NULL, x = NULL) + xlim(0.2, 2.5)
fig_sex_overall

ggsave("./Fig1A2.pdf", 
       plot = fig_sex_overall, 
       width = 2.5, height = 3.2, device = "pdf", units = "cm")

# Severity of COVID-19
fig_sev_overall <- source_data_w %>% 
  mutate(symp_count_init_3l = fct_recode(symp_count_init_3l, "6+ symptoms" = "≥6 symptoms")) %>% 
  group_by(symp_count_init_3l) %>% 
  summarise(n = n()) %>% 
  mutate(freq = round(n / sum(n) * 100, 1) ,
         lab.pos = round(cumsum(freq) - .5 * freq, 2)) %>% 
  ggplot(aes(x = 2, y = freq, fill = symp_count_init_3l)) +
  geom_bar(stat = "identity")+
  coord_polar("y", start = 200) +
  geom_text(aes(y = 0, x = 0.3, label = paste("431")), col = "black", fontface = "bold", size = 2.5) +
  scale_fill_manual(values = RColorBrewer::brewer.pal(8, "GnBu")[c(2,4,7)]) + 
  theme_void() + theme(legend.position = "bottom", 
                       legend.direction = "vertical", 
                       legend.title = element_text(face = "bold", size = 6), 
                       legend.text = element_text(size = 6),
                       plot.margin=grid::unit(c(0,0,0,0), "mm")) +
  guides(fill = guide_legend(keywidth = .25, keyheight = .25)) + 
  labs(title = NULL, fill = "COVID-19 symptoms", y = NULL, x = NULL) + xlim(0.2, 2.5)
fig_sev_overall

ggsave("./Fig1A3.pdf", 
       plot = fig_sev_overall, 
       width = 2.8, height = 3.5, device = "pdf", units = "cm")

# Hospitalization
fig_hosp_overall <- source_data_w %>% 
  group_by(hosp_2wks) %>% 
  summarise(n = n()) %>% 
  mutate(freq = round(n / sum(n) * 100, 1) ,
         lab.pos = round(cumsum(freq) - .5 * freq, 2)) %>% 
  ggplot(aes(x = 2, y = freq, fill = hosp_2wks))+
  geom_bar(stat = "identity")+
  coord_polar("y", start = 200) +
  geom_text(aes(y = 0, x = 0.3, label = paste("431")), col = "black", fontface = "bold", size = 2.5) +
  scale_fill_manual(values = RColorBrewer::brewer.pal(8, "Reds")[c(2,4)]) + 
  theme_void() + theme(legend.position = "bottom", 
                       legend.direction = "vertical", 
                       legend.title = element_text(face = "bold", size = 6), 
                       legend.text = element_text(size = 6),
                       plot.margin=grid::unit(c(0,0,0,0), "mm")) +
  guides(fill = guide_legend(keywidth = .25, keyheight = .25)) + 
  labs(title = NULL, fill = "Hospitalization \ndue to COVID-19", y = NULL, x = NULL) + xlim(0.2, 2.5)
fig_hosp_overall

ggsave("./Fig1A4.pdf", 
       plot = fig_hosp_overall, 
       width = 2.5, height = 3.5, device = "pdf", units = "cm")


## Figure 1b: Study participant characteristics within subsample
# Age
fig_age_subsample <- source_data_w %>% 
  filter(subsample == "Yes") %>% 
  ggplot(aes(x = age )) + 
  geom_histogram(bins = 20, color = "gray", fill = "#BFBFBF") + 
  scale_x_continuous(breaks = c(20, 40, 60, 80)) + 
  graph_style + theme(panel.grid.major.y = element_blank(), 
                      panel.border = element_blank(), 
                      axis.title.x = element_text(face = "bold", margin = margin(t = 8)),
                      plot.margin=grid::unit(c(0,0,0,0), "mm")) + 
  labs(title = NULL, y = NULL, x = "Age")
fig_age_subsample

ggsave("./Fig1B1.pdf", 
       plot = fig_age_subsample, 
       width = 3, height = 2.5, device = "pdf", units = "cm")

# Sex
fig_sex_subsample <- source_data_w %>% 
  filter(subsample == "Yes") %>% 
  group_by(sex) %>% 
  summarise(n = n()) %>% 
  mutate(freq = round(n / sum(n) * 100, 1) ,
         lab.pos = round(cumsum(freq) - .5 * freq, 2)) %>% 
  ggplot(aes(x = 2, y = freq, fill = sex)) +
  geom_bar(stat = "identity")+
  coord_polar("y", start = 200) +
  geom_text(aes(y = 0, x = 0.3, label = paste("64")), col = "black", fontface = "bold", size = 2.5) +
  scale_fill_manual(values = c("#7F7F7F", "#8DA0CB")) +
  theme_void() + theme(legend.position = "bottom", 
                       legend.direction = "vertical", 
                       legend.title = element_text(face = "bold", size = 6), 
                       legend.text = element_text(size = 6),
                       plot.margin=grid::unit(c(0,0,0,0), "mm")) +
  guides(fill = guide_legend(keywidth = .25, keyheight = .25)) + 
  labs(title = NULL, fill = "Sex", y = NULL, x = NULL) + xlim(0.2, 2.5)
fig_sex_subsample

ggsave("./Fig1B2.pdf", 
       plot = fig_sex_subsample, 
       width = 2.5, height = 3.2, device = "pdf", units = "cm")

# Severity of COVID-19
fig_sev_subsample <- source_data_w %>% 
  filter(subsample == "Yes") %>% 
  mutate(symp_count_init_3l = fct_recode(symp_count_init_3l, "6+ symptoms" = "≥6 symptoms")) %>% 
  group_by(symp_count_init_3l) %>% 
  summarise(n = n()) %>% 
  mutate(freq = round(n / sum(n) * 100, 1) ,
         lab.pos = round(cumsum(freq) - .5 * freq, 2)) %>% 
  ggplot(aes(x = 2, y = freq, fill = symp_count_init_3l))+
  geom_bar(stat = "identity")+
  coord_polar("y", start = 200) +
  geom_text(aes(y = 0, x = 0.3, label = paste("64")), col = "black", fontface = "bold", size = 2.5) +
  scale_fill_manual(values = RColorBrewer::brewer.pal(8, "GnBu")[c(2,4,7)]) + 
  theme_void() + theme(legend.position = "bottom", 
                       legend.direction = "vertical", 
                       legend.title = element_text(face = "bold", size = 6), 
                       legend.text = element_text(size = 6),
                       plot.margin=grid::unit(c(0,0,0,0), "mm")) +
  guides(fill = guide_legend(keywidth = .25, keyheight = .25)) + 
  labs(title = NULL, fill = "COVID-19 symptoms", y = NULL, x = NULL) + xlim(0.2, 2.5)
fig_sev_subsample

ggsave("./Fig1B3.pdf", 
       plot = fig_sev_subsample, 
       width = 2.8, height = 3.5, device = "pdf", units = "cm")

# Hospitalization
fig_hosp_subsample <- source_data_w %>% 
  filter(subsample == "Yes") %>% 
  group_by(hosp_2wks) %>% 
  summarise(n = n()) %>% 
  mutate(freq = round(n / sum(n) * 100, 1) ,
         lab.pos = round(cumsum(freq) - .5 * freq, 2)) %>% 
  ggplot(aes(x = 2, y = freq, fill = hosp_2wks))+
  geom_bar(stat = "identity")+
  coord_polar("y", start = 200) +
  geom_text(aes(y = 0, x = 0.3, label = paste("64")), col = "black", fontface = "bold", size = 2.5) +
  scale_fill_manual(values = RColorBrewer::brewer.pal(8, "Reds")[c(2,4)]) + 
  theme_void() + theme(legend.position = "bottom", 
                       legend.direction = "vertical", 
                       legend.title = element_text(face = "bold", size = 6), 
                       legend.text = element_text(size = 6),
                       plot.margin=grid::unit(c(0,0,0,0), "mm")) +
  guides(fill = guide_legend(keywidth = .25, keyheight = .25)) + 
  labs(title = NULL, fill = "Hospitalization \ndue to COVID-19", y = NULL, x = NULL) + xlim(0.2, 2.5)
fig_hosp_subsample

ggsave("./Fig1B4.pdf", 
       plot = fig_hosp_subsample, 
       width = 2.5, height = 3.5, device = "pdf", units = "cm")


## Supplementary Table 1: Study participant characteristics, overall and within subsample
table1(~ age + age_group + sex + 
         symp_count_init_3l + symp_count_init + symp_sev_init_3l + hosp_2wks + icu_2wks + 
         vaccinated_at_6mth + reinfected_at_6mth + 
         smoking + bmi + comorbidity + immune_supp + 
         employment_4l + education_4l + nationality 
       | subsample, 
       render.continuous = render.cont, render.categorical = render.cat, 
       data = source_data_w)


### Antibody Trajectories ---------------------------------------------------------------------- 

## Figure 2a: Anti-S IgA Antibody Levels (MFI ratios; overall study population)
# Full data scatterplot
trend_s_iga <- source_data_g %>% 
  mutate(group = "Overall") %>% 
  ggplot(aes(y = ratio_S_IgA, x = time_since_diagnosis)) + 
  geom_point(size = point_sz) + 
  geom_hline(yintercept = 6.5, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ group) +
  scale_y_continuous(trans = "log10", limits = c(1, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  scale_x_continuous(limits = c(0, 220)) + 
  graph_style + 
  labs(title = "Anti-S IgA Antibody Levels", y = "S IgA MFI ratio", x = "Days since diagnosis")
trend_s_iga

# Violin plots stratified by age group and sex
trend_violin_s_iga <- source_data_g %>% 
  ggplot(aes(y = ratio_S_IgA, x = timepoint, fill = sex)) + 
  geom_split_violin(lwd = line_sz) + geom_boxplot(width = 0.3, outlier.shape = NA, outlier.alpha = 0.2, fill = "#FFFFFF", lwd = line_sz) + 
  geom_hline(yintercept = 6.5, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ age_group_2l) +
  scale_fill_manual(values = col[c(8,10)]) + 
  scale_y_continuous(trans = "log10", limits = c(1, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  guides(fill = guide_legend(keywidth = .25, keyheight = .25)) + 
  labs(title = "Anti-S IgA Antibody Levels by Age and Sex", y = "S IgA MFI ratio", x = "Timepoint", fill = "")
trend_violin_s_iga

# Save figures
ggsave("./Fig2A1.pdf", 
       plot = trend_s_iga + labs(title = NULL), 
       width = 3.2, height = 4, device = "pdf", units = "cm")

ggsave("./Fig2A2.pdf", 
       plot = trend_violin_s_iga + labs(title = NULL, y = NULL), 
       width = 5.4, height = 4, device = "pdf", units = "cm")


## Figure 2b: Anti-S IgG Antibody Levels (MFI ratios; overall study population)
# Full data scatterplot
trend_s_igg <- source_data_g %>% 
  mutate(group = "Overall") %>% 
  ggplot(aes(y = ratio_S_IgG, x = time_since_diagnosis)) + 
  geom_point(size = point_sz) + 
  geom_hline(yintercept = 6, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ group) +
  scale_y_continuous(trans = "log10", limits = c(1, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  scale_x_continuous(limits = c(0, 220)) + 
  graph_style + 
  labs(title = "Anti-S IgG Antibody Levels", y = "S IgG MFI ratio", x = "Days since diagnosis")
trend_s_igg

# Violin plots stratified by age group and sex
trend_violin_s_igg <- source_data_g %>% 
  ggplot(aes(y = ratio_S_IgG, x = timepoint, fill = sex)) + 
  geom_split_violin(lwd = line_sz) + geom_boxplot(width = 0.3, outlier.shape = NA, outlier.alpha = 0.2, fill = "#FFFFFF", lwd = line_sz) + 
  geom_hline(yintercept = 6, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ age_group_2l) +
  scale_fill_manual(values = col[c(8,10)]) + 
  scale_y_continuous(trans = "log10", limits = c(1, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  guides(fill = guide_legend(keywidth = .25, keyheight = .25)) + 
  labs(title = "Anti-S IgG Antibody Levels by Age and Sex", y = "S IgG MFI ratio", x = "Timepoint", fill = "")
trend_violin_s_igg

# Save figures
ggsave("./Fig2B1.pdf", 
       plot = trend_s_igg + labs(title = NULL), 
       width = 3.2, height = 4, device = "pdf", units = "cm")

ggsave("./Fig2B2.pdf", 
       plot = trend_violin_s_igg + labs(title = NULL, y = NULL), 
       width = 5.4, height = 4, device = "pdf", units = "cm")


## Figure 2c: Anti-N IgG Antibody Levels (MFI ratios; subsample)
# Full data scatterplot
trend_n_igg <- source_data_g %>% 
  filter(subsample == "Yes") %>%
  mutate(group = "Overall") %>% 
  ggplot(aes(y = ratio_N_IgG, x = time_since_diagnosis)) + 
  geom_point(size = point_sz) + 
  geom_hline(yintercept = 6, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ group) +
  scale_y_continuous(trans = "log10", limits = c(1, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  scale_x_continuous(limits = c(0, 220)) + 
  graph_style + 
  labs(title = "Anti-N IgG Antibody Levels (subsample)", y = "N IgG MFI ratio", x = "Days since diagnosis")
trend_n_igg

# Violin plots stratified by age group and sex
trend_violin_n_igg <- source_data_g %>% 
  filter(subsample == "Yes") %>%
  ggplot(aes(y = ratio_N_IgG, x = timepoint, fill = sex)) + 
  geom_split_violin(lwd = line_sz) + geom_boxplot(width = 0.3, outlier.shape = NA, outlier.alpha = 0.2, fill = "#FFFFFF", lwd = line_sz) + 
  geom_hline(yintercept = 6, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ age_group_2l) +
  scale_fill_manual(values = col[c(8,10)]) + 
  scale_y_continuous(trans = "log10", limits = c(1, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  guides(fill = guide_legend(keywidth = .25, keyheight = .25)) + 
  labs(title = "Anti-N IgG Antibody Levels by Age and Sex (subsample)", y = "N IgG MFI ratio", x = "Timepoint", fill = "")
trend_violin_n_igg

# Save figures
ggsave("./Fig2C1.pdf", 
       plot = trend_n_igg + labs(title = NULL), 
       width = 3.2, height = 4, device = "pdf", units = "cm")

ggsave("./Fig2C2.pdf", 
       plot = trend_violin_n_igg + labs(title = NULL, y = NULL), 
       width = 5.4, height = 4, device = "pdf", units = "cm")


## Supplementary Table 2: Detailed antibody test results for overall study population
table1(~ result_S_IgA + ratio_S_IgA +  
         result_S_IgG + ratio_S_IgG + ratio_S_IgG_conv + 
         result_S_IgA_IgG 
       | timepoint, 
       render.continuous = render.cont, render.categorical = render.cat, overall = FALSE, 
       data = drop_na(source_data, result_S_IgA_IgG))


## Supplementary Table 3: Detailed antibody test results for subsample
table1(~ result_S_IgA + ratio_S_IgA +  
         result_S_IgG + ratio_S_IgG + ratio_S_IgG_conv + 
         result_N_IgG + ratio_N_IgG + 
         result_S_IgA_IgG + result_S_N_IgG + result_S_N_IgA_IgG + 
         result_Roche_S_Ig + Roche_S_Ig + 
         result_Roche_N_Ig + Roche_N_Ig + 
         result_neutr_wt + neutr_wt + result_neutr_delta + neutr_delta + result_neutr_omicron + neutr_omicron +
         M_Pos + N_Pos + S1_Pos + S2_Pos + Any_Pos
       | timepoint, 
       render.continuous = render.cont, render.categorical = render.cat, overall = FALSE, 
       data = drop_na(filter(source_data, subsample == "Yes"), result_S_IgA_IgG))


## Supplementary Table 4: Sensitivity analysis for anti-S IgA and anti-S IgG seropositivity using reweighting

# Note: Identified case numbers in the Canton of Zurich from whom participants were invited
# 18-39 years: 28,970 cases identified, 135 in overall study population 
# 40-64 years: 26,695 cases identified, 144 in overall study population 
# 65+ years: 10,988 cases identified, 152 in overall study population 

# Define sampling weights
source_data_w_wtd <- source_data_w %>% 
  mutate(sampling_weight = case_when(age_group == "18-39 years" ~ 135/28970, 
                                     age_group == "40-64 years" ~ 144/26695, 
                                     age_group == "65+ years" ~ 152/10988))

# Initiate dataframes for analysis
variables <- c("result_S_IgA_W2", "result_S_IgG_W2", "result_S_IgA_IgG_W2", 
               "result_S_IgA_M1", "result_S_IgG_M1", "result_S_IgA_IgG_M1", 
               "result_S_IgA_M3", "result_S_IgG_M3", "result_S_IgA_IgG_M3", 
               "result_S_IgA_M6", "result_S_IgG_M6", "result_S_IgA_IgG_M6")

res_ab_uwtd <- res_ab_wtd <- data.frame(matrix(NA, ncol = length(variables), nrow = 1))
names(res_ab_uwtd) <- names(res_ab_wtd) <- variables

# Prepare weighted analysis
source_data_design <- survey::svydesign(id = ~ id, 
                                             strata = ~ age_group, 
                                             probs = ~ sampling_weight, 
                                             fpc = NULL, 
                                             data = source_data_w_wtd)

# Calculate unweighted proportions of seropositive participants in the study
res_ab_uwtd$result_S_IgA_W2 <- paste(round(as.numeric(prop.table(table(source_data_w$result_S_IgA_W2)))[2] * 100, 1), "%", sep = "")
res_ab_uwtd$result_S_IgG_W2 <- paste(round(as.numeric(prop.table(table(source_data_w$result_S_IgG_W2)))[2] * 100, 1), "%", sep = "")
res_ab_uwtd$result_S_IgA_IgG_W2 <- paste(round(as.numeric(prop.table(table(source_data_w$result_S_IgA_IgG_W2)))[2] * 100, 1), "%", sep = "")
res_ab_uwtd$result_S_IgA_M1 <- paste(round(as.numeric(prop.table(table(source_data_w$result_S_IgA_M1)))[2] * 100, 1), "%", sep = "")
res_ab_uwtd$result_S_IgG_M1 <- paste(round(as.numeric(prop.table(table(source_data_w$result_S_IgG_M1)))[2] * 100, 1), "%", sep = "")
res_ab_uwtd$result_S_IgA_IgG_M1 <- paste(round(as.numeric(prop.table(table(source_data_w$result_S_IgA_IgG_M1)))[2] * 100, 1), "%", sep = "")
res_ab_uwtd$result_S_IgA_M3 <- paste(round(as.numeric(prop.table(table(source_data_w$result_S_IgA_M3)))[2] * 100, 1), "%", sep = "")
res_ab_uwtd$result_S_IgG_M3 <- paste(round(as.numeric(prop.table(table(source_data_w$result_S_IgG_M3)))[2] * 100, 1), "%", sep = "")
res_ab_uwtd$result_S_IgA_IgG_M3 <- paste(round(as.numeric(prop.table(table(source_data_w$result_S_IgA_IgG_M3)))[2] * 100, 1), "%", sep = "")
res_ab_uwtd$result_S_IgA_M6 <- paste(round(as.numeric(prop.table(table(source_data_w$result_S_IgA_M6)))[2] * 100, 1), "%", sep = "")
res_ab_uwtd$result_S_IgG_M6 <- paste(round(as.numeric(prop.table(table(source_data_w$result_S_IgG_M6)))[2] * 100, 1), "%", sep = "")
res_ab_uwtd$result_S_IgA_IgG_M6 <- paste(round(as.numeric(prop.table(table(source_data_w$result_S_IgA_IgG_M6)))[2] * 100, 1), "%", sep = "")

# Calculate reweighted proportions of seropositive participants (reweighting based on overall number of identified cases to adjust for potential effects of age stratified sampling)
res_ab_wtd$result_S_IgA_W2 <- paste(round(as.numeric(survey::svymean(~ result_S_IgA_W2, source_data_design, method = "mean", na.rm = T))[2] * 100, 1), "%", sep = "")
res_ab_wtd$result_S_IgG_W2 <- paste(round(as.numeric(survey::svymean(~ result_S_IgG_W2, source_data_design, method = "mean", na.rm = T))[2] * 100, 1), "%", sep = "")
res_ab_wtd$result_S_IgA_IgG_W2 <- paste(round(as.numeric(survey::svymean(~ result_S_IgA_IgG_W2, source_data_design, method = "mean", na.rm = T))[2] * 100, 1), "%", sep = "")
res_ab_wtd$result_S_IgA_M1 <- paste(round(as.numeric(survey::svymean(~ result_S_IgA_M1, source_data_design, method = "mean", na.rm = T))[2] * 100, 1), "%", sep = "")
res_ab_wtd$result_S_IgG_M1 <- paste(round(as.numeric(survey::svymean(~ result_S_IgG_M1, source_data_design, method = "mean", na.rm = T))[2] * 100, 1), "%", sep = "")
res_ab_wtd$result_S_IgA_IgG_M1 <- paste(round(as.numeric(survey::svymean(~ result_S_IgA_IgG_M1, source_data_design, method = "mean", na.rm = T))[2] * 100, 1), "%", sep = "")
res_ab_wtd$result_S_IgA_M3 <- paste(round(as.numeric(survey::svymean(~ result_S_IgA_M3, source_data_design, method = "mean", na.rm = T))[2] * 100, 1), "%", sep = "")
res_ab_wtd$result_S_IgG_M3 <- paste(round(as.numeric(survey::svymean(~ result_S_IgG_M3, source_data_design, method = "mean", na.rm = T))[2] * 100, 1), "%", sep = "")
res_ab_wtd$result_S_IgA_IgG_M3 <- paste(round(as.numeric(survey::svymean(~ result_S_IgA_IgG_M3, source_data_design, method = "mean", na.rm = T))[2] * 100, 1), "%", sep = "")
res_ab_wtd$result_S_IgA_M6 <- paste(round(as.numeric(survey::svymean(~ result_S_IgA_M6, source_data_design, method = "mean", na.rm = T))[2] * 100, 1), "%", sep = "")
res_ab_wtd$result_S_IgG_M6 <- paste(round(as.numeric(survey::svymean(~ result_S_IgG_M6, source_data_design, method = "mean", na.rm = T))[2] * 100, 1), "%", sep = "")
res_ab_wtd$result_S_IgA_IgG_M6 <- paste(round(as.numeric(survey::svymean(~ result_S_IgA_IgG_M6, source_data_design, method = "mean", na.rm = T))[2] * 100, 1), "%", sep = "")

# Combine and reformat results
res_ab_sens <- bind_rows(
  mutate(res_ab_uwtd, type = "unweighted"), 
  mutate(res_ab_wtd, type = "weighted")) %>% 
  pivot_longer(-type, names_pattern = "(.*)_(.*)", names_to = c("result", ".value"))

# Print results from unweighted and weighted analysis
res_ab_sens


### Neutralizing Antibody Trajectories ---------------------------------------------------------------------- 

## Figure 2d: Anti-Wildtype SARS-CoV-2 Neutralizing Antibody Levels (IC50 values; subsample)
# Full data scatterplot
trend_na_wt <- source_data_g %>% 
  filter(subsample == "Yes") %>%
  mutate(group = "Overall") %>% 
  ggplot(aes(y = neutr_wt_cc, x = time_since_diagnosis)) + 
  geom_point(size = point_sz) + 
  geom_hline(yintercept = 50, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ group) +
  scale_y_continuous(trans = "log10", limits = c(0.5, 5e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  scale_x_continuous(limits = c(0, 220)) + 
  graph_style + 
  labs(title = "Neutralizing Activity (subsample)", y = "Anti-Wildtype IC50", x = "Days since diagnosis")
trend_na_wt

# Violin plots stratified by age group and sex
trend_violin_na_wt <- source_data_g %>% 
  filter(subsample == "Yes") %>%
  ggplot(aes(y = neutr_wt_cc, x = timepoint, fill = sex)) + 
  geom_split_violin(lwd = line_sz) + geom_boxplot(width = 0.3, outlier.shape = NA, outlier.alpha = 0.2, fill = "#FFFFFF", lwd = line_sz) + 
  geom_hline(yintercept = 50, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ age_group_2l) +
  scale_fill_manual(values = col[c(15,13)]) + 
  scale_y_continuous(trans = "log10", limits = c(0.5, 5e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  guides(fill = guide_legend(keywidth = .25, keyheight = .25)) + 
  labs(title = "Neutralizing Activity against Wildtype SARS-CoV-2 (subsample)", y = "Anti-Wildtype IC50", x = "Timepoint", fill = "")
trend_violin_na_wt

# Save figures
ggsave("./Fig2D1.pdf", 
       plot = trend_na_wt + labs(title = NULL), 
       width = 3.2, height = 4, device = "pdf", units = "cm")

ggsave("./Fig2D2.pdf", 
       plot = trend_violin_na_wt + labs(title = NULL, y = NULL), 
       width = 5.4, height = 4, device = "pdf", units = "cm")


## Figure 2e: Anti-Delta SARS-CoV-2 Neutralizing Antibody Levels (IC50 values; subsample)
# Full data scatterplot
trend_na_delta <- source_data_g %>% 
  filter(subsample == "Yes") %>%
  mutate(group = "Overall") %>% 
  ggplot(aes(y = neutr_delta_cc, x = time_since_diagnosis)) + 
  geom_point(size = point_sz) + 
  geom_hline(yintercept = 50, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ group) +
  scale_y_continuous(trans = "log10", limits = c(0.5, 5e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  scale_x_continuous(limits = c(0, 220)) + 
  graph_style + 
  labs(title = "Neutralizing Activity (subsample)", y = "Anti-Delta IC50", x = "Days since diagnosis")
trend_na_delta

# Violin plots stratified by age group and sex
trend_violin_na_delta <- source_data_g %>% 
  filter(subsample == "Yes") %>%
  ggplot(aes(y = neutr_delta_cc, x = timepoint, fill = sex)) + 
  geom_split_violin(lwd = line_sz) + geom_boxplot(width = 0.3, outlier.shape = NA, outlier.alpha = 0.2, fill = "#FFFFFF", lwd = line_sz) + 
  geom_hline(yintercept = 50, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ age_group_2l) +
  scale_fill_manual(values = col[c(15,13)]) + 
  scale_y_continuous(trans = "log10", limits = c(0.5, 5e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  guides(fill = guide_legend(keywidth = .25, keyheight = .25)) + 
  labs(title = "Neutralizing Activity against Delta SARS-CoV-2 (subsample)", y = "Anti-Delta IC50", x = "Timepoint", fill = "")
trend_violin_na_delta

# Save figures
ggsave("./Fig2E1.pdf", 
       plot = trend_na_delta + labs(title = NULL), 
       width = 3.2, height = 4, device = "pdf", units = "cm")

ggsave("./Fig2E2.pdf", 
       plot = trend_violin_na_delta + labs(title = NULL, y = NULL), 
       width = 5.4, height = 4, device = "pdf", units = "cm")


## Figure 2f: Anti-Omicron SARS-CoV-2 Neutralizing Antibody Levels (IC50 values; subsample)
# Full data scatterplot
trend_na_omicron <- source_data_g %>% 
  filter(subsample == "Yes") %>%
  mutate(group = "Overall") %>% 
  ggplot(aes(y = neutr_omicron_cc, x = time_since_diagnosis)) + 
  geom_point(size = point_sz) + 
  geom_hline(yintercept = 50, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ group) +
  scale_y_continuous(trans = "log10", limits = c(0.5, 5e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  scale_x_continuous(limits = c(0, 220)) + 
  graph_style + 
  labs(title = "Neutralizing Activity (subsample)", y = "Anti-Omicron IC50", x = "Days since diagnosis")
trend_na_omicron

# Violin plots stratified by age group and sex
trend_violin_na_omicron <- source_data_g %>% 
  filter(subsample == "Yes") %>%
  ggplot(aes(y = neutr_omicron_cc, x = timepoint, fill = sex)) + 
  geom_split_violin(lwd = line_sz) + geom_boxplot(width = 0.3, outlier.shape = NA, outlier.alpha = 0.2, fill = "#FFFFFF", lwd = line_sz) + 
  geom_hline(yintercept = 50, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ age_group_2l) +
  scale_fill_manual(values = col[c(15,13)]) + 
  scale_y_continuous(trans = "log10", limits = c(0.5, 5e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  guides(fill = guide_legend(keywidth = .25, keyheight = .25)) + 
  labs(title = "Neutralizing Activity against Omicron SARS-CoV-2 (subsample)", y = "Anti-Omicron IC50", x = "Timepoint", fill = "")
trend_violin_na_omicron

# Save figures
ggsave("./Fig2F1.pdf", 
       plot = trend_na_omicron + labs(title = NULL), 
       width = 3.2, height = 4, device = "pdf", units = "cm")

ggsave("./Fig2F2.pdf", 
       plot = trend_violin_na_omicron + labs(title = NULL, y = NULL), 
       width = 5.4, height = 4, device = "pdf", units = "cm")


### T Cell Trajectories ---------------------------------------------------------------------- 

## Figure 4a (1): M-Specific T cell trajectories (SFU/1e6 PBMCs; subsample)
# Plot data
trend_m_box_tc <- source_data_g %>% 
  filter(subsample == "Yes") %>%
  mutate(group = "M-specific T Cells") %>% 
  ggplot(aes(y = M.corr_cc, x = timepoint)) + 
  geom_boxplot(width = 0.6, color = col[3], outlier.alpha = 0, lwd = line_sz) + 
  geom_point(size = point_sz, position = position_jitter(width = 0.1)) + 
  geom_hline(yintercept = 1, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ group) +
  scale_y_continuous(trans = "log10", limits = c(0.5, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  labs(title = "M-Specific T Cell Count per 1e6 PBMCs (subsample)", y = "SFU / 1e6 PBMCs", x = "Timepoint")
trend_m_box_tc

# Calculate Friedman test
source_data_g %>% 
  filter(subsample == "Yes" & !id %in% unique(source_data_g$id[which(is.na(source_data_g$M.corr_cc))])) %>% 
  mutate(id = factor(id), timepoint = droplevels(timepoint)) %>% 
  rstatix::friedman_test(M.corr_cc ~ timepoint | id)

# Calculate Kruskal-Wallis test
source_data_g %>% 
  filter(subsample == "Yes") %>% 
  mutate(id = factor(id), timepoint = droplevels(timepoint)) %>% 
  rstatix::kruskal_test(M.corr_cc ~ timepoint)

# Save figure
ggsave("./Fig4A1.pdf", 
       plot = trend_m_box_tc + labs(title = NULL), 
       width = 4.5, height = 4, device = "pdf", units = "cm")


## Figure 4a (2): N-Specific T cell trajectories (SFU/1e6 PBMCs; subsample)
# Plot data
trend_n_box_tc <- source_data_g %>% 
  filter(subsample == "Yes") %>%
  mutate(group = "N-specific T Cells") %>% 
  ggplot(aes(y = N.corr_cc, x = timepoint)) + 
  geom_boxplot(width = 0.6, color = col[3], outlier.alpha = 0, lwd = line_sz) + 
  geom_point(size = point_sz, position = position_jitter(width = 0.1)) + 
  geom_hline(yintercept = 1, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ group) +
  scale_y_continuous(trans = "log10", limits = c(0.5, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  labs(title = "N-Specific T Cell Count per 1e6 PBMCs (subsample)", y = "SFU / 1e6 PBMCs", x = "Timepoint")
trend_n_box_tc

# Calculate Friedman test
source_data_g %>% 
  filter(subsample == "Yes" & !id %in% unique(source_data_g$id[which(is.na(source_data_g$N.corr_cc))])) %>% 
  mutate(id = factor(id), timepoint = droplevels(timepoint)) %>% 
  rstatix::friedman_test(N.corr_cc ~ timepoint | id)

# Calculate Kruskal-Wallis test
source_data_g %>% 
  filter(subsample == "Yes") %>% 
  mutate(id = factor(id), timepoint = droplevels(timepoint)) %>% 
  rstatix::kruskal_test(N.corr_cc ~ timepoint)

# Save figure
ggsave("./Fig4A2.pdf", 
       plot = trend_n_box_tc + labs(title = NULL, y = NULL), 
       width = 4.2, height = 4, device = "pdf", units = "cm")


## Figure 4a (3): S1-Specific T cell trajectories (SFU/1e6 PBMCs; subsample)
# Plot data
trend_s1_box_tc <- source_data_g %>% 
  filter(subsample == "Yes") %>%
  mutate(group = "S1-specific T Cells") %>% 
  ggplot(aes(y = S1.corr_cc, x = timepoint)) + 
  geom_boxplot(width = 0.6, color = col[3], outlier.alpha = 0, lwd = line_sz) + 
  geom_point(size = point_sz, position = position_jitter(width = 0.1)) + 
  geom_hline(yintercept = 1, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ group) +
  scale_y_continuous(trans = "log10", limits = c(0.5, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  labs(title = "S1-Specific T Cell Count per 1e6 PBMCs (subsample)", y = "SFU / 1e6 PBMCs", x = "Timepoint")
trend_s1_box_tc

# Calculate Friedman test
source_data_g %>% 
  filter(subsample == "Yes" & !id %in% unique(source_data_g$id[which(is.na(source_data_g$S1.corr_cc))])) %>% 
  mutate(id = factor(id), timepoint = droplevels(timepoint)) %>% 
  rstatix::friedman_test(S1.corr_cc ~ timepoint | id)

# Calculate Kruskal-Wallis test
source_data_g %>% 
  filter(subsample == "Yes") %>% 
  mutate(id = factor(id), timepoint = droplevels(timepoint)) %>% 
  rstatix::kruskal_test(S1.corr_cc ~ timepoint)

# Save figure
ggsave("./Fig4A3.pdf", 
       plot = trend_s1_box_tc + labs(title = NULL, y = NULL), 
       width = 4.2, height = 4, device = "pdf", units = "cm")


## Figure 4a (4): S2-Specific T cell trajectories (SFU/1e6 PBMCs; subsample)
# Plot data
trend_s2_box_tc <- source_data_g %>% 
  filter(subsample == "Yes") %>%
  mutate(group = "S2-specific T Cells") %>% 
  ggplot(aes(y = S2.corr_cc, x = timepoint)) + 
  geom_boxplot(width = 0.6, color = col[3], outlier.alpha = 0, lwd = line_sz) + 
  geom_point(size = point_sz, position = position_jitter(width = 0.1)) + 
  geom_hline(yintercept = 1, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ group) +
  scale_y_continuous(trans = "log10", limits = c(0.5, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  labs(title = "S2-Specific T Cell Count per 1e6 PBMCs (subsample)", y = "SFU / 1e6 PBMCs", x = "Timepoint")
trend_s2_box_tc

# Calculate Friedman test
source_data_g %>% 
  filter(subsample == "Yes" & !id %in% unique(source_data_g$id[which(is.na(source_data_g$S2.corr_cc))])) %>% 
  mutate(id = factor(id), timepoint = droplevels(timepoint)) %>% 
  rstatix::friedman_test(S2.corr_cc ~ timepoint | id)

# Calculate Kruskal-Wallis test
source_data_g %>% 
  filter(subsample == "Yes") %>% 
  mutate(id = factor(id), timepoint = droplevels(timepoint)) %>% 
  rstatix::kruskal_test(S2.corr_cc ~ timepoint)

# Save figure
ggsave("./Fig4A4.pdf", 
       plot = trend_s2_box_tc + labs(title = NULL, y = NULL), 
       width = 4.2, height = 4, device = "pdf", units = "cm")


## Figure 4b: T cell epitope fractions (subsample)
# Calculate proportions
df_tc_fract <- source_data %>% 
  mutate(M.corr_fract = M.corr / TC.corr, 
         N.corr_fract = N.corr / TC.corr, 
         S1.corr_fract = S1.corr / TC.corr, 
         S2.corr_fract = S2.corr / TC.corr) %>% 
  select(id, timepoint, 
         M.corr_fract, N.corr_fract, S1.corr_fract, S2.corr_fract) %>% 
  pivot_longer(!c(id, timepoint), 
               names_to = "var", 
               values_to = "result") %>% 
  mutate(var = gsub("_fract", "", var)) %>% 
  group_by(timepoint, var) %>% 
  summarise(prop = mean(result, na.rm = T)) %>% 
  ungroup() %>% 
  mutate(var = gsub("S2.corr", "S2.corr", var), 
         var = gsub(".corr", " pool", var), 
         var = fct_rev(factor(var, levels = c("M pool", "N pool", "S1 pool", "S2 pool"))))
df_tc_fract

# Plot data
trend_tc_fract <- df_tc_fract %>% 
  mutate(group = "Specific T Cells") %>% 
  ggplot(aes(y = prop, x = timepoint, fill = var)) +
  geom_bar(position = "stack", stat = "identity", width = 0.8) + 
  scale_y_continuous(labels = scales::percent) + 
  scale_fill_manual(values = col[c(5:2)]) + 
  facet_grid(. ~ group) +
  graph_style + 
  guides(fill = guide_legend(keywidth = .25, keyheight = .25, reverse = TRUE)) + 
  labs(title = "Epitope-specific T Cell Response", y = "Proportion", x = "Timepoint", fill = "")
trend_tc_fract

# Save figure
ggsave("./Fig4B.pdf", 
       plot = trend_tc_fract + labs(title = NULL), 
       width = 5.5, height = 4, device = "pdf", units = "cm")


## Figure 4d (1): AIM+ / CD4+ T cells (subsample)
# Plot data
trend_aim_cd4_tc <- source_data_g %>% 
  filter(subsample == "Yes") %>%
  mutate(group = "AIM+ / CD4+ T Cells") %>% 
  ggplot(aes(y = CD4AIM.delta_cc, x = timepoint)) + 
  geom_boxplot(width = 0.6, color = col[20], outlier.alpha = 0, lwd = line_sz) + 
  geom_point(size = point_sz, position = position_jitter(width = 0.1)) + 
  facet_grid(. ~ group) +
  scale_y_continuous(trans = "log10", limits = c(1e-5, 1e-1), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  labs(title = "AIM+ per CD4+ T Cells (subsample)", y = "AIM+ / CD4+", x = "Timepoint")
trend_aim_cd4_tc

# Calculate Friedman test
source_data_g %>% 
  filter(subsample == "Yes" & !id %in% unique(source_data_g$id[which(is.na(source_data_g$CD4AIM.delta_cc))])) %>% 
  mutate(id = factor(id), timepoint = droplevels(timepoint)) %>% 
  rstatix::friedman_test(CD4AIM.delta_cc ~ timepoint | id)

# Calculate Kruskal-Wallis test
source_data_g %>% 
  filter(subsample == "Yes") %>% 
  mutate(id = factor(id), timepoint = droplevels(timepoint)) %>% 
  rstatix::kruskal_test(CD4AIM.delta_cc ~ timepoint)

# Save figure
ggsave("./Fig4D1.pdf", 
       plot = trend_aim_cd4_tc + labs(title = NULL), 
       width = 4.5, height = 4, device = "pdf", units = "cm")


## Figure 4d (2): AIM+ / CD8+ T cells (subsample)
# Plot data
trend_aim_cd8_tc <- source_data_g %>% 
  filter(subsample == "Yes") %>%
  mutate(group = "AIM+ / CD8+ T Cells") %>% 
  ggplot(aes(y = CD8AIM.delta_cc, x = timepoint)) + 
  geom_boxplot(width = 0.6, color = col[20], outlier.alpha = 0, lwd = line_sz) + 
  geom_point(size = point_sz, position = position_jitter(width = 0.1)) + 
  facet_grid(. ~ group) +
  scale_y_continuous(trans = "log10", limits = c(1e-5, 1e-1), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  labs(title = "AIM+ per CD8+ T Cells (subsample)", y = "AIM+ / CD8+", x = "Timepoint")
trend_aim_cd8_tc

# Calculate Friedman test
source_data_g %>% 
  filter(subsample == "Yes" & !id %in% unique(source_data_g$id[which(is.na(source_data_g$CD8AIM.delta_cc))])) %>% 
  mutate(id = factor(id), timepoint = droplevels(timepoint)) %>% 
  rstatix::friedman_test(CD8AIM.delta_cc ~ timepoint | id)

# Calculate Kruskal-Wallis test
source_data_g %>% 
  filter(subsample == "Yes") %>% 
  mutate(id = factor(id), timepoint = droplevels(timepoint)) %>% 
  rstatix::kruskal_test(CD8AIM.delta_cc ~ timepoint)

# Save figure
ggsave("./Fig4D2.pdf", 
       plot = trend_aim_cd8_tc + labs(title = NULL), 
       width = 4.5, height = 4, device = "pdf", units = "cm")


## Figure 4e: Stimulated T cell subsets (subsample)
# Prepare data (reformat dataset and filter for calculated percentages for TEMRA/TEM/TCM AIM+CD4+ and AIM+CD8+)
df_facs <- source_data %>% 
  filter(subsample == "Yes") %>% 
  select(id, timepoint, time_since_diagnosis, CD4AIM.sumcount:CD8AIM.calc.TEMRA) %>% 
  pivot_longer(c(CD4AIM.sumcount:CD8AIM.calc.TEMRA), names_pattern = "(.*)[.](.*)", names_to = c("class", "subset"), values_to = "value") %>% 
  mutate(type = ifelse(grepl(".calc", class), "calc", NA), 
         type = ifelse(grepl("nr", class), "ns", type),
         class = gsub(".calc", "", class),
         class = gsub("CD4AIM", "AIM+ CD4+", class),
         class = gsub("CD8AIM", "AIM+ CD8+", class),
         subset = fct_recode(factor(subset, levels = c("TEMRA", "EM", "CM", "naive", "sumcount")), "TEM" = "EM", "TCM" = "CM")) %>% 
  filter(!subset %in% c("naive", "sumcount"))

# Plot data for CD4 cells
facs_pheno_cd4 <- df_facs %>% 
  filter(class == "AIM+ CD4+") %>% 
  mutate(group = "AIM+ CD4+") %>% 
  ggplot(aes(y = value, x = time_since_diagnosis, color = subset)) + 
  geom_point(size = point_sz) + 
  geom_smooth(method = "lm", se = FALSE, lwd = 0.5) +
  facet_wrap(. ~ group) +
  scale_y_continuous(limits = c(0, 1), labels = scales::percent) + 
  scale_x_continuous(limits = c(0, 220)) + 
  scale_color_manual(values = col[c(11,19,21)]) +
  graph_style + 
  theme(legend.position = "bottom") + 
  labs(title = "AIM+ CD4+ T Cell Subsets (subsample)", y = "Proportion", x = "Days since diagnosis", color = "")
facs_pheno_cd4

# Plot data for CD8 cells
facs_pheno_cd8 <- df_facs %>% 
  filter(!subset %in% c("naive", "sumcount"), 
         class == "AIM+ CD8+") %>% 
  mutate(group = "AIM+ CD8+") %>% 
  ggplot(aes(y = value, x = time_since_diagnosis, color = subset)) + 
  geom_point(size = point_sz) + 
  geom_smooth(method = "lm", se = FALSE, lwd = 0.5) +
  facet_wrap(. ~ group) +
  scale_y_continuous(limits = c(0, 1), labels = scales::percent) + 
  scale_x_continuous(limits = c(0, 220)) + 
  scale_color_manual(values = col[c(11,19,21)]) +
  graph_style + 
  theme(legend.position = "bottom", legend.key.size = unit(3, "mm"), legend.margin = margin(-5, 0, 0, 0)) + 
  labs(title = "AIM+ CD8+ T Cell Subsets (subsample)", y = "Proportion", x = "Days since diagnosis", color = "")
facs_pheno_cd8

# Save figures
ggsave("./Fig4E1.pdf", 
       plot = facs_pheno_cd4 + labs(title = NULL) + theme(legend.position = "none"), 
       width = 6, height = 4, device = "pdf", units = "cm")

ggsave("./Fig4E2.pdf", 
       plot = facs_pheno_cd8 + labs(title = NULL) , 
       width = 6, height = 4.5, device = "pdf", units = "cm")


## Decay Estimation ---------------------------------------------------------------------- 

## Presets
# Define number of runs for bootstrap 95% confidence interval
nruns = 100 # run with 5000 repetitions in article

# Define colors for antibody, neutralizing antibody and T cell decay
col_dcy_ab <- col[9]
col_dcy_nt <- col[14]
col_dcy_tc <- col[3]


## Figure 3a, Supplementary Table 5: Anti-S IgA decay (MFI ratio; overall study population)
# Prepare data (restrict data to maximum and all subsequent timepoints and rescale time axis to start with maximum concentration)
data_ic_imm_re <- source_data_g %>% 
  group_by(id) %>% 
  mutate(max = ifelse(logratio_S_IgA == max(logratio_S_IgA, na.rm = TRUE), 1, 0), # label timepoint with maximum anti-S IgG value
         inc = cumsum(max),
         epos_s_iga = ifelse(sum("positive" %in% result_S_IgA, na.rm = TRUE) >= 1, 1, 0)) %>% 
  filter(epos_s_iga == 1, inc >= 1) %>% # restrict to maximum and subsequent timepoints
  mutate(time_diag_max = time_since_diagnosis[max == 1 & inc == 1], 
         time_max_test = time_since_diagnosis - time_since_diagnosis[max == 1 & inc == 1]) %>% # determine time since maximum concentration
  ungroup() %>% 
  select(id, subsample, age_group, sex, symp_count_init_3l, timepoint, time_max_test, time_diag_max, ratio_S_IgA, logratio_S_IgA)

# Fit univariable model
fit <- lmer(logratio_S_IgA ~ time_max_test + (1 | id), data = data_ic_imm_re)

# Calculate bootstrap 95% confidence interval
pred_boot <- bootMer(fit, predict, nsim = nruns, re.form = NA)
ci.lb <- apply(pred_boot$t, 2, function(x) as.numeric(quantile(x, probs=.025, na.rm=TRUE)))
ci.ub <- apply(pred_boot$t, 2, function(x) as.numeric(quantile(x, probs=.975, na.rm=TRUE)))

# Plot decay curve
thalf_plot_s_iga <- data_ic_imm_re %>% 
  mutate(group = "Anti-S IgA Decay") %>% 
  ggplot(aes(y = ratio_S_IgA, x = time_max_test)) + 
  geom_point(size = point_sz) +
  geom_line(aes(group = id), alpha = .1, lwd = line_sz) + 
  geom_hline(yintercept = 6.5, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ group) +
  geom_line(aes(y = exp(predict(fit, re.form = NA))), size = 1, col = col_dcy_ab) + 
  geom_ribbon(aes(ymin = exp(ci.lb), ymax = exp(ci.ub)), alpha = 0.25, fill = col_dcy_ab) + 
  scale_y_continuous(trans = "log10", limits = c(1, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  scale_x_continuous(limits = c(0, 200)) + 
  graph_style + 
  labs(title = "Anti-S IgA Antibody Decay", y = "S IgA MFI ratio", x = "Days since maximum antibody concentration")
thalf_plot_s_iga

# Save figure
ggsave("./Fig3A.pdf", 
       plot = thalf_plot_s_iga + labs(title = NULL), 
       width = 5.7, height = 4, device = "pdf", units = "cm")

# Print half life (unadjusted model)
thalf_s_iga <- half_life_ab(fit)
thalf_s_iga

# Fit multivariable model and print half life (adjusted model)
fit_adj <- lmer(logratio_S_IgA ~ time_max_test + time_diag_max + age_group + sex + symp_count_init_3l + (1 | id), data = data_ic_imm_re)
thalf_s_iga_adj <- half_life_ab(fit_adj)
thalf_s_iga_adj

# Anti-S IgA decay estimation for subsample; fit univariable model and print half life (unadjusted model)
fit <- lmer(logratio_S_IgA ~ time_max_test + (1 | id), data = filter(data_ic_imm_re, subsample == "Yes"))
thalf_s_iga_tc <- half_life_ab(fit)
thalf_s_iga_tc

# Anti-S IgA decay estimation for subsample; fit multivariable model and print half life (adjusted model)
fit_adj <- lmer(logratio_S_IgA ~ time_max_test + time_diag_max + age_group + sex + symp_count_init_3l + (1 | id), data = filter(data_ic_imm_re, subsample == "Yes"))
thalf_s_iga_adj_tc <- half_life_ab(fit_adj)
thalf_s_iga_adj_tc


## Figure 3b, Supplementary Table 5: Anti-S IgG decay (MFI ratio; overall study population)
# Prepare data (restrict data to maximum and all subsequent timepoints and rescale time axis to start with maximum concentration)
data_ic_imm_re <- source_data_g %>% 
  group_by(id) %>% 
  mutate(max = ifelse(logratio_S_IgG == max(logratio_S_IgG, na.rm = TRUE), 1, 0), # label timepoint with maximum anti-S IgG value
         inc = cumsum(max),
         epos_s_igg = ifelse(sum("positive" %in% result_S_IgG, na.rm = TRUE) >= 1, 1, 0)) %>% 
  filter(epos_s_igg == 1, inc >= 1) %>% # restrict to maximum and subsequent timepoints
  mutate(time_diag_max = time_since_diagnosis[max == 1 & inc == 1], 
         time_max_test = time_since_diagnosis - time_since_diagnosis[max == 1 & inc == 1]) %>% # determine time since maximum concentration
  ungroup() %>% 
  select(id, subsample, age_group, sex, symp_count_init_3l, timepoint, time_max_test, time_diag_max, ratio_S_IgG, logratio_S_IgG)

# Fit univariable model
fit <- lmer(logratio_S_IgG ~ time_max_test + (1 | id), data = data_ic_imm_re)

# Calculate bootstrap 95% confidence interval
pred_boot <- bootMer(fit, predict, nsim = nruns, re.form = NA)
ci.lb <- apply(pred_boot$t, 2, function(x) as.numeric(quantile(x, probs=.025, na.rm=TRUE)))
ci.ub <- apply(pred_boot$t, 2, function(x) as.numeric(quantile(x, probs=.975, na.rm=TRUE)))

# Plot decay curve
thalf_plot_s_igg <- data_ic_imm_re %>% 
  mutate(group = "Anti-S IgG Decay") %>% 
  ggplot(aes(y = ratio_S_IgG, x = time_max_test)) + 
  geom_point(size = point_sz) +
  geom_line(aes(group = id), alpha = .1, lwd = line_sz) + 
  geom_hline(yintercept = 6, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ group) +
  geom_line(aes(y = exp(predict(fit, re.form = NA))), size = 1, col = col_dcy_ab) + 
  geom_ribbon(aes(ymin = exp(ci.lb), ymax = exp(ci.ub)), alpha = 0.25, fill = col_dcy_ab) + 
  scale_y_continuous(trans = "log10", limits = c(1, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  scale_x_continuous(limits = c(0, 200)) + 
  graph_style + 
  labs(title = "Anti-S IgG Antibody Decay", y = "S IgG MFI ratio", x = "Days since maximum antibody concentration")
thalf_plot_s_igg

# Save figure
ggsave("./Fig3B.pdf", 
       plot = thalf_plot_s_igg + labs(title = NULL), 
       width = 5.7, height = 4, device = "pdf", units = "cm")

# Print half life (unadjusted model)
thalf_s_igg <- half_life_ab(fit)
thalf_s_igg

# Fit multivariable model and print half life (adjusted model)
fit_adj <- lmer(logratio_S_IgG ~ time_max_test + time_diag_max + age_group + sex + symp_count_init_3l + (1 | id), data = data_ic_imm_re)
thalf_s_igg_adj <- half_life_ab(fit_adj)
thalf_s_igg_adj

# Anti-S IgG decay estimation for subsample; fit univariable model and print half life (unadjusted model)
fit <- lmer(logratio_S_IgG ~ time_max_test + (1 | id), data = filter(data_ic_imm_re, subsample == "Yes"))
thalf_s_igg_tc <- half_life_ab(fit)
thalf_s_igg_tc

# Anti-S IgG decay estimation for subsample; fit multivariable model and print half life (adjusted model)
fit_adj <- lmer(logratio_S_IgG ~ time_max_test + time_diag_max + age_group + sex + symp_count_init_3l + (1 | id), data = filter(data_ic_imm_re, subsample == "Yes"))
thalf_s_igg_adj_tc <- half_life_ab(fit_adj)
thalf_s_igg_adj_tc


## Figure 3c, Supplementary Table 5: Anti-N IgG decay (MFI ratio; subsample)
# Prepare data (restrict data to maximum and all subsequent timepoints and rescale time axis to start with maximum concentration)
data_ic_imm_re <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  group_by(id) %>% 
  mutate(max = ifelse(logratio_N_IgG == max(logratio_N_IgG, na.rm = TRUE), 1, 0), # label timepoint with maximum anti-S IgG value
         inc = cumsum(max),
         epos_n_igg = ifelse(sum("positive" %in% result_N_IgG, na.rm = TRUE) >= 1, 1, 0)) %>% 
  filter(epos_n_igg == 1, inc >= 1) %>% # restrict to maximum and subsequent timepoints
  mutate(time_diag_max = time_since_diagnosis[max == 1 & inc == 1], 
         time_max_test = time_since_diagnosis - time_since_diagnosis[max == 1 & inc == 1]) %>% # determine time since maximum concentration
  ungroup() %>% 
  select(id, subsample, age_group, sex, symp_count_init_3l, timepoint, time_max_test, time_diag_max, ratio_N_IgG, logratio_N_IgG)

# Fit univariable model
fit <- lmer(logratio_N_IgG ~ time_max_test + (1 | id), data = data_ic_imm_re)

# Calculate bootstrap 95% confidence interval
pred_boot <- bootMer(fit, predict, nsim = nruns, re.form = NA)
ci.lb <- apply(pred_boot$t, 2, function(x) as.numeric(quantile(x, probs=.025, na.rm=TRUE)))
ci.ub <- apply(pred_boot$t, 2, function(x) as.numeric(quantile(x, probs=.975, na.rm=TRUE)))

# Plot decay curve
thalf_plot_n_igg <- data_ic_imm_re %>% 
  mutate(group = "Anti-N IgG Decay") %>% 
  ggplot(aes(y = ratio_N_IgG, x = time_max_test)) + 
  geom_point(size = point_sz) +
  geom_line(aes(group = id), alpha = .1, lwd = line_sz) + 
  geom_hline(yintercept = 6, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ group) +
  geom_line(aes(y = exp(predict(fit, re.form = NA))), size = 1, col = col_dcy_ab) + 
  geom_ribbon(aes(ymin = exp(ci.lb), ymax = exp(ci.ub)), alpha = 0.25, fill = col_dcy_ab) + 
  scale_y_continuous(trans = "log10", limits = c(1, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  scale_x_continuous(limits = c(0, 200)) + 
  graph_style + 
  labs(title = "Anti-N IgG Antibody Decay (subsample)", y = "N IgG MFI ratio", x = "Days since maximum antibody concentration")
thalf_plot_n_igg

# Save figure
ggsave("./Fig3C.pdf", 
       plot = thalf_plot_n_igg + labs(title = NULL), 
       width = 5.7, height = 4, device = "pdf", units = "cm")

# Print half life (unadjusted model)
thalf_n_igg <- half_life_ab(fit)
thalf_n_igg

# Fit multivariable model and print half life (adjusted model)
fit_adj <- lmer(logratio_N_IgG ~ time_max_test + time_diag_max + age_group + sex + symp_count_init_3l + (1 | id), data = data_ic_imm_re)
thalf_n_igg_adj <- half_life_ab(fit_adj)
thalf_n_igg_adj


## Supplementary Table 5: Roche Elecsys Anti-S Ig decay (U/ml; subsample)
# Prepare data (restrict data to maximum and all subsequent timepoints and rescale time axis to start with maximum concentration)
data_ic_imm_re <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  group_by(id) %>% 
  mutate(max = ifelse(Roche_S_Ig == max(Roche_S_Ig, na.rm = TRUE), 1, 0), # label timepoint with maximum anti-S IgG value
         inc = cumsum(max),
         epos_s_igg = ifelse(sum("positive" %in% result_Roche_S_Ig, na.rm = TRUE) >= 1, 1, 0)) %>% 
  filter(epos_s_igg == 1, inc >= 1) %>% # restrict to maximum and subsequent timepoints
  mutate(time_diag_max = time_since_diagnosis[max == 1 & inc == 1], 
         time_max_test = time_since_diagnosis - time_since_diagnosis[max == 1 & inc == 1]) %>% # determine time since maximum concentration
  ungroup() %>% 
  mutate(log_Roche_S_Ig = log(Roche_S_Ig)) %>% 
  select(id, subsample, age_group, sex, symp_count_init_3l, timepoint, time_max_test, time_diag_max, Roche_S_Ig, log_Roche_S_Ig)

# Fit univariable model and print half life (unadjusted)
fit <- lmer(log_Roche_S_Ig ~ time_max_test + (1 | id), data = data_ic_imm_re)
thalf_s_igg_roche <- half_life_ab(fit)
thalf_s_igg_roche

# Fit multivariable model and print half life (adjusted)
fit_adj <- lmer(log_Roche_S_Ig ~ time_max_test + time_diag_max + age_group + sex + symp_count_init_3l + (1 | id), data = data_ic_imm_re)
thalf_s_igg_roche_adj <- half_life_ab(fit_adj)
thalf_s_igg_roche_adj


## Supplementary Table 5: Roche Elecsys Anti-N Ig decay (COI; subsample)
# Prepare data (restrict data to maximum and all subsequent timepoints and rescale time axis to start with maximum concentration)
data_ic_imm_re <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  group_by(id) %>% 
  mutate(max = ifelse(Roche_N_Ig == max(Roche_N_Ig, na.rm = TRUE), 1, 0), # label timepoint with maximum anti-S IgG value
         inc = cumsum(max),
         epos_s_igg = ifelse(sum("positive" %in% result_Roche_N_Ig, na.rm = TRUE) >= 1, 1, 0)) %>% 
  filter(epos_s_igg == 1, inc >= 1) %>% # restrict to maximum and subsequent timepoints
  mutate(time_diag_max = time_since_diagnosis[max == 1 & inc == 1], 
         time_max_test = time_since_diagnosis - time_since_diagnosis[max == 1 & inc == 1]) %>% # determine time since maximum concentration
  ungroup() %>% 
  mutate(log_Roche_N_Ig = log(Roche_N_Ig)) %>% 
  select(id, subsample, age_group, sex, symp_count_init_3l, timepoint, time_max_test, time_diag_max, Roche_N_Ig, log_Roche_N_Ig)

# Fit univariable model and print half life (unadjusted)
fit <- lmer(log_Roche_N_Ig ~ time_max_test + (1 | id), data = data_ic_imm_re)
thalf_s_igg_roche <- half_life_ab(fit)
thalf_s_igg_roche

# Fit multivariable model and print half life (adjusted)
fit_adj <- lmer(log_Roche_N_Ig ~ time_max_test + time_diag_max + age_group + sex + symp_count_init_3l + (1 | id), data = data_ic_imm_re)
thalf_s_igg_roche_adj <- half_life_ab(fit_adj)
thalf_s_igg_roche_adj


## Figure 3d, Supplementary Table 5: Anti-Wildtype SARS-CoV-2 neutralizing antibody decay (IC50 values; subsample)
# Prepare data (restrict data to maximum and all subsequent timepoints and rescale time axis to start with maximum concentration)
data_ic_imm_re <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  group_by(id) %>% 
  mutate(max = ifelse(neutr_wt == max(neutr_wt, na.rm = TRUE), 1, 0), # label timepoint with maximum anti-S IgG value
         inc = cumsum(max),
         epos_n_igg = ifelse(sum("positive" %in% result_neutr_wt, na.rm = TRUE) >= 1, 1, 0)) %>% 
  filter(epos_n_igg == 1, inc >= 1) %>% # restrict to maximum and subsequent timepoints
  mutate(time_diag_max = time_since_diagnosis[max == 1 & inc == 1], 
         time_max_test = time_since_diagnosis - time_since_diagnosis[max == 1 & inc == 1]) %>% # determine time since maximum concentration
  ungroup() %>% 
  select(id, subsample, age_group, sex, symp_count_init_3l, timepoint, time_max_test, time_diag_max, neutr_wt_cc, log_neutr_wt)

# Fit univariable model
fit <- lmer(log_neutr_wt ~ time_max_test + (1 | id), data = data_ic_imm_re)

# Calculate bootstrap 95% confidence interval
pred_boot <- bootMer(fit, predict, nsim = nruns, re.form = NA)
ci.lb <- apply(pred_boot$t, 2, function(x) as.numeric(quantile(x, probs=.025, na.rm=TRUE)))
ci.ub <- apply(pred_boot$t, 2, function(x) as.numeric(quantile(x, probs=.975, na.rm=TRUE)))

# Plot decay curve
thalf_plot_na_wt <- data_ic_imm_re %>% 
  mutate(group = "Anti-Wildtype Neutr. AB Decay") %>% 
  ggplot(aes(y = neutr_wt_cc, x = time_max_test)) + 
  geom_point(size = point_sz) +
  geom_line(aes(group = id), alpha = .1, lwd = line_sz) + 
  geom_hline(yintercept = 50, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ group) +
  geom_line(aes(y = exp(predict(fit, re.form = NA))), size = 1, col = col_dcy_nt) + 
  geom_ribbon(aes(ymin = exp(ci.lb), ymax = exp(ci.ub)), alpha = 0.25, fill = col_dcy_nt) + 
  scale_y_continuous(trans = "log10", limits = c(0.5, 5e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  scale_x_continuous(limits = c(0, 200)) + 
  graph_style + 
  labs(title = "Anti-Wildtype Neutralizing Antibody Decay (subsample)", y = "Anti-Wildtype IC50", x = "Days since maximum neutralizing activity")
thalf_plot_na_wt

# Save figure
ggsave("./Fig3D.pdf", 
       plot = thalf_plot_na_wt + labs(title = NULL), 
       width = 5.7, height = 4, device = "pdf", units = "cm")

# Print half life (unadjusted model)
thalf_na_wt <- half_life_ab(fit)
thalf_na_wt

# Fit multivariable model and print half life (adjusted model)
fit_adj <- lmer(log_neutr_wt ~ time_max_test + time_diag_max + age_group + sex + symp_count_init_3l + (1 | id), data = data_ic_imm_re)
thalf_na_wt_adj <- half_life_ab(fit_adj)
thalf_na_wt_adj


## Figure 3e, Supplementary Table 5: Anti-Delta SARS-CoV-2 neutralizing antibody decay (IC50 values; subsample)
# Prepare data (restrict data to maximum and all subsequent timepoints and rescale time axis to start with maximum concentration)
data_ic_imm_re <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  group_by(id) %>% 
  mutate(max = ifelse(neutr_delta == max(neutr_delta, na.rm = TRUE), 1, 0), # label timepoint with maximum anti-S IgG value
         inc = cumsum(max),
         epos_n_igg = ifelse(sum("positive" %in% result_neutr_delta, na.rm = TRUE) >= 1, 1, 0)) %>% 
  filter(epos_n_igg == 1, inc >= 1) %>% # restrict to maximum and subsequent timepoints
  mutate(time_diag_max = time_since_diagnosis[max == 1 & inc == 1], 
         time_max_test = time_since_diagnosis - time_since_diagnosis[max == 1 & inc == 1]) %>% # determine time since maximum concentration
  ungroup() %>% 
  select(id, subsample, age_group, sex, symp_count_init_3l, timepoint, time_max_test, time_diag_max, neutr_delta_cc, log_neutr_delta)

# Fit univariable model
fit <- lmer(log_neutr_delta ~ time_max_test + (1 | id), data = data_ic_imm_re)

# Calculate bootstrap 95% confidence interval
pred_boot <- bootMer(fit, predict, nsim = nruns, re.form = NA)
ci.lb <- apply(pred_boot$t, 2, function(x) as.numeric(quantile(x, probs=.025, na.rm=TRUE)))
ci.ub <- apply(pred_boot$t, 2, function(x) as.numeric(quantile(x, probs=.975, na.rm=TRUE)))

# Plot decay curve
thalf_plot_na_delta <- data_ic_imm_re %>% 
  mutate(group = "Anti-Delta Neutr. AB Decay") %>% 
  ggplot(aes(y = neutr_delta_cc, x = time_max_test)) + 
  geom_point(size = point_sz) +
  geom_line(aes(group = id), alpha = .1, lwd = line_sz) + 
  geom_hline(yintercept = 50, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ group) +
  geom_line(aes(y = exp(predict(fit, re.form = NA))), size = 1, col = col_dcy_nt) + 
  geom_ribbon(aes(ymin = exp(ci.lb), ymax = exp(ci.ub)), alpha = 0.25, fill = col_dcy_nt) + 
  scale_y_continuous(trans = "log10", limits = c(0.5, 5e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  scale_x_continuous(limits = c(0, 200)) + 
  graph_style + 
  labs(title = "Anti-Delta Neutralizing Antibody Decay (subsample)", y = "Anti-Delta IC50", x = "Days since maximum neutralizing activity")
thalf_plot_na_delta

# Save figure
ggsave("./Fig3E.pdf", 
       plot = thalf_plot_na_delta + labs(title = NULL), 
       width = 5.7, height = 4, device = "pdf", units = "cm")

# Print half life (unadjusted model)
thalf_na_delta <- half_life_ab(fit)
thalf_na_delta

# Fit multivariable model and print half life (adjusted model)
fit_adj <- lmer(log_neutr_delta ~ time_max_test + time_diag_max + age_group + sex + symp_count_init_3l + (1 | id), data = data_ic_imm_re)
thalf_na_delta_adj <- half_life_ab(fit_adj)
thalf_na_delta_adj


## Anti-Omicron SARS-CoV-2 neutralizing antibody decay (IC50 values; subsample)
# Note: could not be estimated


## Supplementary Figure 2d, Supplementary Table 5: Overall T cell decay (SFU/1e6 PBMCs; subsample)
# Prepare data (restrict to subsample and available overall T cell measurements)
data_ic_imm_re <- source_data_g %>%
  filter(subsample == "Yes", !is.na(log_TC.corr))

# Fit univariable model
fit <- lmer(log_TC.corr ~ time_since_diagnosis + (1 | id), data = data_ic_imm_re)

# Calculate bootstrap 95% confidence interval
pred_boot <- bootMer(fit, predict, nsim = nruns, re.form = NA)
ci.lb <- apply(pred_boot$t, 2, function(x) as.numeric(quantile(x, probs=.025, na.rm=TRUE)))
ci.ub <- apply(pred_boot$t, 2, function(x) as.numeric(quantile(x, probs=.975, na.rm=TRUE)))

# Plot decay curve
thalf_plot_tc <- data_ic_imm_re %>% 
  mutate(group = "Overall T Cell Decay") %>% 
  ggplot(aes(y = TC.corr_cc, x = time_since_diagnosis)) + 
  geom_point(size = point_sz) +
  geom_line(aes(group = id), alpha = .1, lwd = line_sz) + 
  geom_hline(yintercept = 1, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ group) +
  geom_line(aes(y = exp(predict(fit, re.form = NA))), size = 1, col = col_dcy_tc) + 
  geom_ribbon(aes(ymin = exp(ci.lb), ymax = exp(ci.ub)), alpha = 0.25, fill = col_dcy_tc) + 
  scale_y_continuous(trans = "log10", limits = c(0.5, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  scale_x_continuous(limits = c(0, 220)) + 
  graph_style + 
  labs(title = "Overall T Cell Decay (subsample)", y = "Pooled SFU / 1e6 PBMCs", x = "Days since diagnosis")
thalf_plot_tc

# Save figure
ggsave("./FigS2D.pdf", 
       plot = thalf_plot_tc + labs(title = NULL), 
       width = 5.7, height = 4, device = "pdf", units = "cm")

# Print half life (unadjusted model)
thalf_tc <- half_life_tc(fit)
thalf_tc

# Fit multivariable model and print half life (adjusted model)
fit_adj <- lmer(log_TC.corr ~ time_since_diagnosis + age_group + sex + symp_count_init_3l + (1 | id), data = data_ic_imm_re)
thalf_tc_adj <- half_life_tc(fit_adj)
thalf_tc_adj


## Supplementary Figure 2e, Supplementary Table 5: M-specific T cell decay (SFU/1e6 PBMCs; subsample)
# Prepare data (restrict to subsample and available M-specific T cell measurements)
data_ic_imm_re <- source_data_g %>%
  filter(subsample == "Yes", !is.na(log_M.corr))

# Fit univariable model
fit <- lmer(log_M.corr ~ time_since_diagnosis + (1 | id), data = data_ic_imm_re)

# Calculate bootstrap 95% confidence interval
pred_boot <- bootMer(fit, predict, nsim = nruns, re.form = NA)
ci.lb <- apply(pred_boot$t, 2, function(x) as.numeric(quantile(x, probs=.025, na.rm=TRUE)))
ci.ub <- apply(pred_boot$t, 2, function(x) as.numeric(quantile(x, probs=.975, na.rm=TRUE)))

# Plot decay curve
thalf_plot_m_tc <- data_ic_imm_re %>% 
  mutate(group = "M-specific T Cell Decay") %>% 
  ggplot(aes(y = M.corr_cc, x = time_since_diagnosis)) + 
  geom_point(size = point_sz) +
  geom_line(aes(group = id), alpha = .1, lwd = line_sz) + 
  geom_hline(yintercept = 1, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ group) +
  geom_line(aes(y = exp(predict(fit, re.form = NA))), size = 1, col = col_dcy_tc) + 
  geom_ribbon(aes(ymin = ifelse(exp(ci.lb) < .5, .5, exp(ci.lb)), ymax = exp(ci.ub)), alpha = 0.25, fill = col_dcy_tc) + 
  scale_y_continuous(trans = "log10", limits = c(0.5, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  scale_x_continuous(limits = c(0, 220)) + 
  graph_style + 
  labs(title = "M-Specific T Cell Decay (subsample)", y = "M SFU / 1e6 PBMCs", x = "Days since diagnosis")
thalf_plot_m_tc

# Save figure
ggsave("./FigS2E.pdf", 
       plot = thalf_plot_m_tc + labs(title = NULL), 
       width = 5.7, height = 4, device = "pdf", units = "cm")

# Print half life (unadjusted model)
thalf_m_tc <- half_life_tc(fit)
thalf_m_tc

# Fit multivariable model and print half life (adjusted model)
fit_adj <- lmer(log_M.corr ~ time_since_diagnosis + age_group + sex + symp_count_init_3l + (1 | id), data = data_ic_imm_re)
thalf_m_tc_adj <- half_life_tc(fit_adj)
thalf_m_tc_adj


## Supplementary Figure 2f, Supplementary Table 5: N-specific T cell decay (SFU/1e6 PBMCs; subsample)
# Prepare data (restrict to subsample and available N-specific T cell measurements)
data_ic_imm_re <- source_data_g %>%
  filter(subsample == "Yes", !is.na(log_N.corr))

# Fit univariable model
fit <- lmer(log_N.corr ~ time_since_diagnosis + (1 | id), data = data_ic_imm_re)

# Calculate bootstrap 95% confidence interval
pred_boot <- bootMer(fit, predict, nsim = nruns, re.form = NA)
ci.lb <- apply(pred_boot$t, 2, function(x) as.numeric(quantile(x, probs=.025, na.rm=TRUE)))
ci.ub <- apply(pred_boot$t, 2, function(x) as.numeric(quantile(x, probs=.975, na.rm=TRUE)))

# Plot decay curve
thalf_plot_n_tc <- data_ic_imm_re %>% 
  mutate(group = "N-specific T Cell Decay") %>% 
  ggplot(aes(y = N.corr_cc, x = time_since_diagnosis)) + 
  geom_point(size = point_sz) +
  geom_line(aes(group = id), alpha = .1, lwd = line_sz) + 
  geom_hline(yintercept = 1, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ group) +
  geom_line(aes(y = exp(predict(fit, re.form = NA))), size = 1, col = col_dcy_tc) + 
  geom_ribbon(aes(ymin = ifelse(exp(ci.lb) < .5, .5, exp(ci.lb)), ymax = exp(ci.ub)), alpha = 0.25, fill = col_dcy_tc) + 
  scale_y_continuous(trans = "log10", limits = c(0.5, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  scale_x_continuous(limits = c(0, 220)) + 
  graph_style + 
  labs(title = "N-Specific T Cell Decay (subsample)", y = "N SFU / 1e6 PBMCs", x = "Days since diagnosis")
thalf_plot_n_tc

# Save figure
ggsave("./FigS2F.pdf", 
       plot = thalf_plot_n_tc + labs(title = NULL), 
       width = 5.7, height = 4, device = "pdf", units = "cm")

# Print half life (unadjusted model)
thalf_n_tc <- half_life_tc(fit)
thalf_n_tc

# Fit multivariable model and print half life (adjusted model)
fit_adj <- lmer(log_N.corr ~ time_since_diagnosis + age_group + sex + symp_count_init_3l + (1 | id), data = data_ic_imm_re)
thalf_n_tc_adj <- half_life_tc(fit_adj)
thalf_n_tc_adj


## Supplementary Figure 2g, Supplementary Table 5: S1-specific T cell decay (SFU/1e6 PBMCs; subsample)
# Prepare data (restrict to subsample and available S1-specific T cell measurements)
data_ic_imm_re <- source_data_g %>%
  filter(subsample == "Yes", !is.na(log_S1.corr))

# Fit univariable model
fit <- lmer(log_S1.corr ~ time_since_diagnosis + (1 | id), data = data_ic_imm_re)

# Calculate bootstrap 95% confidence interval
pred_boot <- bootMer(fit, predict, nsim = nruns, re.form = NA)
ci.lb <- apply(pred_boot$t, 2, function(x) as.numeric(quantile(x, probs=.025, na.rm=TRUE)))
ci.ub <- apply(pred_boot$t, 2, function(x) as.numeric(quantile(x, probs=.975, na.rm=TRUE)))

# Plot decay curve
thalf_plot_s1_tc <- data_ic_imm_re %>% 
  mutate(group = "S1-specific T Cell Decay") %>% 
  ggplot(aes(y = S1.corr_cc, x = time_since_diagnosis)) + 
  geom_point(size = point_sz) +
  geom_line(aes(group = id), alpha = .1, lwd = line_sz) + 
  geom_hline(yintercept = 1, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ group) +
  geom_line(aes(y = exp(predict(fit, re.form = NA))), size = 1, col = col_dcy_tc) + 
  geom_ribbon(aes(ymin = ifelse(exp(ci.lb) < .5, .5, exp(ci.lb)), ymax = exp(ci.ub)), alpha = 0.25, fill = col_dcy_tc) + 
  scale_y_continuous(trans = "log10", limits = c(0.5, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  scale_x_continuous(limits = c(0, 220)) + 
  graph_style + 
  labs(title = "S1-Specific T Cell Decay (subsample)", y = "S1 SFU / 1e6 PBMCs", x = "Days since diagnosis")
thalf_plot_s1_tc

# Save figure
ggsave("./FigS2G.pdf", 
       plot = thalf_plot_s1_tc + labs(title = NULL), 
       width = 5.7, height = 4, device = "pdf", units = "cm")

# Print half life (unadjusted model)
thalf_s1_tc <- half_life_tc(fit)
thalf_s1_tc

# Fit multivariable model and print half life (adjusted model)
fit_adj <- lmer(log_S1.corr ~ time_since_diagnosis + age_group + sex + symp_count_init_3l + (1 | id), data = data_ic_imm_re)
thalf_s1_tc_adj <- half_life_tc(fit_adj)
thalf_s1_tc_adj


## Supplementary Figure 2h, Supplementary Table 5: S2-specific T cell decay (SFU/1e6 PBMCs; subsample)
# Prepare data (restrict to subsample and available S2-specific T cell measurements)
data_ic_imm_re <- source_data_g %>%
  filter(subsample == "Yes", !is.na(log_S2.corr))

# Fit univariable model
fit <- lmer(log_S2.corr ~ time_since_diagnosis + (1 | id), data = data_ic_imm_re)

# Calculate bootstrap 95% confidence interval
pred_boot <- bootMer(fit, predict, nsim = nruns, re.form = NA)
ci.lb <- apply(pred_boot$t, 2, function(x) as.numeric(quantile(x, probs=.025, na.rm=TRUE)))
ci.ub <- apply(pred_boot$t, 2, function(x) as.numeric(quantile(x, probs=.975, na.rm=TRUE)))

# Plot decay curve
thalf_plot_s2_tc <- data_ic_imm_re %>% 
  mutate(group = "S2-specific T Cell Decay") %>% 
  ggplot(aes(y = S2.corr_cc, x = time_since_diagnosis)) + 
  geom_point(size = point_sz) +
  geom_line(aes(group = id), alpha = .1, lwd = line_sz) + 
  geom_hline(yintercept = 1, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ group) +
  geom_line(aes(y = exp(predict(fit, re.form = NA))), size = 1, col = col_dcy_tc) + 
  geom_ribbon(aes(ymin = ifelse(exp(ci.lb) < .5, .5, exp(ci.lb)), ymax = exp(ci.ub)), alpha = 0.25, fill = col_dcy_tc) + 
  scale_y_continuous(trans = "log10", limits = c(0.5, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  scale_x_continuous(limits = c(0, 220)) + 
  graph_style + 
  labs(title = "S2-Specific T Cell Decay (subsample)", y = "S2 SFU / 1e6 PBMCs", x = "Days since diagnosis")
thalf_plot_s2_tc

# Save figure
ggsave("./FigS2H.pdf", 
       plot = thalf_plot_s2_tc + labs(title = NULL), 
       width = 5.7, height = 4, device = "pdf", units = "cm")

# Print half life (unadjusted model)
thalf_s2_tc <- half_life_tc(fit)
thalf_s2_tc

# Fit multivariable model and print half life (adjusted model)
fit_adj <- lmer(log_S2.corr ~ time_since_diagnosis + age_group + sex + symp_count_init_3l + (1 | id), data = data_ic_imm_re)
thalf_s2_tc_adj <- half_life_tc(fit_adj)
thalf_s2_tc_adj


### Assay Positivity ---------------------------------------------------------------------- 

## Supplementary Figure 1a: Anti-S assay positivity with Luminex assay (overall study population)
# Calculate proportions with 95% Wilson confidence intervals
trend_ab_prop_d <- source_data_g %>% 
  select(id, subsample, timepoint, result_S_IgA, result_S_IgG, result_N_IgG, result_S_IgA_IgG, result_S_N_IgG, result_S_N_IgA_IgG) %>% 
  rename_all(~ gsub("IgA_IgG", "IgAIgG", .)) %>% 
  rename_all(~ gsub("S_N", "SN", .)) %>% 
  pivot_longer(!c(id, subsample, timepoint), names_pattern = "(.*)_(.*)_(.*)", names_to = c(".value", "antigen", "antibody")) %>% 
  mutate(antibody = recode_factor(factor(antibody), "IgAIgG" = "Anti-S IgA or IgG", "IgA" = "Anti-S IgA", "IgG" = "Anti-S IgG")) %>% 
  group_by(timepoint, antigen, antibody, result) %>% 
  summarise(n = n()) %>% 
  filter(!is.na(result)) %>% 
  mutate(n_tot = sum(n)) %>% 
  ungroup() %>% 
  mutate(prop = n / n_tot, prop.lb = Hmisc::binconf(n, n_tot, alpha = 0.05, method = "wilson")[, 2], prop.ub = Hmisc::binconf(n, n_tot, alpha = 0.05, method = "wilson")[, 3]) %>% 
  filter(antigen == "S", result == "positive")
trend_ab_prop_d

# Plot proportions
trend_ab_prop <- ggplot(trend_ab_prop_d, aes(y = prop, ymin = prop.lb, ymax = prop.ub, x = timepoint, color = antibody, group = antibody)) + 
  geom_point(position = position_dodge(width = 0.45), size = point_sz * 2) + 
  geom_errorbar(position = position_dodge(width = 0.45), width = 0.3, size = line_sz * 2) +
  scale_y_continuous(labels = scales::percent, limits = c(0, 1), breaks = seq(0, 1, .2)) + 
  scale_color_manual(values = col[c(11:9)]) + 
  graph_style + 
  theme(legend.key.size = unit(3, "mm")) + 
  labs(title = "Antibody Positivity (full sample, Luminex assay)", y = "Proportion", x = "Timepoint", color = "")
trend_ab_prop

# Save figure
ggsave("./FigS1A.pdf", 
       plot = trend_ab_prop, 
       width = 7.1, height = 4, device = "pdf", units = "cm")


## Supplementary Figure 1b: Anti-N and Anti-S assay positivity with Luminex assay (subsample)
# Calculate proportions with 95% Wilson confidence intervals
trend_ab_prop_ds_d <- source_data_g %>% 
  select(id, subsample, timepoint, result_S_IgA, result_S_IgG, result_N_IgG, result_S_IgA_IgG, result_S_N_IgG, result_S_N_IgA_IgG) %>% 
  filter(subsample == "Yes") %>% 
  rename_all(~ gsub("IgA_IgG", "IgAIgG", .)) %>% 
  rename_all(~ gsub("S_N", "SN", .)) %>% 
  pivot_longer(!c(id, subsample, timepoint), names_pattern = "(.*)_(.*)_(.*)", names_to = c(".value", "antigen", "antibody")) %>% 
  mutate(antibody = recode_factor(factor(antibody), "IgAIgG" = "IgA or IgG")) %>% 
  mutate(antigen = recode_factor(factor(antigen), "SN" = "S or N")) %>% 
  group_by(timepoint, antigen, antibody, result) %>% 
  summarise(n = n()) %>% 
  filter(!is.na(result)) %>% 
  mutate(n_tot = sum(n), 
         ab_ag = factor(paste("Anti-", antigen, " ", antibody, sep = ""), levels = c("Anti-S or N IgA or IgG", "Anti-S IgA or IgG", "Anti-S or N IgG", "Anti-S IgA", "Anti-S IgG", "Anti-N IgG")), 
         ab_ag = fct_recode(ab_ag, "Anti-S IgA or IgG or anti-N IgG" = "Anti-S or N IgA or IgG", "Anti-S IgG or anti-N IgG" = "Anti-S or N IgG")) %>% 
  ungroup() %>% 
  mutate(prop = n / n_tot, prop.lb = Hmisc::binconf(n, n_tot, alpha = 0.05, method = "wilson")[, 2], prop.ub = Hmisc::binconf(n, n_tot, alpha = 0.05, method = "wilson")[, 3]) %>% 
  filter(result == "positive", ab_ag %in% c("Anti-S IgA or IgG or anti-N IgG", "Anti-S IgA", "Anti-S IgG", "Anti-N IgG"))
trend_ab_prop_ds_d

# Plot proportions
trend_ab_prop_ds <- ggplot(trend_ab_prop_ds_d, aes(y = prop, ymin = prop.lb, ymax = prop.ub, x = timepoint, color = ab_ag, group = ab_ag)) + 
  geom_point(position = position_dodge(width = 0.6), size = point_sz * 2) + 
  geom_errorbar(position = position_dodge(width = 0.6), width = 0.3, size = line_sz * 2) + 
  scale_y_continuous(labels = scales::percent, limits = c(0, 1), breaks = seq(0, 1, .2)) + 
  scale_color_manual(values = col[c(11:8)]) + 
  graph_style + 
  theme(legend.key.size = unit(3, "mm")) + 
  labs(title = "Antibody Positivity (subsample, Luminex assay)", y = "Proportion", x = "Timepoint", color = "")
trend_ab_prop_ds

# Save figure
ggsave("./FigS1B.pdf", 
       plot = trend_ab_prop_ds,  
       width = 8.4, height = 4, device = "pdf", units = "cm")


## Supplementary Figure 1d: Anti-N and Anti-S assay positivity with Roche Elecsys assay (subsample)
# Calculate proportions with 95% Wilson confidence intervals
trend_ab_prop_ds_d_roche <- source_data_g %>% 
  select(id, subsample, timepoint, result_Roche_S_Ig, result_Roche_N_Ig, result_Roche_S_N_Ig) %>% 
  filter(subsample == "Yes") %>% 
  rename_all(~ gsub("S_N", "SN", .)) %>% 
  pivot_longer(!c(id, subsample, timepoint), names_pattern = "(.*)_(.*)_(.*)_(.*)", names_to = c(".value", "test", "antigen", "antibody")) %>% 
  mutate(antibody = recode_factor(factor(antibody), "IgG" = "Ig")) %>%
  mutate(antigen = recode_factor(factor(antigen), "SN" = "S or N")) %>% 
  group_by(timepoint, antigen, antibody, result) %>% 
  summarise(n = n()) %>% 
  filter(!is.na(result)) %>% 
  mutate(n_tot = sum(n), 
         ab_ag = factor(paste("Anti-", antigen, " ", antibody, sep = ""), levels = c("Anti-S or N Ig", "Anti-S Ig", "Anti-N Ig")), 
         ab_ag = fct_recode(ab_ag, "Anti-S or anti-N Ig" = "Anti-S or N Ig")) %>% 
  ungroup() %>% 
  mutate(prop = n / n_tot, prop.lb = Hmisc::binconf(n, n_tot, alpha = 0.05, method = "wilson")[, 2], prop.ub = Hmisc::binconf(n, n_tot, alpha = 0.05, method = "wilson")[, 3]) %>% 
  filter(result == "positive", ab_ag %in% c("Anti-S or anti-N Ig", "Anti-S Ig", "Anti-N Ig"))
trend_ab_prop_ds_d_roche

# Plot proportions
trend_ab_prop_ds_roche <- ggplot(trend_ab_prop_ds_d_roche, aes(y = prop, ymin = prop.lb, ymax = prop.ub, x = timepoint, color = ab_ag, group = ab_ag)) + 
  geom_point(position = position_dodge(width = 0.45), size = point_sz * 2) + 
  geom_errorbar(position = position_dodge(width = 0.45), width = 0.3, size = line_sz * 2) + 
  scale_y_continuous(labels = scales::percent, limits = c(0, 1), breaks = seq(0, 1, .2)) + 
  scale_color_manual(values = col[c(11,9:8)]) + 
  graph_style + 
  theme(legend.key.size = unit(3, "mm")) + 
  labs(title = "Antibody Positivity (subsample, Roche assay)", y = "Proportion", x = "Timepoint", color = "")
trend_ab_prop_ds_roche

# Save figure
ggsave("./FigS1D.pdf", 
       plot = trend_ab_prop_ds_roche, 
       width = 7.2, height = 4, device = "pdf", units = "cm")


## Supplementary Figure 2a: Neutralizing antibody assay positivity among all participants (Luminex assay; subsample)
# Calculate proportions with 95% Wilson confidence intervals
trend_na_ab_prop_ds_d <- source_data_g %>% 
  select(id, subsample, timepoint, starts_with(c("result_neutr"))) %>% 
  filter(subsample == "Yes") %>% 
  rename_all(~ gsub("wt", "wildtype", .)) %>% 
  rename_all(~ gsub("result_neutr", "result", .)) %>% 
  pivot_longer(!c(id, subsample, timepoint), names_pattern = "(.*)_(.*)", names_to = c(".value", "variant")) %>% 
  mutate(variant = factor(variant, levels = c("wildtype", "delta", "omicron"))) %>% 
  group_by(timepoint, variant, result) %>% 
  summarise(n = n()) %>% 
  bind_rows(data.frame("timepoint" = factor("M6"), "variant" = factor("omicron"), "result" = factor("positive"), "n" = 0)) %>% # artificially add zero count row for omicron testing
  filter(!is.na(result)) %>% 
  mutate(n_tot = sum(n), 
         variant = fct_recode(variant, "Anti-Wildtype" = "wildtype", "Anti-Delta" = "delta", "Anti-Omicron" = "omicron")) %>% 
  ungroup() %>% 
  mutate(prop = n / n_tot, prop.lb = Hmisc::binconf(n, n_tot, alpha = 0.05, method = "wilson")[, 2], prop.ub = Hmisc::binconf(n, n_tot, alpha = 0.05, method = "wilson")[, 3]) %>% 
  filter(result == "positive")
trend_na_ab_prop_ds_d

# Plot proportions
trend_na_ab_prop_ds <- ggplot(trend_na_ab_prop_ds_d, aes(y = prop, ymin = prop.lb, ymax = prop.ub, x = timepoint, color = variant, group = variant)) + 
  geom_point(position = position_dodge(width = 0.45), size = point_sz * 2) + 
  geom_errorbar(position = position_dodge(width = 0.45), width = 0.3, size = line_sz * 2) + 
  scale_y_continuous(labels = scales::percent, limits = c(0, 1), breaks = seq(0, 1, .2)) + 
  scale_color_manual(values = col[c(15:13)]) + 
  graph_style + 
  theme(legend.key.size = unit(3, "mm")) + 
  labs(title = "Neutralizing Antibody Positivity (subsample)", y = "Proportion", x = "Timepoint", color = "")
trend_na_ab_prop_ds

# Save figure
ggsave("./FigS2A.pdf", 
       plot = trend_na_ab_prop_ds, 
       width = 6.7, height = 4, device = "pdf", units = "cm")


## Supplementary Figure 2b: Neutralizing antibody assay positivity among those testing seropositive (in anti-S IgA, anti-S IgG or anti-N IgG; Luminex assay; subsample)
# Calculate proportions with 95% Wilson confidence intervals
trend_na_ab_prop_ds_d_pos <- source_data_g %>% 
  filter(result_S_N_IgA_IgG == "positive") %>% 
  select(id, subsample, timepoint, starts_with(c("result_neutr"))) %>% 
  filter(subsample == "Yes") %>% 
  rename_all(~ gsub("wt", "wildtype", .)) %>% 
  rename_all(~ gsub("result_neutr", "result", .)) %>% 
  pivot_longer(!c(id, subsample, timepoint), names_pattern = "(.*)_(.*)", names_to = c(".value", "variant")) %>% 
  mutate(variant = factor(variant, levels = c("wildtype", "delta", "omicron"))) %>% 
  group_by(timepoint, variant, result) %>% 
  summarise(n = n()) %>% 
  bind_rows(data.frame("timepoint" = factor("M6"), "variant" = factor("omicron"), "result" = factor("positive"), "n" = 0)) %>% # artificially add zero count row for omicron testing
  filter(!is.na(result)) %>% 
  mutate(n_tot = sum(n), 
         variant = fct_recode(variant, "Anti-Wildtype" = "wildtype", "Anti-Delta" = "delta", "Anti-Omicron" = "omicron")) %>% 
  ungroup() %>% 
  mutate(prop = n / n_tot, prop.lb = Hmisc::binconf(n, n_tot, alpha = 0.05, method = "wilson")[, 2], prop.ub = Hmisc::binconf(n, n_tot, alpha = 0.05, method = "wilson")[, 3]) %>% 
  filter(result == "positive")
trend_na_ab_prop_ds_d_pos

# Plot proportions
trend_na_ab_prop_ds_pos <- ggplot(trend_na_ab_prop_ds_d_pos, aes(y = prop, ymin = prop.lb, ymax = prop.ub, x = timepoint, color = variant, group = variant)) + 
  geom_point(position = position_dodge(width = 0.45), size = point_sz * 2) + 
  geom_errorbar(position = position_dodge(width = 0.45), width = 0.3, size = line_sz * 2) + 
  scale_y_continuous(labels = scales::percent, limits = c(0, 1), breaks = seq(0, 1, .2)) + 
  scale_color_manual(values = col[c(15:13)]) + 
  graph_style + 
  theme(legend.key.size = unit(3, "mm")) + 
  labs(title = "Neutralizing Antibody Positivity (seropositives)", y = "Proportion", x = "Timepoint", color = "")
trend_na_ab_prop_ds_pos

# Save figure
ggsave("./FigS2B.pdf", 
       plot = trend_na_ab_prop_ds_pos,
       width = 6.7, height = 4, device = "pdf", units = "cm")


## Supplementary Figure 2c: T cell assay positivity (ELISpot; subsample)
# Calculate proportions with 95% Wilson confidence intervals
trend_tc_prop_ds_d <- source_data_g %>% 
  select(id, subsample, timepoint, ends_with("_Pos")) %>% 
  filter(subsample == "Yes") %>% 
  pivot_longer(!c(id, subsample, timepoint), names_pattern = "(.*)_(.*)", names_to = c("antigen", ".value")) %>% 
  mutate(result = ifelse(Pos == "positive", "positive", "negative")) %>% 
  group_by(timepoint, antigen, result) %>% 
  summarise(n = n()) %>% 
  filter(!is.na(result)) %>% 
  mutate(n_tot = sum(n), 
         antigen = ifelse(antigen == "S", "S2", antigen), 
         antigen = ifelse(antigen == "Any", "M, N, S1 or S2", antigen), 
         antigen = paste(antigen, " pool", sep = ""), 
         antigen = factor(antigen, levels = c("M, N, S1 or S2 pool", "M pool", "N pool", "S1 pool", "S2 pool"))) %>% 
  ungroup() %>% 
  mutate(prop = n / n_tot, prop.lb = Hmisc::binconf(n, n_tot, alpha = 0.05, method = "wilson")[, 2], prop.ub = Hmisc::binconf(n, n_tot, alpha = 0.05, method = "wilson")[, 3]) %>% 
  filter(result == "positive")
trend_tc_prop_ds_d

# Plot proportions
trend_tc_prop_ds <- ggplot(trend_tc_prop_ds_d, aes(y = prop, ymin = prop.lb, ymax = prop.ub, x = timepoint, color = antigen, group = antigen)) + 
  geom_point(position = position_dodge(width = 0.75), size = point_sz * 2) + 
  geom_errorbar(position = position_dodge(width = 0.75), width = 0.3, size = line_sz * 2) + 
  scale_y_continuous(labels = scales::percent, limits = c(0, 1), breaks = seq(0, 1, .2)) + 
  scale_color_manual(values = col[c(1:5)]) + 
  graph_style + 
  theme(legend.key.size = unit(3, "mm")) + 
  labs(title = "T Cell Positivity (subsample)", y = "Proportion", x = "Timepoint", color = "")
trend_tc_prop_ds

# Save figure
ggsave("./FigS2C.pdf", 
       plot = trend_tc_prop_ds, 
       width = 7.2, height = 4, device = "pdf", units = "cm")


### Validation Luminex assay vs. Roche Elecsys assay ----------------------------------------------------------------------

## Supplementary Figure 1c: Percent agreement and Cohen's Kappa for Luminex vs. Roche Elecsys assay within subsample
# Create restricted dataset for subsample
source_data_sub <- source_data %>% 
  filter(subsample == "Yes")

# Calculate agreement for anti-S Ig (overall across all timepoints)
xtab <- table(source_data_sub$result_S_IgG, source_data_sub$result_Roche_S_Ig)

perc_agree <- Hmisc::binconf(xtab[1,1] + xtab[2,2], sum(xtab), alpha = 0.05, method = "wilson")
perc_cohen <- psych::cohen.kappa(xtab)

agree_s_ig <- data.frame("value" = "Anti-S Ig (overall)", "perc_agree" = perc_agree[1], "cohen_agree" = perc_cohen$kappa)
agree_s_ig

# Calculate agreement for anti-N Ig (overall across all timepoints)
xtab <- table(source_data_sub$result_N_IgG, source_data_sub$result_Roche_N_Ig)

perc_agree <- Hmisc::binconf(xtab[1,1] + xtab[2,2], sum(xtab), alpha = 0.05, method = "wilson")
perc_cohen <- psych::cohen.kappa(xtab)

agree_n_ig <- data.frame("value" = "Anti-N Ig (overall)", "perc_agree" = perc_agree[1], "cohen_agree" = perc_cohen$kappa)
agree_n_ig

# Calculate agreement for anti-S Ig or anti-N Ig (overall across all timepoints)
xtab <- table(source_data_sub$result_S_N_IgG, source_data_sub$result_Roche_S_N_Ig)

perc_agree <- Hmisc::binconf(xtab[1,1] + xtab[2,2], sum(xtab), alpha = 0.05, method = "wilson")
perc_cohen <- psych::cohen.kappa(xtab)

agree_s_n_ig <- data.frame("value" = "Anti-S or anti-N Ig (overall)", "perc_agree" = perc_agree[1], "cohen_agree" = perc_cohen$kappa)
agree_s_n_ig

# Calculate agreement for anti-S Ig or anti-N Ig (at 2 weeks after diagnosis)
xtab <- with(filter(source_data_sub, timepoint == "W2"), 
             table(result_S_N_IgG, result_Roche_S_N_Ig))
perc_agree <- Hmisc::binconf(xtab[1,1] + xtab[2,2], sum(xtab), alpha = 0.05, method = "wilson")
perc_cohen <- psych::cohen.kappa(xtab)

agree_s_n_ig_w2 <- data.frame("value" = "Anti-S or anti-N Ig (W2)", "perc_agree" = perc_agree[1], "cohen_agree" = perc_cohen$kappa)
agree_s_n_ig_w2

# Calculate agreement for anti-S Ig or anti-N Ig (at 1 month after diagnosis)
xtab <- with(filter(source_data_sub, timepoint == "M1"), 
             table(result_S_N_IgG, result_Roche_S_N_Ig))
perc_agree <- Hmisc::binconf(xtab[1,1] + xtab[2,2], sum(xtab), alpha = 0.05, method = "wilson")
perc_cohen <- psych::cohen.kappa(xtab)

agree_s_n_ig_m1 <- data.frame("value" = "Anti-S or anti-N Ig (M1)", "perc_agree" = perc_agree[1], "cohen_agree" = perc_cohen$kappa)
agree_s_n_ig_m1

# Calculate agreement for anti-S Ig or anti-N Ig (at 3 months after diagnosis)
xtab <- with(filter(source_data_sub, timepoint == "M3"), 
             table(result_S_N_IgG, result_Roche_S_N_Ig))
perc_agree <- Hmisc::binconf(xtab[1,1] + xtab[2,2], sum(xtab), alpha = 0.05, method = "wilson")
perc_cohen <- psych::cohen.kappa(xtab)

agree_s_n_ig_m3 <- data.frame("value" = "Anti-S or anti-N Ig (M3)", "perc_agree" = perc_agree[1], "cohen_agree" = perc_cohen$kappa)
agree_s_n_ig_m3

# Calculate agreement for anti-S Ig or anti-N Ig (at 6 months after diagnosis)
xtab <- with(filter(source_data_sub, timepoint == "M6"), 
             table(result_S_N_IgG, result_Roche_S_N_Ig))
perc_agree <- Hmisc::binconf(xtab[1,1] + xtab[2,2], sum(xtab), alpha = 0.05, method = "wilson")
perc_cohen <- psych::cohen.kappa(xtab)

agree_s_n_ig_m6 <- data.frame("value" = "Anti-S or anti-N Ig (M6)", "perc_agree" = perc_agree[1], "cohen_agree" = perc_cohen$kappa)
agree_s_n_ig_m6

# Combine results
df_agree_roche_luminex <- bind_rows(agree_s_ig, agree_n_ig, agree_s_n_ig, 
                                    agree_s_n_ig_w2, agree_s_n_ig_m1, agree_s_n_ig_m3, agree_s_n_ig_m6) %>% 
  mutate(value = fct_rev(factor(value, levels = unique(value))))

# Plot results for percent agreement
agree_roche_luminex_perc <- df_agree_roche_luminex %>% 
  ggplot(aes(y = value, x = NA, fill = perc_agree)) + 
  geom_tile() +
  geom_text(label = paste0(formatC(df_agree_roche_luminex$perc_agree * 100, format = "f", digits = 1), "%"), size = 2.2, fontface = "bold", color = "#000000") +
  scale_fill_gradient2(low = col[3], mid = "yellow", high = col[20], midpoint = 0.5, limits = c(0, 1)) + 
  graph_style + theme(panel.grid.major.y = element_blank(), 
                      axis.text.x = element_blank(),
                      axis.title.x = element_text(face = "bold", size = 7), 
                      axis.text.y = element_text(size = 6), 
                      axis.ticks = element_blank(), 
                      legend.text = element_text(size = 5), 
                      legend.margin = margin(20,0,10,5), 
                      legend.box.margin = margin(20,0,10,5), 
                      legend.key.width = unit(3, "mm"), 
                      legend.key.height = unit(3.8, "mm")) +   
  scale_x_discrete(position = "top") + 
  labs(title = "", y = "", x = "Percent\nAgreement", fill = "")  
agree_roche_luminex_perc

# Plot results for Cohen's Kappa
agree_roche_luminex_cohen <- df_agree_roche_luminex %>% 
  ggplot(aes(y = value, x = NA, fill = cohen_agree)) + 
  geom_tile() +
  geom_text(label = formatC(df_agree_roche_luminex$cohen_agree, format = "f", digits = 2), size = 2.2, fontface = "bold", color = "#000000") +
  scale_fill_gradient2(low = col[3], mid = "yellow", high = col[20], midpoint = 0.5, limits = c(0, 1)) + 
  graph_style + theme(panel.grid.major.y = element_blank(), 
                      axis.text.x = element_blank(),
                      axis.title.x = element_text(face = "bold", size = 7), 
                      axis.text.y = element_text(size = 6), 
                      axis.ticks = element_blank()) + 
  scale_x_discrete(position = "top") + 
  labs(title = "", y = "", x = "Cohen's\nKappa", fill = "")
agree_roche_luminex_cohen

# Combine plots
agree_roche_luminex <- ggarrange(agree_roche_luminex_perc + theme(plot.margin = unit(c(0,0,0,0), "lines")), 
                                 agree_roche_luminex_cohen + theme(axis.text.y = element_blank(), plot.margin = unit(c(0,0,0,-.75), "lines")), 
                                 common.legend = TRUE, legend = "right", 
                                 widths = c(.73,.27))
agree_roche_luminex

# Safe figure
ggsave("./FigS1C.pdf", 
       plot = agree_roche_luminex, 
       width = 7.8, height = 4, device = "pdf", units = "cm")


## Supplementary Figure 1e (1): Correlation plot for anti-S Ig for Luminex vs. Roche Elecsys assay within subsample
# Plot results (overall across all timepoints)
comp_chuv_roche_s_ig <- source_data_sub %>% 
  filter(subsample == "Yes") %>%
  mutate(group = "Correlation Anti-S Ig") %>% 
  ggplot(aes(y = Roche_S_Ig, x = ratio_S_IgG)) + 
  geom_point(size = point_sz) + 
  geom_smooth(method = "lm", se = FALSE, color = col[9], lwd = line_sz * 2) + 
  geom_vline(xintercept = 6, linetype = "dotted", lwd = line_sz) + geom_hline(yintercept = 0.4, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ group) +
  scale_x_continuous(trans = "log10", labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  scale_y_continuous(trans = "log10", labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + theme(panel.grid.major.x = element_line()) + 
  labs(title = "Correlation Luminex vs. Roche Elecsys", y = "Roche Elecsys (U/ml)", x = "Luminex (MFI Ratio)")
comp_chuv_roche_s_ig

# Calculate correlation
cor.test(source_data_sub$Roche_S_Ig, source_data_sub$ratio_S_IgG, method = "spearman")

# Save figure
ggsave("./FigS1E1.pdf", 
       plot = comp_chuv_roche_s_ig + labs(title = NULL), 
       width = 6.6, height = 4, device = "pdf", units = "cm")


## Supplementary Figure 1e (2): Correlation plot for anti-N Ig for Luminex vs. Roche Elecsys assay within subsample
# Plot results (overall across all timepoints)
comp_chuv_roche_n_ig <- source_data_sub %>% 
  filter(subsample == "Yes") %>%
  mutate(group = "Correlation Anti-N Ig") %>% 
  ggplot(aes(y = Roche_N_Ig, x = ratio_N_IgG)) + 
  geom_point(size = point_sz) + 
  geom_smooth(method = "lm", se = FALSE, color = col[9], lwd = line_sz * 2) + 
  geom_vline(xintercept = 6, linetype = "dotted", lwd = line_sz) + geom_hline(yintercept = 1, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ group) +
  scale_x_continuous(trans = "log10", limits = c(1, 100), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  scale_y_continuous(trans = "log10", labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + theme(panel.grid.major.x = element_line()) + 
  labs(title = "Correlation Luminex vs. Roche Elecsys", y = "Roche Elecsys (COI)", x = "Luminex (MFI Ratio)")
comp_chuv_roche_n_ig

# Calculate correlation
cor.test(source_data_sub$Roche_N_Ig, source_data_sub$ratio_N_IgG, method = "spearman")

# Save figure
ggsave("./FigS1E2.pdf", 
       plot = comp_chuv_roche_n_ig + labs(title = NULL), 
       width = 6.6, height = 4, device = "pdf", units = "cm")


## Supplementary Figure 1f: Cross-validation Anti-S Ig Roche Elecsys vs. Luminex MFI Ratios
# Import validation data
source_data_valid <- read.xlsx("./Source Data.xlsx", sheet = "data_crossvalid")

# Plot results
luminex_valid <- source_data_valid %>% 
  mutate(group = "Validation Anti-S Ig Roche Elecsys vs. Luminex") %>% 
  ggplot(aes(y = Roche_S_Ig, x = ratio_S_IgG)) + 
  geom_point(size = 2*point_sz, col = "#add8e6") + 
  geom_smooth(method = "lm", se = FALSE, col = "#f02a2c", size = 2*line_sz) + 
  facet_grid(. ~ group) +
  scale_x_continuous(trans = "log10", breaks = c(3.2, 10, 32, 1e2), labels = c("3.2", "10", "32", expression("10"^2))) + 
  scale_y_continuous(trans = "log10", breaks = c(1e0, 1e1, 1e2, 1e3, 1e4), labels = c("1", "10", expression("10"^2), expression("10"^3), expression("10"^4))) + 
  graph_style + theme(panel.grid.major.y = element_blank()) + 
  labs(title = "Correlation Luminex vs. Roche Elecsys (External Dataset, Lausanne University Hospital)", y = "Roche Elecsys Ig (U/ml)", x = "Luminex IgG (MFI Ratio)")
luminex_valid

# Calculate correlation
cor.test(source_data_valid$Roche_S_Ig, source_data_valid$ratio_S_IgG, method = "spearman")

# Save figure
ggsave("./FigS1F.pdf", 
       plot = luminex_valid + labs(title = NULL), 
       width = 6.8, height = 6.4, device = "pdf", units = "cm")


### Concordance and Correlation Antibody vs. T Cell Assays ---------------------------------------------------------------------- 

## Figure 5a: Heatmap for Spearman correlation between antibody and T cell assay results (subsample)
# Prepare data
source_data_g_ts <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  select(id, timepoint, 
         logratio_S_IgA, logratio_S_IgG, logratio_N_IgG, 
         log_M.corr, log_N.corr, log_S2.corr, log_S1.corr,
         log_TC.corr) %>%
  rename(S_IgA = logratio_S_IgA, S_IgG = logratio_S_IgG, N_IgG = logratio_N_IgG, 
         M_TC = log_M.corr, N_TC = log_N.corr, S_TC = log_S2.corr, S1_TC = log_S1.corr,
         Any_TC = log_TC.corr)

# Initialize vectors and dataframes
timepoints <- unique(source_data_g_ts$timepoint)
combinations <- expand.grid(c("S_IgA", "S_IgG", "N_IgG", "Any_TC", "M_TC", "N_TC", "S_TC", "S1_TC"), c("S_IgA", "S_IgG", "N_IgG", "Any_TC", "M_TC", "N_TC", "S_TC", "S1_TC"))
rlist <- list()
rtab <- data.frame(matrix(NA, ncol = 4, nrow = nrow(combinations)))
names(rtab) <- c("timepoint", "measure1", "measure2", "coef")

# Calculate Spearman correlation coefficients for all comparisons (antibody subclasses and epitope-specific T cells)
for (i in timepoints) {
  for (j in 1:nrow(combinations)) {
    comb <- unlist(combinations[j, ])
    comb1 <- as.character(comb[1])
    comb2 <- as.character(comb[2])
    res1 <- source_data_g_ts %>% 
      filter(timepoint == i) %>% 
      pull(comb1)
    res2 <- source_data_g_ts %>% 
      filter(timepoint == i) %>% 
      pull(comb2)
    res <- cor.test(res1, res2, method = "spearman")
    rtab[j, ] <- c("timepoint" = i, "measure1" = comb1, "measure2" = comb2, "coef" = res$estimate)
    rlist[[i]] <- rtab
  }
}

# Reformat output
rlist_corr_ab_tc <- bind_rows(rlist) %>% 
  mutate(timepoint = factor(timepoint, levels = c("W2", "M1", "M3", "M6")), 
         measure1 = gsub("_", " ", measure1), 
         measure1 = gsub("Any TC", "Pooled TC", measure1), 
         measure1 = gsub("S TC", "S2 TC", measure1), 
         measure2 = gsub("_", " ", measure2), 
         measure2 = gsub("Any TC", "Pooled TC", measure2), 
         measure2 = gsub("S TC", "S2 TC", measure2), 
         measure1 = factor(measure1, levels = c("S IgA", "S IgG", "N IgG", "Pooled TC", "M TC", "N TC", "S1 TC", "S2 TC")), 
         measure2 = factor(measure2, levels = c("S IgA", "S IgG", "N IgG", "Pooled TC", "M TC", "N TC", "S1 TC", "S2 TC")), 
         coef = as.numeric(coef))
rlist_corr_ab_tc

# Plot results
corr_ab_tc <- rlist_corr_ab_tc %>% 
  mutate(measure1 = fct_rev(measure1)) %>% 
  ggplot(aes(y = measure1, x = measure2, fill = coef)) +
  geom_tile(color = "#000000") + 
  geom_text(label = formatC(rlist_corr_ab_tc$coef, format = "f", digits = 2), size = 1.6, fontface = "bold", color = ifelse(rlist_corr_ab_tc$coef < 0.55, "#FFFFFF", "#000000")) + 
  facet_wrap(. ~ timepoint, ncol = 4) + 
  scale_fill_continuous(type = "viridis") +  
  graph_style + 
  theme(panel.grid.major.y = element_blank(), 
        legend.text = element_text(size = 5), 
        legend.margin = margin(20,0,10,5), 
        legend.box.margin = margin(20,0,10,-5), 
        legend.key.width = unit(3, "mm"), 
        legend.key.height = unit(3.8, "mm"), 
        axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) + 
  labs(fill = "Spearman \nCorrelation \nCoefficient \n ", y = "", x = "")
corr_ab_tc

# Save figure
ggsave("./Fig5A.pdf", 
       plot = corr_ab_tc + labs(title = NULL, x = NULL, y = NULL), 
       width = 17.8, height = 5.5, device = "pdf", units = "cm")


## Figure 5b: Percent concordance for testing positive or negative between antibody and T cell assay results (main results; subsample)
# Prepare data and calculate proportions
source_data_g_ts <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  select(id, timepoint, 
         result_S_IgA, result_S_IgG, result_N_IgG, 
         Any_Pos) %>%
  rename(S_IgA = result_S_IgA, S_IgG = result_S_IgG, N_IgG = result_N_IgG, 
         Any_TC = Any_Pos) %>% 
  pivot_longer(c(S_IgA, S_IgG, N_IgG), names_to = "antibody", values_to = "Res_AB") %>% 
  group_by(timepoint, antibody, Any_TC, Res_AB) %>% 
  summarise(n = n()) %>% 
  filter(!is.na(Any_TC) & !is.na(Res_AB)) %>% 
  ungroup() %>% 
  group_by(timepoint, antibody) %>% 
  mutate(n_tot = sum(n)) %>% 
  ungroup() %>% 
  mutate(prop = n / n_tot, 
         prop.lb = Hmisc::binconf(n, n_tot, alpha = 0.05, method = "wilson")[, 2], 
         prop.ub = Hmisc::binconf(n, n_tot, alpha = 0.05, method = "wilson")[, 3])
source_data_g_ts

# (1) Plot results for concordance for anti-S IgA
conc_s_iga_tc <- source_data_g_ts %>% 
  filter(antibody == "S_IgA") %>% 
  mutate(group = "Anti-S IgA vs Overall T Cells") %>% 
  mutate(Res_AB = ifelse(Res_AB == "positive", "AB+", "AB-"), 
         Any_TC = ifelse(Any_TC == "positive", "TC+", "TC-"), 
         result = factor(paste(Res_AB, Any_TC, sep = ""), levels = c("AB+TC+", "AB-TC-", "AB+TC-", "AB-TC+"))) %>% 
  ggplot(aes(y = prop, ymin = prop.lb, ymax = prop.ub, x = timepoint, color = result)) + 
  geom_point(size = point_sz * 4, position = position_dodge(width = 0.25)) + 
  geom_errorbar(width = line_sz *2, position = position_dodge(width = 0.25)) + 
  geom_line(aes(group = result), position = position_dodge(width = 0.25)) + 
  facet_grid(. ~ group) + 
  scale_color_manual(values = col[c(10,9,3,4)]) + 
  scale_y_continuous(labels = scales::percent, limits = c(0, 1), breaks = seq(0, 1, .2)) + 
  graph_style + 
  labs(title = "Percent Concordance Anti-S IgA with Overall T Cells (subsample)", y = "Proportion", x = "Timepoint", linetype = "", shape = "")
conc_s_iga_tc

# (2) Plot results for concordance for anti-S IgG
conc_s_igg_tc <- source_data_g_ts %>% 
  filter(antibody == "S_IgG") %>% 
  mutate(group = "Anti-S IgG vs Overall T Cells") %>% 
  mutate(Res_AB = ifelse(Res_AB == "positive", "AB+", "AB-"), 
         Any_TC = ifelse(Any_TC == "positive", "TC+", "TC-"), 
         result = factor(paste(Res_AB, Any_TC, sep = ""), levels = c("AB+TC+", "AB-TC-", "AB+TC-", "AB-TC+"))) %>% 
  ggplot(aes(y = prop, ymin = prop.lb, ymax = prop.ub, x = timepoint, color = result)) + 
  geom_point(size = point_sz * 4, position = position_dodge(width = 0.25)) + 
  geom_errorbar(width = line_sz *2, position = position_dodge(width = 0.25)) + 
  geom_line(aes(group = result), position = position_dodge(width = 0.25)) + 
  facet_grid(. ~ group) + 
  scale_color_manual(values = col[c(10,9,3,4)]) + 
  scale_y_continuous(labels = scales::percent, limits = c(0, 1), breaks = seq(0, 1, .2)) + 
  graph_style + 
  labs(title = "Percent Concordance Anti-S IgG with Overall T Cells (subsample)", y = "Proportion", x = "Timepoint", linetype = "", shape = "")
conc_s_igg_tc

# (3) Plot results for concordance for anti-N IgG
conc_n_igg_tc <- source_data_g_ts %>% 
  filter(antibody == "N_IgG") %>% 
  mutate(group = "Anti-N IgG vs Overall T Cells") %>% 
  mutate(Res_AB = ifelse(Res_AB == "positive", "AB+", "AB-"), 
         Any_TC = ifelse(Any_TC == "positive", "TC+", "TC-"), 
         result = factor(paste(Res_AB, Any_TC, sep = ""), levels = c("AB+TC+", "AB-TC-", "AB+TC-", "AB-TC+"))) %>% 
  ggplot(aes(y = prop, ymin = prop.lb, ymax = prop.ub, x = timepoint, color = result)) + 
  geom_point(size = point_sz * 4, position = position_dodge(width = 0.25)) + 
  geom_errorbar(width = line_sz *2, position = position_dodge(width = 0.25)) + 
  geom_line(aes(group = result), position = position_dodge(width = 0.25)) + 
  facet_grid(. ~ group) + 
  scale_color_manual(values = col[c(10,9,3,4)]) + 
  scale_y_continuous(labels = scales::percent, limits = c(0, 1), breaks = seq(0, 1, .2)) + 
  graph_style + 
  labs(title = "Percent Concordance Anti-N IgG with Overall T Cells (subsample)", y = "Proportion", x = "Timepoint", linetype = "", shape = "")
conc_n_igg_tc

# Save figures
ggsave("./Fig5B1.pdf", 
       plot = conc_s_iga_tc + labs(title = NULL) + theme(legend.position = "none"), 
       width = 5.4, height = 4, device = "pdf", units = "cm")

ggsave("./Fig5B2.pdf", 
       plot = conc_s_igg_tc + labs(title = NULL, y = NULL) + theme(legend.position = "none"), 
       width = 5.1, height = 4, device = "pdf", units = "cm")

ggsave("./Fig5B3.pdf", 
       plot = conc_n_igg_tc + labs(title = NULL, y = NULL, color = "") + theme(legend.key.size = unit(3, "mm")), 
       width = 6.5, height = 4, device = "pdf", units = "cm")


## Supplementary Figure 3a: Percent concordance for testing positive or negative between antibody and T cell assay results (full results; subsample)
# Prepare data and calculate proportions
source_data_g_ts <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  select(id, timepoint, 
         result_S_IgA, result_S_IgG, result_N_IgG, 
         Any_Pos, M_Pos, N_Pos, S1_Pos, S2_Pos) %>%
  rename(S_IgA = result_S_IgA, S_IgG = result_S_IgG, N_IgG = result_N_IgG, 
         Any_TC = Any_Pos, M_TC = M_Pos, N_TC = N_Pos, S1_TC = S1_Pos, S2_TC = S2_Pos) %>% 
  pivot_longer(c(S_IgA, S_IgG, N_IgG), names_to = "antibody", values_to = "Res_AB") %>% 
  pivot_longer(c(Any_TC, M_TC, N_TC, S1_TC, S2_TC), names_to = "tcell", values_to = "Res_TC") %>% 
  group_by(timepoint, antibody, tcell, Res_AB, Res_TC) %>% 
  summarise(n = n()) %>% 
  filter(!is.na(Res_AB) & !is.na(Res_TC)) %>% 
  ungroup() %>% 
  group_by(timepoint, antibody, tcell) %>% 
  mutate(n_tot = sum(n)) %>% 
  ungroup() %>% 
  mutate(prop = n / n_tot, 
         prop.lb = Hmisc::binconf(n, n_tot, alpha = 0.05, method = "wilson")[, 2], 
         prop.ub = Hmisc::binconf(n, n_tot, alpha = 0.05, method = "wilson")[, 3])

# Plot results (heatmap)
conc_all <- source_data_g_ts %>% 
  mutate(result = paste(gsub("_", " ", antibody), ifelse(Res_AB == "positive", "+", "-"), " TC", ifelse(Res_TC == "positive", "+", "-"), sep = ""), 
         result = fct_rev(result), 
         timepoint = fct_rev(timepoint), 
         antibody = gsub("_", " ", antibody), 
         antibody = factor(antibody, levels = c("S IgA", "S IgG", "N IgG")), 
         tcell = gsub("_TC", "", tcell), 
         tcell = gsub("Any", "Pooled", tcell),
         tcell = factor(tcell, levels = c("Pooled", "M", "N", "S1", "S2"))) %>% 
  ggplot(aes(y = timepoint, x = result, fill = prop)) + 
  geom_tile() +
  facet_grid(tcell ~ antibody, scales = "free", space = "free", switch = "y") + 
  scale_fill_continuous(type = "viridis", limits = c(0, .6), labels = scales::percent_format(trim = FALSE, accuracy = 1.0)) + 
  scale_y_discrete(position = "right") + 
  graph_style + theme(legend.margin = margin(20,0,10,5), 
                      legend.box.margin = margin(20,0,10,-5), 
                      legend.key.width = unit(3, "mm"), 
                      legend.key.height = unit(3.8, "mm"), 
                      axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) + 
  labs(title = "", y = "", x = "", fill = "Fraction")
conc_all

# Save figure
ggsave("./FigS3A.pdf", 
       plot = conc_all + labs(title = NULL, y = NULL, x = NULL), 
       width = 8, height = 7.2, device = "pdf", units = "cm")


## Supplementary Figure 3b: Heatmap for Cohen's Kappa agreement between antibody and T cell assay results (subsample)
# Prepare data
source_data_g_ts <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  select(id, timepoint, 
         result_S_IgA, result_S_IgG, result_N_IgG, 
         M_Pos, N_Pos, S2_Pos, S1_Pos,
         Any_Pos) %>%
  rename(S_IgA = result_S_IgA, S_IgG = result_S_IgG, N_IgG = result_N_IgG, 
         M_TC = M_Pos, N_TC = N_Pos, S2_TC = S2_Pos, S1_TC = S1_Pos,
         Any_TC = Any_Pos)

# Initialize vectors and dataframes
timepoints <- unique(source_data_g_ts$timepoint)
combinations <- expand.grid(c("S_IgA", "S_IgG", "N_IgG", "Any_TC", "M_TC", "N_TC", "S1_TC", "S2_TC"), c("S_IgA", "S_IgG", "N_IgG", "Any_TC", "M_TC", "N_TC", "S1_TC", "S2_TC"))
rlist <- list()
rtab <- data.frame(matrix(NA, ncol = 4, nrow = nrow(combinations)))
names(rtab) <- c("timepoint", "measure1", "measure2", "kappa")

# Calculate Cohen's Kappa for all comparisons (antibody subclasses and epitope-specific T cells)
for (i in timepoints) {
  for (j in 1:nrow(combinations)) {
    comb <- unlist(combinations[j, ])
    comb1 <- as.character(comb[1])
    comb2 <- as.character(comb[2])
    res1 <- source_data_g_ts %>% 
      filter(timepoint == i) %>% 
      pull(comb1)
    res2 <- source_data_g_ts %>% 
      filter(timepoint == i) %>% 
      pull(comb2)
    xtab <- table(res1, res2)
    xck <- psych::cohen.kappa(xtab)$kappa
    rtab[j, ] <- c("timepoint" = i, "measure1" = comb1, "measure2" = comb2, "kappa" = xck)
    rlist[[i]] <- rtab
  }
}

# Reformat output
rlist_cohen_ab_tc <- bind_rows(rlist) %>% 
  mutate(timepoint = factor(timepoint, levels = c("W2", "M1", "M3", "M6")), 
         measure1 = gsub("_", " ", measure1), 
         measure1 = gsub("Any TC", "Pooled TC", measure1), 
         measure2 = gsub("_", " ", measure2), 
         measure2 = gsub("Any TC", "Pooled TC", measure2), 
         measure1 = factor(measure1, levels = c("S IgA", "S IgG", "N IgG", "Pooled TC", "M TC", "N TC", "S1 TC", "S2 TC")), 
         measure2 = factor(measure2, levels = c("S IgA", "S IgG", "N IgG", "Pooled TC", "M TC", "N TC", "S1 TC", "S2 TC")), 
         kappa = as.numeric(kappa))
rlist_cohen_ab_tc

# Plot results
cohen_ab_tc <- rlist_cohen_ab_tc %>% 
  mutate(measure1 = fct_rev(measure1)) %>% 
  ggplot(aes(y = measure1, x = measure2, fill = kappa)) +
  geom_tile(color = "#000000") + 
  geom_text(label = formatC(rlist_cohen_ab_tc$kappa, format = "f", digits = 2), size = 1.6, fontface = "bold", color = ifelse(rlist_cohen_ab_tc$kappa < 0.4, "#FFFFFF", "#000000")) + 
  facet_wrap(. ~ timepoint, ncol = 4) + 
  scale_fill_continuous(type = "viridis") +  
  graph_style + theme(panel.grid.major.y = element_blank(), 
                      legend.margin = margin(20,0,10,5), 
                      legend.box.margin = margin(20,0,10,-5), 
                      legend.key.width = unit(3, "mm"), 
                      legend.key.height = unit(3.8, "mm"), 
                      axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) + 
  labs(fill = "Cohen's Kappa \n ", y = "", x = "")
cohen_ab_tc

# Save figure
ggsave("./FigS3B.pdf", 
       plot = cohen_ab_tc + labs(title = NULL, y = NULL, x = NULL), 
       width = 17.8, height = 5.5, device = "pdf", units = "cm")


### Clustering Algorithm ---------------------------------------------------------------------- 

## Note: Clusters derived based on algorithm below are already incorporated in dataset, code below is for reference
# https://doi.org/10.1016/10.18637/jss.v065.i04
# https://doi.org/10.1016/j.cmpb.2012.08.016

# ## Prepare analysis (based on kml3d algorithm)
# # Library
# library(kml3d)
# 
# # Prepare dataset
# df_cont <- source_data_w %>%
#   filter(subsample == "Yes") %>%
#   select(id,
#          logratio_S_IgA_W2, logratio_S_IgA_M1, logratio_S_IgA_M3, logratio_S_IgA_M6,
#          logratio_S_IgG_W2, logratio_S_IgG_M1, logratio_S_IgG_M3, logratio_S_IgG_M6,
#          logratio_N_IgG_W2, logratio_N_IgG_M1, logratio_N_IgG_M3, logratio_N_IgG_M6,
#          log_M.corr_W2, log_M.corr_M1, log_M.corr_M3, log_M.corr_M6,
#          log_N.corr_W2, log_N.corr_M1, log_N.corr_M3, log_N.corr_M6,
#          log_S1.corr_W2, log_S1.corr_M1, log_S1.corr_M3, log_S1.corr_M6,
#          log_S2.corr_W2, log_S2.corr_M1, log_S2.corr_M3, log_S2.corr_M6)
# 
# # Set parameters for kml3d algorithm
# param <- parKml3d(saveFreq = 100, maxIt = 200,
#                   imputationMethod = "copyMean",
#                   distanceName = "euclidean3d", distance = function() {
#                   }, centerMethod = meanNA, startingCond = "nearlyAll",
#                   nbCriterion =100, scale=TRUE)
# 
# # Set seed for reproducibility
# set.seed(20211207)
# 
# # Convert data into an object of class ClusterLongData3d
# cldIR <- cld3d(df_cont,
#                timeInData = list(s_iga = 2:5, # specify relevant columns for each measure and corresponding timepoint
#                                  s_igg = 6:9,
#                                  n_igg = 10:13,
#                                  m_corr = 14:17,
#                                  n_corr = 18:21,
#                                  s1_corr = 22:25,
#                                  s2_corr = 26:29),
#                idAll = df_cont$id) # specify identifier for each individual
# 
# ## Run kml3d algorithm
# # Partition the data
# kml3d(cldIR, nbClusters = 4:6, # specify number of clusters (4 to 6 clusters selected)
#       nbRedrawing = 100, # specify number of times k-means must be run for each number of the clusters (100 each)
#       parAlgo = param,
#       toPlot = "both")
# 
# # Examine and export results
# X11(type = "Xlib")
# try(choice(cldIR)) # 5-cluster model chosen based on Calinski and Harabatz quality criterion and expected patterns in the data


### Cluster Analyses ---------------------------------------------------------------------- 

## Figure 6a: Plot joint antibody and T cell trajectories (rescaled data; subsample)
# Scale data
df_traj <- source_data %>% 
  filter(subsample == "Yes") %>% 
  mutate(logratio_S_IgA_scaled = as.numeric(scale(logratio_S_IgA)),
         logratio_S_IgG_scaled = as.numeric(scale(logratio_S_IgG)),
         logratio_N_IgG_scaled = as.numeric(scale(logratio_N_IgG)),
         log_M.corr_scaled = as.numeric(scale(log_M.corr)),
         log_N.corr_scaled = as.numeric(scale(log_N.corr)),
         log_S1.corr_scaled = as.numeric(scale(log_S1.corr)),
         log_S2.corr_scaled = as.numeric(scale(log_S2.corr)))

# Create trajectory plot
cluster_traj <- df_traj %>% 
  mutate(dummy = NA) %>% 
  select(id, cluster_5l, timepoint, 
         logratio_S_IgA_scaled, logratio_S_IgG_scaled, logratio_N_IgG_scaled,
         dummy, 
         log_M.corr_scaled, log_N.corr_scaled, log_S1.corr_scaled, log_S2.corr_scaled) %>% 
  pivot_longer(!c(id, cluster_5l, timepoint), 
               names_to = "var", 
               values_to = "result") %>% 
  group_by(cluster_5l, timepoint, var) %>% 
  summarise(mean = mean(result, na.rm = T)) %>% 
  ungroup() %>% 
  mutate(var = recode_factor(var, 
                             "logratio_S_IgA_scaled" = "Anti-S IgA", 
                             "logratio_S_IgG_scaled" = "Anti-S IgG", 
                             "logratio_N_IgG_scaled" = "Anti-N IgG", 
                             "dummy" = "", 
                             "log_M.corr_scaled" = "M TC", 
                             "log_N.corr_scaled" = "N TC", 
                             "log_S1.corr_scaled" = "S1 TC", 
                             "log_S2.corr_scaled" = "S2 TC")) %>% 
  ggplot(aes(y = mean, x = timepoint, group = var)) +
  geom_line(aes(color = var), size = line_sz * 2) + 
  facet_wrap(~ cluster_5l, ncol = 5) + 
  scale_color_manual(values = c(col[c(10:8)], "#FFFFFF", col[c(2:5)])) + 
  graph_style + 
  theme(legend.position = "bottom", legend.key.size = unit(3, "mm"), legend.margin = margin(-5, 0, 0, 0)) + 
  labs(title = "Antibody and T Cell Trajectories by Cluster", y = "", x = "Timepoint", color = "")
cluster_traj

# Save figure
ggsave("./Fig6A.pdf", 
       plot = cluster_traj + labs(title = NULL, y = NULL), 
       width = 8.8, height = 5.8, device = "pdf", units = "cm")


## Figure 6b: Heatmap with detailed antibody and T cell results by cluster (data not rescaled; subsample)
# Prepare data
df_heat <- source_data_w %>% 
  filter(subsample == "Yes") %>% 
  select(id, cluster_5l, 
         logratio_S_IgA_W2, logratio_S_IgG_W2, logratio_N_IgG_W2,
         logratio_S_IgA_M1, logratio_S_IgG_M1, logratio_N_IgG_M1,
         logratio_S_IgA_M3, logratio_S_IgG_M3, logratio_N_IgG_M3, 
         logratio_S_IgA_M6, logratio_S_IgG_M6, logratio_N_IgG_M6, 
         log_M.corr_W2, log_N.corr_W2, log_S1.corr_W2, log_S2.corr_W2, 
         log_M.corr_M1, log_N.corr_M1, log_S1.corr_M1, log_S2.corr_M1, 
         log_M.corr_M3, log_N.corr_M3, log_S1.corr_M3, log_S2.corr_M3, 
         log_M.corr_M6, log_N.corr_M6, log_S1.corr_M6, log_S2.corr_M6)

# Initialize dataframes (name rows with participant IDs and then remove ID from data)
df_heat <- as.data.frame(df_heat) 
rownames(df_heat) <- paste0(df_heat$id)
df_map <- df_heat %>% 
  arrange(cluster_5l) %>% 
  select(-c(id, cluster_5l))

# Prepare annotations and colors
row_anno <- df_heat %>% 
  select(cluster_5l) %>% 
  arrange(cluster_5l)

col_anno <- data.frame(
  "Type" = factor(rep(c("Antibodies", "T cells"), c(12, 16))),
  "Timepoint" = factor(c(rep("W2", 3), rep("M1", 3), rep("M3", 3), rep("M6", 3), rep("W2", 4), rep("M1", 4), rep("M3", 4), rep("M6", 4))))
rownames(col_anno) <- colnames(df_map)  

ann_colors = list( # create a list of colours for each level of each variable added to the heatmap
  Type = c( "Antibodies" = col[15], "T cells" = col[14]), 
  Timepoint = c("W2" = "#fef0d9", "M1" = "#fdcc8a", "M3" = "#fc8d59", "M6"= "#d7301f"),
  cluster_5l = c("Cluster 1" = col[8], "Cluster 2" = col[7], "Cluster 3" = col[6], "Cluster 4" = col[5], "Cluster 5" = col[4]))

# Create heatmap
cluster_heat <- pheatmap(df_map, 
                         # annotation_row = row_anno, annotation_col = col_anno, # includes annotations if activated
                         annotation_row = NA, annotation_col = NA, # excludes annotations if activated
                         cluster_rows = F, cluster_cols = F, show_rownames = F, legend = T,
                         annotation_colors = ann_colors, 
                         color = colorRampPalette(brewer.pal(n = 9, name = "YlGnBu")[2:9])(100),
                         scale = "none", border_color = "transparent", 
                         gaps_row = c(9,17,29,39), gaps_col = 12, 
                         fontsize = 10, 
                         angle_col = 90, 
                         labels_col = c(rep(c("S IgA", "S IgG", "N IgG"), 4), rep(c("M TC", "N TC","S1 TC", "S2 TC"), 4)))
cluster_heat

# Save figure
ggsave("./Fig6B.pdf", # printed in double size due to legend format
       plot = cluster_heat, 
       width = 17, height = 10.6, device = "pdf", units = "cm")


## Supplementary Figure 4a (1): Anti-S IgA immune trajectory in individual clusters (subsample)
# Plot results
cluster_s_iga <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  ggplot(aes(y = ratio_S_IgA, x = timepoint)) + 
  geom_boxplot(width = 0.5, color = col[9], outlier.alpha = 0, lwd = line_sz) + 
  geom_point(size = point_sz, position = position_jitter(width = 0.1)) + 
  geom_hline(yintercept = 6.5, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ cluster_5l) +
  scale_y_continuous(trans = "log10", limits = c(1, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  labs(title = "Anti-S IgA Antibody Levels by Cluster (subsample)", y = "S IgA MFI ratio", x = "Timepoint")
cluster_s_iga

# Save figure
ggsave("./FigS4A1.pdf", 
       plot = cluster_s_iga + labs(title = NULL, x = NULL) + theme(axis.text.x = element_blank()), 
       width = 8.6, height = 2.8, device = "pdf", units = "cm")


## Supplementary Figure 4a (2): Anti-S IgG immune trajectory in individual clusters (subsample)
# Plot results
cluster_s_igg <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  ggplot(aes(y = ratio_S_IgG, x = timepoint)) + 
  geom_boxplot(width = 0.5, color = col[9], outlier.alpha = 0, lwd = line_sz) + 
  geom_point(size = point_sz, position = position_jitter(width = 0.1)) + 
  geom_hline(yintercept = 6, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ cluster_5l) +
  scale_y_continuous(trans = "log10", limits = c(1, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  labs(title = "Anti-S IgG Antibody Levels by Cluster (subsample)", y = "S IgG MFI ratio", x = "Timepoint")
cluster_s_igg

# Save figure
ggsave("./FigS4A2.pdf", 
       plot = cluster_s_igg + labs(title = NULL, x = NULL) + theme(axis.text.x = element_blank(), strip.text = element_blank()), 
       width = 8.6, height = 2.3, device = "pdf", units = "cm")


## Supplementary Figure 4a (3): Anti-N IgG immune trajectory in individual clusters (subsample)
# Plot results
cluster_n_igg <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  ggplot(aes(y = ratio_N_IgG, x = timepoint)) + 
  geom_boxplot(width = 0.5, color = col[9], outlier.alpha = 0, lwd = line_sz) + 
  geom_point(size = point_sz, position = position_jitter(width = 0.1)) + 
  geom_hline(yintercept = 6, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ cluster_5l) +
  scale_y_continuous(trans = "log10", limits = c(1, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  labs(title = "Anti-N IgG Antibody Levels by Cluster (subsample)", y = "N IgG MFI ratio", x = "Timepoint")
cluster_n_igg

# Save figure
ggsave("./FigS4A3.pdf", 
       plot = cluster_n_igg + labs(title = NULL, x = NULL) + theme(axis.text.x = element_blank(), strip.text = element_blank()), 
       width = 8.6, height = 2.3, device = "pdf", units = "cm")


## Figure 6c (1): Anti-Wildtype SARS-CoV-2 immune trajectory in individual clusters (subsample)
# Plot results
cluster_na_wt <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  ggplot(aes(y = neutr_wt_cc, x = timepoint)) + 
  geom_boxplot(width = 0.5, color = col[14], outlier.alpha = 0, lwd = line_sz) + 
  geom_point(size = point_sz, position = position_jitter(width = 0.1)) + 
  geom_hline(yintercept = 50, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ cluster_5l) +
  scale_y_continuous(trans = "log10", limits = c(0.5, 5e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  labs(title = "Neutralizing Antibody Levels against Wildtype SARS-CoV-2 by Cluster (subsample)", y = "Anti-Wildtype IC50", x = "Timepoint")
cluster_na_wt

# Save figure
ggsave("./Fig6C1.pdf", 
       plot = cluster_na_wt + labs(title = NULL, x = NULL) + theme(axis.text.x = element_blank()), 
       width = 8.6, height = 2.8, device = "pdf", units = "cm")


## Figure 6c (2): Anti-Delta SARS-CoV-2 immune trajectory in individual clusters (subsample)
# Plot results
cluster_na_delta <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  ggplot(aes(y = neutr_delta_cc, x = timepoint)) + 
  geom_boxplot(width = 0.5, color = col[14], outlier.alpha = 0, lwd = line_sz) + 
  geom_point(size = point_sz, position = position_jitter(width = 0.1)) + 
  geom_hline(yintercept = 50, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ cluster_5l) +
  scale_y_continuous(trans = "log10", limits = c(0.5, 5e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  labs(title = "Neutralizing Antibody Levels against Delta SARS-CoV-2 by Cluster (subsample)", y = "Anti-Delta IC50", x = "Timepoint")
cluster_na_delta

# Save figure
ggsave("./Fig6C2.pdf", 
       plot = cluster_na_delta + labs(title = NULL, x = NULL) + theme(axis.text.x = element_blank(), strip.text = element_blank()),
       width = 8.6, height = 2.3, device = "pdf", units = "cm")


## Supplementary Figure 4a (4): Anti-Omicron SARS-CoV-2 immune trajectory in individual clusters (subsample)
# Plot results
cluster_na_omicron <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  ggplot(aes(y = neutr_omicron_cc, x = timepoint)) + 
  geom_boxplot(width = 0.5, color = col[14], outlier.alpha = 0, lwd = line_sz) + 
  geom_point(size = point_sz, position = position_jitter(width = 0.1)) + 
  geom_hline(yintercept = 50, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ cluster_5l) +
  scale_y_continuous(trans = "log10", limits = c(0.5, 5e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  labs(title = "Neutralizing Antibody Levels against Omicron SARS-CoV-2 by Cluster (subsample)", y = "Anti-Omicron IC50", x = "Timepoint")
cluster_na_omicron

# Save figure
ggsave("./FigS4A4.pdf", 
       plot = cluster_na_omicron + labs(title = NULL, x = NULL) + theme(axis.text.x = element_blank(), strip.text = element_blank()), 
       width = 8.6, height = 2.3, device = "pdf", units = "cm")


## Supplementary Figure 4a (5): Total PBMCs/ml blood immune trajectory in individual clusters (subsample)
# Plot results
cluster_pbmc <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  ggplot(aes(y = `PBMCs/mLBlood`, x = timepoint)) + 
  geom_boxplot(width = 0.5, color = col[20], outlier.alpha = 0, lwd = line_sz) + 
  geom_point(size = point_sz, position = position_jitter(width = 0.1)) + 
  facet_grid(. ~ cluster_5l) +
  scale_y_continuous(trans = "log10", limits = c(1e3, 1e7), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  labs(title = "Total PBMCs / ml Blood by Cluster (subsample)", y = "Total PBMCs / ml blood", x = "Timepoint")
cluster_pbmc

# Save figure
ggsave("./FigS4A5.pdf", 
       plot = cluster_pbmc + labs(title = NULL) + theme(strip.text = element_blank()), 
       width = 8.6, height = 2.9, device = "pdf", units = "cm")


## Supplementary Figure 4a (6): Total T cell trajectory in individual clusters (subsample)
# Plot results
cluster_tc <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  ggplot(aes(y = TC.corr_cc, x = timepoint)) + 
  geom_boxplot(width = 0.5, color = col[3], outlier.alpha = 0, lwd = line_sz) + 
  geom_point(size = point_sz, position = position_jitter(width = 0.1)) + 
  geom_hline(yintercept = 1, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ cluster_5l) +
  scale_y_continuous(trans = "log10", limits = c(0.5, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  labs(title = "Overall T Cell Count per 1e6 PBMCs by Cluster (subsample)", y = "Pooled SFU/1e6 PBMCs", x = "Timepoint")
cluster_tc

# Save figure
ggsave("./FigS4A6.pdf", 
       plot = cluster_tc + labs(title = NULL, x = NULL) + theme(axis.text.x = element_blank()), 
       width = 8.6, height = 2.8, device = "pdf", units = "cm")


## Supplementary Figure 4a (7): M-specific T cell trajectory in individual clusters (subsample)
# Plot results
cluster_m_tc <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  ggplot(aes(y = M.corr_cc, x = timepoint)) + 
  geom_boxplot(width = 0.5, color = col[3], outlier.alpha = 0, lwd = line_sz) + 
  geom_point(size = point_sz, position = position_jitter(width = 0.1)) + 
  geom_hline(yintercept = 1, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ cluster_5l) +
  scale_y_continuous(trans = "log10", limits = c(0.5, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  labs(title = "M-Specific T Cell Count per 1e6 PBMCs by Cluster (subsample)", y = "M SFU/1e6 PBMCs", x = "Timepoint")
cluster_m_tc

# Save figure
ggsave("./FigS4A7.pdf", 
       plot = cluster_m_tc + labs(title = NULL, x = NULL) + theme(axis.text.x = element_blank(), strip.text = element_blank()), 
       width = 8.6, height = 2.3, device = "pdf", units = "cm")


## Supplementary Figure 4a (8): N-specific T cell trajectory in individual clusters (subsample)
# Plot results
cluster_n_tc <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  ggplot(aes(y = N.corr_cc, x = timepoint)) + 
  geom_boxplot(width = 0.5, color = col[3], outlier.alpha = 0, lwd = line_sz) + 
  geom_point(size = point_sz, position = position_jitter(width = 0.1)) + 
  geom_hline(yintercept = 1, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ cluster_5l) +
  scale_y_continuous(trans = "log10", limits = c(0.5, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  labs(title = "N-Specific T Cell Count per 1e6 PBMCs by Cluster (subsample)", y = "N SFU/1e6 PBMCs", x = "Timepoint")
cluster_n_tc

# Save figure
ggsave("./FigS4A8.pdf", 
       plot = cluster_n_tc + labs(title = NULL, x = NULL) + theme(axis.text.x = element_blank(), strip.text = element_blank()), 
       width = 8.6, height = 2.3, device = "pdf", units = "cm")


## Supplementary Figure 4a (9): S1-specific T cell trajectory in individual clusters (subsample)
# Plot results
cluster_s1_tc <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  ggplot(aes(y = S1.corr_cc, x = timepoint)) + 
  geom_boxplot(width = 0.5, color = col[3], outlier.alpha = 0, lwd = line_sz) + 
  geom_point(size = point_sz, position = position_jitter(width = 0.1)) + 
  geom_hline(yintercept = 1, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ cluster_5l) +
  scale_y_continuous(trans = "log10", limits = c(0.5, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  labs(title = "S1-Specific T Cell Count per 1e6 PBMCs by Cluster (subsample)", y = "S1 SFU/1e6 PBMCs", x = "Timepoint")
cluster_s1_tc

# Save figure
ggsave("./FigS4A9.pdf", 
       plot = cluster_s1_tc + labs(title = NULL, x = NULL) + theme(axis.text.x = element_blank(), strip.text = element_blank()), 
       width = 8.6, height = 2.3, device = "pdf", units = "cm")


## Supplementary Figure 4a (10): S2-specific T cell trajectory in individual clusters (subsample)
# Plot results
cluster_s2_tc <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  ggplot(aes(y = S2.corr_cc, x = timepoint)) + 
  geom_boxplot(width = 0.5, color = col[3], outlier.alpha = 0, lwd = line_sz) + 
  geom_point(size = point_sz, position = position_jitter(width = 0.1)) + 
  geom_hline(yintercept = 1, linetype = "dotted", lwd = line_sz) + 
  facet_grid(. ~ cluster_5l) +
  scale_y_continuous(trans = "log10", limits = c(0.5, 4e3), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  labs(title = "S2-Specific T Cell Count per 1e6 PBMCs by Cluster (subsample)", y = "S2 SFU/1e6 PBMCs", x = "Timepoint")
cluster_s2_tc

# Save figure
ggsave("./FigS4A10.pdf", 
       plot = cluster_s2_tc + labs(title = NULL) + theme(strip.text = element_blank()), 
       width = 8.6, height = 2.9, device = "pdf", units = "cm")


## Figure 6c (3): AIM+/CD4+ immune trajectory in individual clusters (subsample)
# Plot results
cluster_aim_cd4_tc <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  ggplot(aes(y = CD4AIM.delta_cc, x = timepoint)) + 
  geom_boxplot(width = 0.5, color = col[20], outlier.alpha = 0, lwd = line_sz) + 
  geom_point(size = point_sz, position = position_jitter(width = 0.1)) + 
  facet_grid(. ~ cluster_5l) +
  scale_y_continuous(trans = "log10", limits = c(1e-5, 1e-1), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  theme(axis.title.y = element_text(margin = margin(r = 1))) + 
  labs(title = "AIM+ per CD4+ T Cells by Cluster (subsample)", y = "AIM+ / CD4+", x = "Timepoint")
cluster_aim_cd4_tc

# Save figure
ggsave("./Fig6C3.pdf", 
       plot = cluster_aim_cd4_tc + labs(title = NULL, x = NULL) + theme(axis.text.x = element_blank(), strip.text = element_blank()),
       width = 8.6, height = 2.3, device = "pdf", units = "cm")


## Figure 6c (4): AIM+/CD8+ immune trajectory in individual clusters (subsample)
# Plot results
cluster_aim_cd8_tc <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  ggplot(aes(y = CD8AIM.delta_cc, x = timepoint)) + 
  geom_boxplot(width = 0.5, color = col[20], outlier.alpha = 0, lwd = line_sz) + 
  geom_point(size = point_sz, position = position_jitter(width = 0.1)) + 
  facet_grid(. ~ cluster_5l) +
  scale_y_continuous(trans = "log10", limits = c(1e-5, 1e-1), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  theme(axis.title.y = element_text(margin = margin(r = 1))) + 
  labs(title = "AIM+ per CD8+ T Cells by Cluster (subsample)", y = "AIM+ / CD8+", x = "Timepoint")
cluster_aim_cd8_tc

# Save figure
ggsave("./Fig6C4.pdf", 
       plot = cluster_aim_cd8_tc + labs(title = NULL) + theme(strip.text = element_blank()), 
       width = 8.6, height = 2.9, device = "pdf", units = "cm")


## Supplementary Figure 4b: Cellular subsets in individual clusters (count per 1e6 PBMCs; subsample)
# Prepare data
df_cluster_sub <- source_data_g %>% 
  filter(subsample == "Yes") %>% 
  select(id, timepoint, cluster_5l, 
         `CD4+.T.cell.count`, `CD8+.T.cell.count`, B.cell.count, NK.cell.count, `CD56+CD16+.NK`, total.memory.B.cell) %>% 
  mutate(`CD56+CD16+.NK.count` = `CD56+CD16+.NK` * NK.cell.count, 
         total.memory.B.cell.count = total.memory.B.cell * B.cell.count) %>% 
  pivot_longer(!c(id, timepoint, cluster_5l, `CD56+CD16+.NK`, total.memory.B.cell), names_to = "measure", values_to = "value") %>% 
  group_by(timepoint, cluster_5l, measure) %>% 
  summarise(mean = mean(value, na.rm = TRUE)) %>% 
  ungroup() %>% 
  mutate(measure = fct_rev(recode_factor(measure, 
                                         "CD4+.T.cell.count" = "Total CD4+ cells", 
                                         "CD8+.T.cell.count" = "Total CD8+ cells", 
                                         "B.cell.count" = "Total B cells", 
                                         "NK.cell.count" = "Total NK cells", 
                                         "CD56+CD16+.NK.count" = "CD56+CD16+ \nmature NK cells", 
                                         "total.memory.B.cell.count" = "CD19+CD27+ \nmemory B cells")))
df_cluster_sub

# Plot results
cluster_subset <- df_cluster_sub %>% 
  ggplot(aes(y = measure, x = cluster_5l, fill = mean)) + 
  geom_tile() + 
  facet_grid(. ~ timepoint) + 
  scale_fill_continuous(type = "viridis", trans = "log10", limits = c(1e3, 1e5), labels = scales::trans_format("log10", scales::math_format(10^.x))) + 
  graph_style + 
  theme(panel.grid.major.y = element_blank(), 
        legend.text = element_text(size = 5), 
        legend.margin = margin(20,0,10,5), 
        legend.box.margin = margin(20,0,10,-5), 
        legend.key.width = unit(3, "mm"), 
        legend.key.height = unit(3, "mm"), 
        axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) + 
  labs(title = "T Cell Subsets by Cluster (subsample)", y = "", x = "", fill = "Cells per \n1e6 PBMCs\n ")
cluster_subset

# Save figure
ggsave("./FigS4B.pdf", 
       plot = cluster_subset + labs(title = NULL, x = NULL, y = NULL), 
       width = 13.8, height = 4.6, device = "pdf", units = "cm")


## Figure 6d: Clinical characteristics of participants in individual clusters (subsample)
# Prepare data
df_clin <- source_data_w %>% 
  filter(subsample == "Yes") %>% 
  select(id, cluster_5l, age_group, sex, smoking, symp_count_init_3l, hosp_2wks) %>% 
  pivot_longer(!c(id, cluster_5l), names_to = "var", values_to = "value") %>% 
  filter(!is.na(value)) %>% 
  group_by(cluster_5l, var, value) %>% 
  summarise(n = n()) %>% 
  mutate(freq = n / sum(n), 
         var = factor(var, levels = c("age_group", "sex", "smoking", "symp_count_init_3l", "hosp_2wks")), 
         value = fct_rev(recode_factor(value, 
                                       "18-39 years" = "18-39 years", "40-64 years" = "40-64 years", "65+ years" = "65+ years",
                                       "female" = "Female", "male" = "Male", 
                                       "Non-smoker" = "Non-smoker", "Ex-smoker" = "Ex-smoker", "Smoker" = "Smoker", 
                                       "Asymptomatic" = "Asympt.", "1-5 symptoms" = "1-5 sympt.", "≥6 symptoms" = "6+ sympt.", 
                                       "Yes" = "Hosp.", "No" = "Non-hosp."))) %>% 
  ungroup()
df_clin

# Plot results
cluster_clin <- df_clin %>% 
  ggplot(aes(y = freq, x = var, fill = value)) + 
  geom_bar(stat = "identity", width = .8) + 
  facet_wrap(. ~ cluster_5l, nrow = 5, strip.position = "left") + 
  ggfittext::geom_fit_text(aes(label = paste0(value)), 
                           position = position_stack(), reflow = FALSE, fontface = "bold", size = 5, min.size = 5) + 
  scale_y_continuous(position = "right", labels = scales::percent) + 
  scale_x_discrete(labels = c( "Age group", "Sex", "Smoking", "Symptom \n count", "Hospitalized"), 
                   position = "top") +
  scale_fill_manual(values = alpha(colorRampPalette(brewer.pal(11, "RdYlBu"))(17), 0.7)) + 
  graph_style + theme(legend.position = "none", 
                      panel.grid.major.y = element_blank(), 
                      axis.text.x = element_text(size = 7, face = "bold", color = "#000000"), 
                      axis.ticks.x = element_blank()) + 
  labs(title = "Clinical Characteristics of Cluster Participants", y = "", x = "", color = "")
cluster_clin

# Save figure
ggsave("./Fig6D.pdf", 
       plot = cluster_clin + labs(title = NULL, y = NULL), 
       width = 9, height = 10.8, device = "pdf", units = "cm")


### Association Analyses ----------------------------------------------------------------------

## Presets
col_reg <- col[10]
outcome_order <- factor(c("time_since_diagnosis", "age_group40-64 years", "age_group65+ years", "sexmale", "symp_count_init_3l1-5 symptoms", "symp_count_init_3l≥6 symptoms", "hosp_2wksYes", "smokingEx-smoker", "smokingSmoker", "bmi", "comorbidityYes", "immune_suppYes"))

## Figure 7a, Supplementary Table 6: Association of relevant predictor variables with anti-S IgG antibodies (MFI ratio; overall study population)
# Restrict dataset and remove rows with missing values
data_mod <- source_data %>% 
  drop_na(logratio_S_IgG, time_since_diagnosis, age_group, sex, symp_count_init_3l, hosp_2wks, smoking, bmi, comorbidity, immune_supp)

# Define outcome (o), adjustment variables (v.adj), and further tested covariables (v.cov)
o <- "logratio_S_IgG"
v.adj <- c("time_since_diagnosis", "age_group", "sex", "symp_count_init_3l")
v.cov <- c("hosp_2wks", "smoking", "bmi", "comorbidity", "immune_supp")

# Run univariable and multivariable mixed linear regression models (random intercept for ID)
output_uv <- gen_output_lmer_uv(outc = o, vars.adj = v.adj, vars.cov = v.cov, data = data_mod)
output_mv <- gen_output_lmer_mv(outc = o, vars.adj = v.adj, vars.cov = v.cov, data = data_mod)

# Plot multivariable results (multiple manual steps involved for creation of final figure)
output_mv_s_igg <- output_mv %>% 
  mutate(outcome = factor(outcome, levels = outcome_order)) %>% 
  ggplot(aes(x = coef, xmin = CI.lb, xmax = CI.ub, y = fct_rev(outcome))) + 
  geom_point(col = col_reg) + geom_errorbar(width = 0.2, col = col_reg) + 
  geom_vline(xintercept = 0, linetype = "dotted", lwd = line_sz) + 
  scale_x_continuous(limits = c(-2.0, 2.5)) + 
  graph_style + labs(title = "Anti-S IgG", y = "", x = "Coefficient (95% CI)")
output_mv_s_igg

# Print multivariable results
output_mv %>% select(outcome, coef_CI, pval)


## Figure 7b, Supplementary Table 7: Association of relevant predictor variables with anti-S IgA antibodies (MFI ratio; overall study population)
# Restrict dataset and remove rows with missing values
data_mod <- source_data %>% 
  drop_na(logratio_S_IgA, time_since_diagnosis, age_group, sex, symp_count_init_3l, hosp_2wks, smoking, bmi, comorbidity, immune_supp)

# Define outcome (o), adjustment variables (v.adj), and further tested covariables (v.cov)
o <- "logratio_S_IgA"
v.adj <- c("time_since_diagnosis", "age_group", "sex", "symp_count_init_3l")
v.cov <- c("hosp_2wks", "smoking", "bmi", "comorbidity", "immune_supp")

# Run univariable and multivariable mixed linear regression models (random intercept for ID)
output_uv <- gen_output_lmer_uv(outc = o, vars.adj = v.adj, vars.cov = v.cov, data = data_mod)
output_mv <- gen_output_lmer_mv(outc = o, vars.adj = v.adj, vars.cov = v.cov, data = data_mod)

# Plot multivariable results (multiple manual steps involved for creation of final figure)
output_mv_s_iga <- output_mv %>% 
  mutate(outcome = factor(outcome, levels = outcome_order)) %>% 
  ggplot(aes(x = coef, xmin = CI.lb, xmax = CI.ub, y = fct_rev(outcome))) + 
  geom_point(col = col_reg) + geom_errorbar(width = 0.2, col = col_reg) + 
  geom_vline(xintercept = 0, linetype = "dotted", lwd = line_sz) + 
  scale_x_continuous(limits = c(-2.0, 2.5)) + 
  graph_style + labs(title = "Anti-S IgA", y = "", x = "Coefficient (95% CI)")
output_mv_s_iga

# Print multivariable results
output_mv %>% select(outcome, coef_CI, pval)


## Figure 7c: Association of relevant predictor variables with anti-N IgG antibodies (MFI ratio; subsample)
# Restrict dataset and remove rows with missing values
data_mod <- source_data %>% 
  filter(subsample == "Yes") %>% 
  drop_na(logratio_N_IgG, time_since_diagnosis, age_group, sex, symp_count_init_3l, hosp_2wks, smoking, bmi, comorbidity, immune_supp)

# Define outcome (o), adjustment variables (v.adj), and further tested covariables (v.cov)
o <- "logratio_N_IgG"
v.adj <- c("time_since_diagnosis", "age_group", "sex", "symp_count_init_3l")
v.cov <- c("hosp_2wks", "smoking", "bmi", "comorbidity", "immune_supp")

# Run univariable and multivariable mixed linear regression models (random intercept for ID)
output_uv <- gen_output_lmer_uv(outc = o, vars.adj = v.adj, vars.cov = v.cov, data = data_mod)
output_mv <- gen_output_lmer_mv(outc = o, vars.adj = v.adj, vars.cov = v.cov, data = data_mod)

# Plot multivariable results (multiple manual steps involved for creation of final figure)
output_mv_n_igg <- output_mv %>% 
  mutate(outcome = factor(outcome, levels = outcome_order)) %>% 
  ggplot(aes(x = coef, xmin = CI.lb, xmax = CI.ub, y = fct_rev(outcome))) + 
  geom_point(col = col_reg) + geom_errorbar(width = 0.2, col = col_reg) + 
  geom_vline(xintercept = 0, linetype = "dotted", lwd = line_sz) + 
  scale_x_continuous(limits = c(-4, 3)) + 
  graph_style + labs(title = "Anti-N IgG", y = "", x = "Coefficient (95% CI)")
output_mv_n_igg

# Print multivariable results
output_mv %>% select(outcome, coef_CI, pval)


## Figure 7d, Supplementary Table 8: Association of relevant predictor variables with overall T cell count (SFU/1e6 PBMCs; subsample)
# Restrict dataset and remove rows with missing values
data_mod <- source_data %>% 
  filter(subsample == "Yes") %>% 
  drop_na(log_TC.corr, time_since_diagnosis, age_group, sex, symp_count_init_3l, hosp_2wks, smoking, bmi, comorbidity, immune_supp)

# Define outcome (o), adjustment variables (v.adj), and further tested covariables (v.cov)
o <- "log_TC.corr"
v.adj <- c("time_since_diagnosis", "age_group", "sex", "symp_count_init_3l")
v.cov <- c("hosp_2wks", "smoking", "bmi", "comorbidity", "immune_supp")

# Run univariable and multivariable mixed linear regression models (random intercept for ID)
output_uv <- gen_output_lmer_uv(outc = o, vars.adj = v.adj, vars.cov = v.cov, data = data_mod)
output_mv <- gen_output_lmer_mv(outc = o, vars.adj = v.adj, vars.cov = v.cov, data = data_mod)

# Plot multivariable results (multiple manual steps involved for creation of final figure)
output_mv_tc <- output_mv %>% 
  mutate(outcome = factor(outcome, levels = outcome_order)) %>% 
  ggplot(aes(x = coef, xmin = CI.lb, xmax = CI.ub, y = fct_rev(outcome))) + 
  geom_point(col = col_reg) + geom_errorbar(width = 0.2, col = col_reg) + 
  geom_vline(xintercept = 0, linetype = "dotted", lwd = line_sz) + 
  scale_x_continuous(limits = c(-4, 3)) + 
  graph_style + labs(title = "Pooled T Cells", y = "", x = "Coefficient (95% CI)")
output_mv_tc

# Print multivariable results
output_mv %>% select(outcome, coef_CI, pval)


## Supplementary Table 9 (1): Association of relevant predictor variables with anti-S IgG positivity at 2 weeks (overall study population)
# Restrict dataset and remove rows with missing values
data_mod <- source_data %>% 
  filter(timepoint == "W2") %>% 
  drop_na(result_S_IgG, time_since_diagnosis, age_group, sex, symp_count_init_3l, hosp_2wks, smoking, bmi, comorbidity, immune_supp)

# Define outcome (o), adjustment variables (v.adj), and further tested covariables (v.cov)
o <- "result_S_IgG"
v.adj <- c("time_since_diagnosis", "age_group", "sex", "symp_count_init_3l")
v.cov <- c("hosp_2wks", "smoking", "bmi", "comorbidity", "immune_supp")

# Run univariable and multivariable logistic regression models
output_uv <- gen_output_glm_uv(outc = o, vars.adj = v.adj, vars.cov = v.cov, data = data_mod)
output_mv <- gen_output_glm_mv(outc = o, vars.adj = v.adj, vars.cov = v.cov, data = data_mod)

# Print multivariable results
output_mv %>% select(outcome, OR_CI = coef_CI, pval)


## Supplementary Table 9 (2): Association of relevant predictor variables with anti-S IgG positivity at 6 months (overall study population)
# Restrict dataset and remove rows with missing values
data_mod <- source_data %>% 
  filter(timepoint == "M6") %>% 
  drop_na(result_S_IgG, time_since_diagnosis, age_group, sex, symp_count_init_3l, hosp_2wks, smoking, bmi, comorbidity, immune_supp)

# Define outcome (o), adjustment variables (v.adj), and further tested covariables (v.cov)
o <- "result_S_IgG"
v.adj <- c("time_since_diagnosis", "age_group", "sex", "symp_count_init_3l")
v.cov <- c("hosp_2wks", "smoking", "bmi", "comorbidity", "immune_supp")

# Run univariable and multivariable logistic regression models
output_uv <- gen_output_glm_uv(outc = o, vars.adj = v.adj, vars.cov = v.cov, data = data_mod)
output_mv <- gen_output_glm_mv(outc = o, vars.adj = v.adj, vars.cov = v.cov, data = data_mod)

# Print multivariable results
output_mv %>% select(outcome, OR_CI = coef_CI, pval)


## Supplementary Table 10 (1): Association of relevant predictor variables with overall T cell positivity at 2 weeks (subsample)
# Restrict dataset and remove rows with missing values
data_mod <- source_data %>% 
  filter(timepoint == "W2") %>% 
  drop_na(Any_Pos, time_since_diagnosis, age_group, sex, symp_count_init_3l, hosp_2wks, smoking, bmi, comorbidity, immune_supp)

# Define outcome (o), adjustment variables (v.adj), and further tested covariables (v.cov)
o <- "Any_Pos"
v.adj <- c("time_since_diagnosis", "age_group", "sex", "symp_count_init_3l")
v.cov <- c("hosp_2wks", "smoking", "bmi", "comorbidity", "immune_supp")

# Run univariable and multivariable logistic regression models
output_uv <- gen_output_glm_uv(outc = o, vars.adj = v.adj, vars.cov = v.cov, data = data_mod)
output_mv <- gen_output_glm_mv(outc = o, vars.adj = v.adj, vars.cov = v.cov, data = data_mod)

# Print multivariable results
output_mv %>% select(outcome, OR_CI = coef_CI, pval)


## Supplementary Table 10 (2): Association of relevant predictor variables with overall T cell positivity at 6 months (subsample)
# Restrict dataset and remove rows with missing values
data_mod <- source_data %>% 
  filter(timepoint == "M6", subsample == "Yes") %>% 
  drop_na(Any_Pos, time_since_diagnosis, age_group, sex, symp_count_init_3l, hosp_2wks, smoking, bmi, comorbidity, immune_supp)

# Define outcome (o), adjustment variables (v.adj), and further tested covariables (v.cov)
o <- "Any_Pos"
v.adj <- c("time_since_diagnosis", "age_group", "sex", "symp_count_init_3l")
v.cov <- c("hosp_2wks", "smoking", "bmi", "comorbidity", "immune_supp")

# Run univariable and multivariable logistic regression models
output_uv <- gen_output_glm_uv(outc = o, vars.adj = v.adj, vars.cov = v.cov, data = data_mod)
output_mv <- gen_output_glm_mv(outc = o, vars.adj = v.adj, vars.cov = v.cov, data = data_mod)

# Print multivariable results
output_mv %>% select(outcome, OR_CI = coef_CI, pval)


### End ----------------------------------------------------------------------