####################################################### # # R Code for "Association Between Seasonal # Influenza and Absolute Humidity: Time-Series # Analysis with Daily Surveillance Data in Japan" # # Author: Keita Shimmei # Date: March, 1st, 2020 # ####################################################### ####################################### # # Step 1: Data Preparation # ####################################### # Import library library(tidyverse) # for data processing library(dlnm) # for modeling distributed lag non-linear model library(lubridate) # for manipulating time-series data # Import dataset R_Reprocess_dir <- "~/OneDrive/Research/Epidemiology/Kawasaki/R/Reprocessing" setwd(R_Reprocess_dir) load("DATA_A.rdata") ############################## # Create dataset for analysis # make variable: day of season data.frame(date = seq(as.Date("2015-11-01"), as.Date("2016-04-30"), by = "days")) %>% mutate(day = day(date), month = month(date), month_day = paste(month, day, sep = "_"), day_of_season = rep(1:nrow(.))) %>% dplyr::select(month_day, day_of_season) -> Day_of_season # merge "day of season" into dataset DATA_A %>% filter(season %in% c("season2", "season3", "season4")) %>% mutate(day = day(date), month = month(date), month_day = paste(month, day, sep = "_")) %>% left_join(Day_of_season, by = "month_day") %>% as.data.frame(.) -> DATA DATA %>% group_by(date, season, dow, holiday, day, month, day_of_season) %>% summarise(AB_HUM = mean(AB_HUM), total_case = sum(region_case), clinic = mean(clinic), HUM = mean(HUM), TEMP = mean(TEMP), PM2.5 = mean(PM2.5)) %>% mutate(season_year = ifelse(season == "season2", 2014, ifelse(season == "season3", 2015, 2016))) %>% group_by(season_year) %>% mutate(log_case_yesterday = log(lag(total_case)), log_case_yesterday = ifelse(is.infinite(log_case_yesterday), 0, log_case_yesterday)) -> data ############################## # Set hyper-parameters ############################## MAXLAG <- 27 MIN <- min(data$AB_HUM) MAX <- max(data$AB_HUM) CEN <- quantile(data$AB_HUM, probs=c(0.95)) print(paste("Max Lag:", MAXLAG)) print(paste("Reference Value:", signif(CEN, digits = 4))) ######################################## # Create lists of AH and its percentile ######################################## AH_percentile <- quantile(data$AB_HUM, probs = seq(0, 1, 0.0001)) AH_percentile <- data.frame(percentile = as.numeric(format(as.numeric(as.character(str_sub(names(AH_percentile), end = -2))), nsmall = 2)), AB_HUM = AH_percentile) data$percentile <- NA for(i in 1:length(data$AB_HUM)){ AH_percentile %>% mutate(ah = data$AB_HUM[i], gap = abs(AB_HUM - ah)) %>% filter(gap == min(gap)) -> nearest data$percentile[i] <- nearest$percentile } ####################################### # # Step 2: DLNM Estimation # ####################################### result_list <- list() model_name <- c("Model 1", "Model 2", "Model 3", "Model 4") ################################################## # Model 1: f(•) = ns(date), g(•) = log(Y_{t-1}) ################################################## DF_argvar <- 3 DF_arglag <- 3 crossbasis <- crossbasis(data$AB_HUM, # main exposure; absolute humidity lag = MAXLAG, # set maximum lag argvar = list(fun = "ns", df = DF_argvar), # set function for exposure effect arglag = list(fun = "ns", df = DF_arglag), # set function for lag effect group = data$season) # take a lag by season model <- glm(total_case ~ crossbasis + factor(dow) # controlling day of week + holiday # contrlling holiday + ns(date, df = 4*3) # controlling seasonality + log_case_yesterday # controlling autocorrelation # + ns(season_year, df = 2) #+ ns(day_of_season, df = 2) , offset = log(clinic) # offset term , family = quasipoisson, data = data) # use Quasi-Poisson model result_list[['Model 1']]$model <- model result_list[['Model 1']]$crossbasis <- crossbasis ################################################## # Model 2: f(•) = ns(date), g(•) = 0 ################################################## DF_argvar <- 3 DF_arglag <- 3 crossbasis <- crossbasis(data$AB_HUM, lag = MAXLAG, argvar = list(fun = "ns", df = DF_argvar), arglag = list(fun = "ns", df = DF_arglag), group = data$season) model <- glm(total_case ~ crossbasis + factor(dow) + holiday + ns(date, df = 4*3) # + log_case_yesterday # + ns(season_year, df = 2) # + ns(day_of_season, df = 2) , offset = log(clinic) , family = quasipoisson, data = data) result_list[['Model 2']]$model <- model result_list[['Model 2']]$crossbasis <- crossbasis ############################################################################# # Model 3: f(•) = ns(season year) + ns(day of season), g(•) = log(Y_{t-1}) ############################################################################# DF_argvar <- 2 DF_arglag <- 3 crossbasis <- crossbasis(data$AB_HUM, lag = MAXLAG, argvar = list(fun = "ns", df = DF_argvar), arglag = list(fun = "ns", df = DF_arglag), group = data$season) model <- glm(total_case ~ crossbasis + factor(dow) + holiday #+ ns(date, df = 4*3) + log_case_yesterday + ns(day_of_season, df = 2) + ns(season_year, df = 2) , offset = log(clinic) , family = quasipoisson, data = data) result_list[['Model 3']]$model <- model result_list[['Model 3']]$crossbasis <- crossbasis ############################################################################# # Model 4: f(•) = ns(season year) + ns(day of season), g(•) = 0 ############################################################################# DF_argvar <- 2 DF_arglag <- 3 crossbasis <- crossbasis(data$AB_HUM, lag = MAXLAG, argvar = list(fun = "ns", df = DF_argvar), arglag = list(fun = "ns", df = DF_arglag), group = data$season) model <- glm(total_case ~ crossbasis + factor(dow) + holiday # + ns(date, df = 4*3) # + log_case_yesterday + ns(day_of_season, df = 2) + ns(season_year, df = 2) , offset = log(clinic) , family = quasipoisson, data = data) result_list[['Model 4']]$model <- model result_list[['Model 4']]$crossbasis <- crossbasis ####################################### # # Step 3: Calculate RR by Model # ####################################### ######################################## # Overall effect ######################################## PERCENTILE <- c(95, 75, 50, 25, 1) AH_percentile %>% filter(percentile %in% PERCENTILE) %>% mutate(AB_HUM = as.numeric(format(AB_HUM, digits = 3))) -> lag_percentile scale_to_value <- function(x){ return(x) } predict_overall <- NULL for(MODEL in model_name){ print(MODEL) model <- result_list[[MODEL]]$model crossbasis <- result_list[[MODEL]]$crossbasis crall <- crossreduce(crossbasis, model, from = MIN, to = MAX, by = 0.1, cen = CEN) data.frame(model = MODEL, AH = as.numeric(names(crall$RRfit)), fit = crall$RRfit, low = crall$RRlow, high = crall$RRhigh) %>% bind_rows(predict_overall) %>% arrange(model) -> predict_overall } print(predict_overall) ############################## # lag-response relationship ############################## Pred_table <- NULL for(MODEL in model_name){ print(MODEL) model <- result_list[[MODEL]]$model crossbasis <- result_list[[MODEL]]$crossbasis for(i in 1:nrow(lag_percentile)){ VALUE <- lag_percentile$AB_HUM[i] PER <- lag_percentile$percentile[i] crvar <- crossreduce(crossbasis, model, type = "var", value = VALUE, from=MIN, to=MAX, by = 0.1, cen = CEN) data.frame(model = MODEL, value = paste0("Absolute Humidity: ", PER, "% ", "(", VALUE, " g/)"), Lag = names(crvar$RRfit), fit = crvar$RRfit, low = crvar$RRlow, high = crvar$RRhigh) %>% bind_rows(Pred_table) -> Pred_table } } print(Pred_table) ############################################ # Variable-response relationship at lag=12 ############################################ VALUE_list <- c(12) VALUE_level <- paste0("Lag: ", VALUE_list) Pred_table <- NULL for(MODEL in model_name){ print(MODEL) model <- result_list[[MODEL]]$model crossbasis <- result_list[[MODEL]]$crossbasis for(VALUE in VALUE_list){ crlag <- crossreduce(crossbasis, model, type = "lag", value = VALUE, from=MIN, to=MAX, by = 0.1, cen = CEN) data.frame(model = MODEL, value = paste0("Lag: ", VALUE), AH = as.numeric(names(crlag$RRfit)), fit = crlag$RRfit, low = crlag$RRlow, high = crlag$RRhigh) %>% bind_rows(Pred_table) -> Pred_table } } print(Pred_table)