### Additional file 1: Mock Analysis code #### #### Mock Analysis 1: #### ## Predicting the spatial distribution of Plasmodium vivax using malariaAtlas-derived response and covariate data ## Loading packages # Data access, management and visualisation library(malariaAtlas) library(boot) library(ggplot2) # Spatial utility and modelling library(INLA) library(raster) library(sp) library(rgeos) library(rgdal) library(seegSDM) # available via devtools::install_github("SEEG-Oxford/seegSDM") library(seegMBG) # available via devtools::install_github("SEEG-Oxford/seegMBG") ## Download & Visualise Response data # Define spatial extent and download points within this area extent <- matrix(c(-65.2,-11.8, -52, 1), 2, 2, dimnames = list(c("x", "y"), c("min", "max"))) pvpr_raw <- getPR(extent = extent, species = "pv") pvpr_raw <- pvpr_raw[!is.na(pvpr_raw$pr),] # Subset to data points for which all data is publicly available # Visualise these points shp <- as(raster::extent(extent), "SpatialPolygons") # create a shapefile of arbitrary study extent shp_df <- ggplot2::fortify(shp) # convert to a data.frame for ggplot-mapping p <- autoplot(pvpr_raw, shp_df = shp_df, printed = FALSE, map_title = "PvPR Surveys in Example Study Area", fill_legend_title = "Raw PvPR") print(p) # Use convertPrevalence to standarize pvpr to Pv parasite rate in age range 1 - 99 pvpr_raw$pv_pr_1to99 <- convertPrevalence(prevalence = pvpr_raw$pr, age_min_in = pvpr_raw$lower_age, age_max_in = pvpr_raw$upper_age, age_min_out = rep(1, length(pvpr_raw$pr)), age_max_out = rep(99, length(pvpr_raw$pr)), parameters = "Pv_Gething2012") # Replace points layer in plot 'p' with age-standardized points & visualise p$layers[[2]] <- geom_point(data = pvpr_raw, aes(x = longitude, y = latitude, fill = pv_pr_1to99, size = examined), shape = 21) p <- p + scale_fill_distiller(name = "PvPR1-99", palette = "RdYlBu") print(p) ## Prepare Covariates # load in environmental covariates pre-cropped to study region covariates <- raster::stack(raster::raster("LST_night.tif"), # NASA LP DAAC. Land surface temperature and emissivity 8-day L3 global 1km. version 005. 2015. https://lpdaac.usgs.gov. Accessed Feb 2017. raster::raster("elevation.tif"), # NASA LP DAAC. SRTMGL3S: NASA Shuttle Radar Topography Mission Global 3 arc second sub-sampled. Version 003. 2013. https://lpdaac.usgs.gov. Accessed Mar 2016. raster::raster("rainfall.tif")) # Hijmans RJ, Cameron SE, Parra JL, Jones PG, Jarvis A. Very high resolution interpolated climate surfaces for global land areas. International Journal of Climatology. 2005; 25:1965-1978. # download raster of travel time to cities (Weiss et al 2018) for study area & visualise this access <- malariaAtlas::getRaster(surface = "A global map of travel time to cities to assess inequalities in accessibility in 2015", extent = extent) autoplot_MAPraster(access) # stack all covariate rasters, rename these and visualise covariates <- raster::stack(covariates, access) names(covariates) <- c("NightTimeTemp", "Elevation", "Rainfall", "TravelToCities") covariate_plot <- autoplot_MAPraster(covariates) # Convert age-standardised PR value to age-standardised number of positive diagnoses to enable weighting by sample size while using binomial likelihood in inla model pvpr_raw$pv_pos_1to99 <- pvpr_raw$pv_pr_1to99*pvpr_raw$examined # apply transformations to covariates and cap extreme values (far beyond those in regions where we have response data) covariates_trans <- covariates # log-transform Elevation and Travel time to cities covariates_trans$Elevation <- log(covariates$Elevation+22.001) covariates_trans$TravelToCities <- log(covariates$TravelToCities+0.1) # cap elevation and travel time to cities to range where we have response data elev_cap <- c(min(pvpr_cov$Elevation), max(pvpr_cov$Elevation)) values(covariates_trans$Elevation)[values(covariates_trans$Elevation)>elev_cap[2]] <- elev_cap[2] values(covariates_trans$Elevation)[values(covariates_trans$Elevation)access_cap[2]] <- access_cap[2] values(covariates_trans$TravelToCities)[values(covariates_trans$TravelToCities)% group_by(fold, value) %>% count() w2$model.output[[1]]$data %>% group_by(fold, value) %>% count() # count individuals w$model.output[[1]]$data %>% group_by(fold) %>% count() w2$model.output[[1]]$data %>% group_by(fold) %>% count() # count locations w$model.output[[1]]$data %>% distinct(latitude, longitude, .keep_all = TRUE) %>% group_by(fold) %>% count() w2$model.output[[1]]$data %>% distinct(latitude, longitude, .keep_all = TRUE) %>% group_by(fold) %>% count() # retrieve AUC values from the model(s) w$report[[1]][[3]]$auc w2$report[[1]][[3]]$auc ## Convert raster outputs to MAPraster format for visualisation w_df <- as.MAPraster(w$report[[1]][[2]]) w2_df <- as.MAPraster(w2$report[[1]][[2]]) w_plot <- autoplot(w_df, plot_titles = "", legend_title = "PfPR", printed = FALSE)[[1]] + scale_fill_distiller(name = "PfPR2-10\n",palette = "Blues", limits = c(0, 1), direction = 1,labels = c("Low", "","","","High"))+ scale_x_continuous(expand = c(0,0))+ scale_y_continuous(expand = c(0,0))+ guides(fill = guide_colorbar(barwidth = 1, barheight = 15))+ theme(legend.text = element_text(size = 16), legend.title = element_text(face = "bold", size = 17), strip.text = element_text(face = "bold", size = 20)) w2_plot <- autoplot(w2_df, plot_titles = "", legend_title = "PfPR", printed = FALSE)[[1]] + scale_fill_distiller(name = "PfPR2-10\n",palette = "Blues", limits = c(0, 1), direction = 1, labels = c("Low", "","","","High"))+ scale_x_continuous(expand = c(0,0))+ scale_y_continuous(expand = c(0,0))+ guides(fill = guide_colorbar(barwidth = 1, barheight = 15))+ theme(legend.text = element_text(size = 16), legend.title = element_text(face = "bold", size = 17), strip.text = element_text(face = "bold", size = 20))