######################################################## #### Spatial analysis method testing in simulations #### ######################################################## # load libraries library(rjags) library(runjags) library(animation) # set working directory setwd("./Ebola") generate.population <- function( population.number = 20, population.size.meanlog = 10, population.size.sdlog = 1) { max.size <- 10*population.number population.xcoord <- runif(population.number, max = max.size) population.ycoord <- runif(population.number, max = max.size) population.size <- round(rlnorm(population.number, population.size.meanlog, population.size.sdlog)) population <- data.frame( x = population.xcoord, y = population.ycoord, size = population.size) return(population) } simulate.epidemic <- function( population = generate.population(), weights = c(0.3700, 0.3569, 0.1517, 0.0708, 0.0340, 0.0166), R.before = 2, R.after = 0.5, breakpoint.meanlog = 5, breakpoint.sdlog = 0.2, alpha = 1.3, frac = 0.06, delta = 2.5) { if(R.after > 1) stop("R.after > 1, will lead to never-ending epidemic") population.distances <- sqrt(sapply(population$x, function(x) (x - population$x)^2) + sapply(population$y, function(y) (y - population$y)^2)) diag(population.distances) <- 1 spatial.matrix <- population.distances^(-delta)%*%diag(population$size^alpha) spatial.matrix <- (1-diag(nrow(population)))*frac*spatial.matrix/(rowSums(spatial.matrix) - diag(spatial.matrix)) breakpoint <- round(rlnorm(nrow(population), meanlog = breakpoint.meanlog, sdlog = breakpoint.sdlog)) # initialisation incidence <- matrix(rep(0, nrow(population)), nrow = nrow(population)) week <- 1 distance.2.centre <- (mean(population$x) - population$x)^2 + (mean(population$y) - population$y)^2 incidence[which(distance.2.centre == min(distance.2.centre)), week] <- 1 breakpoint.reached <- rep(0, nrow(population)) origin <- rep(0, nrow(population)) while(sum(incidence[, max(1, week+1-length(weights)):week]) != 0) { R <- ifelse(rowSums(incidence) < breakpoint, R.before, R.after) breakpoint.reached[rowSums(incidence) < breakpoint] <- week wij <- rev(weights[1:min(length(weights), week)]) weightedinc <- sapply(1:nrow(population), function(n) sum(incidence[n, max(1, week+1-length(weights)):week]*wij)) Lambda <- R*weightedinc expected.incidence <- (1-frac)*Lambda + t(spatial.matrix)%*%Lambda #incidence <- cbind(incidence, rpois(nrow(population), lambda = expected.incidence)) new.incidence <- rpois(nrow(population), lambda = expected.incidence) new.infection <- which(rowSums(incidence) == 0 & new.incidence > 0) for(i in new.infection) origin[i] <- sample(1:nrow(population), 1, replace = TRUE, prob = (t(spatial.matrix)*Lambda)[,i]) incidence <- cbind(incidence, new.incidence) week <- week + 1 } res <- list( population = cbind(population, breakpoint, breakpoint.reached = breakpoint.reached+1, origin), incidence = incidence[, 1:(week-length(weights)+1)], parameters = list(frac = frac, alpha = alpha, delta = delta, R.before = R.before, R.after = R.after, weights = weights) ) return(res) } ############# some plot and animation functions ############## plot.incidence <- function(epidemic) { par(mar = c(4, 4, 1, 1), las = 1, xaxs = "i", yaxs = "i") with(epidemic, { plot(x = c(1, ncol(incidence)), y = c(1, max(incidence)), type = "l", col = "white", xlab = "time", ylab = "incidence") for(i in 1:nrow(population)) lines(incidence[i,], col = rainbow(nrow(population))[i]) }) } plot.cumincidence <- function(epidemic) { par(mar = c(4, 4, 1, 1), las = 1, xaxs = "i", yaxs = "i") with(epidemic, { plot(x = c(1, ncol(incidence)), y = c(1, max(rowSums(incidence))), type = "l", col = "white", xlab = "time", ylab = "cumulative incidence") for(i in 1:nrow(population)) lines(cumsum(incidence[i,]), col = rainbow(nrow(population))[i]) }) } animate.epidemic <- function(epidemic) { ani.options(interval = 0.5) par(mar = c(1, 1, 1, 1)) with(epidemic, { max.size <- 10*nrow(epidemic$population) for(week in 2:ncol(incidence)) { plot(x = population$x, y = population$y, pch = 19, cex = 0.2, xlab = NA, ylab = NA, axes = FALSE, yaxt = "n", asp = 1, xlim = c(-0.02*max.size, 1.02*max.size), ylim = c(-0.02*max.size, 1.02*max.size)) rect(xleft = 0, ybottom = 0, xright = max.size, ytop = max.size) points(x = population$x, y = population$y, cex = 0.2*sqrt(rowSums(incidence[, 1:week])), col = rainbow(nrow(population))) points(x = population$x, y = population$y, pch = 19, cex = 0.2*sqrt(incidence[, week]), col = rainbow(nrow(population))) ani.pause() } }) par(mar = c(4.5, 4.5, 1.5, 1.5)) } ############# test single simulation & generate 200 epidemics ############## epidemic <- simulate.epidemic() plot.incidence(epidemic) plot.cumincidence(epidemic) animate.epidemic(epidemic) n.sim <- 200 alpha <- 1.3 frac <- 0.06 delta <- 2.5 R.before <- 2 R.after <- 0.5 simulations <- list() for(n in 1:n.sim) { print(paste("simulation", n)) ok <- 0 while(ok == 0) { epidemic <- simulate.epidemic(population = generate.population(population.number = 20), alpha = alpha, frac = frac, delta = delta, R.before = R.before, R.after = R.after) if(sum(epidemic$incidence) > 200) ok <- 1 } simulations[[length(simulations)+1]] <- epidemic } saveRDS(simulations, file = "./simulations/simulated_epidemics.rds") ############# analysis of multiple simulations ############## # prepare datalist for analysis in JAGS prepare.jagsanalysis <- function(epidemic) { population.distances <- sqrt(sapply(epidemic$population$x, function(x) (x - epidemic$population$x)^2) + sapply(epidemic$population$y, function(y) (y - epidemic$population$y)^2)) diag(population.distances) <- 1 wmax <- length(epidemic$parameters$weights) weightedinc <- array(0, dim = dim(epidemic$incidence)) for(i in 1:nrow(epidemic$incidence)) { for(j in 2:ncol(epidemic$incidence)) { weightedinc[i, j] <- sum(epidemic$parameters$weights[1:min(wmax, j-1)]*epidemic$incidence[i, (j-1):max(1,j-wmax)]) } } return(list( n.weeks = ncol(epidemic$incidence), n.districts = nrow(epidemic$incidence), dist.matrix = population.distances, weightedinc = weightedinc, population = epidemic$population$size, inc.matrix = epidemic$incidence)) } # posterior R based on all locations and times with observed incidence, # with a distinction between before and after breakpoint postR <- function(epidemic) { incidence.indices <- which(epidemic$incidence != 0, arr.ind = TRUE) col.R <- paste0("R2[",incidence.indices[,"row"],",",incidence.indices[,"col"],"]") col.Rbefore <- unlist(sapply(1:nrow(epidemic$population), function(j) paste0("R2[",j,",",1:(epidemic$population$breakpoint.reached[j]-1),"]"))) col.Rafter <- unlist(sapply(1:nrow(epidemic$population), function(j) paste0("R2[",j,",",epidemic$population$breakpoint.reached[j]:ncol(epidemic$incidence),"]"))) tmp <- epidemic$posterior[,intersect(col.R, col.Rbefore)] Rbefore <- quantile(as.vector(tmp), probs = c(0.025,0.50,0.975)) tmp <- epidemic$posterior[,intersect(col.R, col.Rafter)] Rafter <- quantile(as.vector(tmp), probs = c(0.025,0.50,0.975)) return(list(before = Rbefore, after = Rafter)) } # determine most likely origin (highest posterior probability), # and compare with actual origin in simulation correct.origin <- function(epidemic) { population.distances <- sqrt(sapply(epidemic$population$x, function(x) (x - epidemic$population$x)^2) + sapply(epidemic$population$y, function(y) (y - epidemic$population$y)^2)) diag(population.distances) <- 1 wmax <- length(epidemic$parameters$weights) weightedinc <- array(0, dim = dim(epidemic$incidence)) for(i in 1:nrow(epidemic$incidence)) { for(j in 2:ncol(epidemic$incidence)) { weightedinc[i, j] <- sum(epidemic$parameters$weights[1:min(wmax, j-1)]*epidemic$incidence[i, (j-1):max(1,j-wmax)]) } } n.districts <- nrow(epidemic$incidence) n.weeks <- ncol(epidemic$incidenc) n.iter <- nrow(epidemic$posterior) R2 <- sapply(1:n.weeks, function(j) epidemic$posterior[, paste0("R2[", 1:n.districts, ",", j, "]")]) R2 <- array(as.vector(R2), dim = c(n.iter, n.districts, n.weeks)) R2 <- aperm(R2, perm = c(2, 3, 1)) local.pressure <- array(as.vector(weightedinc)*as.vector(R2), dim = c(n.districts, n.weeks, n.iter)) alpha <- epidemic$posterior[, "alpha"] frac <- epidemic$posterior[, "f1"] delta <- epidemic$posterior[, "d11"] delta.hat <- median(delta) alpha.hat <- median(alpha) frac.hat <- median(frac) local.pressure.hat <- apply(local.pressure, MARGIN = c(1,2), median) gravity <- population.distances^(-delta.hat)%*%diag(epidemic$population$size^alpha.hat) gravity <- (1-diag(n.districts))*frac.hat*gravity/(rowSums(gravity) - diag(gravity)) origin <- rep(0, n.districts) infected <- which(rowSums(epidemic$incidence) != 0) for(i in infected) { infection.week <- min(which(epidemic$incidence[i,] != 0)) origin[i] <- which.max((t(gravity)*local.pressure.hat[, infection.week])[,i]) } origin[which.max(epidemic$incidence[,1])] <- 0 correct <- (sum(origin == epidemic$population$origin)-sum(origin == 0)) infected <- length(infected) -1 return(c(correct = correct, infected = infected)) } parameters=c("alpha", "d11", "f1", "R2") init.list <- list( list(alpha = 1.0, f1 = 0.05, d11 = 2), list(alpha = 1.2, f1 = 0.05, d11 = 3), list(alpha = 1.4, f1 = 0.10, d11 = 2), list(alpha = 1.6, f1 = 0.10, d11 = 3) ) simulations <- readRDS(file = "./simulations/simulated_epidemics.rds") epidemics <- list() for(n in 1:n.sim) { print(paste("analysing simulation", n)) epidemic <- simulations[[n]] data.list <- prepare.jagsanalysis(epidemic) # analysis with (slightly) informative priors on R2 and frac res <- run.jags( model = "model { for(i in 1:n.districts) { for(j in 1:n.districts) { gravity[i, j] <- (population[j]^alpha)*(dist.matrix[i, j]^(-delta)) } } for(i in 1:n.districts) { tot[i] <- sum(gravity[i, 1:n.districts]) - (population[i]^alpha)*(dist.matrix[i, i]^(-delta)) for(j in 1:n.districts) { normgravity[i, j] <- frac*gravity[i, j]/tot[i] } } for(i in 1:n.districts) { for(j in 1:n.weeks) { R2[i, j] ~ dgamma(2, 2/1) lambda[i, j] <- weightedinc[i, j]*R2[i, j] } } for(i in 1:n.districts) { infpres[i, 1] <- 0 inc.matrix[i, 1] ~ dpois(1) } for(i in 1:n.districts) { for(j in 2:n.weeks) { infpres[i, j] <- (1-frac)*lambda[i, j] + sum(normgravity[1:n.districts, i]*lambda[1:n.districts, j]) - normgravity[i, i]*lambda[i, j] inc.matrix[i, j] ~ dpois(infpres[i, j]) } } delta[1, 1] <- d11 frac[1] <- f1 alpha ~ dunif(-1, 6) f1 ~ dbeta(1, 9) d11 ~ dunif(-1, 6) }", monitor = parameters, data = data.list, n.chains = 4, inits = init.list, burnin = 100, sample = 500, adapt = 100, thin = 5, summarise = FALSE, method = "rjparallel") epidemic <- list( population = epidemic$population, incidence = epidemic$incidence, parameters = epidemic$parameters, posterior = as.matrix(as.mcmc.list(res))) posterior <- list( a = quantile(epidemic$posterior[, "alpha"], probs = c(0.025, 0.50, 0.975)), f1 = quantile(epidemic$posterior[, "f1"], probs = c(0.025, 0.50, 0.975)), d11 = quantile(epidemic$posterior[, "d11"], probs = c(0.025, 0.50, 0.975)), R = postR(epidemic), correct = correct.origin(epidemic)) epidemic$posterior <- posterior epidemics[[length(epidemics)+1]] <- epidemic # overwrite results file after analysis of each simulation saveRDS(epidemics, file = "./simulations/analysed_epidemics.rds") } ######### plot ranked posterior distributions ######### plot.rankedposterior <- function(posterior, truevalue, ...) { par(mar = c(4, 4, 1, 1), las = 1, xaxs = "i", yaxs = "i") tmp <- posterior[, order(posterior[2, ])] plot(x = c(0, (ncol(tmp)+1)), y = c(0.9*min(tmp), 1.1*max(tmp)), type = "n", ...) for(n in 1:ncol(tmp)) { lines(x = c(n, n), y = tmp[c(1,3), n], col = "gray", lwd = 3) } lines(x = c(0, (ncol(tmp)+1)), y = c(truevalue, truevalue), lty = 2) points(x = 1:ncol(tmp), y = tmp[2, ], pch = 19, col = "black", cex = 0.6) } epidemics <- readRDS(file = "./simulations/analysed_epidemics.rds") posteriorf1 <- sapply(1:length(epidemics), function(n) epidemics[[n]]$posterior$f1) posteriord11 <- sapply(1:length(epidemics), function(n) epidemics[[n]]$posterior$d11) posteriora <- sapply(1:length(epidemics), function(n) epidemics[[n]]$posterior$a) posteriorR.before <- sapply(1:length(epidemics), function(n) epidemics[[n]]$posterior$R$before) posteriorR.after <- sapply(1:length(epidemics), function(n) epidemics[[n]]$posterior$R$after) plot.rankedposterior(posteriora, truevalue = alpha, ylim = c(0, 2.9), ylab = expression(alpha), xlab = "simulations") plot.rankedposterior(posteriord11, truevalue = delta, ylim = c(0, 6.5), ylab = expression(delta), xlab = "simulations") plot.rankedposterior(posteriorf1, truevalue = frac, ylim = c(0, 0.17), ylab = "f", xlab = "simulations") plot.rankedposterior(posteriorR.before, truevalue = R.before, ylim = c(0, 8), ylab = "R.before", xlab = "simulations") plot.rankedposterior(posteriorR.after, truevalue = R.after, ylim = c(0, 8), ylab = "R.after", xlab = "simulations") ######### summary statistics ######### # origins correctly identified origins <- sapply(1:length(epidemics), function(n) epidemics[[n]]$posterior$correct) quantile(origins["correct",]/origins["infected",], probs = c(0.025, 0.50, 0.975)) quantile(origins["infected",], probs = c(0.025, 0.50, 0.975)) infected.persons <- sapply(1:length(epidemics), function(n) sum(epidemics[[n]]$incidence)) quantile(infected.persons, probs = c(0.025, 0.50, 0.975)) # posterior median and 95% CI apply(posteriorf1, MARGIN = 1, mean) apply(posteriord11, MARGIN = 1, mean) apply(posteriora, MARGIN = 1, mean) apply(posteriorR.before, MARGIN = 1, mean) apply(posteriorR.after, MARGIN = 1, mean) # MSE sum((posteriorf1[2,]-frac)^2)/n.sim sum((posteriord11[2,]-delta)^2)/n.sim sum((posteriora[2,]-alpha)^2)/n.sim sum((posteriorR.before[2,]-R.before)^2)/n.sim sum((posteriorR.after[2,]-R.after)^2)/n.sim # coverage sum(posteriorf1[1,] < frac & posteriorf1[3,] > frac)/n.sim sum(posteriord11[1,] < delta & posteriord11[3,] > delta)/n.sim sum(posteriora[1,] < alpha & posteriora[3,] > alpha)/n.sim sum(posteriorR.before[1,] < R.before & posteriorR.before[3,] > R.before)/n.sim sum(posteriorR.after[1,] < R.after & posteriorR.after[3,] > R.after)/n.sim