################################################################################## # S3 Data. R code to reproduce statistical analysis in: # Tanentzap et al. Microplastics and anthropogenic fibre concentrations in lakes reflect surrounding land use. PLOS Biology. # No guarantees whatsoever are given with this code, but please cite if re-used. # File prepared by Andrew Tanentzap (ajt65@cam.ac.uk), July 2021 # File contains the following sections: # 1) Prepare workspace # 2) Model associated with FIGURE 1 # 3) Model associated with FIGURE 2 # 4) Simulation study associated with S1 FIGURE # 5) Model associated with S2 FIGURE ###################################################################################### ## 1) Prepare workspace ###################################################################################### library(rstan) library(brms) # ensure >= v.2.15.0 library(faux) all_trawls <- read.table('TanentzapS1Data.txt',header=T,skip=10,sep = '\t',fill=T,quote="",as.is=T) EUlakedata <- read.table('TanentzapS2Data.txt',header=T,skip=10,sep = '\t') EUwastegdp <- read.table('TanentzapS4Data.txt',header=T,skip=10,sep = '\t') ###################################################################################### ## 2) Model associated with FIGURE 1 ###################################################################################### all_trawls[all_trawls$Habitat == 'MARINE','Habitat'] <- 'ESTUARINE/MARINE' # because of all the zero's, fit a censored model in line with discussion @ https://discourse.datamethods.org/t/modelling-outcome-data-with-a-limit-of-detection-is-a-hurdle-model-appropriate/1427/8 # code modified from https://onlinelibrary.wiley.com/doi/full/10.1002/sim.5515 https://setac.onlinelibrary.wiley.com/doi/full/10.1002/etc.4046 cens_mod <- " data { int N_obs; int N_cen; int N_obs_us; int N_obs_db; int N_cens_us; int N_cens_db; int N_studies; real y_obs_us[N_obs_us]; real y_obs_db[N_obs_db]; real log_mesh_size[N_obs]; real log_mesh_size_cen[N_cen]; matrix[N_obs,3] habitat; matrix[N_cen,3] habitat_cen; int habitat_id[N_obs]; int habitat_id_cen[N_cen]; int study_id[N_obs]; int study_id_cen[N_cen]; vector[N_cens_us] L_us; vector[N_cens_db] L_db; } parameters { vector[1] beta; vector[3] alpha; vector[3] sigma; vector[N_studies] z; vector[3] z2; } transformed parameters{ vector[N_obs_us] mu_us; vector[N_obs_db] mu_db; vector[N_cens_db] mu_cen_db; vector[N_cens_us] mu_cen_us; for (i in 1:N_obs_db) mu_db[i] = habitat[i]*alpha + (beta[1]+sigma[3]*z2[habitat_id[i]])*log_mesh_size[i] + sigma[1]*z[study_id[i]]; for (i in (N_obs_db+1):N_obs) mu_us[(i-N_obs_db)] = alpha[1] + alpha[2] + (beta[1]+sigma[3]*z2[2])*log_mesh_size[i] + sigma[1]*z[study_id[i]]; for (i in 1:N_cens_db) mu_cen_db[i] = habitat_cen[i]*alpha + (beta[1]+sigma[3]*z2[habitat_id_cen[i]])*log_mesh_size_cen[i] + sigma[1]*z[study_id_cen[i]]; for (i in (N_cens_db+1):N_cen) mu_cen_us[(i-N_cens_db)] = alpha[1] + alpha[2] + (beta[1]+sigma[3]*z2[2])*log_mesh_size_cen[i] + sigma[1]*z[study_id_cen[i]]; } model { z ~ std_normal(); z2 ~ std_normal(); alpha ~ normal(0,5); beta ~ std_normal(); sigma ~ std_normal(); y_obs_us ~ lognormal(mu_us, sigma[2]); y_obs_db ~ lognormal(mu_db, sigma[2]); target += normal_lcdf(L_us | mu_cen_us, sigma[2]) + normal_lcdf(L_db | mu_cen_db, sigma[2]); } generated quantities { vector[N_obs_us+N_obs_db+N_cens_db+N_cens_us] concat = append_row(append_row(append_row(mu_db,mu_us),mu_cen_db),mu_cen_us); real mean_concat = mean(concat); real r2_gel = variance(concat) / (variance(concat) + pow(sigma[2],2)); }" groups_cen <- as.factor(all_trawls$Habitat) cens_mod_dat <- list( N_obs = length(all_trawls$concentration_L[which(all_trawls$concentration_L!=0)]), N_cen = length(all_trawls$concentration_L[which(all_trawls$concentration_L==0)]), N_obs_us = nrow(all_trawls[which(all_trawls$concentration_L!=0 & all_trawls$Habitat == 'THIS_STUDY'),]), N_obs_db = nrow(all_trawls[which(all_trawls$concentration_L!=0 & all_trawls$Habitat != 'THIS_STUDY'),]), N_cens_us = nrow(all_trawls[which(all_trawls$concentration_L==0 & all_trawls$Habitat == 'THIS_STUDY'),]), N_cens_db = nrow(all_trawls[which(all_trawls$concentration_L==0 & all_trawls$Habitat != 'THIS_STUDY'),]), N_studies = length(unique(all_trawls$DOI)), y_obs_db = all_trawls$concentration_L[which(all_trawls$concentration_L!=0 & all_trawls$Habitat != 'THIS_STUDY')], y_obs_us = all_trawls$concentration_L[which(all_trawls$concentration_L!=0 & all_trawls$Habitat == 'THIS_STUDY')], log_mesh_size = as.numeric(scale(log(all_trawls$Filter_microns)))[which(all_trawls$concentration_L!=0)], log_mesh_size_cen = as.numeric(scale(log(all_trawls$Filter_microns)))[which(all_trawls$concentration_L==0)], habitat = matrix(model.matrix(~groups_cen[which(all_trawls$concentration_L!=0)]),ncol=4)[,1:3], habitat_cen = matrix(model.matrix(~groups_cen[which(all_trawls$concentration_L==0)]),ncol=4)[,1:3], habitat_id = as.numeric(groups_cen[which(all_trawls$concentration_L!=0)]), habitat_id_cen = as.numeric(groups_cen)[which(all_trawls$concentration_L==0)], study_id = as.numeric(as.factor(all_trawls$DOI))[which(all_trawls$concentration_L!=0)], study_id_cen = as.numeric(as.factor(all_trawls$DOI))[which(all_trawls$concentration_L==0)] ) cens_mod_dat$L_us = rep(1/(pi*0.15*0.15*100*1000)*.5,cens_mod_dat$N_cens_us) cens_mod_dat$L_db = rep(round(min(all_trawls$concentration_L[which(all_trawls$concentration_L!=0)]),7)*.5,cens_mod_dat$N_cens_db) fit.stan <- stan(model_code = cens_mod, data = cens_mod_dat, iter = 1, chains = 1, verbose = F) # sample model cens_mod_m1 <- stan(fit=fit.stan, data=cens_mod_dat, iter=6750, warmup=3000, chains=4, refresh=100, thin=15, cores=4, open_progress=T) ###################################################################################### ## 3) Model associated with FIGURE 2 ###################################################################################### # fit linear model to count data accounting for overdispersion m3 <- brm(bf(totalMP ~ scale(mwi^(1/3)) + scale(forest_cov) + scale(total_resp^(1/3)) + scale(human_cov) + scale(log(slope_short)) + scale(generated_load^(1/3)), hu ~ 1), #fit = m3, family = hurdle_poisson(), data = EUlakedata, chains = 4, cores = 4, iter = 5000, warmup = 2000, thin = 12, save_all_pars = T) ###################################################################################### ## 4) Simulation study associated with S1 FIGURE ###################################################################################### ######### simulation study testing for variation in sampling volumes simresults <- lapply(seq(.2,.4,by=.2), function(z){ lapply(seq(0,.8,by=.1), function(p){ EUlakedata$samplevol <- rnorm_pre(EUlakedata$totalMP, mu = 7100, sd = 7100*z, r = p) m3_c <- brm(bf(totalMP ~ scale(mwi^(1/3)) + scale(forest_cov) + scale(total_resp^(1/3)) + scale(human_cov) + scale(log(slope_short)) + scale(generated_load^(1/3)) + scale(samplevol), hu ~ 1), family = hurdle_poisson(), data = alldata, chains = 4, iter = 2, warmup = 1, thin = 1) vol_sims <- lapply(1:100, function(y){ # add random error in proportion to concentrations but vary it to be up to 20% of the mean EUlakedata$samplevol <- rnorm_pre(EUlakedata$totalMP, mu = 7100, sd = 7100*z, r = p) m3_s1 <- update(m3_c,newdata = EUlakedata, iter = 5000, warmup = 2000, thin = 12, save_pars = save_pars(all=T), refresh=0) return(posterior_samples(m3_s1)) }) return(vol_sims) }) }) #probability of finding a significant effect of sample vol and that effect of MPW is no longer significant prop_pos_sig_sampvol <- sapply(simresults,function(x){sapply(x,function(y){ sum(sapply(y,function(z){ (quantile(z$b_scalesamplevol,.025) >0 & quantile(z$b_scalesamplevol,.975) >0)})) })}) / sapply(simresults,function(x){sapply(x,function(y){ length(sapply(y,function(z){ (quantile(z$b_scalesamplevol,.025) >0 & quantile(z$b_scalesamplevol,.975) >0)})) })}) prop_pos_sig_mwiout <- sapply(simresults,function(x){sapply(x,function(y){ sum(sapply(y,function(z){ (quantile(z$b_scalemwi_outE1D3,.025) <0 & quantile(z$b_scalemwi_outE1D3,.975) >0)})) })}) / sapply(simresults,function(x){sapply(x,function(y){ length(sapply(y,function(z){ (quantile(z$b_scalemwi_outE1D3,.025) <0 & quantile(z$b_scalemwi_outE1D3,.975) >0)})) })}) prop_pos_sig_for <- sapply(simresults,function(x){sapply(x,function(y){ sum(sapply(y,function(z){ (quantile(z$b_scaleallforest_cov,.025) <0 & quantile(z$b_scaleallforest_cov,.975) >0)})) })}) / sapply(simresults,function(x){sapply(x,function(y){ length(sapply(y,function(z){ (quantile(z$b_scaleallforest_cov,.025) >0 & quantile(z$b_scaleallforest_cov,.975) >0)})) })}) prop_pos_sig_WWtW <- sapply(simresults,function(x){sapply(x,function(y){ sum(sapply(y,function(z){ (quantile(z$b_scalegenerated_loadE1D3,.025) <0 & quantile(z$b_scalegenerated_loadE1D3,.975) >0)})) })}) / sapply(simresults,function(x){sapply(x,function(y){ length(sapply(y,function(z){ (quantile(z$b_scalegenerated_loadE1D3,.025) <0 & quantile(z$b_scalegenerated_loadE1D3,.975) >0)})) })}) prop_pos_sig_resp <- sapply(simresults,function(x){sapply(x,function(y){ sum(sapply(y,function(z){ (quantile(z$b_scaletotal_respE1D3,.025) <0 & quantile(z$b_scaletotal_respE1D3,.975) >0)})) })}) / sapply(simresults,function(x){sapply(x,function(y){ length(sapply(y,function(z){ (quantile(z$b_scaletotal_respE1D3,.025) <0 & quantile(z$b_scaletotal_respE1D3,.975) >0)})) })}) ###################################################################################### ## 5) Model associated with S2 FIGURE ###################################################################################### lmm1 <- brm(data = EUwastegdp, family = gaussian, log(percapita_waste)~log(regional_GDP_percapita)+(1|country), chains = 4, cores = 4, iter = 2000, warmup = 1000) #end of file