# R code 3 # Kajanus, M.H., Forsman, J.T., Vollstädt, M.G.R., Devictor, V., Elo, M., Lehikoinen, A., Mönkkönen, M., # Thorson, J.T., and Kivelä, S.M. 2021. Titmice are a better indicator of bird density in Northern European than in Western European forests. # Ecology and Evolution. # R code for creating randomly drawn species' data sets and the VAST analysis of association between the randomly drawn species # groups and forest bird density in France # Required data sets are available upon request from the corresponding author. ### FRANCE ############################################################################################################### ############################################################################################################### ### Prepare saving of results and prepare titmouse data for the VAST analysis # Save the ouput to a file, which you will use later on in the evaluation of biological # control groups # Load libraries library(TMB) library(VAST) library(RandomFields) library(raster) library(RANN) library(INLA) # Create a dataframe, which will store the values of the estimates, the SEs and convergence check Parameter <- c("Control_biom", "Envir_var", "Spatial", "Spatio_temp") estimate_TM <- data.frame(Parameter) SE_TM <- data.frame(Parameter) # Set correct working directory (VAST_FRA), # where titmouse estimates and VAST.dll are saved and extract the values for the parameters from the titmouse analysis setwd() load("Obj") dyn.load( dynlib("VAST_v5_3_0") ) SD <- summary(sdreport(Obj)) values <- as.data.frame(SD) # install.packages("dplyr") library(dplyr) values <- values %>% filter(row.names(values) %in% c("gamma2_ctp", "gamma2_ctp.1", "L_omega2_z", "L_epsilon2_z")) row.names(values) <- c("Control_biom", "Envir_var", "Spatial", "Spatio_temp") # Called "Control biomass" to be able to match with other tables, contains biomass of titmice here fix(values) # add them to the dataframe estimate_TM$estimate <- values[, 1] # continue with SE SE_TM$SE <- values[, 2] # Save the files write.csv2(file = "estimate_TM.csv", estimate_TM) write.csv2(file = "SE_TM.csv", SE_TM) ############################################################################################################### ############################################################################################################### ### Clear the environment and set correct working directory rm(list = ls()) # Set your working directory to upload data setwd() Back <- getwd() #################################################################################################### ################################################################################################# ### Start from here with raw data ### Set correct working directory and read in the bird and trait data fra <- read.csv2("fra.csv", sep = ";", dec = ",", header = TRUE) # Not available str(fra) traits <- read.csv2("traits.csv", sep = ";", dec = ",", header = TRUE) # Not available str(traits) # Filter for the species we are using in the analysis species <- unique(fra$ESPECE) length(species) # 63 traits <- traits[traits$ESPECE %in% species, ] dim(traits) # 63 4 fix(traits) ######################################################### ### Start with creating new species sets # Function to randomly choose 6 species from the species set to use as "biological control" # testing, whether this random set would predict biomass as succesfully as Parus species sampleWOsurprise <- function(x) { if(length(x) <= 1) { return(x) } else { return(sample(x, 6)) } } control_spec <- unique(fra$ESPECE) # Make sure to throw out Parus species, as you do not want them in your sample control_spec <- control_spec[! control_spec %in% c("PARATE", "PARCAE", "PARCIN", "PARCRI", "PARMAJ", "PARMON", "PARPAL")] # Choose the set of species we will refer to as "control species" # Decide how many sets you want: i in 1:length(1:X) # To save computational time, divide in sets of 100 and run separately at the same time control <- list() for(i in 1:length(1:100)) { control[[i]] <- as.vector(sampleWOsurprise(control_spec)) } names(control) <- c(1:100) # Have a look at the species sets for each sample to see how # well the selection process worked control # Make a table, that you will use later, to display which random species set performs well (if any) control_table <- t(as.data.frame(control)) names <- c("Species_1", "Species_2", "Species_3", "Species_4", "Species_5", "Species_6") colnames(control_table) <- names row.names(control_table) <- rep(1:length(control)) fix(control_table) # Set correct working directory where to save these setwd() # Store the data for later Store_Control <- paste0(getwd(), '/Overview_Control_Species') dir.create(Store_Control) setwd(Store_Control) # Save the table write.csv2(file = "Table_control_groups.csv", control_table) # Save the list of species per control sample for later evaluation # Set the location, where the files will be saved # Store the original WD first to change back easily, once the data is stored # Make sure you are in the right directory to begin with getwd() for (i in seq_along(control)) { Folder <- paste0(paste(names(control)[i], "_Biol_Control")) dir.create(Folder) cwd <- getwd() # Current dir setwd(Folder) write.table(control[[i]], paste(names(control)[i], "_Biol_Control.txt", sep = "")) setwd(cwd) } # Collect the mean bodyweight of the "control species" for each sample control_bw <- list() for(i in seq_along(control)) { control_bw[[i]] <- traits$mass[match(control[[i]], traits$ESPECE)] } # Save the bodyweight of species per control sample for later evaluation for (i in seq_along(control)) { Folder <- paste0(paste(names(control)[i], "_Biol_Control")) dir.create(Folder) cwd <- getwd() # Current WD setwd(Folder) write.table(control_bw[[i]], paste(names(control)[i], "_Biol_Control_BW.txt", sep = "")) setwd(cwd) } ########################################################################## # Collect the identities of all species Species <- unique(fra$ESPECE) length(Species) # 63 # Collect the mean bodyweight of all species bw <- traits$mass length(bw) # 63 # Vector of the length of sampling sites Nsample <- length(unique(fra$ID)) IDs <- unique(fra$ID) length(IDs) # 16656 ########################################################################## # Create a dataframe containing all columns from the original dataset # but reduced to one row per each species of the overall species list data <- fra[fra$ID == IDs[1], ] n_control <- list() for(i in seq_along(control)) { n_control[[i]] <- data$N[data$ESPECE %in% control[[i]]] #number of individuals for each of the control species per site } dens_control <- list() for(i in seq_along(control)) { dens_control[[i]] <- sum(n_control[[i]]) #density of control species species (i.e. abundance) } spec_control <- list() for(i in seq_along(control)) { spec_control[[i]] <- data$ESPECE[data$ESPECE %in% control[[i]]] #species names } biom_control <- list() for(i in seq_along(control)) { biom_control[[i]] <- sum(n_control[[i]][match(control[[i]][control[[i]] %in% spec_control[[i]]], spec_control[[i]])] * control_bw[[i]][control[[i]] %in% control[[i]]]) #biomass of all individuals of all control species at a given site } rich_control <- list() for(i in seq_along(control)) { rich_control[[i]] <- length(spec_control[[i]][n_control[[i]] > 0]) #species richness of control species per site } rich_total <- length(data$N[data$N > 0]) #species richness of all birds species <- data$ESPECE[data$N > 0] #species names dens <- data$N[data$N > 0] #density of each single species (i.e. abundance) dens_total <- sum(dens) #total density per site (i.e. number of all individuals of all species put together) biom_total <- sum(dens * bw[match(species, Species)], na.rm = TRUE) #total biomass of all species per site (all individuals) dens_wo_control <- list() for(i in seq_along(control)) { dens_wo_control[[i]] <- dens_total - dens_control[[i]] #density of all species excluding the control species } biom_wo_control <- list() for(i in seq_along(control)) { biom_wo_control[[i]] <- biom_total - biom_control[[i]] #biomass of all species excluding the control species } rich_wo_control <- list() for(i in seq_along(control)) { rich_wo_control[[i]] <- rich_total - rich_control[[i]] #species richness of all species excluding the control species } year <- data$ANNEE[1] # data frame, containing the information we are collecting for the later analysis with VAST # i.e. details on density etc. calculated in the steps abobve newdata <- list() for(i in seq_along(control)) { newdata[[i]] <- data.frame(ID = rep(IDs[1], 9), year = rep(year, 9), lat = rep(data$latitude_wgs84[1], 9), lon = rep(data$longitude_wgs84[1], 9), habitat = rep(data$habitat2[1], 9), div = c("control_density", "control_biomass", "control_richness", "total_density", "total_biomass", "total_richness", "total_density_wo_control", "total_biomass_wo_control", "total_richness_wo_control"), tot = c(dens_control[[i]], biom_control[[i]], rich_control[[i]], dens_total, biom_total, rich_total, dens_wo_control[[i]], biom_wo_control[[i]], rich_wo_control[[i]])) } ################################################################################## # run a loop to calculate all of these for each single site # i.e. for each point count in each route for(j in 2:Nsample) { data <- fra[fra$ID == IDs[j], ] n_control <- list() for(i in seq_along(control)) { n_control[[i]] <- data$N[data$ESPECE %in% control[[i]]] #number of individuals for each of the control species per site } dens_control <- list() for(i in seq_along(control)) { dens_control[[i]] <- sum(n_control[[i]]) #density of control species species (i.e. abundance) } spec_control <- list() for(i in seq_along(control)) { spec_control[[i]] <- data$ESPECE[data$ESPECE %in% control[[i]]] #species names } biom_control <- list() for(i in seq_along(control)) { biom_control[[i]] <- sum(n_control[[i]][match(control[[i]][control[[i]] %in% spec_control[[i]]], spec_control[[i]])] * control_bw[[i]][control[[i]] %in% control[[i]]]) #biomass of all individuals of all control species at a given site } rich_control <- list() for(i in seq_along(control)) { rich_control[[i]] <- length(spec_control[[i]][n_control[[i]] > 0]) #species richness of control species per site } rich_total <- length(data$N[data$N > 0]) #species richness of all birds species <- data$ESPECE[data$N > 0] #species names dens <- data$N[data$N > 0] #density of each single species (i.e. abundance) dens_total <- sum(dens) #total density per site (i.e. number of all individuals of all species put together) biom_total <- sum(dens * bw[match(species, Species)], na.rm = TRUE) #total biomass of all species per site (all individuals) dens_wo_control <- list() for(i in seq_along(control)) { dens_wo_control[[i]] <- dens_total - dens_control[[i]] #density of all species excluding the control species } biom_wo_control <- list() for(i in seq_along(control)) { biom_wo_control[[i]] <- biom_total - biom_control[[i]] #biomass of all species excluding the control species } rich_wo_control <- list() for(i in seq_along(control)) { rich_wo_control[[i]] <- rich_total - rich_control[[i]] #species richness of all species excluding the control species } year <- data$ANNEE[1] # data frame, containing the information we are collecting for the later analysis with VAST # i.e. details on density etc. calculated in the steps abobve newdata2 <- list() for(i in seq_along(control)){ newdata2[[i]] <- data.frame(ID = rep(IDs[[j]], 9), year = rep(year, 9), lat = rep(data$latitude_wgs84[1], 9), lon = rep(data$longitude_wgs84[1], 9), habitat = rep(data$habitat2[1], 9), div = c("control_density", "control_biomass", "control_richness", "total_density", "total_biomass", "total_richness", "total_density_wo_control", "total_biomass_wo_control", "total_richness_wo_control"), tot = c(dens_control[[i]], biom_control[[i]], rich_control[[i]], dens_total, biom_total, rich_total, dens_wo_control[[i]], biom_wo_control[[i]], rich_wo_control[[i]])) } for(i in seq_along(newdata)) { newdata[[i]] <- rbind(newdata[[i]], newdata2[[i]]) } } # Save newdata file just in case, before adding PCA values for climate data! save(newdata, file = "newdata") ############################################################################################# # The above loop seems to work, but may result in the 1st "plot" to be duplicated, so get rid of it again # check the first twoplots to see if they are duplicated lapply(newdata, head, n = 20) # Run this only after checking, that there was in fact a duplication and you need to get rid of it # for(i in seq_along(newdata)) { # newdata[[i]] <- newdata[[i]][-c(1:9), ] # } # Save the datafiles for security setwd(Store_Control) save(newdata, file = "newdata") for (i in seq_along(newdata)) { Folder <- paste0(paste(names(control)[i], "_Biol_Control")) dir.create(Folder) cwd <- getwd() # Current dir setwd(Folder) write.csv2(newdata[[i]], paste(names(control)[i], "_Control_biomass_site.csv")) setwd(cwd) } # Set the working directory to the initial path again setwd(Back) getwd() ### Add the environmental data to the respective files to be able to run the VAST # models later. setwd() climate <- read.csv("fra_climate.csv", sep = ";", dec = ",", header = TRUE) # Not available str(climate) # 22851 obs. # Set correct working directory to save the data after adding the climate variable to each data set setwd(Store_Control) # Match the files for biomass per site and climate for each subset of newdata and save for(i in seq_along(newdata)) { newdata[[i]] <- merge(newdata[[i]], climate, by = "ID") Folder <- paste0(paste(names(control)[i], "_Biol_Control")) dir.create(Folder) cwd <- getwd() # Current dir setwd(Folder) write.csv2(newdata[[i]], paste(names(control)[i], "_Control_VAST.csv")) setwd(cwd) } setwd(Back) ########################################################################### ##### Estimate the effect of rando species groups' biomass on forest bird density in France ##### Start here if using the 300 already provided data sets (named "1_Control_VAST" etc.) ########################################################################### rm(list = ls()) ### Start from here with the ready data sets # Set correct working directory where to find the data Data_WD <- "..." # Replace "..." with the path to your working directory setwd(Data_WD) getwd() # Define which is the length of your vector, i.e. the number of control samples # you have and how many "iterations" you thus would like to run. control <- list() for(i in 1:length(1:100)) { control[[i]] <- NA #is really just a dummy list, just needs to include the same number of elements as your actual control samples } names(control) <- c(1:100) # Create a dataframe, which will store the values of the estimates and the SEs Parameter <- c("Control_biom", "Envir_var", "Spatial", "Spatio_temp") estimates <- data.frame(Parameter) SE <- data.frame(Parameter) # Set the WD to store the dataframes for later. Put this in a new folder Store_Control <- "..." # Replace "..." with the path to with the path to the working directory, where you store the output setwd(Store_Control) getwd() write.csv2(file = "Estimates_control.csv", estimates) write.csv2(file = "SE_control.csv", SE) # Change back to the folder where the sinlge datafiles for random species sets are stored setwd(Data_WD) # Run aloop through all control samples to get the estimates for the biological control # Data provided upon request for (i in seq_along(control)) { Folder <- paste0(paste(names(control)[i], "_Biol_Control")) setwd(Folder) # Load the data for all forest birds excluding randomly drawn species France <- read.csv2(paste(names(control)[i], "_Control_VAST.csv"), sep = ";", dec = ",", header = TRUE, row.names = 1) # Data provided upon request # Split the data for biomass of all forest birds France_all <- France[France$div == "total_biomass_wo_control", ] # Split the data for the random group (i.e. the covariates) France_control <- France[France$div == "control_biomass", ] rm(France) # str(France_all) # str(France_control) ### Remove all zero observations from the data set # Save the rows that have zero observations remove <- France_all[France_all$tot <= 0, ] # Save the dataset of France_all wihtout the zero observations France_all <- France_all[{!France_all$ID %in% remove$ID}, ] # str(France_all) # Save the dataset of France control without the zero observations France_control <- France_control[{!France_control$ID %in% remove$ID}, ] # Use the latest version for the C++ code Version <- get_latest_version(package = "VAST") # Define the spatial resolution of the model, and define whether tu use a mesh or grid approximation Method <- "Mesh" grid_size_km <- 25 n_x <- 1000 # Number of stations, i.e. "knots" Kmeans_Config <- list("randomseed" = 1, "nstart" = 100, "iter.max" = 1e3) # Define whether to include spatial and spatio-temporal variation # Define the rank of this covariance among species, whether its autocorrelated # and whether there is overdispersion FieldConfig <- c("Omega1" = 0, "Epsilon1" = 0, "Omega2" = 1, "Epsilon2" = 1) # here the first linear predictor is turned off # One spatial and spatio-temporal factor used for the second linear predictor RhoConfig <- c("Beta1" = 0, "Beta2" = 0, "Epsilon1" = 0, "Epsilon2" = 4) # These settings define a Gompertz-Model OverdispersionConfig <- c("Delta1" = 0, "Delta2" = 0) # no overdispersion ObsModel <- c(2, 3) # Set the post-hoc calculations which you would use Options <- c("SD_site_density" = 0, "SD_site_logdensity" = 0, "Calculate_Range" = 1, # turned on "Calculate_evenness" = 0, "Calculate_effective_area" = 1, # turned on "Calculate_Cov_SE" = 0, "Calculate_Synchrony" = 0, "Calculate_Coherence" = 0) # Potential stratification of results strata.limits <- data.frame(STRATA = "All_areas") # Set the region. No Region to be downloaded from any dataset, so set to "other" to use the region you defined Region <- "Other" # Set the location, where the files will be saved # Set the location, where the files will be saved DateFile <- paste0(getwd(), '/Control_VAST') dir.create(DateFile) # Save a list with all the settings (not necessary for the model, but good practice) to remember what you did Record <- ThorsonUtilities::bundlelist( c("Version", "Method", "grid_size_km", "n_x", "FieldConfig", "RhoConfig", "OverdispersionConfig", "ObsModel", "Kmeans_Config") ) save(Record, file = file.path(DateFile, "Record.RData")) capture.output(Record, file = file.path(DateFile, "Record.txt")) # set.seed to get reproducable results set.seed(123) # Define the area and such AreaSwept_km2 <- pi * 0.100^2 Data_Geostat <- data.frame(Catch_KG = France_all[, "tot"], Year = France_all[, "year"], Vessel = France_all[, "ID"], AreaSwept_km2 = AreaSwept_km2, Lat = France_all[, "lat"], Lon = France_all[, "lon"]) # Create the extrapolation grid that is appropriate for the specific region Extrapolation_List <- make_extrapolation_info(Region = Region, strata.limits = strata.limits, observations_LL = Data_Geostat[, c("Lat", "Lon")]) # Generate the information which is used for the estimation of spatio-temporal parameters Spatial_List <- make_spatial_info(grid_size_km = grid_size_km, n_x = n_x, Method = Method, Lon = Data_Geostat[, "Lon"], Lat = Data_Geostat[, "Lat"], Extrapolation_List = Extrapolation_List, DirPath = DateFile, Save_Results = FALSE) # Add the knots to "Data_Geostat" Data_Geostat <- cbind(Data_Geostat, "knot_i" = Spatial_List$knot_i) # Define and arrange the covariates covariates <- cbind(scale(France_control$tot), scale(France_control$PC2)) # mit Mira checken, was genau skaliert werden muss colnames(covariates) <- c("tit_tot", "clim") t_e <- France_control[, "year"] # Format the covariates Covariate <- format_covariates(Lat_e = France_control[, "lat"], Lon_e = France_control[, "lon"], t_e = France_control[, "year"], Cov_ep = covariates, Extrapolation_List = Extrapolation_List, Spatial_List = Spatial_List, FUN = mean, Year_Set = min(t_e):max(t_e), na.omit = "time-average" ) X_cov <- Covariate$Cov_xtp # Build the list of data-inputs used for parameter estimation TmbData <- Data_Fn(Version = Version, FieldConfig = FieldConfig, OverdispersionConfig = OverdispersionConfig, RhoConfig = RhoConfig, ObsModel = ObsModel, c_i = rep(0,nrow(Data_Geostat)), b_i = Data_Geostat[, "Catch_KG"], a_i = Data_Geostat[, "AreaSwept_km2"], s_i = Data_Geostat[, "knot_i"] - 1, t_i = Data_Geostat[, "Year"], v_i = as.numeric(Data_Geostat[, "Vessel"]) - 1, a_xl = Spatial_List$a_xl, X_xtp = X_cov, MeshList = Spatial_List$MeshList, GridList = Spatial_List$GridList, Method = Spatial_List$Method, Options = Options, CheckForErrors = FALSE) # In this step, build the TMB object TmbList = Build_TMB_Fn(TmbData = TmbData, RunDir = DateFile, Version = Version, RhoConfig = RhoConfig, CovConfig = TRUE, loc_x = Spatial_List$loc_x, Method = Method, Use_REML = FALSE) # "Turn off" encounter probability from gamma estimate Map <- TmbList$Map Map[["gamma1_ctp"]] <- rep(NA, length(TmbList$Parameters$gamma1_ctp)) Map[["gamma1_ctp"]] <- factor(Map[["gamma1_ctp"]]) # Expand the TMB object, adding object "map" TmbList = Build_TMB_Fn(Map = Map, TmbData = TmbData, RunDir = DateFile, Version = Version, RhoConfig = RhoConfig, CovConfig = TRUE, loc_x = Spatial_List$loc_x, Method = Method, Use_REML = FALSE) Obj <- TmbList[["Obj"]] # Use a gradient-based nonlinear minimizer to identify maximum likelihood estimates for the fixed effects Opt <- TMBhelper::Optimize(obj = Obj, lower = TmbList[["Lower"]], upper = TmbList[["Upper"]], getsd = TRUE, savedir = DateFile, bias.correct = FALSE, newtonsteps = 2, loopnum = 1) # Go back to overarching WD, to save the most important information in a Excel file setwd(Store_Control) # Read the Excel files, where you will store the information estimates_C <- read.csv2("Estimates_control.csv", header = TRUE, row.names = 1, check.names = FALSE) SE_C <- read.csv2("SE_control.csv", header = TRUE, row.names = 1, check.names = FALSE) # Extract the values for the parameters you want SD <- summary(sdreport(Obj)) values <- as.data.frame(SD) values <- values %>% filter(row.names(values) %in% c("gamma2_ctp", "gamma2_ctp.1", "L_omega2_z", "L_epsilon2_z")) row.names(values) <- c("Control_biom", "Envir_var", "Spatial", "Spatio_temp") # Add the values to the respective datafiles # Get the number of which control group we are looking at N_col <- paste0(paste(names(control)[i], "_control")) # Remove the whitespace in N_col N_col <- gsub(" ", "", N_col) # start with estimates estimates_C$N_col <- values[, 1] colnames(estimates_C)[colnames(estimates_C) == "N_col"] <- N_col # Save the file write.csv2(file = "Estimates_control.csv", estimates_C) # continue with SE SE_C$N_col <- values[, 2] colnames(SE_C)[colnames(SE_C) == "N_col"] <- N_col # Save the file write.csv2(file = "SE_control.csv", SE_C) rm(list = ls()[! ls() %in% c("Data_WD", "control", "Store_Control", "Folder")]) setwd(Data_WD) } ##### ########################################################################################### ### WITH ALL CONVERGED MODELS ################ # Check out the estimates and how well the control models perform compared to # the titmice # Switch to the Folder, where you store the estimates for titmouse biomass setwd("G:/Mira/Titmice simulations/FRA") estimate_TM <- read.csv2("estimate_TM.csv", header = TRUE, row.names = 2, check.names = FALSE) estimate_TM <- as.data.frame(t(estimate_TM)) estimate_TM <- estimate_TM[-1, ] estimate_TM$group <- rownames(estimate_TM) SE_TM <- read.csv2("SE_TM.csv", header = TRUE, row.names = 2, check.names = FALSE) SE_TM <- as.data.frame(t(SE_TM)) SE_TM <- SE_TM[-1, ] SE_TM$group <- rownames(SE_TM) titmouse <- as.data.frame(estimate_TM[, 1]) colnames(titmouse) <- "estimate" titmouse$lower_CI <- titmouse[, 1] - 1.96 * SE_TM[, 1] titmouse$upper_CI <- titmouse[, 1] + 1.96 * SE_TM[, 1] titmouse$group <- SE_TM$group # Change WD again setwd(Store_Control) # Read the information about the respective estimates estimates_C <- read.csv2("Estimates_control.csv", header = TRUE, row.names = 2, check.names = FALSE) estimates_C <- estimates_C[, -1] estimates_C <- as.data.frame(t(estimates_C)) estimates_C$group <- rownames(estimates_C) SE_C <- read.csv2("SE_control.csv", header = TRUE, row.names = 2, check.names = FALSE) SE_C <- SE_C[, -1] SE_C <- as.data.frame(t(SE_C)) SE_C$group <- rownames(SE_C) # Create a new dataframe, containing information on the estimate of "Control_group_Biomass" # and their lower and upper CI control_biom <- as.data.frame(estimates_C[, 1]) colnames(control_biom) <- "estimate" control_biom$lower_CI <- control_biom[, 1] - 1.96 * SE_C[, 1] control_biom$upper_CI <- control_biom[, 1] + 1.96 * SE_C[, 1] control_biom$N <- c(1:length(control_biom[, 1])) control_biom$group <- SE_C$group # Color for the polygon grey1 <- rgb(153, 153, 153, 87, maxColorValue = 255) # See how the control group performs per sample # Plot the performance of the biological control with the lower and upper CI and add the estimate of titmouse biomass for visual support #### png(filename = "VAST estimate biological control.png", width = 39.7, height = 21, units = "cm", res = 300) plot(control_biom[, 1] ~ control_biom[, 4], type = "n", ylim = c(min(control_biom[, 1] - 0.25), max(control_biom[, 1] + 0.25)), xaxt = "n", main = "VAST estimate biological control", ylab = "", xlab = "Control Group") axis(1, at = 1:length(control_biom[, 1]), las = 3) # tick marks per control group, change according the number of samples polygon(c(rev(control_biom[, 4]), control_biom[, 4]), c(rev(control_biom$upper_CI), control_biom$lower_CI), col = grey1, border = NA) lines(control_biom$estimate, lwd = 2, col = "red") lines(control_biom$lower_CI, lty = 3, col = "grey5") # add lower CI lines(control_biom$upper_CI, lty = 3, col = "grey5") # add upper CI abline(h = titmouse$estimate, col = "blue", lty = 2) x <- as.numeric(length(control_biom[, 1]) - 10.5) y <- max(control_biom[, 1] + 0.2) text(x, y, paste0("mean control = ", round(mean(control_biom[, 1]), digits = 3)), cex = 1.5) text(x, y - 0.05, paste0("median control = ", round(median(control_biom[, 1]), digits = 3)), cex = 1.5) text(x, y - 0.10, paste0("titmouse = ", round(titmouse$estimate, digits = 3)), cex = 1.5) dev.off() ##### # Compare the performance of the Control groups compared to the actual estimate png(filename = "Histogramm VAST estimate biological control.png", width = 29.7, height = 21, units = "cm", res = 300) dens <- density(control_biom[, 1]) max <- which.max(dens$y) hist(control_biom[, 1], prob = TRUE, col = "indianred", breaks = 10, main = "VAST estimate biological control", ylab = "Density", xlab = "Estimate", xlim = c(min(control_biom[, 1] - 0.15), max(control_biom[, 1] + 0.15)), lwd = 2) lines(density(control_biom[, 1]), lwd = 3) text(min(control_biom[, 1] - 0.05), dens$y[max] - 0.8, paste0("p ", ifelse(sum(control_biom$estimate > titmouse$estimate)/length(control_biom$estimate) > 0.05, "> 0.05", "< 0.05")), cex = 1.5) text(min(control_biom[, 1] - 0.05), dens$y[max] - 0.2, paste0("titmouse = ", round(titmouse$estimate, digits = 3)), cex = 1.5) clip(0, 1, 0, dens$y[max] + 2) abline(v = titmouse$estimate, lty = 2, col = "blue", lwd = 5) dev.off() # Create the table for the supplements # Get the species set setwd(Data_WD) suppl_table <- read.csv2("Table_control_groups.csv", header = TRUE, row.names = 1, check.names = FALSE) suppl_table <- cbind(suppl_table, control_biom)[, -10] # Add the titmouse values for comparison titmice <- t(c("PARATE", "PARCAE", "PARCRI", "PARMAJ", "PARMON", "PARPAL")) titmice <- data.frame(titmice) colnames(titmice) <- c("Species_1", "Species_2", "Species_3", "Species_4", "Species_5", "Species_6") row.names(titmice) <- "Titmice" titmice <- cbind(titmice, titmouse) suppl_table <- rbind(titmice, suppl_table) # Store the data setwd(Store_Control) write.csv2(file = "Table_supplements.csv", suppl_table) ### Checking which random groups are significant ################################################################################################################# store <- list() for(i in 1:nrow(control_biom)){ store[[i]] <- ifelse( range(c(control_biom[i, ]$lower_CI, control_biom[i, ]$upper_CI) > 0), TRUE, ifelse( range(c(control_biom[i, ]$lower_CI, control_biom[i, ]$upper_CI) < 0), TRUE, FALSE )) } store2 <- list() for(j in seq_along(store)){ store2[[j]] <- t(as.data.frame(store[[j]])) if( (store2[[j]][, 1] == "TRUE") & (store2[[j]][, 2] == "TRUE") ) print(j) } ##### ### WITH ONLY CONVERGED AND SIGNIFICANT MODELS ################ # Check out the estimates and how well the control models perform compared to # the titmice # Switch to the Folder, where you store the estimates for titmouse biomass setwd("") estimate_TM <- read.csv2("estimate_TM.csv", header = TRUE, row.names = 2, check.names = FALSE) estimate_TM <- as.data.frame(t(estimate_TM)) estimate_TM <- estimate_TM[-1, ] SE_TM <- read.csv2("SE_TM.csv", header = TRUE, row.names = 2, check.names = FALSE) SE_TM <- as.data.frame(t(SE_TM)) SE_TM <- SE_TM[-1, ] titmouse <- as.data.frame(estimate_TM[, 1]) colnames(titmouse) <- "estimate" titmouse$lower_CI <- titmouse[, 1] - 1.96 * SE_TM[, 1] titmouse$upper_CI <- titmouse[, 1] + 1.96 * SE_TM[, 1] # Change WD again setwd(Store_Control) # Read the information about the respective estimates estimates_C <- read.csv2("Estimates_control_sig.csv", header = TRUE, row.names = 2, check.names = FALSE) estimates_C <- estimates_C[, -1] estimates_C <- as.data.frame(t(estimates_C)) SE_C <- read.csv2("SE_control_sig.csv", header = TRUE, row.names = 2, check.names = FALSE) SE_C <- SE_C[, -1] SE_C <- as.data.frame(t(SE_C)) # Create a new dataframe, containing information on the estimate of "Control_group_Biomass" # and their lower and upper CI control_biom <- as.data.frame(estimates_C[, 1]) colnames(control_biom) <- "estimate" control_biom$lower_CI <- control_biom[, 1] - 1.96 * SE_C[, 1] control_biom$upper_CI <- control_biom[, 1] + 1.96 * SE_C[, 1] control_biom$N <- c(1:length(control_biom[, 1])) # Color for the polygon grey1 <- rgb(153, 153, 153, 87, maxColorValue = 255) # See how the control group performs per sample # Plot the performance of the biological control with the lower and upper CI and add the estimate of titmouse biomass for visual support #### png(filename = "VAST estimate biological control sig.png", width = 39.7, height = 21, units = "cm", res = 300) plot(control_biom[, 1] ~ control_biom[, 4], type = "n", ylim = c(min(control_biom[, 1] - 0.25), max(control_biom[, 1] + 0.25)), xaxt = "n", main = "VAST estimate biological control", ylab = "", xlab = "Control Group") axis(1, at = 1:length(control_biom[, 1]), las = 3) # tick marks per control group, change according the number of samples polygon(c(rev(control_biom[, 4]), control_biom[, 4]), c(rev(control_biom$upper_CI), control_biom$lower_CI), col = grey1, border = NA) lines(control_biom$estimate, lwd = 2, col = "red") lines(control_biom$lower_CI, lty = 3, col = "grey5") # add lower CI lines(control_biom$upper_CI, lty = 3, col = "grey5") # add upper CI abline(h = titmouse$estimate, col = "blue", lty = 2) x <- as.numeric(length(control_biom[, 1]) - 10.5) y <- max(control_biom[, 1] + 0.2) text(x, y, paste0("mean control = ", round(mean(control_biom[, 1]), digits = 3)), cex = 1.5) text(x, y - 0.05, paste0("median control = ", round(median(control_biom[, 1]), digits = 3)), cex = 1.5) text(x, y - 0.10, paste0("titmouse = ", round(titmouse$estimate, digits = 3)), cex = 1.5) dev.off() ##### # Compare the performance of the Control groups compared to the actual estimate png(filename = "Histogramm VAST estimate biological control sig.png", width = 29.7, height = 21, units = "cm", res = 300) dens <- density(control_biom[, 1]) max <- which.max(dens$y) hist(control_biom[, 1], prob = TRUE, col = "indianred", breaks = 10, main = "VAST estimate biological control", ylab = "Density", xlab = "Estimate", xlim = c(min(control_biom[, 1] - 0.15), max(control_biom[, 1] + 0.15)), lwd = 2) lines(density(control_biom[, 1]), lwd = 3) text(min(control_biom[, 1] - 0.05), dens$y[max] - 0.8, paste0("p ", ifelse(sum(control_biom$estimate > titmouse$estimate)/length(control_biom$estimate) > 0.05, "> 0.05", "< 0.05")), cex = 1.5) text(min(control_biom[, 1] - 0.05), dens$y[max] - 0.2, paste0("titmouse = ", round(titmouse$estimate, digits = 3)), cex = 1.5) clip(0, 1, 0, dens$y[max] + 2) abline(v = titmouse$estimate, lty = 2, col = "blue", lwd = 5) dev.off()