# R code 3
# Kajanus, M.H., Forsman, J.T., Vollstädt, M.G.R., Devictor, V., Elo, M., Lehikoinen, A., Mönkkönen, M.,
# Thorson, J.T., and Kivelä, S.M. 2021. Titmice are a better indicator of bird density in Northern European than in Western European forests.
# Ecology and Evolution.


# R code for creating randomly drawn species' data sets and the VAST analysis of association between the randomly drawn species
# groups and forest bird density in France
# Required data sets are available upon request from the corresponding author.


### FRANCE
###############################################################################################################
###############################################################################################################
### Prepare saving of results and prepare titmouse data for the VAST analysis
# Save the ouput to a file, which you will use later on in the evaluation of biological 
# control groups

# Load libraries

library(TMB) 
library(VAST)
library(RandomFields)
library(raster)
library(RANN)
library(INLA)

# Create a dataframe, which will store the values of the estimates, the SEs and convergence check
Parameter <- c("Control_biom", "Envir_var", "Spatial", "Spatio_temp")
estimate_TM <- data.frame(Parameter)
SE_TM <- data.frame(Parameter) 

# Set correct working directory (VAST_FRA),
# where titmouse estimates and VAST.dll are saved and extract the values for the parameters from the titmouse analysis
setwd()
load("Obj")
dyn.load( dynlib("VAST_v5_3_0") )
SD <- summary(sdreport(Obj))
values <- as.data.frame(SD)

# install.packages("dplyr")
library(dplyr)

values <- values %>% filter(row.names(values) %in% c("gamma2_ctp", "gamma2_ctp.1", 
                                                     "L_omega2_z", "L_epsilon2_z"))
row.names(values) <- c("Control_biom", "Envir_var", "Spatial", "Spatio_temp") # Called "Control biomass" to be able to match with other tables, contains biomass of titmice here

fix(values)

# add them to the dataframe
estimate_TM$estimate <- values[, 1] 

# continue with SE
SE_TM$SE <- values[, 2]

# Save the files
write.csv2(file = "estimate_TM.csv", estimate_TM)
write.csv2(file = "SE_TM.csv", SE_TM)

###############################################################################################################
###############################################################################################################
### Clear the environment and set correct working directory
rm(list = ls())

# Set your working directory to upload data
setwd()
Back <- getwd()

####################################################################################################
#################################################################################################
### Start from here with raw data
### Set correct working directory and read in the bird and trait data

fra <- read.csv2("fra.csv", sep = ";", dec = ",", header = TRUE)	# Not available
str(fra)

traits <- read.csv2("traits.csv", sep = ";", dec = ",", header = TRUE)	# Not available
str(traits)

# Filter for the species we are using in the analysis
species <- unique(fra$ESPECE)
length(species)		# 63
traits <- traits[traits$ESPECE %in% species, ]
dim(traits)			# 63  4
fix(traits)

#########################################################
### Start with creating new species sets

# Function to randomly choose 6 species from the species set to use as "biological control"
# testing, whether this random set would predict biomass as succesfully as Parus species
sampleWOsurprise <- function(x) { 
  if(length(x) <= 1) {
    return(x)
  } else {
    return(sample(x, 6))
  }
}


control_spec <- unique(fra$ESPECE)

# Make sure to throw out Parus species, as you do not want them in your sample
control_spec <- control_spec[! control_spec %in% c("PARATE", "PARCAE", "PARCIN", "PARCRI", "PARMAJ", "PARMON", "PARPAL")]

# Choose the set of species we will refer to as "control species"
# Decide how many sets you want: i in 1:length(1:X)
# To save computational time, divide in sets of 100 and run separately at the same time
control <- list()
for(i in 1:length(1:100)) {
  control[[i]] <- as.vector(sampleWOsurprise(control_spec))
}
names(control) <- c(1:100)

# Have a look at the species sets for each sample to see how 
# well the selection process worked
control

# Make a table, that you will use later, to display which random species set performs well (if any)
control_table <- t(as.data.frame(control))
names <- c("Species_1", "Species_2", "Species_3", "Species_4", "Species_5", "Species_6")
colnames(control_table) <- names
row.names(control_table) <- rep(1:length(control))

fix(control_table)

# Set correct working directory where to save these
setwd()

# Store the data for later
Store_Control <- paste0(getwd(), '/Overview_Control_Species')
dir.create(Store_Control)
setwd(Store_Control)

# Save the table
write.csv2(file = "Table_control_groups.csv", control_table)

# Save the list of species per control sample for later evaluation
# Set the location, where the files will be saved
# Store the original WD first to change back easily, once the data is stored
# Make sure you are in the right directory to begin with
getwd()

for (i in seq_along(control)) {
  Folder <- paste0(paste(names(control)[i], "_Biol_Control"))
  dir.create(Folder)
  cwd <- getwd()         # Current dir
  setwd(Folder) 
  write.table(control[[i]], paste(names(control)[i], "_Biol_Control.txt", sep = ""))
  setwd(cwd)
}

# Collect the mean bodyweight of the "control species" for each sample
control_bw <- list()
for(i in seq_along(control)) {
  control_bw[[i]] <- traits$mass[match(control[[i]], traits$ESPECE)]
}

# Save the bodyweight of species per control sample for later evaluation
for (i in seq_along(control)) {
  Folder <- paste0(paste(names(control)[i], "_Biol_Control"))
  dir.create(Folder)
  cwd <- getwd()          # Current WD
  setwd(Folder) 
  write.table(control_bw[[i]], paste(names(control)[i], "_Biol_Control_BW.txt", sep = ""))
  setwd(cwd)
}


##########################################################################

# Collect the identities of all species
Species <- unique(fra$ESPECE)
length(Species)		# 63

# Collect the mean bodyweight of all species
bw <- traits$mass
length(bw)			# 63

# Vector of the length of sampling sites
Nsample <- length(unique(fra$ID))
IDs <- unique(fra$ID)
length(IDs)			# 16656

##########################################################################

# Create a dataframe containing all columns from the original dataset
# but reduced to one row per each species of the overall species list
data <- fra[fra$ID == IDs[1], ]

n_control <- list()
for(i in seq_along(control)) {
  n_control[[i]] <- data$N[data$ESPECE %in% control[[i]]] 	#number of individuals for each of the control species per site
}
dens_control <- list()
for(i in seq_along(control)) {
  dens_control[[i]] <- sum(n_control[[i]])	 #density of control species species (i.e. abundance)
}
spec_control <- list()
for(i in seq_along(control)) {
  spec_control[[i]] <- data$ESPECE[data$ESPECE %in% control[[i]]] 	#species names
}
biom_control <- list() 
for(i in seq_along(control)) {
  biom_control[[i]] <- sum(n_control[[i]][match(control[[i]][control[[i]] %in% spec_control[[i]]], spec_control[[i]])] * control_bw[[i]][control[[i]] %in% control[[i]]]) 	#biomass of all individuals of all control species at a given site
}
rich_control <- list()
for(i in seq_along(control)) {
  rich_control[[i]] <- length(spec_control[[i]][n_control[[i]] > 0]) 	#species richness of control species per site
}

rich_total <- length(data$N[data$N > 0])	 #species richness of all birds
species <- data$ESPECE[data$N > 0]	 #species names
dens <- data$N[data$N > 0] 	#density of each single species (i.e. abundance)
dens_total <- sum(dens) 	#total density per site (i.e. number of all individuals of all species put together)
biom_total <- sum(dens * bw[match(species, Species)], na.rm = TRUE) 	#total biomass of all species per site (all individuals)

dens_wo_control <- list()
for(i in seq_along(control)) {
  dens_wo_control[[i]] <- dens_total - dens_control[[i]]	 #density of all species excluding the control species
}
biom_wo_control <- list()
for(i in seq_along(control)) {
  biom_wo_control[[i]] <- biom_total - biom_control[[i]] 	 #biomass of all species excluding the control species 
}
rich_wo_control <- list()
for(i in seq_along(control)) {
  rich_wo_control[[i]] <- rich_total - rich_control[[i]]	 #species richness of all species excluding the control species
}

year <- data$ANNEE[1]

# data frame, containing the information we are collecting for the later analysis with VAST
# i.e. details on density etc. calculated in the steps abobve
newdata <- list()
for(i in seq_along(control)) {
  newdata[[i]] <- data.frame(ID = rep(IDs[1], 9), year = rep(year, 9), lat = rep(data$latitude_wgs84[1], 9),
					lon = rep(data$longitude_wgs84[1], 9), habitat = rep(data$habitat2[1], 9),
                             div = c("control_density", "control_biomass", "control_richness", "total_density", "total_biomass",
						"total_richness", "total_density_wo_control", "total_biomass_wo_control",
						"total_richness_wo_control"),
                             tot = c(dens_control[[i]], biom_control[[i]], rich_control[[i]], dens_total, biom_total, rich_total,
						dens_wo_control[[i]], biom_wo_control[[i]], rich_wo_control[[i]]))
}


##################################################################################

# run a loop to calculate all of these for each single site
# i.e. for each point count in each route
for(j in 2:Nsample) {
  data <- fra[fra$ID == IDs[j], ]
  
  n_control <- list()
  for(i in seq_along(control)) {
    n_control[[i]] <- data$N[data$ESPECE %in% control[[i]]] 	#number of individuals for each of the control species per site
  }
  dens_control <- list()
  for(i in seq_along(control)) {
    dens_control[[i]] <- sum(n_control[[i]])	 #density of control species species (i.e. abundance)
  }
  spec_control <- list()
  for(i in seq_along(control)) {
    spec_control[[i]] <- data$ESPECE[data$ESPECE %in% control[[i]]] 	#species names
  }
  biom_control <- list() 
  for(i in seq_along(control)) {
    biom_control[[i]] <- sum(n_control[[i]][match(control[[i]][control[[i]] %in% spec_control[[i]]], spec_control[[i]])] * control_bw[[i]][control[[i]] %in% control[[i]]]) #biomass of all individuals of all control species at a given site
  }
  rich_control <- list()
  for(i in seq_along(control)) {
    rich_control[[i]] <- length(spec_control[[i]][n_control[[i]] > 0]) 	#species richness of control species per site
  }
  
  rich_total <- length(data$N[data$N > 0])	 #species richness of all birds
  species <- data$ESPECE[data$N > 0] 	#species names
  dens <- data$N[data$N > 0] 	#density of each single species (i.e. abundance)
  dens_total <- sum(dens) 	#total density per site (i.e. number of all individuals of all species put together)
  biom_total <- sum(dens * bw[match(species, Species)], na.rm = TRUE)	 #total biomass of all species per site (all individuals)
  
  dens_wo_control <- list()
  for(i in seq_along(control)) {
    dens_wo_control[[i]] <- dens_total - dens_control[[i]] 	#density of all species excluding the control species
  }
  biom_wo_control <- list()
  for(i in seq_along(control)) {
    biom_wo_control[[i]] <- biom_total - biom_control[[i]] 	#biomass of all species excluding the control species 
  }
  rich_wo_control <- list()
  for(i in seq_along(control)) {
    rich_wo_control[[i]] <- rich_total - rich_control[[i]] 	#species richness of all species excluding the control species
  }
  
  year <- data$ANNEE[1]
  
  
  # data frame, containing the information we are collecting for the later analysis with VAST
  # i.e. details on density etc. calculated in the steps abobve
  newdata2 <- list()
  for(i in seq_along(control)){
    newdata2[[i]] <-  data.frame(ID = rep(IDs[[j]], 9), year = rep(year, 9), lat = rep(data$latitude_wgs84[1], 9),
					lon = rep(data$longitude_wgs84[1], 9), habitat = rep(data$habitat2[1], 9),
                                 div = c("control_density", "control_biomass", "control_richness", "total_density", "total_biomass",
							"total_richness", "total_density_wo_control", "total_biomass_wo_control", "total_richness_wo_control"),
                                 tot = c(dens_control[[i]], biom_control[[i]], rich_control[[i]], dens_total, biom_total, rich_total,
							dens_wo_control[[i]], biom_wo_control[[i]], rich_wo_control[[i]]))
  }
  
  for(i in seq_along(newdata)) {
    newdata[[i]] <- rbind(newdata[[i]], newdata2[[i]])
  }
}


# Save newdata file just in case, before adding PCA values for climate data!
save(newdata, file = "newdata")

#############################################################################################

# The above loop seems to work, but may result in the 1st "plot" to be duplicated, so get rid of it again
# check the first twoplots to see if they are duplicated
lapply(newdata, head, n = 20)

# Run this only after checking, that there was in fact a duplication and you need to get rid of it
# for(i in seq_along(newdata)) {
#  newdata[[i]] <- newdata[[i]][-c(1:9), ]
# }


# Save the datafiles for security
setwd(Store_Control)

save(newdata, file = "newdata")

for (i in seq_along(newdata)) {
  Folder <- paste0(paste(names(control)[i], "_Biol_Control"))
  dir.create(Folder)
  cwd <- getwd()          # Current dir
  setwd(Folder) 
  write.csv2(newdata[[i]], paste(names(control)[i], "_Control_biomass_site.csv"))
  setwd(cwd)
}

# Set the working directory to the initial path again
setwd(Back)
getwd()


### Add the environmental data to the respective files to be able to run the VAST
# models later.
setwd()
climate <- read.csv("fra_climate.csv", sep = ";", dec = ",", header = TRUE)	# Not available
str(climate)	# 22851 obs.

# Set correct working directory to save the data after adding the climate variable to each data set
setwd(Store_Control)

# Match the files for biomass per site and climate for each subset of newdata and save
for(i in seq_along(newdata)) {
  newdata[[i]] <- merge(newdata[[i]], climate, by = "ID")
  Folder <- paste0(paste(names(control)[i], "_Biol_Control"))
  dir.create(Folder)
  cwd <- getwd()          # Current dir
  setwd(Folder) 
  write.csv2(newdata[[i]], paste(names(control)[i], "_Control_VAST.csv"))
  setwd(cwd)
}

setwd(Back)

###########################################################################
##### Estimate the effect of rando species groups' biomass on forest bird density in France
##### Start here if using the 300 already provided data sets (named "1_Control_VAST" etc.)
###########################################################################
rm(list = ls())

### Start from here with the ready data sets
# Set correct working directory where to find the data
Data_WD <- "..."	# Replace "..." with the path to your working directory
setwd(Data_WD)
getwd()

# Define which is the length of your vector, i.e. the number of control samples
# you have and how many "iterations" you thus would like to run.
control <- list()
for(i in 1:length(1:100)) {
  control[[i]] <- NA 	#is really just a dummy list, just needs to include the same number of elements as your actual control samples
}
names(control) <- c(1:100)

# Create a dataframe, which will store the values of the estimates and the SEs
Parameter <- c("Control_biom", "Envir_var", "Spatial", "Spatio_temp")
estimates <- data.frame(Parameter)
SE <- data.frame(Parameter) 

# Set the WD to store the dataframes for later. Put this in a new folder
Store_Control <- "..."	# Replace "..." with the path to with the path to the working directory, where you store the output
setwd(Store_Control)
getwd()

write.csv2(file = "Estimates_control.csv", estimates)
write.csv2(file = "SE_control.csv", SE)

# Change back to the folder where the sinlge datafiles for random species sets are stored
setwd(Data_WD)

# Run aloop through all control samples to get the estimates for the biological control
# Data provided upon request
for (i in seq_along(control)) {

  Folder <- paste0(paste(names(control)[i], "_Biol_Control"))
  setwd(Folder)
  
  # Load the data for all forest birds excluding randomly drawn species
  France <- read.csv2(paste(names(control)[i], "_Control_VAST.csv"), sep = ";", dec = ",", header =    TRUE, row.names = 1)	# Data provided upon request
  
  # Split the data for biomass of all forest birds
  France_all <- France[France$div == "total_biomass_wo_control", ]
  
  # Split the data for the random group (i.e. the covariates)
  France_control <- France[France$div == "control_biomass", ]
  rm(France)

  # str(France_all)
  # str(France_control)

  ### Remove all zero observations from the data set
  # Save the rows that have zero observations
  remove <- France_all[France_all$tot <= 0, ]

  # Save the dataset of France_all wihtout the zero observations
  France_all <- France_all[{!France_all$ID %in% remove$ID}, ]
  # str(France_all)

  # Save the dataset of France control without the zero observations
  France_control <- France_control[{!France_control$ID %in% remove$ID}, ]

  # Use the latest version for the C++ code
  Version <- get_latest_version(package = "VAST")
  
  # Define the spatial resolution of the model, and define whether tu use a mesh or grid approximation
  Method <- "Mesh"
  grid_size_km <- 25
  n_x <- 1000	# Number of stations, i.e. "knots"
  Kmeans_Config <- list("randomseed" = 1, "nstart" = 100, "iter.max" = 1e3)    
  
  # Define whether to include spatial and spatio-temporal variation
  # Define the rank of this covariance among species, whether its autocorrelated
  # and whether there is overdispersion
  FieldConfig <- c("Omega1" = 0, "Epsilon1" = 0, "Omega2" = 1, "Epsilon2" = 1) 	# here the first linear predictor is turned off
  # One spatial and spatio-temporal factor used for the second linear predictor
  RhoConfig <- c("Beta1" = 0, "Beta2" = 0, "Epsilon1" = 0, "Epsilon2" = 4)     # These settings define a Gompertz-Model
  OverdispersionConfig <- c("Delta1" = 0, "Delta2" = 0)                        # no overdispersion
  ObsModel <- c(2, 3)  
  
  # Set the post-hoc calculations which you would use
  Options <- c("SD_site_density" = 0,
               "SD_site_logdensity" = 0,
               "Calculate_Range" = 1,           # turned on
               "Calculate_evenness" = 0,
               "Calculate_effective_area" = 1,  # turned on
               "Calculate_Cov_SE" = 0,
               "Calculate_Synchrony" = 0,
               "Calculate_Coherence" = 0)
  
  # Potential stratification of results
  strata.limits <- data.frame(STRATA = "All_areas")
  
  # Set the region. No Region to be downloaded from any dataset, so set to "other" to use the region you defined
  Region <- "Other"
  
  # Set the location, where the files will be saved
  # Set the location, where the files will be saved
  DateFile <- paste0(getwd(), '/Control_VAST')
  dir.create(DateFile)
  
  # Save a list with all the settings (not necessary for the model, but good practice) to remember what you did
  Record <- ThorsonUtilities::bundlelist( c("Version", "Method", "grid_size_km", "n_x",
                                            "FieldConfig", "RhoConfig", "OverdispersionConfig",
                                            "ObsModel", "Kmeans_Config") )
  save(Record, file = file.path(DateFile, "Record.RData"))
  capture.output(Record, file = file.path(DateFile, "Record.txt"))
  
  # set.seed to get reproducable results
  set.seed(123)
  
  # Define the area and such
  AreaSwept_km2 <- pi * 0.100^2
  Data_Geostat <- data.frame(Catch_KG = France_all[, "tot"],
                             Year = France_all[, "year"],
                             Vessel = France_all[, "ID"],
                             AreaSwept_km2 = AreaSwept_km2,
                             Lat = France_all[, "lat"],
                             Lon = France_all[, "lon"])
  
  # Create the extrapolation grid that is appropriate for the specific region
  Extrapolation_List <- make_extrapolation_info(Region = Region, strata.limits = strata.limits,
                                                observations_LL = Data_Geostat[, c("Lat", "Lon")])
  
  # Generate the information which is used for the estimation of spatio-temporal parameters
  Spatial_List <- make_spatial_info(grid_size_km = grid_size_km, n_x = n_x, Method = Method,
                                    Lon = Data_Geostat[, "Lon"], Lat = Data_Geostat[, "Lat"],
                                    Extrapolation_List = Extrapolation_List, DirPath = DateFile,
                                    Save_Results = FALSE)
  
  # Add the knots to "Data_Geostat"
  Data_Geostat <- cbind(Data_Geostat, "knot_i" = Spatial_List$knot_i)
  
  # Define and arrange the covariates
  covariates <- cbind(scale(France_control$tot), scale(France_control$PC2)) # mit Mira checken, was genau skaliert werden muss
  colnames(covariates) <- c("tit_tot", "clim")
  t_e <- France_control[, "year"]
  
  # Format the covariates
  Covariate <- format_covariates(Lat_e = France_control[, "lat"],
                                 Lon_e = France_control[, "lon"], 
                                 t_e = France_control[, "year"],
                                 Cov_ep = covariates, Extrapolation_List = Extrapolation_List,
                                 Spatial_List = Spatial_List, FUN = mean,
                                 Year_Set = min(t_e):max(t_e),
                                 na.omit = "time-average" )
  
  X_cov <- Covariate$Cov_xtp
  
  # Build the list of data-inputs used for parameter estimation
  TmbData <- Data_Fn(Version = Version, FieldConfig = FieldConfig,
                     OverdispersionConfig = OverdispersionConfig, RhoConfig = RhoConfig,
                     ObsModel = ObsModel,
                     c_i = rep(0,nrow(Data_Geostat)),
                     b_i = Data_Geostat[, "Catch_KG"],
                     a_i = Data_Geostat[, "AreaSwept_km2"],
                     s_i = Data_Geostat[, "knot_i"] - 1,
                     t_i = Data_Geostat[, "Year"],
                     v_i = as.numeric(Data_Geostat[, "Vessel"]) - 1,
                     a_xl = Spatial_List$a_xl,
                     X_xtp = X_cov,
                     MeshList = Spatial_List$MeshList,
                     GridList = Spatial_List$GridList,
                     Method = Spatial_List$Method,
                     Options = Options,
                     CheckForErrors = FALSE)
  
  # In this step, build the TMB object
  TmbList = Build_TMB_Fn(TmbData = TmbData,
                         RunDir = DateFile,
                         Version = Version,
                         RhoConfig = RhoConfig,
                         CovConfig = TRUE,
                         loc_x = Spatial_List$loc_x,
                         Method = Method,
                         Use_REML = FALSE)
  
  # "Turn off" encounter probability from gamma estimate
  Map <- TmbList$Map
  Map[["gamma1_ctp"]] <- rep(NA, length(TmbList$Parameters$gamma1_ctp))
  Map[["gamma1_ctp"]] <- factor(Map[["gamma1_ctp"]])
  
  # Expand the TMB object, adding object "map"
  TmbList = Build_TMB_Fn(Map = Map,
                         TmbData = TmbData,
                         RunDir = DateFile,
                         Version = Version,
                         RhoConfig = RhoConfig,
                         CovConfig = TRUE,
                         loc_x = Spatial_List$loc_x,
                         Method = Method,
                         Use_REML = FALSE)
  
  Obj <- TmbList[["Obj"]]
  
  # Use a gradient-based nonlinear minimizer to identify maximum likelihood estimates for the fixed effects
  Opt <- TMBhelper::Optimize(obj = Obj,
                             lower = TmbList[["Lower"]],
                             upper = TmbList[["Upper"]],
                             getsd = TRUE, savedir = DateFile,
                             bias.correct = FALSE, newtonsteps = 2, loopnum = 1)
  
  
  # Go back to overarching WD, to save the most important information in a Excel file
  setwd(Store_Control)
  
  # Read the Excel files, where you will store the information
  estimates_C <- read.csv2("Estimates_control.csv", header = TRUE, row.names = 1, check.names = FALSE)
  SE_C <- read.csv2("SE_control.csv",  header = TRUE, row.names = 1, check.names = FALSE)
  
  # Extract the values for the parameters you want
  SD <- summary(sdreport(Obj))
  values <- as.data.frame(SD)
  
  values <- values %>% filter(row.names(values) %in% c("gamma2_ctp", "gamma2_ctp.1", 
                                                                 "L_omega2_z", "L_epsilon2_z"))
  row.names(values) <- c("Control_biom", "Envir_var", "Spatial", "Spatio_temp")
  
  # Add the values to the respective datafiles
  # Get the number of which control group we are looking at
  N_col <- paste0(paste(names(control)[i], "_control"))

  # Remove the whitespace in N_col
  N_col <- gsub(" ", "", N_col)
  
  # start with estimates
  estimates_C$N_col <- values[, 1] 
  colnames(estimates_C)[colnames(estimates_C) == "N_col"] <- N_col
  
  # Save the file
  write.csv2(file = "Estimates_control.csv", estimates_C)
  
  # continue with SE
  SE_C$N_col <- values[, 2]
  colnames(SE_C)[colnames(SE_C) == "N_col"] <- N_col
  
  # Save the file
  write.csv2(file = "SE_control.csv", SE_C)

  rm(list = ls()[! ls() %in% c("Data_WD", "control", "Store_Control", "Folder")])
  setwd(Data_WD)
}

#####


###########################################################################################
### WITH ALL CONVERGED MODELS
################
# Check out the estimates and how well the control models perform compared to
# the titmice
# Switch to the Folder, where you store the estimates for titmouse biomass
setwd("G:/Mira/Titmice simulations/FRA")
estimate_TM <- read.csv2("estimate_TM.csv", header = TRUE, row.names = 2, check.names = FALSE)
estimate_TM <- as.data.frame(t(estimate_TM))
estimate_TM <- estimate_TM[-1, ]
estimate_TM$group <- rownames(estimate_TM)

SE_TM <- read.csv2("SE_TM.csv", header = TRUE, row.names = 2, check.names = FALSE)
SE_TM <- as.data.frame(t(SE_TM))
SE_TM <- SE_TM[-1, ]
SE_TM$group <- rownames(SE_TM)

titmouse <- as.data.frame(estimate_TM[, 1])
colnames(titmouse) <- "estimate"
titmouse$lower_CI <- titmouse[, 1] - 1.96 * SE_TM[, 1]
titmouse$upper_CI <- titmouse[, 1] + 1.96 * SE_TM[, 1]
titmouse$group <- SE_TM$group

# Change WD again
setwd(Store_Control)

# Read the information about the respective estimates
estimates_C <- read.csv2("Estimates_control.csv", header = TRUE, row.names = 2, check.names = FALSE)
estimates_C <- estimates_C[, -1]
estimates_C <- as.data.frame(t(estimates_C))
estimates_C$group <- rownames(estimates_C)

SE_C <- read.csv2("SE_control.csv",  header = TRUE, row.names = 2, check.names = FALSE)
SE_C <- SE_C[, -1]
SE_C <- as.data.frame(t(SE_C))
SE_C$group <- rownames(SE_C)
 
# Create a new dataframe, containing information on the estimate of "Control_group_Biomass"
# and their lower and upper CI
control_biom <- as.data.frame(estimates_C[, 1])
colnames(control_biom) <- "estimate"
control_biom$lower_CI <- control_biom[, 1] - 1.96 * SE_C[, 1]
control_biom$upper_CI <- control_biom[, 1] + 1.96 * SE_C[, 1]
control_biom$N <- c(1:length(control_biom[, 1]))
control_biom$group <- SE_C$group

# Color for the polygon
grey1 <- rgb(153, 153, 153, 87, maxColorValue = 255)

# See how the control group performs per sample
# Plot the performance of the biological control with the lower and upper CI and add the estimate of titmouse biomass for visual support ####
png(filename = "VAST estimate biological control.png", width = 39.7, height = 21, units = "cm",
    res = 300)
plot(control_biom[, 1] ~ control_biom[, 4], type = "n", 
     ylim = c(min(control_biom[, 1] - 0.25), max(control_biom[, 1] + 0.25)), xaxt = "n",
     main = "VAST estimate biological control", ylab = "", xlab = "Control Group")
axis(1, at = 1:length(control_biom[, 1]), las = 3) # tick marks per control group, change according the number of samples
polygon(c(rev(control_biom[, 4]), control_biom[, 4]), 
        c(rev(control_biom$upper_CI), control_biom$lower_CI), col = grey1, 
        border = NA)
lines(control_biom$estimate, lwd = 2, col = "red")
lines(control_biom$lower_CI, lty = 3, col = "grey5") # add lower CI
lines(control_biom$upper_CI, lty = 3, col = "grey5") # add upper CI
abline(h = titmouse$estimate, col = "blue", lty = 2)
x <- as.numeric(length(control_biom[, 1]) - 10.5)
y <- max(control_biom[, 1] + 0.2)
text(x, y, paste0("mean control = ", round(mean(control_biom[, 1]), digits = 3)), cex = 1.5)
text(x, y - 0.05, paste0("median control = ", round(median(control_biom[, 1]), digits = 3)), cex = 1.5)
text(x, y - 0.10, paste0("titmouse = ", round(titmouse$estimate, digits = 3)), cex = 1.5)
dev.off()

#####

# Compare the performance of the Control groups compared to the actual estimate
png(filename = "Histogramm VAST estimate biological control.png", width = 29.7, height = 21,
    units = "cm", res = 300)
dens <- density(control_biom[, 1])
max <- which.max(dens$y)
hist(control_biom[, 1], prob = TRUE, col = "indianred", breaks = 10, 
     main = "VAST estimate biological control",
     ylab = "Density", xlab = "Estimate",
     xlim = c(min(control_biom[, 1] - 0.15), max(control_biom[, 1] + 0.15)),
     lwd = 2)
lines(density(control_biom[, 1]), lwd = 3)
text(min(control_biom[, 1] - 0.05), dens$y[max] - 0.8,
     paste0("p ",
            ifelse(sum(control_biom$estimate > titmouse$estimate)/length(control_biom$estimate) > 0.05, 
                  "> 0.05", "< 0.05")),
     cex = 1.5)
text(min(control_biom[, 1] - 0.05), dens$y[max] - 0.2,
     paste0("titmouse = ", round(titmouse$estimate, digits = 3)), cex = 1.5)
clip(0, 1, 0, dens$y[max] + 2)
abline(v = titmouse$estimate, lty = 2, col = "blue", lwd = 5)
dev.off()

# Create the table for the supplements
# Get the species set
setwd(Data_WD)
suppl_table <- read.csv2("Table_control_groups.csv", header = TRUE, row.names = 1, check.names = FALSE)
suppl_table <- cbind(suppl_table, control_biom)[, -10]

# Add the titmouse values for comparison
titmice <- t(c("PARATE", "PARCAE", "PARCRI", "PARMAJ", "PARMON", "PARPAL"))
titmice <- data.frame(titmice)
colnames(titmice) <- c("Species_1", "Species_2", "Species_3", "Species_4", "Species_5", "Species_6")
row.names(titmice) <- "Titmice"
titmice <- cbind(titmice, titmouse)

suppl_table <- rbind(titmice, suppl_table)

# Store the data
setwd(Store_Control)
write.csv2(file = "Table_supplements.csv", suppl_table)


### Checking which random groups are significant
#################################################################################################################


store <- list()
for(i in 1:nrow(control_biom)){
	store[[i]] <- ifelse( range(c(control_biom[i, ]$lower_CI, control_biom[i, ]$upper_CI) > 0), TRUE,
				ifelse( range(c(control_biom[i, ]$lower_CI, control_biom[i, ]$upper_CI) < 0), TRUE,
				FALSE ))

}

store2 <- list()
for(j in seq_along(store)){
	store2[[j]] <- t(as.data.frame(store[[j]]))
	if( (store2[[j]][, 1] == "TRUE") & (store2[[j]][, 2] == "TRUE") ) print(j)
}


#####

### WITH ONLY CONVERGED AND SIGNIFICANT MODELS
################
# Check out the estimates and how well the control models perform compared to
# the titmice
# Switch to the Folder, where you store the estimates for titmouse biomass
setwd("")
estimate_TM <- read.csv2("estimate_TM.csv", header = TRUE, row.names = 2, check.names = FALSE)
estimate_TM <- as.data.frame(t(estimate_TM))
estimate_TM <- estimate_TM[-1, ]

SE_TM <- read.csv2("SE_TM.csv", header = TRUE, row.names = 2, check.names = FALSE)
SE_TM <- as.data.frame(t(SE_TM))
SE_TM <- SE_TM[-1, ]

titmouse <- as.data.frame(estimate_TM[, 1])
colnames(titmouse) <- "estimate"
titmouse$lower_CI <- titmouse[, 1] - 1.96 * SE_TM[, 1]
titmouse$upper_CI <- titmouse[, 1] + 1.96 * SE_TM[, 1]

# Change WD again
setwd(Store_Control)

# Read the information about the respective estimates
estimates_C <- read.csv2("Estimates_control_sig.csv", header = TRUE, row.names = 2, check.names = FALSE)
estimates_C <- estimates_C[, -1]
estimates_C <- as.data.frame(t(estimates_C))

SE_C <- read.csv2("SE_control_sig.csv",  header = TRUE, row.names = 2, check.names = FALSE)
SE_C <- SE_C[, -1]
SE_C <- as.data.frame(t(SE_C))

# Create a new dataframe, containing information on the estimate of "Control_group_Biomass"
# and their lower and upper CI
control_biom <- as.data.frame(estimates_C[, 1])
colnames(control_biom) <- "estimate"
control_biom$lower_CI <- control_biom[, 1] - 1.96 * SE_C[, 1]
control_biom$upper_CI <- control_biom[, 1] + 1.96 * SE_C[, 1]
control_biom$N <- c(1:length(control_biom[, 1]))

# Color for the polygon
grey1 <- rgb(153, 153, 153, 87, maxColorValue = 255)

# See how the control group performs per sample
# Plot the performance of the biological control with the lower and upper CI and add the estimate of titmouse biomass for visual support ####
png(filename = "VAST estimate biological control sig.png", width = 39.7, height = 21, units = "cm",
    res = 300)
plot(control_biom[, 1] ~ control_biom[, 4], type = "n", 
     ylim = c(min(control_biom[, 1] - 0.25), max(control_biom[, 1] + 0.25)), xaxt = "n",
     main = "VAST estimate biological control", ylab = "", xlab = "Control Group")
axis(1, at = 1:length(control_biom[, 1]), las = 3) # tick marks per control group, change according the number of samples
polygon(c(rev(control_biom[, 4]), control_biom[, 4]), 
        c(rev(control_biom$upper_CI), control_biom$lower_CI), col = grey1, 
        border = NA)
lines(control_biom$estimate, lwd = 2, col = "red")
lines(control_biom$lower_CI, lty = 3, col = "grey5") # add lower CI
lines(control_biom$upper_CI, lty = 3, col = "grey5") # add upper CI
abline(h = titmouse$estimate, col = "blue", lty = 2)
x <- as.numeric(length(control_biom[, 1]) - 10.5)
y <- max(control_biom[, 1] + 0.2)
text(x, y, paste0("mean control = ", round(mean(control_biom[, 1]), digits = 3)), cex = 1.5)
text(x, y - 0.05, paste0("median control = ", round(median(control_biom[, 1]), digits = 3)), cex = 1.5)
text(x, y - 0.10, paste0("titmouse = ", round(titmouse$estimate, digits = 3)), cex = 1.5)
dev.off()

#####

# Compare the performance of the Control groups compared to the actual estimate
png(filename = "Histogramm VAST estimate biological control sig.png", width = 29.7, height = 21,
    units = "cm", res = 300)
dens <- density(control_biom[, 1])
max <- which.max(dens$y)
hist(control_biom[, 1], prob = TRUE, col = "indianred", breaks = 10, 
     main = "VAST estimate biological control",
     ylab = "Density", xlab = "Estimate",
     xlim = c(min(control_biom[, 1] - 0.15), max(control_biom[, 1] + 0.15)),
     lwd = 2)
lines(density(control_biom[, 1]), lwd = 3)
text(min(control_biom[, 1] - 0.05), dens$y[max] - 0.8,
     paste0("p ",
            ifelse(sum(control_biom$estimate > titmouse$estimate)/length(control_biom$estimate) > 0.05, 
                  "> 0.05", "< 0.05")),
     cex = 1.5)
text(min(control_biom[, 1] - 0.05), dens$y[max] - 0.2,
     paste0("titmouse = ", round(titmouse$estimate, digits = 3)), cex = 1.5)
clip(0, 1, 0, dens$y[max] + 2)
abline(v = titmouse$estimate, lty = 2, col = "blue", lwd = 5)
dev.off()