library(tidyverse)
library(imputeTS)
library(gridExtra)
library(foreach)
library(doParallel)


# data input has to be in one numeric column. Title of the column and in each row the value.


optimal_sample_size <- function(data, p.adjust, core) {
  options(warn = -1)
  
  time <- seq(1, 300, by = 1)
  
  out_data <- vector("list", length(time))
  
  
  registerDoParallel(core) # number of cores to use
  out_data <-
    foreach (rep = seq_along(time), .combine = rbind) %dopar% {
      # syntax to use multiple cores.
      
      
      jack = data.frame(sample_size = c(1:(length(data[, 1])))) %>%
        mutate(rep = list(1:1000)) %>%
        unnest() %>%
        mutate(
          jack.mean = map(sample_size, function(sample_size) {
            sub = sample(data[, 1], sample_size)
            mean(sub)
          }),
          jack.sd = map(sample_size, function(sample_size) {
            sub = sample(data[, 1], sample_size)
            as.numeric(sd(sub))
          })
        ) %>%
        mutate(jack.mean = as.numeric(jack.mean),
               jack.sd = as.numeric(jack.sd))
      
      
      jack2 <- jack %>%
        group_by(sample_size) %>%
        summarise(sd_total = sd(jack.mean)) # calculating the sd
      
      
      diff_sd <-
        diff(jack2$sd_total) * -1 # calculating the difference of sd and making them positive.
      jack2 <-
        data.frame(jack2) # necessary to run the next line of code
      jack2 <-
        top_n(jack2, nrow(jack2) - 2, sd_total) # removing last two rows
      jack2$diff_sd = diff_sd[1:length(diff_sd) - 1]
      out_data[[rep]] <- jack2
      
      
      reps <- rep(seq(1, 300, by = 1), each = nrow(data) - 2)
      
      out_data$rep <- reps
      
      complete_data_sum <- out_data %>%
        group_by(sample_size) %>%
        summarise(Mean = mean(diff_sd))
      
      graph <<- ggplot(out_data, aes(x = sample_size, y = diff_sd)) +
        geom_point(aes(colour = as.factor(rep)),
                   size = 2,
                   alpha = 0.2) +
        scale_x_continuous(name = "\nSample Size", breaks = seq(0, length(data[, 1]), by = 1)) +
        scale_y_continuous(name = "\ndiff sd") +
        theme_bw() +
        theme(
          axis.text.x = element_text(
            face = "bold",
            color = "black",
            size = 10
          ),
          axis.text.y = element_text(
            face = "bold",
            color = "black",
            size = 10
          )
        ) +
        geom_smooth(data = complete_data_sum,
                    aes(x = sample_size, y = Mean),
                    method = "loess") +
        theme(legend.position = "none")
      
      
      results <-
        pairwise.t.test(out_data$diff_sd, out_data$sample_size, p.adj = p.adjust)
      
      results_filt <- do.call(rbind.data.frame, results[3])
      
      
      colnames(results_filt[2])
      
      output_p <- vector("double", 1)
      
      
      col_vector <- c(1:length(results_filt))
      
      for (col in seq_along(col_vector)) {
        if (results_filt[[col, col]] > 0.05) {
          output_p <- colnames(results_filt[col])
        }
        if (results_filt[[col, col]] > 0.05)
          break
      }
      
      return(paste0("the minimum number of replicates is:", output_p))
      
    }