library(tidyverse) library(imputeTS) library(gridExtra) library(foreach) library(doParallel) # data input has to be in one numeric column. Title of the column and in each row the value. optimal_sample_size <- function(data, p.adjust, core) { options(warn = -1) time <- seq(1, 300, by = 1) out_data <- vector("list", length(time)) registerDoParallel(core) # number of cores to use out_data <- foreach (rep = seq_along(time), .combine = rbind) %dopar% { # syntax to use multiple cores. jack = data.frame(sample_size = c(1:(length(data[, 1])))) %>% mutate(rep = list(1:1000)) %>% unnest() %>% mutate( jack.mean = map(sample_size, function(sample_size) { sub = sample(data[, 1], sample_size) mean(sub) }), jack.sd = map(sample_size, function(sample_size) { sub = sample(data[, 1], sample_size) as.numeric(sd(sub)) }) ) %>% mutate(jack.mean = as.numeric(jack.mean), jack.sd = as.numeric(jack.sd)) jack2 <- jack %>% group_by(sample_size) %>% summarise(sd_total = sd(jack.mean)) # calculating the sd diff_sd <- diff(jack2$sd_total) * -1 # calculating the difference of sd and making them positive. jack2 <- data.frame(jack2) # necessary to run the next line of code jack2 <- top_n(jack2, nrow(jack2) - 2, sd_total) # removing last two rows jack2$diff_sd = diff_sd[1:length(diff_sd) - 1] out_data[[rep]] <- jack2 reps <- rep(seq(1, 300, by = 1), each = nrow(data) - 2) out_data$rep <- reps complete_data_sum <- out_data %>% group_by(sample_size) %>% summarise(Mean = mean(diff_sd)) graph <<- ggplot(out_data, aes(x = sample_size, y = diff_sd)) + geom_point(aes(colour = as.factor(rep)), size = 2, alpha = 0.2) + scale_x_continuous(name = "\nSample Size", breaks = seq(0, length(data[, 1]), by = 1)) + scale_y_continuous(name = "\ndiff sd") + theme_bw() + theme( axis.text.x = element_text( face = "bold", color = "black", size = 10 ), axis.text.y = element_text( face = "bold", color = "black", size = 10 ) ) + geom_smooth(data = complete_data_sum, aes(x = sample_size, y = Mean), method = "loess") + theme(legend.position = "none") results <- pairwise.t.test(out_data$diff_sd, out_data$sample_size, p.adj = p.adjust) results_filt <- do.call(rbind.data.frame, results[3]) colnames(results_filt[2]) output_p <- vector("double", 1) col_vector <- c(1:length(results_filt)) for (col in seq_along(col_vector)) { if (results_filt[[col, col]] > 0.05) { output_p <- colnames(results_filt[col]) } if (results_filt[[col, col]] > 0.05) break } return(paste0("the minimum number of replicates is:", output_p)) }