--- title: "Estimating the Strength of Evidence in Psychological Science" output: html_document: default pdf_document: default authors: Aczel, Palfi and Szaszi --- load packages ```{r warning =FALSE, message= FALSE} library(psych) library(ggplot2) library(BayesFactor) library(gmodels) library(plyr) ``` load data ```{r} #data_ti <- read.csv("data_ti.csv", sep = ";") #data_tp <- read.csv("data_tp.csv", sep = ";") #data_r <- read.csv("data_r.csv", sep = ";") data_all <- read.csv("statcheck_dataset.csv") ``` ################################# Data filtering and transformation of the t and F statistics ####################################### #### Data filtering and transformation ```{r} # Selecting t, F and r statistics data <- subset(data_all, Statistic %in% c("t", "F")) # Selecting F statistics where df1 = 1 data <- subset(data, df1 %in% c(1, NA)) nrow(subset(data, Statistic %in% c("t"))) #169984 nrow(subset(data, Statistic %in% c("F"))) #233945 # Excluding non-significant results data <- subset(data, data$Computed <= 0.05) # Excluding cases with not exact test statistics values data <- subset(data, data$Test.Comparison == "=") # Selecting two-tailed tests data <- subset(data, data$OneTail=="FALSE") # Excluding cases where the re-calculated and original p-values suggest different decisions data <- subset(data, data$DecisionError == "FALSE") # Rounds up the non-integer dfs data$df2_rounded <- ceiling(data$df2) # Calculating the square root of the F values data$Value_calc <- ifelse(data$Statistic == "F", sqrt(abs(data$Value)), data$Value) #Assuming balanced designs data_ti <- data #creating new column and calculating N1 and N2 from df2 #Calcuating sample size from df data_ti$N1 <- ((data_ti$df2_rounded)+2)/2 data_ti$N2 <- data_ti$N1 #Excluding cases with too small sample size (<6) data_ti <- subset(data_ti, data_ti$df2_rounded >= 4) nrow(subset(data_ti, Statistic %in% c("t"))) nrow(subset(data_ti, Statistic %in% c("F"))) ``` ###calculating Bayes factors with medium scaled prior ```{r warning =FALSE, message= FALSE} #Calculating LogBayes data_ti$LogBayes <- NA for (i in 1:nrow(data_ti)){ data_ti[i,"LogBayes"] <- ttest.tstat(data_ti[i, "Value_calc"], data_ti[i, "N1"], data_ti[i, "N2"], nullInterval = NULL, rscale = "medium", complement = FALSE, simple = FALSE)[['bf']] } #Bayes factors data_ti$Bayes <- exp(data_ti$LogBayes) ``` ############################################## ###Calculating Bayes factor evidence categories ### t+F tests, assuming independent design, applying medium scaled prior(Table 1 and Table S2) ```{r} data_ti$strength_of_evidence <- ifelse (data_ti$Bayes > 1,"Anecdotal H1", "Anecdotal H0") data_ti$strength_of_evidence <- ifelse (data_ti$Bayes > 3,"Moderate H1", data_ti$strength_of_evidence) data_ti$strength_of_evidence <- ifelse (data_ti$Bayes > 10,"Strong H1", data_ti$strength_of_evidence) data_ti$strength_of_evidence <- ifelse (data_ti$Bayes < 1/3,"Moderate H0", data_ti$strength_of_evidence) data_ti$strength_of_evidence <- ifelse (data_ti$Bayes < 1/10,"Strong H0", data_ti$strength_of_evidence) data_ti$strength_of_evidence <- as.factor(data_ti$strength_of_evidence) data_ti$strength_of_evidence <- ordered(data_ti$strength_of_evidence, levels = c("Strong H1", "Moderate H1", "Anecdotal H1", "Anecdotal H0", "Moderate H0", "Strong H0")) CrossTable(data_ti$strength_of_evidence) ``` ############################################## ### Creating P-value ranges for t+F tests assumming independent design ```{r} data_ti$P_level <- ifelse (data_ti$Computed >= 0, ".000-.005", "999") data_ti$P_level <- ifelse (data_ti$Computed >= 0.005, ".005-.010", data_ti$P_level) data_ti$P_level <- ifelse (data_ti$Computed >= 0.010, ".010-.015", data_ti$P_level) data_ti$P_level <- ifelse (data_ti$Computed >= 0.015, ".015-.020", data_ti$P_level) data_ti$P_level <- ifelse (data_ti$Computed >= 0.020, ".020-.025", data_ti$P_level) data_ti$P_level <- ifelse (data_ti$Computed >= 0.025, ".025-.030", data_ti$P_level) data_ti$P_level <- ifelse (data_ti$Computed >= 0.030, ".030-.035", data_ti$P_level) data_ti$P_level <- ifelse (data_ti$Computed >= 0.035, ".035-.040", data_ti$P_level) data_ti$P_level <- ifelse (data_ti$Computed >= 0.040, ".040-.045", data_ti$P_level) data_ti$P_level <- ifelse (data_ti$Computed >= 0.045, ".045-.050", data_ti$P_level) data_ti$P_level <- as.factor (data_ti$P_level) CrossTable(data_ti$P_level) ``` ### Cross-tabulation of p-value ranges with Bayes Factor Categories (calculated with medium scaled prior) for t+F tests assuming independent design (Table S3) ```{r} CrossTable(data_ti$strength_of_evidence, data_ti$P_level, prop.chisq = FALSE) ``` ############################################## ###Creating Plots ###Figure 1 - The relationship between significant p-values and the corresponding Bayes factors for the t-test results. The Bayes factors were calculated with medium scaled prior distribution assuming independent-samples design. ```{r} #The code for Figure 1 was adapted from: http://shinyapps.org/apps/RGraphCompendium/index.php#evidential-flow par(cex.main = 1, mar = c(4.5, 6, 4, 7) + 0.1, mgp = c(3, 1, 0), cex.lab = 1, font.lab = 2, cex.axis = 1, las = 1) plot(x= data_ti[,'Computed'], y=data_ti[,'LogBayes'], xlim = c(0, 0.05), ylim = c(-1 * log(10), log(100)), xlab = "", ylab = "", cex.lab = 1, cex.axis = 1, las = 1, yaxt = "n",xaxt="n", bty = "n", type = "p", pch = '.', bg = "black") labelsUpper = log(c(100, 30, 10, 3, 1)) lablesUppercorx = log(c(10, 3, 1)) labelsLower = -1 * log(c(10, 3, 1)) criticalP = c(labelsLower, 0, labelsUpper) criticalPcorx = c(lablesUppercorx,0,labelsLower) abline(h = 0) axis(side = 4, at = labelsUpper + 0.602, tick = FALSE, cex.axis = 1, labels = c ("", "", "Strong H1", "Moderate H1", "Anecdotal H1")) axis(side = 4, at = labelsLower - 0.602, tick = FALSE, cex.axis = 1, labels = c("Strong H0", "Moderate H0", "Anecdotal H0")) axis(side = 2, at = c(criticalP), tick = TRUE, las = 2, cex.axis = 1, labels = c( "1/10", "1/3", "1", "", "100", "30", "10", "3", "")) axis(side=1, at= c(0,0.005,0.01,0.015,0.02,0.025,0.03,0.035,0.04,0.045,0.05), cex.axis= 0.7) mtext("Bayes factors", side = 2, line = 2.5, las = 0, cex = 1) mtext("P-values", side = 1, line = 2.5, las = 1, cex = 1) grid::grid.text("", 0.97, 0.5, rot = 270, gp = grid::gpar(cex = 1.3)) for (idx in 1:length(criticalPcorx)) { abline(h = criticalPcorx[idx], col = "darkgrey", lwd = 1, lty = 2) } # #Arrows indicating the direction of the evidence # arrows(x0=0.025, y0= log(1/5), x1= 0.025, y1= log(1/1.5), length = 0.09, angle = 30, code = 1, lwd = 2) # arrows(x0=0.025, y0=log(10), x1= 0.025, log(30), length = 0.09, angle = 30, code = 2, lwd = 2) # text(0.015, log(1/8), "Evidence for H0", pos = 4, cex = 1) # text(0.015, log(70), "Evidence for H1", pos = 4, cex = 1) ``` ### Figure 2. - Density plots with quantile lines for the Bayes factors in ranges of significant p-values. The Bayes factors were calculated with medium scaled prior distribution assuming independent-samples design. ```{r} #Data preparation f = as.numeric(CrossTable(data_ti$P_level)[[2]][1,]) #calculate the proportion of p values in each P value range log_prop = log(f*100) #calculate the log values of each proportion for the legend #assign the corresponding log_prop value to each p value to color the violin plots data_ti$Proportion_of_N_total <- ifelse (data_ti$Computed >= 0, log_prop[[1]], "999") data_ti$Proportion_of_N_total <- ifelse (data_ti$Computed >= 0.005, log_prop[[2]], data_ti$Proportion_of_N_total) data_ti$Proportion_of_N_total <- ifelse (data_ti$Computed >= 0.010, log_prop[[3]], data_ti$Proportion_of_N_total) data_ti$Proportion_of_N_total <- ifelse (data_ti$Computed >= 0.015, log_prop[[4]], data_ti$Proportion_of_N_total) data_ti$Proportion_of_N_total <- ifelse (data_ti$Computed >= 0.020, log_prop[[5]], data_ti$Proportion_of_N_total) data_ti$Proportion_of_N_total <- ifelse (data_ti$Computed >= 0.025, log_prop[[6]], data_ti$Proportion_of_N_total) data_ti$Proportion_of_N_total <- ifelse (data_ti$Computed >= 0.030, log_prop[[7]], data_ti$Proportion_of_N_total) data_ti$Proportion_of_N_total <- ifelse (data_ti$Computed >= 0.035, log_prop[[8]], data_ti$Proportion_of_N_total) data_ti$Proportion_of_N_total <- ifelse (data_ti$Computed >= 0.040, log_prop[[9]], data_ti$Proportion_of_N_total) data_ti$Proportion_of_N_total <- ifelse (data_ti$Computed >= 0.045, log_prop[[10]], data_ti$Proportion_of_N_total) #change each Bayes value lower than 1/10 to 1/10 data_ti$Bayes_trimmed <- ifelse(data_ti$Bayes < (1/10), 1/10, data_ti$Bayes) #subset the data where the Bayes factors are smaller than the lower limit of the second quantile of Bayes factors in the .000-005 P value range data_tismallp <- subset(data_ti,P_level == ".000-.005") data_tin <- subset(data_ti, data_ti$Bayes < quantile(data_tismallp$Bayes)[[2]]) #THE VIOLIN PLOT p <- ggplot(data_tin, aes(P_level, Bayes_trimmed)) p + geom_violin(draw_quantiles = c(0.25, 0.5, 0.75), adjust = 3, scale = "width", trim = "TRUE", na.rm = T, inherit.aes = T, bw = 0.1, aes(fill= Proportion_of_N_total), color ="black") + theme_bw() + theme(panel.grid.minor=element_blank())+ theme(panel.grid.major=element_blank())+ theme(axis.text.x = element_text(angle = 45, hjust = 1))+ scale_y_continuous(breaks = c(0,1/10, 1/3,1,3,10), minor_breaks = waiver(), labels = c(0,"1/10", "1/3",1,3,10))+ geom_hline(yintercept = c(10,3,1,1/3,1/10),color="grey", linetype="dashed")+ coord_cartesian(ylim = c(0, 30), expand =FALSE)+ coord_trans(y="log2")+ scale_fill_gradient(low = "white", high = "black", name = "proportion of\nall results", breaks = c(4, 3.401197, 1.1), labels = paste(c(60, 30, 0), "%"))+ theme(legend.position = c(0.9,0.83))+ theme(axis.text.x = element_text(angle = 45, hjust = 1))+ theme(legend.background = element_rect(colour = "#999999"))+ theme(legend.title = element_text(size = 6)) + theme(legend.text=element_text(size=6))+ theme(legend.key.size= unit(0.3, "cm"))+ labs(x = "P-value ranges", y = "Bayes factor ")+ ggtitle("") + annotate("text", x = 0, y = 16, size =3, label = "Strong H1")+ annotate("text", x = 0, y = 6, size =3, label = " Moderate H1")+ annotate("text", x = 0, y = 1.66,size =3, label = " Anecdotal H1")+ annotate("text", x = 0, y = 1/1.66,size =3, label = " Anecdotal H0")+ annotate("text", x = 0, y = 1/6,size =3, label = " Moderate H0") ``` #Calculating effect sizes ```{r} data_ti$ES <- abs((data_ti$Value_calc*2)/sqrt(data_ti$df2)) data_ti$ES_max5 <- ifelse(data_ti$ES>5, 5, data_ti$ES) hist(data_ti$ES) median(data_ti$ES) #t data_prior_t <- subset(data_ti, data_ti$Statistic=="t") hist(data_prior_t$ES_max5) median(data_prior_t$ES) quantile(data_prior_t$ES) #F data_prior_F <- subset(data_ti, data_ti$Statistic=="F") hist(data_prior_F$ES_max5) median(data_prior_F$ES_max5) quantile(data_prior_F$ES_max5) #r data_prior_r <- read.csv("data_r.csv", sep = ";") hist(data_prior_r$Value) data_prior_r$Value_abs <- abs(data_prior_r$Value) hist(data_prior_r$Value_abs) median(data_prior_r$Value_abs) quantile(data_prior_r$Value_abs) ``` ###Calculating Bayes factor evidence categories ### t+F tests, assuming independent design, applying medium scaled prior(Table 1 and Table S2) ```{r} data_ti$strength_of_evidence <- ifelse (data_ti$Bayes > 1,"Anecdotal H1", "Anecdotal H0") data_ti$strength_of_evidence <- ifelse (data_ti$Bayes > 3,"Moderate H1", data_ti$strength_of_evidence) data_ti$strength_of_evidence <- ifelse (data_ti$Bayes > 10,"Strong H1", data_ti$strength_of_evidence) data_ti$strength_of_evidence <- ifelse (data_ti$Bayes < 1/3,"Moderate H0", data_ti$strength_of_evidence) data_ti$strength_of_evidence <- ifelse (data_ti$Bayes < 1/10,"Strong H0", data_ti$strength_of_evidence) data_ti$strength_of_evidence <- as.factor(data_ti$strength_of_evidence) data_ti$strength_of_evidence <- ordered(data_ti$strength_of_evidence, levels = c("Strong H1", "Moderate H1", "Anecdotal H1", "Anecdotal H0", "Moderate H0", "Strong H0")) CrossTable(data_ti$strength_of_evidence) ``` ###Calculating additional descriptives ```{r} nrow(data_ti)#287424 unique(data_ti$journal) #293 Journals data_ti$Source <- droplevels(data_ti$Source) str(unique(data_ti$Source)) #35515 Articles table(data_ti$year)#1985-2016 nrow(subset(data_ti, Statistic %in% c("t"))) #114272 nrow(subset(data_ti, Statistic %in% c("F"))) #173152 ##Calculating the frequency of journal articles which contain at least one significant result which doesn't provide strong evidence. data_ti$BayesST <- ifelse(data_ti$Bayes > 10, 1, 0) means <- as.numeric(tapply(data_ti$BayesST, data_ti$Source, mean)) hist(means) means_dummy <- ifelse(means == 1, 1, 0) mean(means_dummy) data_ti_weak <- subset(data_ti, data_ti$Bayes <10) data_ti_weak$BayesST3 <- ifelse(data_ti_weak$Bayes <=3,0,1) mean(data_ti_weak$BayesST3) ```