---
title: "Estimating the Strength of Evidence in Psychological Science"
output:
  html_document: default
  pdf_document: default
authors: Aczel, Palfi and Szaszi
---

load packages
```{r warning =FALSE, message= FALSE}
library(psych)
library(ggplot2)
library(BayesFactor)
library(gmodels)
library(plyr)
```

load data
```{r}
#data_ti <- read.csv("data_ti.csv", sep = ";")
#data_tp <- read.csv("data_tp.csv", sep = ";")
#data_r <- read.csv("data_r.csv", sep = ";")
data_all <- read.csv("statcheck_dataset.csv")

```

#################################  Data filtering and transformation of the t and F statistics  #######################################

#### Data filtering and transformation
```{r}
# Selecting t, F  and r statistics
data <- subset(data_all, Statistic %in% c("t", "F"))
# Selecting F statistics where df1 = 1
data <- subset(data, df1 %in% c(1, NA))
nrow(subset(data, Statistic %in% c("t"))) #169984
nrow(subset(data, Statistic %in% c("F"))) #233945

# Excluding non-significant results
data <- subset(data, data$Computed <= 0.05)
# Excluding cases with not exact test statistics values
data <- subset(data, data$Test.Comparison == "=")
# Selecting two-tailed tests
data <- subset(data, data$OneTail=="FALSE")
# Excluding cases where the re-calculated and original p-values suggest different decisions 
data <- subset(data, data$DecisionError == "FALSE")

# Rounds up the non-integer dfs
data$df2_rounded <- ceiling(data$df2)
# Calculating the square root of the F values
data$Value_calc <- ifelse(data$Statistic == "F", sqrt(abs(data$Value)), data$Value)


#Assuming balanced designs
data_ti <- data

#creating new column and calculating N1 and N2 from df2
#Calcuating sample size from df
data_ti$N1 <- ((data_ti$df2_rounded)+2)/2
data_ti$N2 <- data_ti$N1

#Excluding cases with too small sample size (<6)
data_ti <- subset(data_ti, data_ti$df2_rounded >= 4)

nrow(subset(data_ti, Statistic %in% c("t"))) 
nrow(subset(data_ti, Statistic %in% c("F"))) 

```


###calculating Bayes factors with medium scaled prior
```{r warning =FALSE, message= FALSE}

#Calculating LogBayes
data_ti$LogBayes <- NA
for (i in 1:nrow(data_ti)){
  data_ti[i,"LogBayes"] <- ttest.tstat(data_ti[i, "Value_calc"],
                                    data_ti[i, "N1"],
                                    data_ti[i, "N2"],
                                    nullInterval = NULL,
                                    rscale = "medium",
                                    complement = FALSE,
                                    simple = FALSE)[['bf']]
}

#Bayes factors
data_ti$Bayes <- exp(data_ti$LogBayes)
```

##############################################

###Calculating Bayes factor evidence categories

### t+F tests, assuming independent design, applying medium scaled prior(Table 1 and Table S2)
```{r}
data_ti$strength_of_evidence <- ifelse (data_ti$Bayes > 1,"Anecdotal H1", "Anecdotal H0")
data_ti$strength_of_evidence <- ifelse (data_ti$Bayes > 3,"Moderate H1", data_ti$strength_of_evidence)
data_ti$strength_of_evidence <- ifelse (data_ti$Bayes > 10,"Strong H1", data_ti$strength_of_evidence)
data_ti$strength_of_evidence <- ifelse (data_ti$Bayes < 1/3,"Moderate H0", data_ti$strength_of_evidence)
data_ti$strength_of_evidence <- ifelse (data_ti$Bayes < 1/10,"Strong H0", data_ti$strength_of_evidence)

data_ti$strength_of_evidence <- as.factor(data_ti$strength_of_evidence)
data_ti$strength_of_evidence <- ordered(data_ti$strength_of_evidence, levels = c("Strong H1", "Moderate H1", "Anecdotal H1", "Anecdotal H0", "Moderate H0", "Strong H0"))

CrossTable(data_ti$strength_of_evidence)
```

##############################################

### Creating P-value ranges for t+F tests assumming independent design
```{r}
data_ti$P_level <- ifelse (data_ti$Computed >= 0,     ".000-.005", "999")
data_ti$P_level <- ifelse (data_ti$Computed >= 0.005, ".005-.010", data_ti$P_level)
data_ti$P_level <- ifelse (data_ti$Computed >= 0.010, ".010-.015", data_ti$P_level)
data_ti$P_level <- ifelse (data_ti$Computed >= 0.015, ".015-.020", data_ti$P_level)
data_ti$P_level <- ifelse (data_ti$Computed >= 0.020, ".020-.025", data_ti$P_level)
data_ti$P_level <- ifelse (data_ti$Computed >= 0.025, ".025-.030", data_ti$P_level)
data_ti$P_level <- ifelse (data_ti$Computed >= 0.030, ".030-.035", data_ti$P_level)
data_ti$P_level <- ifelse (data_ti$Computed >= 0.035, ".035-.040", data_ti$P_level)
data_ti$P_level <- ifelse (data_ti$Computed >= 0.040, ".040-.045", data_ti$P_level)
data_ti$P_level <- ifelse (data_ti$Computed >= 0.045, ".045-.050", data_ti$P_level)
data_ti$P_level <- as.factor (data_ti$P_level)
CrossTable(data_ti$P_level)

```

### Cross-tabulation of p-value ranges with Bayes Factor Categories (calculated with medium scaled prior) for t+F tests assuming independent design (Table S3)
```{r}
CrossTable(data_ti$strength_of_evidence, data_ti$P_level, prop.chisq = FALSE)
```

##############################################
###Creating Plots

###Figure 1 - The relationship between significant p-values and the corresponding Bayes factors for the t-test results. The Bayes factors were calculated with medium scaled prior distribution assuming independent-samples design. 
```{r}
#The code for Figure 1 was adapted from: http://shinyapps.org/apps/RGraphCompendium/index.php#evidential-flow

par(cex.main = 1, mar = c(4.5, 6, 4, 7) + 0.1, mgp = c(3, 1, 0), cex.lab = 1, 
    font.lab = 2, cex.axis = 1, las = 1)
plot(x= data_ti[,'Computed'], y=data_ti[,'LogBayes'], xlim = c(0, 0.05), ylim = c(-1 * log(10), log(100)), 
     xlab = "", ylab = "", cex.lab = 1, cex.axis = 1, las = 1, yaxt = "n",xaxt="n", 
     bty = "n", type = "p", pch = '.', bg = "black")

labelsUpper = log(c(100, 30, 10, 3, 1))
lablesUppercorx = log(c(10, 3, 1))
labelsLower = -1 * log(c(10, 3, 1))
criticalP = c(labelsLower, 0, labelsUpper)
criticalPcorx = c(lablesUppercorx,0,labelsLower)
abline(h = 0)
axis(side = 4, at = labelsUpper + 0.602, tick = FALSE, cex.axis = 1, labels = c ("", "", "Strong H1", "Moderate H1", "Anecdotal H1"))
axis(side = 4, at = labelsLower - 0.602, tick = FALSE, cex.axis = 1, labels = c("Strong H0", "Moderate H0", "Anecdotal H0"))
axis(side = 2, at = c(criticalP), tick = TRUE, las = 2, cex.axis = 1, labels = c( "1/10", "1/3", "1", "", "100", "30", "10", "3", ""))
axis(side=1, at= c(0,0.005,0.01,0.015,0.02,0.025,0.03,0.035,0.04,0.045,0.05), cex.axis= 0.7)
mtext("Bayes factors", side = 2, line = 2.5, las = 0, cex = 1)
mtext("P-values", side = 1, line = 2.5, las = 1, cex = 1)
grid::grid.text("", 0.97, 0.5, rot = 270, gp = grid::gpar(cex = 1.3))
for (idx in 1:length(criticalPcorx)) {
  abline(h = criticalPcorx[idx], col = "darkgrey", lwd = 1, lty = 2)
}

# #Arrows indicating the direction of the evidence
# arrows(x0=0.025, y0= log(1/5), x1= 0.025, y1=  log(1/1.5), length = 0.09, angle = 30, code = 1, lwd = 2)
# arrows(x0=0.025, y0=log(10), x1= 0.025, log(30), length = 0.09, angle = 30, code = 2, lwd = 2)
# text(0.015, log(1/8), "Evidence for H0", pos = 4, cex = 1)
# text(0.015, log(70), "Evidence for H1", pos = 4, cex = 1)
```

### Figure 2. - Density plots with quantile lines for the Bayes factors in ranges of significant p-values. The Bayes factors were calculated with medium scaled prior distribution assuming independent-samples design.  

```{r}
#Data preparation 
f = as.numeric(CrossTable(data_ti$P_level)[[2]][1,]) #calculate the proportion of p values in each P value range
log_prop = log(f*100) #calculate the log values of each proportion for the legend

#assign the corresponding log_prop value to each p value to color the violin plots
data_ti$Proportion_of_N_total <- ifelse (data_ti$Computed >= 0, log_prop[[1]], "999")
data_ti$Proportion_of_N_total <- ifelse (data_ti$Computed >= 0.005, log_prop[[2]], data_ti$Proportion_of_N_total)
data_ti$Proportion_of_N_total <- ifelse (data_ti$Computed >= 0.010, log_prop[[3]], data_ti$Proportion_of_N_total)
data_ti$Proportion_of_N_total <- ifelse (data_ti$Computed >= 0.015, log_prop[[4]], data_ti$Proportion_of_N_total)
data_ti$Proportion_of_N_total <- ifelse (data_ti$Computed >= 0.020, log_prop[[5]], data_ti$Proportion_of_N_total)
data_ti$Proportion_of_N_total <- ifelse (data_ti$Computed >= 0.025, log_prop[[6]], data_ti$Proportion_of_N_total)
data_ti$Proportion_of_N_total <- ifelse (data_ti$Computed >= 0.030, log_prop[[7]], data_ti$Proportion_of_N_total)
data_ti$Proportion_of_N_total <- ifelse (data_ti$Computed >= 0.035, log_prop[[8]], data_ti$Proportion_of_N_total)
data_ti$Proportion_of_N_total <- ifelse (data_ti$Computed >= 0.040, log_prop[[9]], data_ti$Proportion_of_N_total)
data_ti$Proportion_of_N_total <- ifelse (data_ti$Computed >= 0.045, log_prop[[10]], data_ti$Proportion_of_N_total)

#change each Bayes value lower than 1/10 to 1/10
data_ti$Bayes_trimmed <- ifelse(data_ti$Bayes < (1/10), 1/10, data_ti$Bayes)

#subset the data where the Bayes factors are smaller than the lower limit of the second quantile of Bayes factors in the .000-005 P value range 
data_tismallp <- subset(data_ti,P_level == ".000-.005") 
data_tin <- subset(data_ti, data_ti$Bayes < quantile(data_tismallp$Bayes)[[2]])

#THE VIOLIN PLOT
p <- ggplot(data_tin, aes(P_level, Bayes_trimmed))
p + geom_violin(draw_quantiles = c(0.25, 0.5, 0.75),
                adjust = 3, scale = "width", trim = "TRUE",
                na.rm = T, inherit.aes = T, bw = 0.1,
                aes(fill= Proportion_of_N_total), color ="black") + 
    theme_bw() +
    theme(panel.grid.minor=element_blank())+
    theme(panel.grid.major=element_blank())+
    theme(axis.text.x = element_text(angle = 45, hjust = 1))+
    scale_y_continuous(breaks = c(0,1/10, 1/3,1,3,10), 
                       minor_breaks = waiver(), 
                       labels = c(0,"1/10", "1/3",1,3,10))+
  
    geom_hline(yintercept = c(10,3,1,1/3,1/10),color="grey", linetype="dashed")+
    coord_cartesian(ylim = c(0, 30), expand =FALSE)+  
    coord_trans(y="log2")+  
    scale_fill_gradient(low = "white", high = "black",
                        name = "proportion of\nall results",
                        breaks = c(4, 3.401197, 1.1),
                        labels = paste(c(60, 30, 0), "%"))+
    theme(legend.position = c(0.9,0.83))+
    theme(axis.text.x = element_text(angle = 45, hjust = 1))+
    theme(legend.background = element_rect(colour = "#999999"))+ 
    theme(legend.title = element_text(size = 6)) +
    theme(legend.text=element_text(size=6))+
    theme(legend.key.size= unit(0.3, "cm"))+
    labs(x = "P-value ranges", y = "Bayes factor ")+
    ggtitle("") +
    annotate("text", x = 0, y = 16, size =3, label = "Strong H1")+
    annotate("text", x = 0, y = 6, size =3, label =  "     Moderate H1")+
    annotate("text", x = 0, y = 1.66,size =3, label = "     Anecdotal H1")+
    annotate("text", x = 0, y = 1/1.66,size =3, label = "     Anecdotal H0")+
    annotate("text", x = 0, y = 1/6,size =3, label = "     Moderate H0")

```

#Calculating effect sizes
```{r}
data_ti$ES <- abs((data_ti$Value_calc*2)/sqrt(data_ti$df2)) 
data_ti$ES_max5 <- ifelse(data_ti$ES>5, 5, data_ti$ES)
hist(data_ti$ES)
median(data_ti$ES)

#t
data_prior_t <- subset(data_ti, data_ti$Statistic=="t")
hist(data_prior_t$ES_max5)
median(data_prior_t$ES)
quantile(data_prior_t$ES)
#F
data_prior_F <- subset(data_ti, data_ti$Statistic=="F")
hist(data_prior_F$ES_max5)
median(data_prior_F$ES_max5)
quantile(data_prior_F$ES_max5)
#r
data_prior_r <- read.csv("data_r.csv", sep = ";")
hist(data_prior_r$Value)
data_prior_r$Value_abs <- abs(data_prior_r$Value)
hist(data_prior_r$Value_abs)
median(data_prior_r$Value_abs)
quantile(data_prior_r$Value_abs)

```

###Calculating Bayes factor evidence categories

### t+F tests, assuming independent design, applying medium scaled prior(Table 1 and Table S2)
```{r}
data_ti$strength_of_evidence <- ifelse (data_ti$Bayes > 1,"Anecdotal H1", "Anecdotal H0")
data_ti$strength_of_evidence <- ifelse (data_ti$Bayes > 3,"Moderate H1", data_ti$strength_of_evidence)
data_ti$strength_of_evidence <- ifelse (data_ti$Bayes > 10,"Strong H1", data_ti$strength_of_evidence)
data_ti$strength_of_evidence <- ifelse (data_ti$Bayes < 1/3,"Moderate H0", data_ti$strength_of_evidence)
data_ti$strength_of_evidence <- ifelse (data_ti$Bayes < 1/10,"Strong H0", data_ti$strength_of_evidence)

data_ti$strength_of_evidence <- as.factor(data_ti$strength_of_evidence)
data_ti$strength_of_evidence <- ordered(data_ti$strength_of_evidence, levels = c("Strong H1", "Moderate H1", "Anecdotal H1", "Anecdotal H0", "Moderate H0", "Strong H0"))

CrossTable(data_ti$strength_of_evidence)
```

###Calculating additional descriptives
```{r}

nrow(data_ti)#287424
unique(data_ti$journal) #293 Journals
data_ti$Source <- droplevels(data_ti$Source)
str(unique(data_ti$Source)) #35515 Articles
table(data_ti$year)#1985-2016
nrow(subset(data_ti, Statistic %in% c("t"))) #114272
nrow(subset(data_ti, Statistic %in% c("F"))) #173152

##Calculating the frequency of journal articles which contain at least one significant result which doesn't provide strong evidence.
data_ti$BayesST <- ifelse(data_ti$Bayes > 10, 1, 0)
means <- as.numeric(tapply(data_ti$BayesST, data_ti$Source, mean))
hist(means)
means_dummy <- ifelse(means == 1, 1, 0)
mean(means_dummy)

data_ti_weak <- subset(data_ti, data_ti$Bayes <10)
data_ti_weak$BayesST3 <- ifelse(data_ti_weak$Bayes <=3,0,1)
mean(data_ti_weak$BayesST3)

```