#February 5 2019 
##R code for figures and graphs in  "A theory and methodology to quantify knowledge", Royal Society Open Science, 2019, by Daniele Fanelli
#email@danielefanelli.com


##############################---Figure 1: pictorial representations of patterns and mutual information
rm(list=ls())
entropy<-function(x) {xPr<-x/sum(x); xPr<-xPr[xPr>0]; -sum(xPr*log2(xPr))}
mutualInf<-function(m) {x<-apply(m,1,sum); y<-apply(m,2,sum); return(list(mi=round(entropy(x)+entropy(y)-entropy(m),2), x=round(entropy(x),2), y=round(entropy(y),2)))}

m0<-matrix(c(
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
), nrow=15, ncol=15)

m1<-matrix(c(
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
  0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
  0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
  0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
  0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,
  0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,
  0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
  0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
  0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
  0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
  0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
  0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
  0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
  0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
  1,0,0,0,0,0,0,0,0,00,0,0,0,0
), nrow=15, ncol=15)


mCirc<-matrix(c(
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,
  0,0,0,0,1,1,0,0,0,1,1,0,0,0,0,
  0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,
  0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,
  0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,
  0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,
  0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,
  0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,
  0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,
  0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,
  0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,
  0,0,0,0,1,1,0,0,0,1,1,0,0,0,0,
  0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
), nrow=15, ncol=15)


mFace<-matrix(c(
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,
  0,0,0,0,1,1,0,0,0,1,1,0,0,0,0,
  0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,
  0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,
  0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,
  0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,
  0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,
  0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,
  0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,
  0,0,1,0,0,1,1,1,1,1,0,0,1,0,0,
  0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,
  0,0,0,0,1,1,0,0,0,1,1,0,0,0,0,
  0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
), nrow=15, ncol=15)


m1err<-matrix(c(
  0,0,0,0,0,0,0,0,0,0,0,dnorm(3),dnorm(2),dnorm(1),1,
  0,0,0,0,0,0,0,0,0,0,dnorm(3),dnorm(2),dnorm(1),1,dnorm(-1),
  0,0,0,0,0,0,0,0,0,dnorm(3),dnorm(2),dnorm(1),1,dnorm(-1),dnorm(-2),
  0,0,0,0,0,0,0,0,dnorm(3),dnorm(2),dnorm(1),1,dnorm(-1),dnorm(-2),dnorm(-3),
  0,0,0,0,0,0,0,dnorm(3),dnorm(2),dnorm(1),1,dnorm(-1),dnorm(-2),dnorm(-3),0,
  0,0,0,0,0,0,dnorm(3),dnorm(2),dnorm(1),1,dnorm(-1),dnorm(-2),dnorm(-3),0,0,
  0,0,0,0,0,dnorm(3),dnorm(2),dnorm(1),1,dnorm(-1),dnorm(-2),dnorm(-3),0,0,0,
  0,0,0,0,dnorm(3),dnorm(2),dnorm(1),1,dnorm(-1),dnorm(-2),dnorm(-3),0,0,0,0,
  0,0,0,dnorm(3),dnorm(2),dnorm(1),1,dnorm(-1),dnorm(-2),dnorm(-3),0,0,0,0,0,
  0,0,dnorm(3),dnorm(2),dnorm(1),1,dnorm(-1),dnorm(-2),dnorm(-3),0,0,0,0,0,0,
  0,dnorm(3),dnorm(2),dnorm(1),1,dnorm(-1),dnorm(-2),dnorm(-3),0,0,0,0,0,0,0,
  dnorm(3),dnorm(2),dnorm(1),1,dnorm(-1),dnorm(-2),dnorm(-3),0,0,0,0,0,0,0,0,
  dnorm(2),dnorm(1),1,dnorm(-1),dnorm(-2),dnorm(-3),0,0,0,0,0,0,0,0,0,
  dnorm(1),1,dnorm(-1),dnorm(-2),dnorm(-3),0,0,0,0,0,0,0,0,0,0,
  1,dnorm(-1),dnorm(-2),dnorm(-3),0,0,0,0,0,00,0,0,0,0
), nrow=15, ncol=15)


m2err<-matrix(c(
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,dnorm(3),0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,dnorm(2),0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,dnorm(1),0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,dnorm(-1),0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,dnorm(-2),0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,dnorm(-3),0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,00,0,0,0,0
), nrow=15, ncol=15)

entX0<-round(log2(10),2)
entY0<-round(log2(10),2)
mi1<-mutualInf(m1)
miCirc<-mutualInf(mCirc)
miFace<-mutualInf(mFace)
mi1err<-mutualInf(m1err)
mi2err<-mutualInf(m2err)


png(filename="tau-ex-RV.png", width=9, height=6.5, units="in", res=300)

par(mfrow=c(2,3), mar=c(4,2,4,2.5), cex.main=1.5, cex.sub=1.5, cex.lab=1.2); lineVal=0.9; sublineVal=2.4; lineValletter=1.5; cexLetter=2; 

image(m0[,nrow(m0):1], col=grey(level=seq(1,0,-0.01)), xaxt="n", yaxt="n"); grid(nx=15, ny=15); 
title(main=expression(tau*": "*Ø), xlab =paste("H(X)=", entX0), ylab = paste("H(Y)=", entY0), line=lineVal)
mtext(side=1, text="I(Y:X)=0", line=sublineVal)
mtext(side=3, adj=0, text="a", line=lineValletter, cex=cexLetter)

image(m1[,nrow(m1):1], col=grey(level=seq(1,0,-0.01)), xaxt="n", yaxt="n"); grid(nx=15, ny=15); 
title(main=expression(tau*": 'Y=X'"), xlab=paste("H(X)=", mi1$x), ylab=paste("H(Y)=", mi1$y), line=lineVal); 
mtext(side=1, text=paste("I(Y:X)=", mi1$mi), line=sublineVal)
mtext(side=3, adj=0, text="b", line=lineValletter, cex=cexLetter)

image(m1err[,nrow(m1err):1], col=grey(level=seq(1,0,-0.01)), xaxt="n", yaxt="n"); grid(nx=15, ny=15); 
title(main=expression(tau*": 'Y=X+N(0,1)'"),  xlab=paste("H(X)=", mi1err$x), ylab=paste("H(Y)=", mi1err$y), line=lineVal); 
mtext(side=1, text=paste("I(Y:X)=", mi1err$mi), line=sublineVal)
mtext(side=3, adj=0, text="c", line=lineValletter, cex=cexLetter)

image(m2err[,nrow(m2err):1], col=grey(level=seq(1,0,-0.01)), xaxt="n", yaxt="n"); grid(nx=15, ny=15); 
title(main=expression(tau*": 'Y=X+N(0,1), X=7'"), xlab=paste("H(X|X=7)=", mi2err$x), ylab=paste("H(Y)=", mi2err$y), line=lineVal); 
mtext(side=1, text=paste("I(Y:X|X=7)= H(Y)-H(Y|X=7)=",mi1err$y-mi2err$y), line=sublineVal)
mtext(side=3, adj=0, text="d", line=lineValletter, cex=cexLetter)

image(mCirc[,nrow(mCirc):1], col=grey(level=seq(1,0,-0.01)), xaxt="n", yaxt="n"); grid(nx=15, ny=15); 
title(main=expression(tau*": 'Y"**2*"+X"**2*"=36'"), xlab=paste("H(X)=", miCirc$x), ylab=paste("H(Y)=", miCirc$y), line=lineVal); 
mtext(side=1, text=paste("I(Y:X)=",miCirc$mi), line=sublineVal); mtext(side=1, text=paste("I(Y:X)=", miCirc$mi), line=sublineVal)
mtext(side=3, adj=0, text="e", line=lineValletter, cex=cexLetter)

image(mFace[,nrow(mFace):1], col=grey(level=seq(1,0,-0.01)), xaxt="n", yaxt="n"); grid(nx=15, ny=15); 
title(main=expression(tau*": 'Y"**2*"+X"**2*"=36 & {(-2,2),(2,2),(0,0)...}'"), xlab=paste("H(X)=", miFace$x), ylab=paste("H(Y)=", miFace$y), line=lineVal); 
mtext(side=1, text=paste("I(Y:X)=", miFace$mi), line=sublineVal)
mtext(side=3, adj=0, text="f", line=lineValletter, cex=cexLetter)

dev.off()


#February 11 2018 figures for new version AMToKSP


##############################---Figure 2: graph to represent the set of all possible methodologies

rm(list=ls())


png(filename="methUniv.png", width=1000, height=1000, units="px")


par(mfrow=c(10,10), mar=c(0,0,0,0))


for (i in 1:100) 
{
  
  if(i==55) {
    b<- 1
    x<-rnorm(40)
    y<-b*x+rnorm(40,0,0.1)
    
    plot(x,y, pch=20, xlim=c(-2,2), ylim=c(-2,2), tick=FALSE, xaxt="n", yaxt="n", xlab="")
    
    minx<- -1.3
    miny<- 1.5
    labl<-expression(tau[55])
    
    text(labl, x=minx, y=miny, cex=3.5, col="red")
    
    
  }
  
  else{
    
    
    b<-rnorm(1,0,0.3)
    x<-rnorm(40)
    y<-b*x+rnorm(40,0,runif(1,0.2,2))
    
    plot(x,y, pch=20, xlim=c(-2,2), ylim=c(-2,2), tick=FALSE, xaxt="n", yaxt="n", xlab="")
    
    minx<- -1.3
    miny<- 1.5
    labl<-substitute(tau[CODE],list(CODE=i))
    
    text(labl, x=minx, y=miny, cex=3.5, col="red")
    #legend(minx,miny, legend=labl, border="white", bg="white", pt.cex=0, xjust=2)
    
  }
  
  
}

dev.off()


##############################-----Figure3: comparing effect sizes to K

rm(list=ls())
library(ggplot2)
library(grid)
library(gridExtra)

set.seed(666)
#For R-squared and Cohen's d
# we imagine a variance of x 1000, and an effect size (variance explained, represented by b) decreasing from  0.001 to 0.999
n<-10000
x<-rnorm(n,0,1000)
sdx<-sd(x)
sdyx<-100
bvals<-seq(0.00,0.99,0.01)

R2<-c(0)
r<-c(0)
d<-c(0)
d1<-c(0)
K<-c(0)
K1<-c(0)
K2<-c(0)
K3<-c(0)


# we calculate K valies for a tau that is larger than before, but which is then reduced by n=2,10,100 repetitions
Kn1<-c(0)
Kn10<-c(0)
Kn100<-c(0)
Kn1000<-c(0)

Kfun<-function(hy,hyx,hx,t){
  (hy-hyx)/(hy+hx+t)
}

for(b in bvals){
  
  
  y<-b*x+rnorm(n,0,sdyx)
  sdy<-sd(y)
  tau<-1
  tau1<-100
  
  #we calculate R-squared, Cohen's d and K, for the latter assuming different levels of accuracy (significant digits). For the former, the accuracy makes no difference
  
  R2b<-(sdy^2-sdyx^2)/sdy^2
  rb<-sqrt(R2b)
  db<-sqrt((4*rb^2)/(1-rb^2))
  d1b<-sqrt(4*(sdy-sdyx))/sqrt(sdyx)
  
  R2<-c(R2, R2b)
  r<-c(r, rb)
  d<-c(d, db)
  d1<-c(d1, d1b)
  
  K<-c(K, Kfun(log(round(sdy,0)), log(round(sdyx,0)), log(round(sdx,0)), log2(sqrt(2*pi*exp(1)))+tau))
  K1<-c(K1, Kfun(log(round(sdy*10,0)), log(round(sdyx*10,0)), log(round(sdx*10,0)), log2(sqrt(2*pi*exp(1)))+tau))
  K2<-c(K2, Kfun(log(round(sdy*100,0)), log(round(sdyx*100,0)), log(round(sdx*100,0)), log2(sqrt(2*pi*exp(1)))+tau))
  K3<-c(K3, Kfun(log(round(sdy*1000,0)), log(round(sdyx*1000,0)), log(round(sdx*1000,0)), log2(sqrt(2*pi*exp(1)))+tau))
  
  
  # we calcualte K valies for a tau that is larger than before, but which is then reduced by n=2,10,100 repetitions
  Kn1<-c(Kn1, Kfun(log(round(sdy,0)), log(round(sdyx,0)), log(round(sdx,0)), log2(sqrt(2*pi*exp(1)))+tau1) )
  Kn10<-c(Kn10, Kfun(log(round(sdy,0)), log(round(sdyx,0)), 0, log2(sqrt(2*pi*exp(1)))+(log(sdx +tau1)/2)) )
  Kn100<-c(Kn100, Kfun(log(round(sdy,0)), log(round(sdyx,0)), 0, log2(sqrt(2*pi*exp(1)))+(log(sdx +tau1)/10)) )
  Kn1000<-c(Kn1000, Kfun(log(round(sdy,0)), log(round(sdyx,0)), 0, log2(sqrt(2*pi*exp(1)))+(log(sdx+tau1)/100)) )
  
  
}

#for Chi-square

#we generate a 2 by 2 contingency table. We will assume that y and x have both uniform marginal probability distribution, and we progressively generate a pattern in it. For K we assume that tau=1

p<-seq(0.25,0.4999, 0.0001)
Hyx<-(-p*log2(p) - (0.5-p)*log2(0.5-p) - p*log2(p) - (0.5-p)*log2(0.5-p))

n=100
chi100<-n*(( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25 + ( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25)
K100<-(1+1-Hyx)/(1+1+tau/n)

n=80
chi80<-n*(( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25 + ( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25)
K80<-(1+1-Hyx)/(1+1+tau/n)

n=40
chi40<-n*(( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25 + ( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25)
K40<-(1+1-Hyx)/(1+1+tau/n)

n=20
chi20<-n*( ((0.25 - p)^2)/0.25 + ((0.25 - (0.5-p))^2)/0.25 + ((0.25 - p)^2)/0.25 + ((0.25 - (0.5-p))^2)/0.25)
K20<-(1+1-Hyx)/(1+1+tau/n)


tau<-100

n=100
chi100t100<-n*(( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25 + ( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25)
K100t100<-(1+1-Hyx)/(1+1+tau/n)

n=80
chi80t100<-n*(( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25 + ( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25)
K80t100<-(1+1-Hyx)/(1+1+tau/n)

n=40
chi40t100<-n*(( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25 + ( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25)
K40t100<-(1+1-Hyx)/(1+1+tau/n)

n=20
chi20t100<-n*( ((0.25 - p)^2)/0.25 + ((0.25 - (0.5-p))^2)/0.25 + ((0.25 - p)^2)/0.25 + ((0.25 - (0.5-p))^2)/0.25)
K20t100<-(1+1-Hyx)/(1+1+tau/n)


#prepare graphs
titSize<-24
subtitSize<-titSize*0.8
labSize<-subtitSize*0.8
axisLabSize<-labSize*0.8

ylimR2<-max(K,Kn1, Kn10, Kn100, Kn1000)
ylimChi<-max(K40, K80, K100, K40t100, K80t100, K100t100)


p1<-ggplot(data=data.frame(R2,K), aes(x=R2,y=K))+geom_line(linetype=1)+scale_y_continuous(limits=c(0,ylimR2))+labs(title="K, varying accuracy",  x="R-squared", y="")
p1<-p1+geom_line(data=data.frame(R2,K1), aes(x=R2,y=K1), linetype=2)
p1<-p1+geom_line(data=data.frame(R2,K2), aes(x=R2,y=K2), linetype=3)
p1<-p1+geom_line(data=data.frame(R2,K3), aes(x=R2,y=K3), linetype=4)
p1<-p1+theme(plot.title = element_text(size=subtitSize, face="plain"), axis.text = element_text(size=axisLabSize), axis.title.x = element_text(size=subtitSize))
p1<-p1


p2<-ggplot(data=data.frame(R2,Kn1), aes(x=R2,y=K))+geom_line(linetype=1)+scale_y_continuous(limits=c(0,ylimR2))+labs(title="K, varying n",  x="R-squared", y="")
p2<-p2+geom_line(data=data.frame(R2,Kn10), aes(x=R2,y=Kn10), linetype=2)
p2<-p2+geom_line(data=data.frame(R2,Kn100), aes(x=R2,y=Kn100), linetype=3)
p2<-p2+geom_line(data=data.frame(R2,Kn1000), aes(x=R2,y=Kn1000), linetype=4)
p2<-p2+theme(plot.title = element_text(size=subtitSize, face="plain"), axis.text = element_text(size=axisLabSize), axis.title.x = element_text(size=subtitSize))


p3<-ggplot(data=data.frame(d,K), aes(x=d,y=K))+geom_line(linetype=1)+scale_y_continuous(limits=c(0,ylimR2))+labs(title="K, varying accuracy",  x="Cohen's d (absolute value)", y="")
p3<-p3+geom_line(data=data.frame(d,K1), aes(x=d,y=K1), linetype=2)
p3<-p3+geom_line(data=data.frame(d,K2), aes(x=d,y=K2), linetype=3)
p3<-p3+geom_line(data=data.frame(d,K3), aes(x=d,y=K3), linetype=4)
p3<-p3+theme(plot.title = element_text(size=subtitSize, face="plain"), axis.text = element_text(size=axisLabSize), axis.title.x = element_text(size=subtitSize))

p4<-ggplot(data=data.frame(d,Kn1), aes(x=d,y=Kn1))+geom_line(linetype=1)+scale_y_continuous(limits=c(0,ylimR2))+labs(title="K, varying n",  x="Cohen's d (absolute value)", y="")
p4<-p4+geom_line(data=data.frame(d,Kn10), aes(x=d,y=Kn10), linetype=2)
p4<-p4+geom_line(data=data.frame(d,Kn100), aes(x=d,y=Kn100), linetype=3)
p4<-p4+geom_line(data=data.frame(d,Kn1000), aes(x=d,y=Kn1000), linetype=4)
p4<-p4+theme(plot.title = element_text(size=subtitSize, face="plain"), axis.text = element_text(size=labSize), axis.title.x = element_text(size=subtitSize))

p5<-ggplot(data=data.frame(chi20,K20), aes(x=chi20,y=K20))+geom_line(linetype=1)+scale_y_continuous(limits=c(0,ylimR2))+labs(title=expression("K, varying n, l("*tau*")"==1), xlim=c(0,max(chi100)),  x="Chi-squared", y="")
p5<-p5+geom_line(data=data.frame(chi40,K40), aes(x=chi40,y=K40), linetype=2)
p5<-p5+geom_line(data=data.frame(chi80,K80), aes(x=chi80,y=K80), linetype=3)
p5<-p5+geom_line(data=data.frame(chi100,K100), aes(x=chi100,y=K100), linetype=4)
p5<-p5+theme(plot.title = element_text(size=subtitSize, face="plain"), axis.text = element_text(size=axisLabSize), axis.title.x = element_text(size=subtitSize))

p6<-ggplot(data=data.frame(chi20t100,K20t100), aes(x=chi20t100,y=K20t100))+geom_line(linetype=1)+scale_y_continuous(limits=c(0,ylimR2))+labs(title=expression("K, varying n, l("*tau*")"==100), xlim=c(0,max(chi100)),  x="Chi-squared", y="")
p6<-p6+geom_line(data=data.frame(chi40t100,K40t100), aes(x=chi40t100,y=K40t100), linetype=2)
p6<-p6+geom_line(data=data.frame(chi80t100,K80t100), aes(x=chi80t100,y=K80t100), linetype=3)
p6<-p6+geom_line(data=data.frame(chi100t100,K100t100), aes(x=chi100t100,y=K100t100), linetype=4)
p6<-p6+theme(plot.title = element_text(size=subtitSize, face="plain"), axis.text = element_text(size=axisLabSize), axis.title.x = element_text(size=subtitSize))


graphs<-arrangeGrob(textGrob("a", gp=gpar(fontsize=subtitSize), x=0.03), textGrob("b", gp=gpar(fontsize=subtitSize), x=0.03), p1,p2,textGrob("c", gp=gpar(fontsize=subtitSize), x=0.03), textGrob("d", gp=gpar(fontsize=subtitSize), x=0.03), p3,p4,textGrob("e", gp=gpar(fontsize=subtitSize), x=0.03), textGrob("f", gp=gpar(fontsize=subtitSize), x=0.03), p5,p6, nrow=6, heights=c(0.1,1,0.1,1,0.1,1))
titGrob<-textGrob("relation between K and statistical measures", gp=gpar(fontsize=titSize))


png(filename="kR2examples.png", width=1.5*480, height=1.5*480)
grid.arrange(titGrob,graphs, nrow=2, heights=c(1,10))
dev.off()


##############################--------S1 Figure:  Pressure Demon with m measurements 

m<-seq(0,10, 1)
e<-seq(1,0.4,-0.05)
t<-c(1,0.1,0.01)
k<-function(m,e,t){(1-e^m)/(1+m+t)}
k1<-k(m,0.99,0)

png(filename="maxDemMeasurementsEx.png", width=480, height=480)
plot(m,k(m,e[1],0), ylim=c(0,0.3), type="l", ylab="K", xlab="measurements", main="Mawell's Demon's K, varying error", cex.lab=1.5, cex.main=1.7)
for(i in 2:length(e)) lines(m,k(m,e[i],0), lty=1)
dev.off()


##############################--------Figure 5 and S6 Figures A and B: effects of resolution on K

rm(list-ls())
library(ggplot2)
library(gridExtra)
library(grid)
library(entropy)

set.seed(666)
n<-10000
Umax<-100
Umin<--100
UmaxEr<-10
UminEr<--10
sdN<-10
range<-c(Umin, Umax)
Method1<-"ML"
Method2<-"shrink"


x<-round(runif(n,Umin, Umax),0)
y<-x+round(runif(n,UminEr, UmaxEr),0)# linear case
y2<-x+x^2+round(runif(n,UminEr, UmaxEr),0)#parabolic case

df<-data.frame(x,y)
df2<-data.frame(x,y2)
txtSize<-15
titSize<-txtSize*1.5


#----first simulation, on linear case

p0<-ggplot(df, aes(x,y))+geom_point(alpha=0.025)+theme_bw()+ scale_fill_gradientn(colours=c("white","grey", "black"), guide=FALSE)+ggtitle(substitute(Y*'='*X*'+'*e[1]*', '*e[1]*'~'*U*'('*UMIN*','*UMAX*')', list(UMIN= UminEr, UMAX= UmaxEr)))+theme(axis.title.y=element_blank(), axis.text.y=element_text(size=txtSize), axis.title.x=element_text(size=txtSize), axis.text.x=element_text(size=txtSize), plot.title=element_text(size= titSize))


acc<-seq(1,10,1)

plotList<-list()

Hx<-c()
Hy<-c()
Hyx<-c()
Hy.x<-c()
k<-c()
k2<-c()


for(a in acc){
  
  binNum<-2^a
  
  #Entropy and K based on simple cell counts
  hx<-entropy(y=discretize(x, numBins=binNum, r=range), unit="log2", method= Method1)
  hy<-entropy(y=discretize(y, numBins=binNum, r=range), unit="log2", method= Method1)
  hyx<-entropy(y=discretize2d(x1=x,x2=y, numBins1=binNum, numBins2=binNum, r1=range, r2=range), unit="log2", method= Method1)
  hy.x<-entropy(y=discretize2d(x1=x,x2=y, numBins1=binNum, numBins2=binNum, r1=range, r2=range), unit="log2", method= Method1)-hx
  K<-round((hy-hy.x)/(hy+hx+0),2)
  
  
  #Entropy and K based on estimator
  hx<-entropy(y=discretize(x, numBins=binNum, r=range), unit="log2", method= Method2)
  hy<-entropy(y=discretize(y, numBins=binNum, r=range), unit="log2", method= Method2)
  hyx<-entropy(y=discretize2d(x1=x,x2=y, numBins1=binNum, numBins2=binNum, r1=range, r2=range), unit="log2", method= Method2)
  hy.x<-entropy(y=discretize2d(x1=x,x2=y, numBins1=binNum, numBins2=binNum, r1=range, r2=range), unit="log2", method= Method2)-hx
  K2<-round((hy-hy.x)/(hy+hx+0),2)
  
  
  print(a)
  print(list(c(hx,hy,hy.x)))
  print(K)
  print("----")
  
  Hx<-c(Hx,hx)
  Hy<-c(Hy,hy)
  Hyx<-c(Hyx,hyx)
  Hy.x<-c(Hy.x,hy.x)
  k<-c(k,K)
  k2<-c(k2,K2)
  
  p<-ggplot(df, aes(x,y))+geom_bin2d(bins= binNum-1, show.legend=FALSE, drop=TRUE)+theme_bw()+ scale_fill_gradientn(colours=c("white","grey", "black"), guide=FALSE)+theme(axis.title.y=element_text(size=txtSize), axis.text.y=element_text(size=txtSize), axis.title.x=element_text(size=txtSize), axis.text.x=element_text(size=txtSize), plot.title=element_text(size= titSize, hjust=0.5), plot.subtitle=element_text(size= titSize, hjust=0.5))+labs(title=substitute('a='*ACC*', '*H(Y)*'='*HY, list(ACC=binNum, HY=round(hy,2))), subtitle=substitute('K('*Y*';'*X*','*tau*')'*'='*VAL, list(VAL=K, ACC=binNum)))

  
  plotList<-c(plotList, list(p))
  
}

pk<-ggplot(data.frame(acc,Hy,k,k2), aes(acc,k))+geom_line()+geom_line(aes(x=acc, y=k2), col="red")+theme_bw()+labs(x="log(a)")+ggtitle(expression(K*'('*Y*';'*X*','*tau*')'))+theme(axis.title.y=element_blank(), axis.text.y=element_text(size=txtSize), axis.title.x=element_text(size=txtSize*1.5), axis.text.x=element_text(size=txtSize), plot.title=element_text(size= titSize))

pky<-ggplot(data.frame(acc,Hy,k,k2), aes(acc,Hy*k))+geom_line()+geom_line(aes(x=acc, y=Hy*k2), col="red")+theme_bw()+labs(x="log(a)")+ggtitle(expression(K*'('*Y*';'*X*','*tau*') '%*%' H('*Y*')'))+theme(axis.title.y=element_blank(), axis.text.y=element_text(size=txtSize), axis.title.x=element_text(size=txtSize*1.5), axis.text.x=element_text(size=txtSize), plot.title=element_text(size= titSize))


#----second simulation, on parabolic case

p02<-ggplot(df2, aes(x,y2))+geom_point(alpha=0.0025)+theme_bw()+ scale_fill_gradientn(colours=c("white","grey", "black"), guide=FALSE)+ggtitle(substitute(Y*'='*X*'+'*X^2*'+'*e[1]*', '*e[1]*'~'*U*'('*UMIN*','*UMAX*')', list(UMIN= UminEr, UMAX= UmaxEr)))+theme(axis.title.y=element_blank(), axis.text.y=element_text(size=txtSize), axis.title.x=element_text(size=txtSize), axis.text.x=element_text(size=txtSize), plot.title=element_text(size= titSize))


acc<-seq(1,10,1)

plotList2<-list()

Hx<-c()
Hy<-c()
Hyx<-c()
Hy.x<-c()
k<-c()
k2<-c()


for(a in acc){
  
  binNum<-2^a
  
  #Entropy and K based on simple cell counts
  hx<-entropy(y=discretize(x, numBins=binNum, r=range), unit="log2", method= Method1)
  hy<-entropy(y=discretize(y2, numBins=binNum, r=range), unit="log2", method= Method1)
  hyx<-entropy(y=discretize2d(x1=x,x2=y2, numBins1=binNum, numBins2=binNum, r1=range, r2=range), unit="log2", method= Method1)
  hy.x<-entropy(y=discretize2d(x1=x,x2=y2, numBins1=binNum, numBins2=binNum, r1=range, r2=range), unit="log2", method= Method1)-hx
  K<-round((hy-hy.x)/(hy+hx+0),2)
  
  
  #Entropy and K based on estimator
  hx<-entropy(y=discretize(x, numBins=binNum, r=range), unit="log2", method= Method2)
  hy<-entropy(y=discretize(y2, numBins=binNum, r=range), unit="log2", method= Method2)
  hyx<-entropy(y=discretize2d(x1=x,x2=y2, numBins1=binNum, numBins2=binNum, r1=range, r2=range), unit="log2", method= Method2)
  hy.x<-entropy(y=discretize2d(x1=x,x2=y2, numBins1=binNum, numBins2=binNum, r1=range, r2=range), unit="log2", method= Method2)-hx
  K2<-round((hy-hy.x)/(hy+hx+0),2)
  
  
  print(a)
  print(list(c(hx,hy,hy.x)))
  print(K)
  print("----")
  
  Hx<-c(Hx,hx)
  Hy<-c(Hy,hy)
  Hyx<-c(Hyx,hyx)
  Hy.x<-c(Hy.x,hy.x)
  k<-c(k,K)
  k2<-c(k2,K2)
  
  p2<-ggplot(df2, aes(x,y2))+geom_bin2d(bins= binNum-1, show.legend=FALSE, drop=TRUE)+theme_bw()+ scale_fill_gradientn(colours=c("white","grey", "black"), guide=FALSE)+theme(axis.title.y=element_text(size=txtSize), axis.text.y=element_text(size=txtSize), axis.title.x=element_text(size=txtSize), axis.text.x=element_text(size=txtSize), plot.title=element_text(size= titSize, hjust=0.5), plot.subtitle=element_text(size= titSize, hjust=0.5))+labs(title=substitute('a='*ACC*', '*H(Y)*'='*HY, list(ACC=binNum, HY=round(hy,2))), subtitle=substitute('K('*Y*';'*X*','*tau*')'*'='*VAL, list(VAL=K, ACC=binNum)))
  
  plotList2<-c(plotList2, list(p2))
  
}

pk2<-ggplot(data.frame(acc,Hy,k,k2), aes(acc,k))+geom_line()+geom_line(aes(x=acc, y=k2), col="red")+theme_bw()+labs(x="log(a)")+ggtitle(expression(K*'('*Y*';'*X*','*tau*')'))+theme(axis.title.y=element_blank(), axis.text.y=element_text(size=txtSize), axis.title.x=element_text(size=txtSize*1.5), axis.text.x=element_text(size=txtSize), plot.title=element_text(size= titSize))

pky2<-ggplot(data.frame(acc,Hy,k,k2), aes(acc,Hy*k))+geom_line()+geom_line(aes(x=acc, y=Hy*k2), col="red")+theme_bw()+labs(x="log(a)")+ggtitle(expression(K*'('*Y*';'*X*','*tau*') '%*%' H('*Y*')'))+theme(axis.title.y=element_blank(), axis.text.y=element_text(size=txtSize), axis.title.x=element_text(size=txtSize*1.5), axis.text.x=element_text(size=txtSize), plot.title=element_text(size= titSize))


#prepare the graphical objects
g1<-arrangeGrob(p0, plotList[[1]], plotList[[2]], plotList[[3]], plotList[[4]], plotList[[5]], plotList[[6]], plotList[[7]], plotList[[8]], plotList[[9]], pk, pky, nrow=2, ncol=6)
g2<-arrangeGrob(p02, plotList2[[1]], plotList2[[2]], plotList2[[3]], plotList2[[4]], plotList2[[5]], plotList2[[6]], plotList2[[7]], plotList2[[8]], plotList2[[9]], pk2, pky2, nrow=2, ncol=6)


png(filename="KaccuracyEx.png", width=4*480, height=3*480, units="px")
grid.arrange(textGrob("relation between resolution and K", gp=gpar(fontsize=60)), textGrob("a", x=0.03, gp=gpar(fontsize=40)), g1, textGrob("b", x=0.03, gp=gpar(fontsize=40)), g2, nrow=5, ncol=1, heights=c(0.5, 0.5, 5, 0.5 ,5))
dev.off()


##############################--------Figure 6: Gender differences in personality

rm(list=ls())
library(ggplot2)
library(gridExtra)
library(grid)
library(cowplot)
library(entropy)
library(MASS)
library(plyr)


#------Data from Del Giudice, with mean  and variance, or separate covariance matrices for males and females. We thus assume that actual populations of values are (potentially) bimodal normal along any given dimension.

dfMV<-read.table("genMeanVar.txt", sep="\t", header=TRUE, dec=",", stringsAsFactors=TRUE)
dfmal<-read.table("gendMatMal.txt", sep="\t", header=TRUE, dec=",", stringsAsFactors=TRUE)
dffem<-read.table("gendMatFem.txt", sep="\t", header=TRUE, dec=",", stringsAsFactors=TRUE)


#K function 
kFun<-function(y,yx,x,t){(y-yx)/(y+x+t)}
#functions to calculate av-d and av-D	
avd<-function(m1,m2,s1,s2){0.125*((m1-m2)^2)*(s1^2+s2^2)/((s1^2)*(s2^2))}
avD<-function(d,S1,S2,D){0.5*(-d+( 0.5*(sum(diag(solve(S1) %*% S2))+sum(diag(solve(S2) %*% S1))) +D^2))}


#create var covar matrices
dfmalMatrix<-as.matrix(dfmal[1:15,2:16])
dffemMatrix<-as.matrix(dffem[1:15,2:16])

#sample size, entropy estimation method, and bin multiplier
N<-5*10^6
entMethod<-"shrink"

#for each variable, simulate data
set.seed(666)
yM<-mvrnorm(n=N, mu=dfMV$meanM, Sigma= dfmalMatrix)
yF<-mvrnorm(n=N, mu=dfMV$meanF, Sigma= dffemMatrix)
yT<-rbind(yM, yF)

write.table(cbind(yT, c(rep("m", N), rep("f", N))), file=paste("gendDat", N, ".txt", sep=""), sep="\t")


#define bin number for entropy of individual parameters
i<-2

dfKeach<-data.frame(acc=numeric(0), varName=character(0), Hm=numeric(0), Hf=numeric(0), Htot=numeric(0), K=numeric(0), KY=numeric(0))

for(v in 1:length(yT[1,])) {
  minv<-min(yT[,v])
  maxv<-max(yT[,v])
  midv<-median(yT[,v])
  
  ht<-entropy(table(cut(yT[,v], breaks=c(minv, midv, maxv))), unit="log2")
  hm<-entropy(table(cut(yM[,v], breaks=c(minv, midv, maxv))), unit="log2")
  hf<-entropy(table(cut(yF[,v], breaks=c(minv, midv, maxv))), unit="log2")
  
  #calculate K values for each parameter
  k<-kFun(ht, 0.5*(hm+hf), 1,0)
  ky<-k*ht
  
  dfKeach<-rbind(dfKeach, data.frame(acc=i, varName=dfMV$vars[v], Hm= hm, Hf= hf, Htot= ht, K=k, KY=ky))
  print(v)
}


#calculate the naive k by summing across all parameters
HtotSum<-sum(dfKeach$Htot)
HmSum<-sum(dfKeach$Hm)
HfSum<-sum(dfKeach$Hf)
Ksum<-with(dfKeach, kFun(sum(Htot), sum(0.5*(Hm+Hf)), 1, 0))
KsumY<-Ksum*sum(dfKeach$Htot)


dfKeach<-rbind(dfKeach, data.frame(acc=i, varName= "Kmd", Hm= HmSum, Hf= HfSum, Htot= HtotSum, K=Ksum, KY=KsumY))

dfKeach<-rbind(dfKeach, data.frame(acc=i, varName= "average K", Hm= mean(dfKeach$Hm[1:15]), Hf= mean(dfKeach$Hf[1:15]), Htot= mean(dfKeach$Htot[1:15]), K=mean(dfKeach$K[1:15]), KY=mean(dfKeach$KY[1:15])))

dfKeach$k<-with(dfKeach, (Htot-0.5*(Hm+Hf))/Htot)

#save data
write.table(dfKeach, file="gendDiffRes.txt", append=FALSE, dec=",", sep="\t", row.names=FALSE)

#dfKeach<-read.table("gendDiffRes.txt", sep="\t", header=TRUE, dec=",")

dfK<-data.frame(dfKeach, varK=c("Warmth", "Emotional Stability", "Dominance", "Liveliness", "Rule-Consciousness", "Social Boldness", "Sensitivity", "Vigilance", "Abstractness", "Privateness", "Apprehension", "Openness to Change", "Self-Reliance", "Perfectionism", "Tension", "Kmd", "average K"))
dfK$varK<-factor(dfK$varK, levels=c("Liveliness", "Vigilance", "Perfectionism", "Abstractness", "Social Boldness", "Self-Reliance", "Rule-Consciousness", "Privateness", "Tension", "Dominance", "Emotional Stability", "Openness to Change", "Warmth", "Apprehension", "Sensitivity", "average K", "Kmd"))

dfD<-data.frame(d=c(dfMV$d.score, mean(abs(dfMV$d.score), na.rm=TRUE), 1.49), varD=c("Warmth", "Emotional Stability", "Dominance", "Liveliness", "Rule-Consciousness", "Social Boldness",  "Sensitivity", "Vigilance", "Abstractness", "Privateness", "Apprehension",  "Openness to Change", "Self-Reliance", "Perfectionism", "Tension", "average d", "Mahalanobis D"))
dfD$varD<-factor(dfD$varD, levels=c("Liveliness", "Vigilance", "Perfectionism", "Abstractness", "Social Boldness", "Self-Reliance", "Rule-Consciousness", "Privateness", "Tension", "Dominance", "Emotional Stability", "Openness to Change", "Warmth", "Apprehension", "Sensitivity", "average d", "Mahalanobis D"))


#prepare the plots
titSize<-17
subtitSize<-titSize*0.8
labSize<-subtitSize
labPosX<-0.03
pd<-ggplot(data= dfD, aes(x=varD, y=abs(d)))+geom_bar(stat="identity", fill=c(rep("black",15), "orange", "red"))+ theme(axis.text.x = element_text(angle = 45, hjust = 1, size=12))+labs(x="", y="abs(d)")+ggtitle(expression("Cohen's d, absolute values"))+theme(plot.title=element_text(face="bold", size=titSize))
pk<-ggplot(data= dfK, aes(x=varK, y=K))+geom_bar(stat="identity", fill=c(rep("black",15), "orange", "blue"))+ theme(axis.text.x = element_text(angle = 45, hjust = 1, size=12))+labs(x="", y="")+ggtitle(expression('K'*'('*'Y'*';'*'gender,'*tau*')'))+theme(plot.title=element_text(face="bold", size=titSize))+annotate("text", x=17, y=0.027, label=round(dfK$KY[16],2), colour=c("blue"))

titleGrob<-textGrob("gender differences in personality factors", gp=gpar(fontsize=titSize*1.3))

png(filename="sexDiffEx.png", width=480, height=1.2*480)
grid.arrange(titleGrob,textGrob("a", x=labPosX, gp=gpar(fontsize=labSize)), pd, textGrob("b",  x=labPosX, gp=gpar(fontsize=labSize)), pk, nrow=5, heights=c(0.8,0.2,5,0.2,5))
dev.off()


##############################--------Figure 7: Cumulative K versus meta-analysis


rm(list=ls())
library(ggplot2)
library(gridExtra)
library(grid)
library(cowplot)

Kcum<-function(df, exdum, exans, tau, grouping, nDig, method) {
  require(entropy)#load package with key functions
  require(plyr)#package to calculate values by group
  Kfun<-function(hy,hyx,hx,t){
    (hy-hyx)/(hy+hx+t)
  }
  #center the explanandum, and scale significant digits up to have delta=1
  df$y<-round(scale(df[,exdum], scale=FALSE)*10^nDig,0)
  maxVal<-max(abs(df$y))#create a range that captures all values of y
  range<-c(-maxVal, maxVal) 
  bins<-2*maxVal+1#the number of bins is then the number of measurement units in the total range
  
  KvalTable<-data.frame(group=character(0), Hy=numeric(0), mHyx=numeric(0), mdy=numeric(0), mdyx=numeric(0), K=numeric(0), Kc=numeric(0))
  
  resTable<-data.frame(group=character(0), Ngroup=numeric(0), treatment=character(0), Ntreatment=numeric(0), Hy=numeric(0), dy=numeric(0), WG=numeric(0), Hyx=numeric(0), dyx=numeric(0), WGx=numeric(0))#produce table of goup data
  
  #calculate counts and entropy for total
  yCount<- discretize(df$y, numBins=bins, r=range)#count frequencies average for group-level y
  yEntTot<- entropy(yCount, method=method, unit="log2")#entropy of  average for group-level y
  totN<-length(df$y)#total sample size
  
  #calculate counts and entropy for total by explanans
  df$scaleVar<-paste(df[,exans], df[,grouping])
  ySc<-ddply(.data=df, .variables= "scaleVar", summarize, ySc=scale(y, scale=FALSE))
  df$ySc<-ySc$ySc
  yxCount<-  discretize(df$ySc, numBins=bins, r=range)
  yxEntTot<-entropy(yxCount, method=method, unit="log2")
  
  
  #calculate counts and entropy for total by study, save results in a data frame
  groupNames<-levels(as.factor(df[,grouping]))
  for(g in groupNames){
    dfg<-df[df[,grouping]==g,]
    
    exansNames<-levels(as.factor(dfg[,exans]))
    
    for(j in exansNames){
      dfgj<-dfg[dfg[,exans]==j,]
      
      
      yCountG<- discretize(dfg$y,numBins=bins, r=range)#count frequencies for group-level y
      yEntTotG<-entropy(yCountG, method=method, unit="log2")#entropy of group-level y
      yDistTotG<-KL.empirical(y1=yCountG, y2=yCount, unit="log2")#distance of group-level y from averge of group-level y
      NG<-length(dfg$y)#group sample size
      WG<-NG/totN #relative group weight
      
      yxCountG<- discretize(scale(dfgj$y, scale=FALSE),numBins=bins, r=range)#count frequencies for treatment-level within group
      yxEntTotG<-entropy(yxCountG, method=method, unit="log2")#entropy of treatment-level within group
      yxDistTotG<-KL.empirical(y1=yxCountG, y2=yxCount, unit="log2")#distance for each treatment-level within group from total across groups
      NGJ<-length(dfgj$y)#treatment-level sample size
      Wx<-NGJ/totN#weight of treatment-level in total sample
      
      
      resTable<-rbind(resTable, data.frame(group=g, Ngroup= NG, treatment=j, Ntreatment= NGJ, Hy=yEntTotG, dy=yDistTotG, WG=WG, Hyx=yxEntTotG, dyx=yxDistTotG, tau=tau, WGx=Wx))
      
      
    }
    
    avHyxG<-with(resTable[resTable$group==g,], sum(Hyx*WG))
    avdyxG<-with(resTable[resTable$group==g,], sum(dyx*WG))
    
    Kg<-Kfun(yEntTotG, avHyxG,1,tau)
    KcG<-Kfun(yEntTotG, avHyxG+avdyxG,1,tau)
    
    KvalTable<-rbind(KvalTable, data.frame(group=g, Hy=yEntTotG, mHyx= avHyxG, mdy= yDistTotG , mdyx= avdyxG, K=Kg, Kc=KcG))
    
  }
  
  #calculate cumulative K
  dfT<-data.frame(resTable)
  avHy<-with(dfT[!duplicated(dfT$group),], sum(Hy*WG))
  avdy<-with(dfT[!duplicated(dfT$group),], sum(dy*WG))
  avHyx<-with(dfT, sum(Hyx*WGx))
  avdyx<-with(dfT, sum(dyx*WGx))
  
  Km<-Kfun(avHy, avHyx,1, tau)
  Kc<-Kfun(avHy+avdy, avHyx+avdyx,1, tau)
  
  
  KvalTable<-rbind(KvalTable, data.frame(group="Kc", Hy= avHy, mHyx= avHyx, mdy= avdy , mdyx= avdyx, K=Km, Kc=Kc))
  
  return(list(Analysis=resTable, Kvalues=KvalTable))	
  
}


#Figure A, with same variance, SAME population values, same effect size
#Figure B, with same variance, different population values, same effect size
#Figure C, with different variance, different population values, same effect size
#Figure D, with different variance, different population values, null effect size


sd1List<-c(1,1,1,1,1)
sd2List<-c(1,2,1,1,1)
meanX1List<-c(1,1,1,1,1)
meanX2List<-c(3,3,3,3,1)
meanX3List<-c(1,1,1,4,4)
meanX4List<-c(3,3,3,6,4)
cumTauList<-c(3,3,4.75,3,3)

#commands to save plots and results
plotList<-list()
sink(file="CumKres.txt", append=TRUE)

for(i in 1:length(sd1List)) {
  
  
  N<-1000
  sd1<-sd1List[i]
  sd2<-sd2List[i]
  meanX1<-meanX1List[i]
  meanX2<-meanX2List[i]
  meanX3<-meanX3List[i]
  meanX4<-meanX4List[i]
  cumTau<-cumTauList[i]
  
  set.seed(666)
  x1<-rnorm(N, mean=meanX1, sd=sd1)
  x2<-rnorm(N, mean=meanX2, sd=sd1)
  
  x3<-rnorm(N, mean=meanX3, sd=sd2)
  x4<-rnorm(N, mean=meanX4, sd=sd2)
  
  s1resc<-scale(c(x1,x2))+mean(c(x1,x2,x3,x4))
  s2resc<-scale(c(x3,x4))+mean(c(x1,x2,x3,x4))
  meangroups1<-mean(c(s1resc[1:N],s2resc[1:N]))
  meangroups2<-mean(c(s1resc[(N+1):(2*N)], s2resc[(N+1):(2*N)]))
  
  ES1<-(mean(x2)-mean(x1))/(sd(x1)+sd(x2))*0.5
  SE1<-sd(x1)/sqrt(N)
  ES2<-(mean(x4)-mean(x3))/(sd(x3)+sd(x4))*0.5
  SE2<-sd(x3)/sqrt(N)
  Wtot<-(1/SE1^2)+(1/SE2^2)
  W1<-(1/SE1^2)/Wtot
  W2<-(1/SE2^2)/Wtot
  ESpool<-W1*ES1+W2*ES2
  SEpool<-sqrt(1/((N*sd(x1)^(-2))+(N*sd(x3)^(-2))))
  
  #generate data sets for plots
  dfMet<-data.frame(res=c(x1,x2,x3,x4), expl=c(rep("x1", N), rep("x2", N), rep("x3", N), rep("x4", N)), treat=c(rep("cont", N), rep("treat", N), rep("cont", N), rep("treat", N)), study=c(rep("s1", 2*N), rep("s2", 2*N)), MEAN=c(rep(mean(x1), N), rep(mean(x2), N), rep(mean(x3), N), rep(mean(x4), N)), SDV=c(rep(sd(x1), N), rep(sd(x2), N), rep(sd(x3), N), rep(sd(x4), N)), groups=c(rep("g1", N), rep("g2", N), rep("g1", N), rep("g2", N)), MEANgroups=c(rep(mean(c(x1,x3)), N), rep(mean(c(x2,x4)), N), rep(mean(c(x1,x3)), N), rep(mean(c(x2,x4)), N)), SDVgroups=c(rep(sd(c(x1,x3)), N), rep(sd(c(x2,x4)), N), rep(sd(c(x1,x3)), N), rep(sd(c(x2,x4)), N)), avres=c(s1resc, s2resc), MEANavres=c(rep(meangroups1, N), rep(meangroups2, N), rep(meangroups1, N), rep(meangroups2, N)), study=c(rep("study1", 2*N), rep("study2", 2*N)))
  dfMet$resScExpl<-dfMet$res-dfMet$MEAN
  
  dfMetaAn<-data.frame(study=c("st.1", "st.2", "pooled"), ES=c(ES1,ES2, ESpool), SE=c(SE1, SE2, SEpool))
  dfMetaAn$study<-factor(dfMetaAn$study, levels=c("pooled", "st.2", "st.1"))
  
  #calculate values of entropy and K
  Kres<-Kcum(df=dfMet, exdum ="res", exans ="treat", tau= cumTau, grouping ="study", nDig=1, method="ML" )
  sigD<-3#significant digits to show on plots
  txtSize<-13#plot titles size
  
  p1<-ggplot(data=subset(dfMet, is.element(expl, c("x1", "x2"))), aes(x=expl, y=res))+geom_point( show.legend=FALSE, col="blue")+geom_errorbar(aes(ymin= MEAN, ymax= MEAN), show.legend=FALSE, col="blue")+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank(), plot.title=element_text(size= txtSize))+scale_y_continuous(limits=c(-3,8))+ggtitle(substitute(list(K==K1), list(K1=round(Kres$Kvalues$K[1], sigD))))
  pmarX1<-ggplot(subset(dfMet, is.element(expl, c("x1", "x2"))), aes(x=res, group=expl))+geom_density(show.legend=FALSE, col="blue")+coord_flip()+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank(), plot.title=element_text(size= txtSize))+scale_x_continuous(limits=c(-3,8))+ggtitle(substitute(H(Y[1]~"|"~X)==Y1X, list(Y1X=round(Kres$Kvalues$mHyx[1],sigD))))
  pmarY1<-ggplot(subset(dfMet, is.element(expl, c("x1", "x2"))), aes(x=res), col="blue")+geom_density(show.legend=FALSE, col="blue")+coord_flip()+scale_y_reverse()+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank(), plot.title=element_text(size= txtSize))+scale_x_continuous(limits=c(-3,8))+ggtitle(substitute(H(Y[1])==Y1, list(Y1=round(Kres$Kvalues$Hy[1], sigD))))
  
  
  p2<-ggplot(subset(dfMet, is.element(expl, c("x3", "x4"))), aes(x=expl, y=res))+geom_point( show.legend=FALSE, col="goldenrod")+geom_errorbar(aes(ymin= MEAN, ymax= MEAN), show.legend=FALSE, col="goldenrod")+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank(), plot.title=element_text(size= txtSize))+scale_y_continuous(limits=c(-3,8))+ggtitle(substitute(list(K==K2), list(K2=round(Kres$Kvalues$K[2], sigD))))
  pmarX2<-ggplot(subset(dfMet, is.element(expl, c("x3", "x4"))), aes(x=res, group=expl))+geom_density(show.legend=FALSE, col="goldenrod")+coord_flip()+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank(), plot.title=element_text(size= txtSize))+scale_x_continuous(limits=c(-3,8))+ggtitle(substitute(H(Y[2]~"|"~X)==Y2X, list(Y2X=round(Kres$Kvalues$mHyx[2],sigD))))
  pmarY2<-ggplot(subset(dfMet, is.element(expl, c("x3", "x4"))), aes(x=res), col="goldenrod")+geom_density(show.legend=FALSE, col="goldenrod")+coord_flip()+scale_y_reverse()+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank(), plot.title=element_text(size= txtSize))+scale_x_continuous(limits=c(-3,8))+ggtitle(substitute(H(Y[2])==Y1, list(Y1=round(Kres$Kvalues$Hy[2], sigD))))
  
  #tags for meta-analysis plot
  ESpool<-round(dfMetaAn$ES[dfMetaAn$study=="pooled"], sigD)
  SEpool<-round(dfMetaAn$SE[dfMetaAn$study=="pooled"], sigD)
  p3<-ggplot(dfMetaAn, aes(x=study, y=ES))+geom_point(show.legend=FALSE, col="black", shape=c(1,1,18), size=c(5,5,5))+geom_errorbar(aes(ymin=ES-1.96*SE, ymax=ES+1.96*SE), show.legend=FALSE, col="black")+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), plot.title=element_text(size= txtSize))+scale_y_continuous(limits=c(-0.2,0.6))+coord_flip()+ggtitle(substitute(ES==ESPOOL %+-% SEPOOL, list(ESPOOL=ESpool, SEPOOL=SEpool)))
  
  
  p4<-ggplot(dfMet, aes(x=groups, y=scale(res, scale=FALSE)))+geom_point(show.legend=FALSE, aes(col=expl, alpha=0.1))+scale_colour_manual(values=c("blue", "blue", "goldenrod", "goldenrod"))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank(), plot.title=element_text(size= txtSize))+scale_y_continuous(limits=c(-6,6))+ggtitle(substitute(list(bar(tau)*'+'*bar(d)==TAU, K[cum]==K3C), list(TAU=cumTau, K3C=round(Kres$Kvalues$Kc[3], sigD))))
  pmarX4<-ggplot()+geom_density(inherit.aes=FALSE, data=dfMet, aes(x=resScExpl, col=expl), show.legend=FALSE, lty=2)+scale_colour_manual(values=c("blue", "blue", "goldenrod", "goldenrod"))+coord_flip()+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank(), plot.title=element_text(size= txtSize))+scale_x_continuous(limits=c(-6,6))+geom_density(inherit.aes=FALSE, data=dfMet, aes(x=scale(resScExpl, scale=FALSE)), show.legend=FALSE, col="black")+ggtitle(substitute(bar(H(Y~"|"~X)) + bar(d)==YXD, list(YXD=round(Kres$Kvalues$mHyx[3]+Kres$Kvalues$mdyx[3],sigD))))
  pmarY4<-ggplot()+geom_density(inherit.aes=FALSE, data=dfMet, aes(x=scale(res, scale=FALSE), col=study), show.legend=FALSE, lty=2)+scale_colour_manual(values=c("blue", "goldenrod"))+coord_flip()+scale_y_reverse()+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank(), plot.title=element_text(size= txtSize))+scale_x_continuous(limits=c(-6,6))+geom_density(inherit.aes=FALSE, data=dfMet, aes(x=scale(res, scale=FALSE)), show.legend=FALSE, col="black")+ggtitle(substitute(bar(H(Y))+bar(d)==YD, list(YD=round(Kres$Kvalues$Hy[3]+Kres$Kvalues$mdy[3],sigD))))
  
  
  pEmpty <- ggplot(mtcars, aes(x = wt, y = mpg)) +geom_blank() + theme(axis.text = element_blank(), axis.title = element_blank(), line = element_blank(), panel.background = element_blank())
  
  
  g<-arrangeGrob(layout_matrix=rbind(c(1,2,3, 4,5,6, 7, 8,9,10)), pmarY1, p1, pmarX1, pmarY2, p2, pmarX2, p3,  pmarY4, p4, pmarX4, nrow=1)
  
  
  plotList<-c(plotList, list(g))
  
  print(Kres)
  
  
}

titSize<-35
labelSize<-20

CombPlot<-ggdraw()+draw_plot(plotList[[1]], x=0, y=0.8, height=0.17)+draw_plot(plotList[[2]], x=0, y=0.6, height= 0.17)+draw_plot(plotList[[3]], x=0, y=0.4, height=0.17)+draw_plot(plotList[[4]], x=0, y=0.2, height=0.17)+draw_plot(plotList[[5]], x=0, y=0.0, height=0.17)+draw_plot_label(c("a) same effect size, same variance, same methods, same range", "b) same effect size, different variance, same methods, same range", "c) same effect size, same variance, different methods, same range", "d) same effect size, same variance, same methods, different range", "e) null effect size, same variance, same methods, different range"), x=c(0,0,0,0,0), y=c(0.995,0.795, 0.595,0.395,0.195), hjust=-0.7, size=labelSize)


png(filename="cumulativeKexMulti.png", width=3*480, height=2*480)
grid.arrange(textGrob(expression("comparison of meta-analysis and "*K[cum]), gp=gpar(fontsize=titSize)), CombPlot, nrow=2, heights=c(0.7,10))
dev.off()
sink()
sink()


##############################--------Figure 8: Simulation of reproducibility results


rm(list=ls())
library(ggplot2)
library(grid)
library(gridExtra)


#fundamental equation
Kr<-K*A^(-l*d)
A<-2
sdy<-100
sdyx<-50
x<-1
tau<-50
K<-(log(sdy)-log(sdyx))/(log(sdy)+x+tau)
R2<-(sdy^2-sdyx^2)/sdy^2
r<-sqrt(R2)
Cd<-sqrt((4*r^2)/(1-r^2))

#this is the decline rate, first digit in the variables names
l1<-0.01
l2<-0.5
l3<-2
l4<-10

dist<-seq(0,30, 1)


lambda.a=1
avLambda.a=1

set.seed(666)
dp.a<-rpois(100000,lambda.a)
d.a<-dp.a/avLambda.a

Kr1.a<-K*A^(-l1*d.a)
Kr2.a<-K*A^(-l2*d.a)
Kr3.a<-K*A^(-l3*d.a)
Kr4.a<-K*A^(-l4*d.a)
R21.a<-(sdy^2-sdyx^2)/(sdy^2*(A^(l1*d.a)))
R22.a<-(sdy^2-sdyx^2)/(sdy^2*(A^(l2*d.a)))
R23.a<-(sdy^2-sdyx^2)/(sdy^2*(A^(l3*d.a)))
R24.a<-(sdy^2-sdyx^2)/(sdy^2*(A^(l4*d.a)))
r1.a<-sqrt(R21.a)
r2.a<-sqrt(R22.a)
r3.a<-sqrt(R23.a)
r4.a<-sqrt(R24.a)
Cd1.a<-sqrt((4*R21.a)/(1-R21.a))
Cd2.a<-sqrt((4*R22.a)/(1-R22.a))
Cd3.a<-sqrt((4*R23.a)/(1-R23.a))
Cd4.a<-sqrt((4*R24.a)/(1-R24.a))


lambda.b=4
avLambda.b=4
dp.b<-rpois(100000,lambda.b)
d.b<-dp.b/avLambda.b
Kr1.b<-K*A^(-l1*d.b)
Kr2.b<-K*A^(-l2*d.b)
Kr3.b<-K*A^(-l3*d.b)
Kr4.b<-K*A^(-l4*d.b)
R21.b<-(sdy^2-sdyx^2)/(sdy^2*(A^(l1*d.b)))
R22.b<-(sdy^2-sdyx^2)/(sdy^2*(A^(l2*d.b)))
R23.b<-(sdy^2-sdyx^2)/(sdy^2*(A^(l3*d.b)))
R24.b<-(sdy^2-sdyx^2)/(sdy^2*(A^(l4*d.b)))
r1.b<-sqrt(R21.b)
r2.b<-sqrt(R22.b)
r3.b<-sqrt(R23.b)
r4.b<-sqrt(R24.b)
Cd1.b<-sqrt((4*R21.b)/(1-R21.b))
Cd2.b<-sqrt((4*R22.b)/(1-R22.b))
Cd3.b<-sqrt((4*R23.b)/(1-R23.b))
Cd4.b<-sqrt((4*R24.b)/(1-R24.b))


lambda.c=20
avLambda.c=20
dp.c<-rpois(100000,lambda.c)
d.c<-dp.c/avLambda.c
Kr1.c<-K*A^(-l1*d.c)
Kr2.c<-K*A^(-l2*d.c)
Kr3.c<-K*A^(-l3*d.c)
Kr4.c<-K*A^(-l4*d.c)
R21.c<-(sdy^2-sdyx^2)/(sdy^2*(A^(l1*d.c)))
R22.c<-(sdy^2-sdyx^2)/(sdy^2*(A^(l2*d.c)))
R23.c<-(sdy^2-sdyx^2)/(sdy^2*(A^(l3*d.c)))
R24.c<-(sdy^2-sdyx^2)/(sdy^2*(A^(l4*d.c)))
r1.c<-sqrt(R21.c)
r2.c<-sqrt(R22.c)
r3.c<-sqrt(R23.c)
r4.c<-sqrt(R24.c)


#-------------plot with r
titSize<-32
subtitSize<-titSize*0.6
labSize<-subtitSize
legendSize<-titSize*0.55
axisTitSize<-subtitSize*0.8
labPosX<-0.03

#set up data set for lines
dfKlines<-data.frame(distance=rep(dist,4), lambda=as.factor(c(rep(as.character(l1), length(dist)), rep(as.character(l2), length(dist)), rep(as.character(l3), length(dist)), rep(as.character(l4), length(dist)))), Kvalue=c(K*A^(-l1*dist), K*A^(-l2*dist), K*A^(-l3*dist), K*A^(-l4*dist)))

dfKlines$lambda<-factor(dfKlines$lambda, levels=c(as.character(l1), as.character(l2), as.character(l3), as.character(l4)))

pKlines<-ggplot(data=dfKlines, aes(x=distance, y=Kvalue, col=lambda))+scale_colour_manual(values=c("royalblue", "green", "orange", "red"))+geom_line()+labs(y="K")+theme(axis.title=element_text(size= axisTitSize), plot.title=element_text(size= subtitSize, hjust=0.5, face="plain"), axis.text.x=element_text(size=labSize), legend.text=element_text(size= legendSize))+ guides(col=guide_legend(title=expression(lambda)))

pDist.a<-ggplot()+geom_line(aes(x=dist, y=dpois(dist,lambda.a)/avLambda.a))+labs(x="distance", y="", title="distribution of distances")+theme(axis.title=element_text(size= axisTitSize), plot.title=element_text(size= subtitSize, hjust=0.5, face="plain"), axis.text.x=element_text(size=labSize))
pDist.b<-ggplot()+geom_line(aes(x=dist, y=dpois(dist,lambda.b)/avLambda.b))+labs(x="distance", y="", title="distribution of distances")+theme(axis.title=element_text(size= axisTitSize), plot.title=element_text(size= subtitSize, hjust=0.5, face="plain"), axis.text.x=element_text(size=labSize))
pDist.c<-ggplot()+geom_line(aes(x=dist, y=dpois(dist,lambda.c)/avLambda.c))+labs(x="distance", y="", title="distribution of distances")+theme(axis.title=element_text(size= axisTitSize), plot.title=element_text(size= subtitSize, hjust=0.5, face="plain"), axis.text.x=element_text(size=labSize))


#set up data set for histograms
dfr<-data.frame(r1.a, r1.b, r1.c, r2.a, r2.b, r2.c, r3.a, r3.b, r3.c, r4.a, r4.b, r4.c)

pr1.a<-ggplot(data=dfr, aes(x=r1.a))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="royalblue", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r1.a), col="black", lwd=1.5)
pr1.b<-ggplot(data=dfr, aes(x=r1.b))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="royalblue", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r1.b), col="black", lwd=1.5)
pr1.c<-ggplot(data=dfr, aes(x=r1.c))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="royalblue", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r1.c), col="black", lwd=1.5)

pr2.a<-ggplot(data=dfr, aes(x=r2.a))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="green", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r2.a), col="black", lwd=1.5)
pr2.b<-ggplot(data=dfr, aes(x=r2.b))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="green", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r2.b), col="black", lwd=1.5)
pr2.c<-ggplot(data=dfr, aes(x=r2.c))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="green", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r2.c), col="black", lwd=1.5)

pr3.a<-ggplot(data=dfr, aes(x=r3.a))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="orange", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r3.a), col="black", lwd=1.5)
pr3.b<-ggplot(data=dfr, aes(x=r3.b))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="orange", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r3.b), col="black", lwd=1.5)
pr3.c<-ggplot(data=dfr, aes(x=r3.c))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="orange", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r3.c), col="black", lwd=1.5)

pr4.a<-ggplot(data=dfr, aes(x=r4.a))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="red", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r4.a), col="black", lwd=1.5)
pr4.b<-ggplot(data=dfr, aes(x=r4.b))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="red", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r4.b), col="black", lwd=1.5)
pr4.c<-ggplot(data=dfr, aes(x=r4.c))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="red", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r4.c), col="black", lwd=1.5)

titleGrob<-textGrob("reproducibility by informational distance", gp=gpar(fontsize=titSize))
layMat<-rbind(c(1,1,1), c(2,2,2), c(3,4,5), c(6,7,8), c(9,10,11), c(12,13,14), c(15,16,17))

png(filename="ReprEx-r.png", width=2*480, height=2*480)
grid.arrange(layout_matrix=layMat, grobs=list(titleGrob, pKlines, pDist.a, pDist.b, pDist.c, pr1.a, pr1.b, pr1.c, pr2.a, pr2.b, pr2.c, pr3.a, pr3.b, pr3.c, pr4.a, pr4.b, pr4.c), nrow=7, heights=c(1.5,3,3,3,3,3,3))
dev.off()


##############################--------Figure 9: Density plots of data from the Reproducibility Initiative in Psychology

rm(list=ls())
library(ggplot2)
library(grid)
library(gridExtra)
library(e1071)

df<-read.table("RIPdf.txt", sep="\t", header=TRUE, dec=",", stringsAsFactors=TRUE)

# Calculate difference between effect sizes, re-scaled on original effect size
df$rDiff<-with(df, (T.r.R-T.r.O))
#df$rDiff<-with(df, T.r.R/T.r.O)

# we re-scale extreme values (due to original r=0) and count as zero any negative value
#df<-subset(df, round(T.r.O,2)>0)

#---attempt at histogram with ggplot
#generate a dataset with the two correlation values
dft<-data.frame(Orig=c(rep("Original", length(df$T.r.O)), rep("Replication", length(df$T.r.R))), r=c(df$T.r.O, df$T.r.R), expertise=as.factor(c(as.character(df$Methodology.expertise.required.O), as.character(df$Methodology.expertise.required.O))))


dft12<-subset(dft, is.element(expertise, c("No expertise required","Slight expertise required")))
dft345<-subset(dft, !is.element(expertise, c("No expertise required","Slight expertise required")))

#The numbers of each category are: 
#No expertise required =32
#Slight expertise required =33
#Moderate expertise required =13
#Strong expertise required =12
#Extreme expertise required=7

#test for Kurtosis
kurt12o<-with(subset(dft12, Orig=="Original"), round(kurtosis(r),2))
kurt345o<-with(subset(dft345, Orig=="Original"), round(kurtosis(r),2))
kurt12r<-with(subset(dft12, Orig=="Replication"), round(kurtosis(r),2))
kurt345r<-with(subset(dft345, Orig=="Replication"), round(kurtosis(r),2))

titSize<-28
subtitSize<-titSize*0.8
labSize<<-subtitSize
axisTitSize<-subtitSize*0.9
labPosX<-0.03


p12<-ggplot(dft12, aes(x=r))+geom_histogram(data=subset(dft12, Orig=="Original"), fill="blue", alpha=0.2, binwidth=0.05)+geom_density(data=subset(dft12, Orig=="Original"), aes(y=..density..*0.1*65), col="blue")
p12<-p12+geom_histogram(data=subset(dft12, Orig=="Replication"), fill="red", alpha=0.2, binwidth=0.05)+geom_density(data=subset(dft12, Orig=="Replication"), col="red", aes(y=..density..*0.1*65))
p12<-p12+scale_x_continuous(limits=c(-0.5,1))+annotate("text", label=kurt12o, col="blue", size=12, x=0.75, y=10)+annotate("text", label=kurt12r, col="red", size=12, x=-0.25, y=10)
p12<-p12+labs(title="no/slight expertise required", y="")+theme(axis.title=element_text(size= axisTitSize), plot.title=element_text(size= subtitSize, hjust=0.5, face="plain"))

p345<-ggplot(dft345, aes(x=r))+geom_histogram(data=subset(dft345, Orig=="Original"), fill="blue", alpha=0.2, binwidth=0.05)+geom_density(data=subset(dft345, Orig=="Original"), aes(y=..density..*0.1*32), col="blue")
p345<-p345+geom_histogram(data=subset(dft345, Orig=="Replication"), fill="red", alpha=0.2, binwidth=0.05)+geom_density(data=subset(dft345, Orig=="Replication"), col="red", aes(y=..density..*0.1*32))
p345<-p345+scale_x_continuous(limits=c(-0.5,1))+annotate("text", label=kurt345o, col="blue", size=12, x=0.75, y=6)+annotate("text", label=kurt345r, col="red", size=12, x=-0.25, y=6)
p345<-p345+labs(title="moderate/strong/extreme, exp. req.", y="")+theme(axis.title=element_text(size= axisTitSize), plot.title=element_text(size= subtitSize, hjust=0.5, face="plain"))


png(filename="RipEx-rGrouped.png", width=1.5*480, height=1.5*480)
grid.arrange(textGrob("reproducibility in psychology, by expertise required", gp=gpar(fontsize=titSize)),textGrob("a", x=labPosX, gp=gpar(fontsize=labSize)), p12,textGrob("b", x=labPosX, gp=gpar(fontsize=labSize)), p345, ncol=1, nrow=5, heights=c(0.75,0.1,4.5,0.1,4.5))
dev.off()


##############################----Figure 10: how K_{null} declines with |T|

rm(list=ls())
library(ggplot2)
library(grid)

titSize<-30
axisTitSize<-titSize*0.8
labSize<<-axisTitSize*0.8

T<-2:100
y<-log2(T/(T-1))

p<-ggplot(mapping=aes(x=T, y=y))+geom_line()+scale_x_discrete(lim=c(2,10,50,99))
p<-p+theme(axis.text.y=element_text(size=labSize), axis.text.x=element_text(size=labSize), axis.title=element_text(size=axisTitSize), plot.title = element_text(size=titSize, hjust=0.3, face="plain"))+labs(x="|T|", y=expression(paste("log", frac("|T|", "|T|-1"))))
                                                                                                                                                                                                                                        
png(filename="logTplot.png", width=480*1.25, height=480*0.75)
grid.arrange(textGrob("information value of hypothesis falsification", gp=gpar(fontsize=titSize)), p, nrow=2,heights=c(1,10))
dev.off()


##############################--------Figure 10: simulation of informativeness of null findings


rm(list=ls())
library(lattice)
library(entropy)


Kfun<-function(hy,hyx,hx,t){
  (hy-hyx)/(hy+hx+t)
}

set.seed(666)

x<-rnorm(1000)
y1<-x+rnorm(1000,0,0.1)
y2<-rnorm(1000)
y3<-rnorm(1000)
y4<-rnorm(1000)

#calculating entropies
Hx<-round(log2(100),0)
Hy.1<-round(log2(sqrt(100^2+10^2)),0)
Hy.i<-round(log2(100),0)
Hytot<-round(log2(0.25*sqrt(100^2+10^2)+0.75*sqrt(100^2)),0)
Hxy.1<-Hx+round(log2(10),0)#joint distribution xy1
Hxy.i<-Hx+Hy.i#joint distribution xy2,3,4
HT<-log2(4)
Hyx.T<-0.25*(Hxy.1+3*Hxy.i)
HT.yx<-HT-(Hytot-(Hyx.T-Hx))

#calculate entropies using counts
range<-c(-10,10)
binNum<-20
xcount<-discretize(x,numBins=binNum, r=range)
y1count<-discretize(y1,numBins=binNum, r=range)
y2count<-discretize(y2,numBins=binNum, r=range)
y3count<-discretize(y3,numBins=binNum, r=range)
y4count<-discretize(y4,numBins=binNum, r=range)
xy1count<-discretize2d(x1=y1, x2=x, numBins1=binNum, r1=range, numBins2=binNum, r2=range)
xy2count<-discretize2d(x1=y2, x2=x, numBins1=binNum, r1=range, numBins2=binNum, r2=range)
xy3count<-discretize2d(x1=y3, x2=x, numBins1=binNum, r1=range, numBins2=binNum, r2=range)
xy4count<-discretize2d(x1=y4, x2=x, numBins1=binNum, r1=range, numBins2=binNum, r2=range)

#calculating entropies
Hx<-entropy(xcount, method="ML")
Hy.1<-entropy(y1count, method="ML")
Hy.2<-entropy(y2count, method="ML")
Hy.3<-entropy(y3count, method="ML")
Hy.4<-entropy(y4count, method="ML")
Hytot<-entropy(discretize(c(y1, y2, y3, y4),numBins=binNum, r=range), method="ML")
Hytot2<-entropy(discretize(c(y1, y2),numBins=binNum, r=range), method="ML")
Hxy.1<-entropy(xy1count, method="ML")
Hxy.2<-entropy(xy2count, method="ML")
Hxy.3<-entropy(xy3count, method="ML")
Hxy.4<-entropy(xy4count, method="ML")

#-----plot

png(filename="KTyxEx.png", width=480*2, height=480*3, units="px")

cexTxt<-3
cexTit<-7
cexSubTit<-cexTit*0.7
minx<- 5
miny<- -2.2
sep<-0.65
ylim<-c(-5,4)
xlim<-c(-4,5)

par(mfrow=c(3,2), mar=c(0.5,0.5,0.5,0.5))

m<-rbind(c(9,9),c(7,7),c(1,2),c(1,2),c(1,2),c(1,2),c(1,2),c(8,8),c(3,4),c(3,4),c(3,4),c(3,4),c(3,4),c(5,6),c(5,6),c(5,6),c(5,6),c(5,6))

layout(mat=m)
par(mar=c(0.5,0.5,0.5,0.5))


#panel A, with only two

HT<-log2(2)
Hyx.T<-0.5*(Hxy.1+Hxy.2)
HT.yx<-HT-(Hytot2-(Hyx.T-Hx))
KT.yx<-Kfun(hy=HT, hyx=HT.yx, hx=Hx+Hytot, t=0)


plot(x,y2, pch=20, xlim= xlim, ylim= ylim, tick=FALSE, xaxt="n", yaxt="n", xlab="", ylab="")
laby<-substitute(H*'('*Y*')'==VAL, list(VAL=round(Hy.2,2)))	
text(laby, x=minx, y=miny-3*sep, cex= cexTxt, col="black", adj = c(1,0))
labyx<-substitute(H*'('*Y*'|'*X*','*h[0]*','*tau[b]*')'==VAL, list(VAL=round(Hxy.2-Hx,2)))	
text(labyx, x=minx, y=miny-4*sep, cex= cexTxt, col="black", adj = c(1,0))
labht<-expression(h[0])
text(labht, x=-3.5, y=3, cex= cexTxt*2, col="black", adj = c(0,0))

plot(x,y1, pch=20, xlim= xlim, ylim= ylim, tick=FALSE, xaxt="n", yaxt="n", xlab="", ylab="")
laby<-substitute(H*'('*Y*')'==VAL, list(VAL=round(Hy.1,2)))	
text(laby, x=minx, y=miny, cex= cexTxt, col="black", adj = c(1,0))
labyx<-substitute(H*'('*Y*'|'*X*','*h[1]*','*tau[b]*')'==VAL, list(VAL=round(Hxy.1-Hx,2)))	
text(labyx, x=minx, y=miny-sep, cex= cexTxt, col="black", adj = c(1,0))
labT<-substitute(H*'('*T*')'==VAL, list(VAL=round(HT,2)))	
text(labT, x=minx, y=miny-2*sep, cex= cexTxt, col="black", adj = c(1,0))
labTyx<-substitute(H*'('*T[h*','*tau]*'|'*X*','*Y*')'==VAL, list(VAL=round(HT.yx,2)))	
text(labTyx, x=minx, y=miny-3*sep, cex= cexTxt, col="black", adj = c(1,0))
labKT<-substitute(K*'('*T[h*','*tau]*';'*X*','*Y*')'==VAL, list(VAL=round(KT.yx,3)))	
text(labKT, x=minx, y=miny-4.2*sep, cex= cexTxt*1.2, col="red", adj = c(1,0))
labht<-expression(h[1])
text(labht, x=-3.5, y=3, cex= cexTxt*2, col="black", adj = c(0,0))


#panel B, with all four Ts
#par(mfrow=c(2,2), mar=c(1,1,1,1))

HT<-log2(4)
Hyx.T<-0.25*(Hxy.1+Hxy.2+Hxy.3+Hxy.4)
HT.yx<-HT-(Hytot-(Hyx.T-Hx))
KT.yx<-Kfun(hy=HT, hyx=HT.yx, hx=Hx+Hytot, t=0)


plot(x,y2, pch=20, xlim= xlim, ylim= ylim, tick=FALSE, xaxt="n", yaxt="n", xlab="", ylab="")
laby<-substitute(H*'('*Y*')'==VAL, list(VAL=round(Hy.2,2)))	
text(laby, x=minx, y=miny-3*sep, cex= cexTxt, col="black", adj = c(1,0))
labyx<-substitute(H*'('*Y*'|'*X*','*h[0]*','*tau[a]*')'==VAL, list(VAL=round(Hxy.2-Hx,2)))	
text(labyx, x=minx, y=miny-4*sep, cex= cexTxt, col="black", adj = c(1,0))
labht<-expression(h[0]*','*tau[a])
text(labht, x=-3.5, y=3, cex= cexTxt*2, col="black", adj = c(0,0))

plot(x,y3, pch=20, xlim= xlim, ylim= ylim, tick=FALSE, xaxt="n", yaxt="n", xlab="", ylab="")
laby<-substitute(H*'('*Y*')'==VAL, list(VAL=round(Hy.3,2)))	
text(laby, x=minx, y=miny-3*sep, cex= cexTxt, col="black", adj = c(1,0))
labyx<-substitute(H*'('*Y*'|'*X*','*h[1]*','*tau[a]*')'==VAL, list(VAL=round(Hxy.3-Hx,2)))	
text(labyx, x=minx, y=miny-4*sep, cex= cexTxt, col="black", adj = c(1,0))
labht<-expression(h[1]*','*tau[a])
text(labht, x=-3.5, y=3, cex= cexTxt*2, col="black", adj = c(0,0))

plot(x,y4, pch=20, xlim= xlim, ylim= ylim, tick=FALSE, xaxt="n", yaxt="n", xlab="", ylab="")
laby<-substitute(H*'('*Y*')'==VAL, list(VAL=round(Hy.3,2)))	
text(laby, x=minx, y=miny-3*sep, cex= cexTxt, col="black", adj = c(1,0))
labyx<-substitute(H*'('*Y*'|'*X*','*h[0]*','*tau[b]*')'==VAL, list(VAL=round(Hxy.3-Hx,2)))	
text(labyx, x=minx, y=miny-4*sep, cex= cexTxt, col="black", adj = c(1,0))
labht<-expression(h[0]*','*tau[b])
text(labht, x=-3.5, y=3, cex= cexTxt*2, col="black", adj = c(0,0))

plot(x,y1, pch=20, xlim= xlim, ylim= ylim, tick=FALSE, xaxt="n", yaxt="n", xlab="", ylab="")
laby<-substitute(H*'('*Y*')'==VAL, list(VAL=round(Hy.1,2)))	
text(laby, x=minx, y=miny+sep, cex= cexTxt, col="black", adj = c(1,0))
labyx<-substitute(H*'('*Y*'|'*X*','*h[1]*','*tau[b]*')'==VAL, list(VAL=round(Hxy.1-Hx,2)))	
text(labyx, x=minx, y=miny, cex= cexTxt, col="black", adj = c(1,0))
labT<-substitute(H*'('*T*')'==VAL, list(VAL=round(HT,2)))	
text(labT, x=minx, y=miny-sep, cex= cexTxt, col="black", adj = c(1,0))
labTyx<-substitute(H*'('*T[h*','*tau]*'|'*X*','*Y*')'==VAL, list(VAL=round(HT.yx,2)))	
text(labTyx, x=minx, y=miny-2*sep, cex= cexTxt, col="black", adj = c(1,0))
labKT<-substitute(K*'('*T[h*','*tau]*';'*X*','*Y*')'==VAL, list(VAL=round(KT.yx,3)))	
text(labKT, x=minx, y=miny-3.2*sep, cex= cexTxt*1.2, col="red", adj = c(1,0))
labKYT<-substitute(H*'('*T*')'*K*'('*T[h*','*tau]*';'*X*','*Y*')'==VAL, list(VAL=round(KT.yx*HT,3)))	
text(labKYT, x=minx, y=miny-4.2*sep, cex= cexTxt*1.2, col="red", adj = c(1,0))
labht<-expression(h[1]*','*tau[b])
text(labht, x=-3.5, y=3, cex= cexTxt*2, col="black", adj = c(0,0))

plot.new()
text(0.5,0.5,"a) conclusive test of single hypothesis", cex=cexSubTit)

plot.new()
text(0.5,0.5,"b) test of single hypothesis, plus auxiliary condition", cex=cexSubTit)

plot.new()
text(0.5,0.5,"knowledge value of empirical tests", cex=cexTit)


dev.off()