#February 5 2019 ##R code for figures and graphs in "A theory and methodology to quantify knowledge", Royal Society Open Science, 2019, by Daniele Fanelli #email@danielefanelli.com ##############################---Figure 1: pictorial representations of patterns and mutual information rm(list=ls()) entropy<-function(x) {xPr<-x/sum(x); xPr<-xPr[xPr>0]; -sum(xPr*log2(xPr))} mutualInf<-function(m) {x<-apply(m,1,sum); y<-apply(m,2,sum); return(list(mi=round(entropy(x)+entropy(y)-entropy(m),2), x=round(entropy(x),2), y=round(entropy(y),2)))} m0<-matrix(c( 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ), nrow=15, ncol=15) m1<-matrix(c( 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, 0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, 0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, 0,0,0,0,0,0,0,0,0,1,0,0,0,0,0, 0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,0,0,0,0,0,0,0,0,00,0,0,0,0 ), nrow=15, ncol=15) mCirc<-matrix(c( 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,1,1,1,0,0,0,0,0,0, 0,0,0,0,1,1,0,0,0,1,1,0,0,0,0, 0,0,0,1,0,0,0,0,0,0,0,1,0,0,0, 0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, 0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, 0,1,0,0,0,0,0,0,0,0,0,0,0,1,0, 0,1,0,0,0,0,0,0,0,0,0,0,0,1,0, 0,1,0,0,0,0,0,0,0,0,0,0,0,1,0, 0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, 0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, 0,0,0,1,0,0,0,0,0,0,0,1,0,0,0, 0,0,0,0,1,1,0,0,0,1,1,0,0,0,0, 0,0,0,0,0,0,1,1,1,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ), nrow=15, ncol=15) mFace<-matrix(c( 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,1,1,1,0,0,0,0,0,0, 0,0,0,0,1,1,0,0,0,1,1,0,0,0,0, 0,0,0,1,0,0,0,0,0,0,0,1,0,0,0, 0,0,1,0,0,0,0,0,0,0,0,0,1,0,0, 0,0,1,0,0,1,0,0,0,1,0,0,1,0,0, 0,1,0,0,0,0,0,0,0,0,0,0,0,1,0, 0,1,0,0,0,0,0,1,0,0,0,0,0,1,0, 0,1,0,0,0,0,0,0,0,0,0,0,0,1,0, 0,0,1,0,0,1,0,0,0,1,0,0,1,0,0, 0,0,1,0,0,1,1,1,1,1,0,0,1,0,0, 0,0,0,1,0,0,0,0,0,0,0,1,0,0,0, 0,0,0,0,1,1,0,0,0,1,1,0,0,0,0, 0,0,0,0,0,0,1,1,1,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ), nrow=15, ncol=15) m1err<-matrix(c( 0,0,0,0,0,0,0,0,0,0,0,dnorm(3),dnorm(2),dnorm(1),1, 0,0,0,0,0,0,0,0,0,0,dnorm(3),dnorm(2),dnorm(1),1,dnorm(-1), 0,0,0,0,0,0,0,0,0,dnorm(3),dnorm(2),dnorm(1),1,dnorm(-1),dnorm(-2), 0,0,0,0,0,0,0,0,dnorm(3),dnorm(2),dnorm(1),1,dnorm(-1),dnorm(-2),dnorm(-3), 0,0,0,0,0,0,0,dnorm(3),dnorm(2),dnorm(1),1,dnorm(-1),dnorm(-2),dnorm(-3),0, 0,0,0,0,0,0,dnorm(3),dnorm(2),dnorm(1),1,dnorm(-1),dnorm(-2),dnorm(-3),0,0, 0,0,0,0,0,dnorm(3),dnorm(2),dnorm(1),1,dnorm(-1),dnorm(-2),dnorm(-3),0,0,0, 0,0,0,0,dnorm(3),dnorm(2),dnorm(1),1,dnorm(-1),dnorm(-2),dnorm(-3),0,0,0,0, 0,0,0,dnorm(3),dnorm(2),dnorm(1),1,dnorm(-1),dnorm(-2),dnorm(-3),0,0,0,0,0, 0,0,dnorm(3),dnorm(2),dnorm(1),1,dnorm(-1),dnorm(-2),dnorm(-3),0,0,0,0,0,0, 0,dnorm(3),dnorm(2),dnorm(1),1,dnorm(-1),dnorm(-2),dnorm(-3),0,0,0,0,0,0,0, dnorm(3),dnorm(2),dnorm(1),1,dnorm(-1),dnorm(-2),dnorm(-3),0,0,0,0,0,0,0,0, dnorm(2),dnorm(1),1,dnorm(-1),dnorm(-2),dnorm(-3),0,0,0,0,0,0,0,0,0, dnorm(1),1,dnorm(-1),dnorm(-2),dnorm(-3),0,0,0,0,0,0,0,0,0,0, 1,dnorm(-1),dnorm(-2),dnorm(-3),0,0,0,0,0,00,0,0,0,0 ), nrow=15, ncol=15) m2err<-matrix(c( 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,dnorm(3),0,0,0,0,0,0,0,0, 0,0,0,0,0,0,dnorm(2),0,0,0,0,0,0,0,0, 0,0,0,0,0,0,dnorm(1),0,0,0,0,0,0,0,0, 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,dnorm(-1),0,0,0,0,0,0,0,0, 0,0,0,0,0,0,dnorm(-2),0,0,0,0,0,0,0,0, 0,0,0,0,0,0,dnorm(-3),0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,00,0,0,0,0 ), nrow=15, ncol=15) entX0<-round(log2(10),2) entY0<-round(log2(10),2) mi1<-mutualInf(m1) miCirc<-mutualInf(mCirc) miFace<-mutualInf(mFace) mi1err<-mutualInf(m1err) mi2err<-mutualInf(m2err) png(filename="tau-ex-RV.png", width=9, height=6.5, units="in", res=300) par(mfrow=c(2,3), mar=c(4,2,4,2.5), cex.main=1.5, cex.sub=1.5, cex.lab=1.2); lineVal=0.9; sublineVal=2.4; lineValletter=1.5; cexLetter=2; image(m0[,nrow(m0):1], col=grey(level=seq(1,0,-0.01)), xaxt="n", yaxt="n"); grid(nx=15, ny=15); title(main=expression(tau*": "*Ø), xlab =paste("H(X)=", entX0), ylab = paste("H(Y)=", entY0), line=lineVal) mtext(side=1, text="I(Y:X)=0", line=sublineVal) mtext(side=3, adj=0, text="a", line=lineValletter, cex=cexLetter) image(m1[,nrow(m1):1], col=grey(level=seq(1,0,-0.01)), xaxt="n", yaxt="n"); grid(nx=15, ny=15); title(main=expression(tau*": 'Y=X'"), xlab=paste("H(X)=", mi1$x), ylab=paste("H(Y)=", mi1$y), line=lineVal); mtext(side=1, text=paste("I(Y:X)=", mi1$mi), line=sublineVal) mtext(side=3, adj=0, text="b", line=lineValletter, cex=cexLetter) image(m1err[,nrow(m1err):1], col=grey(level=seq(1,0,-0.01)), xaxt="n", yaxt="n"); grid(nx=15, ny=15); title(main=expression(tau*": 'Y=X+N(0,1)'"), xlab=paste("H(X)=", mi1err$x), ylab=paste("H(Y)=", mi1err$y), line=lineVal); mtext(side=1, text=paste("I(Y:X)=", mi1err$mi), line=sublineVal) mtext(side=3, adj=0, text="c", line=lineValletter, cex=cexLetter) image(m2err[,nrow(m2err):1], col=grey(level=seq(1,0,-0.01)), xaxt="n", yaxt="n"); grid(nx=15, ny=15); title(main=expression(tau*": 'Y=X+N(0,1), X=7'"), xlab=paste("H(X|X=7)=", mi2err$x), ylab=paste("H(Y)=", mi2err$y), line=lineVal); mtext(side=1, text=paste("I(Y:X|X=7)= H(Y)-H(Y|X=7)=",mi1err$y-mi2err$y), line=sublineVal) mtext(side=3, adj=0, text="d", line=lineValletter, cex=cexLetter) image(mCirc[,nrow(mCirc):1], col=grey(level=seq(1,0,-0.01)), xaxt="n", yaxt="n"); grid(nx=15, ny=15); title(main=expression(tau*": 'Y"**2*"+X"**2*"=36'"), xlab=paste("H(X)=", miCirc$x), ylab=paste("H(Y)=", miCirc$y), line=lineVal); mtext(side=1, text=paste("I(Y:X)=",miCirc$mi), line=sublineVal); mtext(side=1, text=paste("I(Y:X)=", miCirc$mi), line=sublineVal) mtext(side=3, adj=0, text="e", line=lineValletter, cex=cexLetter) image(mFace[,nrow(mFace):1], col=grey(level=seq(1,0,-0.01)), xaxt="n", yaxt="n"); grid(nx=15, ny=15); title(main=expression(tau*": 'Y"**2*"+X"**2*"=36 & {(-2,2),(2,2),(0,0)...}'"), xlab=paste("H(X)=", miFace$x), ylab=paste("H(Y)=", miFace$y), line=lineVal); mtext(side=1, text=paste("I(Y:X)=", miFace$mi), line=sublineVal) mtext(side=3, adj=0, text="f", line=lineValletter, cex=cexLetter) dev.off() #February 11 2018 figures for new version AMToKSP ##############################---Figure 2: graph to represent the set of all possible methodologies rm(list=ls()) png(filename="methUniv.png", width=1000, height=1000, units="px") par(mfrow=c(10,10), mar=c(0,0,0,0)) for (i in 1:100) { if(i==55) { b<- 1 x<-rnorm(40) y<-b*x+rnorm(40,0,0.1) plot(x,y, pch=20, xlim=c(-2,2), ylim=c(-2,2), tick=FALSE, xaxt="n", yaxt="n", xlab="") minx<- -1.3 miny<- 1.5 labl<-expression(tau[55]) text(labl, x=minx, y=miny, cex=3.5, col="red") } else{ b<-rnorm(1,0,0.3) x<-rnorm(40) y<-b*x+rnorm(40,0,runif(1,0.2,2)) plot(x,y, pch=20, xlim=c(-2,2), ylim=c(-2,2), tick=FALSE, xaxt="n", yaxt="n", xlab="") minx<- -1.3 miny<- 1.5 labl<-substitute(tau[CODE],list(CODE=i)) text(labl, x=minx, y=miny, cex=3.5, col="red") #legend(minx,miny, legend=labl, border="white", bg="white", pt.cex=0, xjust=2) } } dev.off() ##############################-----Figure3: comparing effect sizes to K rm(list=ls()) library(ggplot2) library(grid) library(gridExtra) set.seed(666) #For R-squared and Cohen's d # we imagine a variance of x 1000, and an effect size (variance explained, represented by b) decreasing from 0.001 to 0.999 n<-10000 x<-rnorm(n,0,1000) sdx<-sd(x) sdyx<-100 bvals<-seq(0.00,0.99,0.01) R2<-c(0) r<-c(0) d<-c(0) d1<-c(0) K<-c(0) K1<-c(0) K2<-c(0) K3<-c(0) # we calculate K valies for a tau that is larger than before, but which is then reduced by n=2,10,100 repetitions Kn1<-c(0) Kn10<-c(0) Kn100<-c(0) Kn1000<-c(0) Kfun<-function(hy,hyx,hx,t){ (hy-hyx)/(hy+hx+t) } for(b in bvals){ y<-b*x+rnorm(n,0,sdyx) sdy<-sd(y) tau<-1 tau1<-100 #we calculate R-squared, Cohen's d and K, for the latter assuming different levels of accuracy (significant digits). For the former, the accuracy makes no difference R2b<-(sdy^2-sdyx^2)/sdy^2 rb<-sqrt(R2b) db<-sqrt((4*rb^2)/(1-rb^2)) d1b<-sqrt(4*(sdy-sdyx))/sqrt(sdyx) R2<-c(R2, R2b) r<-c(r, rb) d<-c(d, db) d1<-c(d1, d1b) K<-c(K, Kfun(log(round(sdy,0)), log(round(sdyx,0)), log(round(sdx,0)), log2(sqrt(2*pi*exp(1)))+tau)) K1<-c(K1, Kfun(log(round(sdy*10,0)), log(round(sdyx*10,0)), log(round(sdx*10,0)), log2(sqrt(2*pi*exp(1)))+tau)) K2<-c(K2, Kfun(log(round(sdy*100,0)), log(round(sdyx*100,0)), log(round(sdx*100,0)), log2(sqrt(2*pi*exp(1)))+tau)) K3<-c(K3, Kfun(log(round(sdy*1000,0)), log(round(sdyx*1000,0)), log(round(sdx*1000,0)), log2(sqrt(2*pi*exp(1)))+tau)) # we calcualte K valies for a tau that is larger than before, but which is then reduced by n=2,10,100 repetitions Kn1<-c(Kn1, Kfun(log(round(sdy,0)), log(round(sdyx,0)), log(round(sdx,0)), log2(sqrt(2*pi*exp(1)))+tau1) ) Kn10<-c(Kn10, Kfun(log(round(sdy,0)), log(round(sdyx,0)), 0, log2(sqrt(2*pi*exp(1)))+(log(sdx +tau1)/2)) ) Kn100<-c(Kn100, Kfun(log(round(sdy,0)), log(round(sdyx,0)), 0, log2(sqrt(2*pi*exp(1)))+(log(sdx +tau1)/10)) ) Kn1000<-c(Kn1000, Kfun(log(round(sdy,0)), log(round(sdyx,0)), 0, log2(sqrt(2*pi*exp(1)))+(log(sdx+tau1)/100)) ) } #for Chi-square #we generate a 2 by 2 contingency table. We will assume that y and x have both uniform marginal probability distribution, and we progressively generate a pattern in it. For K we assume that tau=1 p<-seq(0.25,0.4999, 0.0001) Hyx<-(-p*log2(p) - (0.5-p)*log2(0.5-p) - p*log2(p) - (0.5-p)*log2(0.5-p)) n=100 chi100<-n*(( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25 + ( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25) K100<-(1+1-Hyx)/(1+1+tau/n) n=80 chi80<-n*(( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25 + ( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25) K80<-(1+1-Hyx)/(1+1+tau/n) n=40 chi40<-n*(( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25 + ( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25) K40<-(1+1-Hyx)/(1+1+tau/n) n=20 chi20<-n*( ((0.25 - p)^2)/0.25 + ((0.25 - (0.5-p))^2)/0.25 + ((0.25 - p)^2)/0.25 + ((0.25 - (0.5-p))^2)/0.25) K20<-(1+1-Hyx)/(1+1+tau/n) tau<-100 n=100 chi100t100<-n*(( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25 + ( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25) K100t100<-(1+1-Hyx)/(1+1+tau/n) n=80 chi80t100<-n*(( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25 + ( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25) K80t100<-(1+1-Hyx)/(1+1+tau/n) n=40 chi40t100<-n*(( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25 + ( (0.25 - p)^2 ) / 0.25 + ( (0.25 - (0.5-p))^2 ) / 0.25) K40t100<-(1+1-Hyx)/(1+1+tau/n) n=20 chi20t100<-n*( ((0.25 - p)^2)/0.25 + ((0.25 - (0.5-p))^2)/0.25 + ((0.25 - p)^2)/0.25 + ((0.25 - (0.5-p))^2)/0.25) K20t100<-(1+1-Hyx)/(1+1+tau/n) #prepare graphs titSize<-24 subtitSize<-titSize*0.8 labSize<-subtitSize*0.8 axisLabSize<-labSize*0.8 ylimR2<-max(K,Kn1, Kn10, Kn100, Kn1000) ylimChi<-max(K40, K80, K100, K40t100, K80t100, K100t100) p1<-ggplot(data=data.frame(R2,K), aes(x=R2,y=K))+geom_line(linetype=1)+scale_y_continuous(limits=c(0,ylimR2))+labs(title="K, varying accuracy", x="R-squared", y="") p1<-p1+geom_line(data=data.frame(R2,K1), aes(x=R2,y=K1), linetype=2) p1<-p1+geom_line(data=data.frame(R2,K2), aes(x=R2,y=K2), linetype=3) p1<-p1+geom_line(data=data.frame(R2,K3), aes(x=R2,y=K3), linetype=4) p1<-p1+theme(plot.title = element_text(size=subtitSize, face="plain"), axis.text = element_text(size=axisLabSize), axis.title.x = element_text(size=subtitSize)) p1<-p1 p2<-ggplot(data=data.frame(R2,Kn1), aes(x=R2,y=K))+geom_line(linetype=1)+scale_y_continuous(limits=c(0,ylimR2))+labs(title="K, varying n", x="R-squared", y="") p2<-p2+geom_line(data=data.frame(R2,Kn10), aes(x=R2,y=Kn10), linetype=2) p2<-p2+geom_line(data=data.frame(R2,Kn100), aes(x=R2,y=Kn100), linetype=3) p2<-p2+geom_line(data=data.frame(R2,Kn1000), aes(x=R2,y=Kn1000), linetype=4) p2<-p2+theme(plot.title = element_text(size=subtitSize, face="plain"), axis.text = element_text(size=axisLabSize), axis.title.x = element_text(size=subtitSize)) p3<-ggplot(data=data.frame(d,K), aes(x=d,y=K))+geom_line(linetype=1)+scale_y_continuous(limits=c(0,ylimR2))+labs(title="K, varying accuracy", x="Cohen's d (absolute value)", y="") p3<-p3+geom_line(data=data.frame(d,K1), aes(x=d,y=K1), linetype=2) p3<-p3+geom_line(data=data.frame(d,K2), aes(x=d,y=K2), linetype=3) p3<-p3+geom_line(data=data.frame(d,K3), aes(x=d,y=K3), linetype=4) p3<-p3+theme(plot.title = element_text(size=subtitSize, face="plain"), axis.text = element_text(size=axisLabSize), axis.title.x = element_text(size=subtitSize)) p4<-ggplot(data=data.frame(d,Kn1), aes(x=d,y=Kn1))+geom_line(linetype=1)+scale_y_continuous(limits=c(0,ylimR2))+labs(title="K, varying n", x="Cohen's d (absolute value)", y="") p4<-p4+geom_line(data=data.frame(d,Kn10), aes(x=d,y=Kn10), linetype=2) p4<-p4+geom_line(data=data.frame(d,Kn100), aes(x=d,y=Kn100), linetype=3) p4<-p4+geom_line(data=data.frame(d,Kn1000), aes(x=d,y=Kn1000), linetype=4) p4<-p4+theme(plot.title = element_text(size=subtitSize, face="plain"), axis.text = element_text(size=labSize), axis.title.x = element_text(size=subtitSize)) p5<-ggplot(data=data.frame(chi20,K20), aes(x=chi20,y=K20))+geom_line(linetype=1)+scale_y_continuous(limits=c(0,ylimR2))+labs(title=expression("K, varying n, l("*tau*")"==1), xlim=c(0,max(chi100)), x="Chi-squared", y="") p5<-p5+geom_line(data=data.frame(chi40,K40), aes(x=chi40,y=K40), linetype=2) p5<-p5+geom_line(data=data.frame(chi80,K80), aes(x=chi80,y=K80), linetype=3) p5<-p5+geom_line(data=data.frame(chi100,K100), aes(x=chi100,y=K100), linetype=4) p5<-p5+theme(plot.title = element_text(size=subtitSize, face="plain"), axis.text = element_text(size=axisLabSize), axis.title.x = element_text(size=subtitSize)) p6<-ggplot(data=data.frame(chi20t100,K20t100), aes(x=chi20t100,y=K20t100))+geom_line(linetype=1)+scale_y_continuous(limits=c(0,ylimR2))+labs(title=expression("K, varying n, l("*tau*")"==100), xlim=c(0,max(chi100)), x="Chi-squared", y="") p6<-p6+geom_line(data=data.frame(chi40t100,K40t100), aes(x=chi40t100,y=K40t100), linetype=2) p6<-p6+geom_line(data=data.frame(chi80t100,K80t100), aes(x=chi80t100,y=K80t100), linetype=3) p6<-p6+geom_line(data=data.frame(chi100t100,K100t100), aes(x=chi100t100,y=K100t100), linetype=4) p6<-p6+theme(plot.title = element_text(size=subtitSize, face="plain"), axis.text = element_text(size=axisLabSize), axis.title.x = element_text(size=subtitSize)) graphs<-arrangeGrob(textGrob("a", gp=gpar(fontsize=subtitSize), x=0.03), textGrob("b", gp=gpar(fontsize=subtitSize), x=0.03), p1,p2,textGrob("c", gp=gpar(fontsize=subtitSize), x=0.03), textGrob("d", gp=gpar(fontsize=subtitSize), x=0.03), p3,p4,textGrob("e", gp=gpar(fontsize=subtitSize), x=0.03), textGrob("f", gp=gpar(fontsize=subtitSize), x=0.03), p5,p6, nrow=6, heights=c(0.1,1,0.1,1,0.1,1)) titGrob<-textGrob("relation between K and statistical measures", gp=gpar(fontsize=titSize)) png(filename="kR2examples.png", width=1.5*480, height=1.5*480) grid.arrange(titGrob,graphs, nrow=2, heights=c(1,10)) dev.off() ##############################--------S1 Figure: Pressure Demon with m measurements m<-seq(0,10, 1) e<-seq(1,0.4,-0.05) t<-c(1,0.1,0.01) k<-function(m,e,t){(1-e^m)/(1+m+t)} k1<-k(m,0.99,0) png(filename="maxDemMeasurementsEx.png", width=480, height=480) plot(m,k(m,e[1],0), ylim=c(0,0.3), type="l", ylab="K", xlab="measurements", main="Mawell's Demon's K, varying error", cex.lab=1.5, cex.main=1.7) for(i in 2:length(e)) lines(m,k(m,e[i],0), lty=1) dev.off() ##############################--------Figure 5 and S6 Figures A and B: effects of resolution on K rm(list-ls()) library(ggplot2) library(gridExtra) library(grid) library(entropy) set.seed(666) n<-10000 Umax<-100 Umin<--100 UmaxEr<-10 UminEr<--10 sdN<-10 range<-c(Umin, Umax) Method1<-"ML" Method2<-"shrink" x<-round(runif(n,Umin, Umax),0) y<-x+round(runif(n,UminEr, UmaxEr),0)# linear case y2<-x+x^2+round(runif(n,UminEr, UmaxEr),0)#parabolic case df<-data.frame(x,y) df2<-data.frame(x,y2) txtSize<-15 titSize<-txtSize*1.5 #----first simulation, on linear case p0<-ggplot(df, aes(x,y))+geom_point(alpha=0.025)+theme_bw()+ scale_fill_gradientn(colours=c("white","grey", "black"), guide=FALSE)+ggtitle(substitute(Y*'='*X*'+'*e[1]*', '*e[1]*'~'*U*'('*UMIN*','*UMAX*')', list(UMIN= UminEr, UMAX= UmaxEr)))+theme(axis.title.y=element_blank(), axis.text.y=element_text(size=txtSize), axis.title.x=element_text(size=txtSize), axis.text.x=element_text(size=txtSize), plot.title=element_text(size= titSize)) acc<-seq(1,10,1) plotList<-list() Hx<-c() Hy<-c() Hyx<-c() Hy.x<-c() k<-c() k2<-c() for(a in acc){ binNum<-2^a #Entropy and K based on simple cell counts hx<-entropy(y=discretize(x, numBins=binNum, r=range), unit="log2", method= Method1) hy<-entropy(y=discretize(y, numBins=binNum, r=range), unit="log2", method= Method1) hyx<-entropy(y=discretize2d(x1=x,x2=y, numBins1=binNum, numBins2=binNum, r1=range, r2=range), unit="log2", method= Method1) hy.x<-entropy(y=discretize2d(x1=x,x2=y, numBins1=binNum, numBins2=binNum, r1=range, r2=range), unit="log2", method= Method1)-hx K<-round((hy-hy.x)/(hy+hx+0),2) #Entropy and K based on estimator hx<-entropy(y=discretize(x, numBins=binNum, r=range), unit="log2", method= Method2) hy<-entropy(y=discretize(y, numBins=binNum, r=range), unit="log2", method= Method2) hyx<-entropy(y=discretize2d(x1=x,x2=y, numBins1=binNum, numBins2=binNum, r1=range, r2=range), unit="log2", method= Method2) hy.x<-entropy(y=discretize2d(x1=x,x2=y, numBins1=binNum, numBins2=binNum, r1=range, r2=range), unit="log2", method= Method2)-hx K2<-round((hy-hy.x)/(hy+hx+0),2) print(a) print(list(c(hx,hy,hy.x))) print(K) print("----") Hx<-c(Hx,hx) Hy<-c(Hy,hy) Hyx<-c(Hyx,hyx) Hy.x<-c(Hy.x,hy.x) k<-c(k,K) k2<-c(k2,K2) p<-ggplot(df, aes(x,y))+geom_bin2d(bins= binNum-1, show.legend=FALSE, drop=TRUE)+theme_bw()+ scale_fill_gradientn(colours=c("white","grey", "black"), guide=FALSE)+theme(axis.title.y=element_text(size=txtSize), axis.text.y=element_text(size=txtSize), axis.title.x=element_text(size=txtSize), axis.text.x=element_text(size=txtSize), plot.title=element_text(size= titSize, hjust=0.5), plot.subtitle=element_text(size= titSize, hjust=0.5))+labs(title=substitute('a='*ACC*', '*H(Y)*'='*HY, list(ACC=binNum, HY=round(hy,2))), subtitle=substitute('K('*Y*';'*X*','*tau*')'*'='*VAL, list(VAL=K, ACC=binNum))) plotList<-c(plotList, list(p)) } pk<-ggplot(data.frame(acc,Hy,k,k2), aes(acc,k))+geom_line()+geom_line(aes(x=acc, y=k2), col="red")+theme_bw()+labs(x="log(a)")+ggtitle(expression(K*'('*Y*';'*X*','*tau*')'))+theme(axis.title.y=element_blank(), axis.text.y=element_text(size=txtSize), axis.title.x=element_text(size=txtSize*1.5), axis.text.x=element_text(size=txtSize), plot.title=element_text(size= titSize)) pky<-ggplot(data.frame(acc,Hy,k,k2), aes(acc,Hy*k))+geom_line()+geom_line(aes(x=acc, y=Hy*k2), col="red")+theme_bw()+labs(x="log(a)")+ggtitle(expression(K*'('*Y*';'*X*','*tau*') '%*%' H('*Y*')'))+theme(axis.title.y=element_blank(), axis.text.y=element_text(size=txtSize), axis.title.x=element_text(size=txtSize*1.5), axis.text.x=element_text(size=txtSize), plot.title=element_text(size= titSize)) #----second simulation, on parabolic case p02<-ggplot(df2, aes(x,y2))+geom_point(alpha=0.0025)+theme_bw()+ scale_fill_gradientn(colours=c("white","grey", "black"), guide=FALSE)+ggtitle(substitute(Y*'='*X*'+'*X^2*'+'*e[1]*', '*e[1]*'~'*U*'('*UMIN*','*UMAX*')', list(UMIN= UminEr, UMAX= UmaxEr)))+theme(axis.title.y=element_blank(), axis.text.y=element_text(size=txtSize), axis.title.x=element_text(size=txtSize), axis.text.x=element_text(size=txtSize), plot.title=element_text(size= titSize)) acc<-seq(1,10,1) plotList2<-list() Hx<-c() Hy<-c() Hyx<-c() Hy.x<-c() k<-c() k2<-c() for(a in acc){ binNum<-2^a #Entropy and K based on simple cell counts hx<-entropy(y=discretize(x, numBins=binNum, r=range), unit="log2", method= Method1) hy<-entropy(y=discretize(y2, numBins=binNum, r=range), unit="log2", method= Method1) hyx<-entropy(y=discretize2d(x1=x,x2=y2, numBins1=binNum, numBins2=binNum, r1=range, r2=range), unit="log2", method= Method1) hy.x<-entropy(y=discretize2d(x1=x,x2=y2, numBins1=binNum, numBins2=binNum, r1=range, r2=range), unit="log2", method= Method1)-hx K<-round((hy-hy.x)/(hy+hx+0),2) #Entropy and K based on estimator hx<-entropy(y=discretize(x, numBins=binNum, r=range), unit="log2", method= Method2) hy<-entropy(y=discretize(y2, numBins=binNum, r=range), unit="log2", method= Method2) hyx<-entropy(y=discretize2d(x1=x,x2=y2, numBins1=binNum, numBins2=binNum, r1=range, r2=range), unit="log2", method= Method2) hy.x<-entropy(y=discretize2d(x1=x,x2=y2, numBins1=binNum, numBins2=binNum, r1=range, r2=range), unit="log2", method= Method2)-hx K2<-round((hy-hy.x)/(hy+hx+0),2) print(a) print(list(c(hx,hy,hy.x))) print(K) print("----") Hx<-c(Hx,hx) Hy<-c(Hy,hy) Hyx<-c(Hyx,hyx) Hy.x<-c(Hy.x,hy.x) k<-c(k,K) k2<-c(k2,K2) p2<-ggplot(df2, aes(x,y2))+geom_bin2d(bins= binNum-1, show.legend=FALSE, drop=TRUE)+theme_bw()+ scale_fill_gradientn(colours=c("white","grey", "black"), guide=FALSE)+theme(axis.title.y=element_text(size=txtSize), axis.text.y=element_text(size=txtSize), axis.title.x=element_text(size=txtSize), axis.text.x=element_text(size=txtSize), plot.title=element_text(size= titSize, hjust=0.5), plot.subtitle=element_text(size= titSize, hjust=0.5))+labs(title=substitute('a='*ACC*', '*H(Y)*'='*HY, list(ACC=binNum, HY=round(hy,2))), subtitle=substitute('K('*Y*';'*X*','*tau*')'*'='*VAL, list(VAL=K, ACC=binNum))) plotList2<-c(plotList2, list(p2)) } pk2<-ggplot(data.frame(acc,Hy,k,k2), aes(acc,k))+geom_line()+geom_line(aes(x=acc, y=k2), col="red")+theme_bw()+labs(x="log(a)")+ggtitle(expression(K*'('*Y*';'*X*','*tau*')'))+theme(axis.title.y=element_blank(), axis.text.y=element_text(size=txtSize), axis.title.x=element_text(size=txtSize*1.5), axis.text.x=element_text(size=txtSize), plot.title=element_text(size= titSize)) pky2<-ggplot(data.frame(acc,Hy,k,k2), aes(acc,Hy*k))+geom_line()+geom_line(aes(x=acc, y=Hy*k2), col="red")+theme_bw()+labs(x="log(a)")+ggtitle(expression(K*'('*Y*';'*X*','*tau*') '%*%' H('*Y*')'))+theme(axis.title.y=element_blank(), axis.text.y=element_text(size=txtSize), axis.title.x=element_text(size=txtSize*1.5), axis.text.x=element_text(size=txtSize), plot.title=element_text(size= titSize)) #prepare the graphical objects g1<-arrangeGrob(p0, plotList[[1]], plotList[[2]], plotList[[3]], plotList[[4]], plotList[[5]], plotList[[6]], plotList[[7]], plotList[[8]], plotList[[9]], pk, pky, nrow=2, ncol=6) g2<-arrangeGrob(p02, plotList2[[1]], plotList2[[2]], plotList2[[3]], plotList2[[4]], plotList2[[5]], plotList2[[6]], plotList2[[7]], plotList2[[8]], plotList2[[9]], pk2, pky2, nrow=2, ncol=6) png(filename="KaccuracyEx.png", width=4*480, height=3*480, units="px") grid.arrange(textGrob("relation between resolution and K", gp=gpar(fontsize=60)), textGrob("a", x=0.03, gp=gpar(fontsize=40)), g1, textGrob("b", x=0.03, gp=gpar(fontsize=40)), g2, nrow=5, ncol=1, heights=c(0.5, 0.5, 5, 0.5 ,5)) dev.off() ##############################--------Figure 6: Gender differences in personality rm(list=ls()) library(ggplot2) library(gridExtra) library(grid) library(cowplot) library(entropy) library(MASS) library(plyr) #------Data from Del Giudice, with mean and variance, or separate covariance matrices for males and females. We thus assume that actual populations of values are (potentially) bimodal normal along any given dimension. dfMV<-read.table("genMeanVar.txt", sep="\t", header=TRUE, dec=",", stringsAsFactors=TRUE) dfmal<-read.table("gendMatMal.txt", sep="\t", header=TRUE, dec=",", stringsAsFactors=TRUE) dffem<-read.table("gendMatFem.txt", sep="\t", header=TRUE, dec=",", stringsAsFactors=TRUE) #K function kFun<-function(y,yx,x,t){(y-yx)/(y+x+t)} #functions to calculate av-d and av-D avd<-function(m1,m2,s1,s2){0.125*((m1-m2)^2)*(s1^2+s2^2)/((s1^2)*(s2^2))} avD<-function(d,S1,S2,D){0.5*(-d+( 0.5*(sum(diag(solve(S1) %*% S2))+sum(diag(solve(S2) %*% S1))) +D^2))} #create var covar matrices dfmalMatrix<-as.matrix(dfmal[1:15,2:16]) dffemMatrix<-as.matrix(dffem[1:15,2:16]) #sample size, entropy estimation method, and bin multiplier N<-5*10^6 entMethod<-"shrink" #for each variable, simulate data set.seed(666) yM<-mvrnorm(n=N, mu=dfMV$meanM, Sigma= dfmalMatrix) yF<-mvrnorm(n=N, mu=dfMV$meanF, Sigma= dffemMatrix) yT<-rbind(yM, yF) write.table(cbind(yT, c(rep("m", N), rep("f", N))), file=paste("gendDat", N, ".txt", sep=""), sep="\t") #define bin number for entropy of individual parameters i<-2 dfKeach<-data.frame(acc=numeric(0), varName=character(0), Hm=numeric(0), Hf=numeric(0), Htot=numeric(0), K=numeric(0), KY=numeric(0)) for(v in 1:length(yT[1,])) { minv<-min(yT[,v]) maxv<-max(yT[,v]) midv<-median(yT[,v]) ht<-entropy(table(cut(yT[,v], breaks=c(minv, midv, maxv))), unit="log2") hm<-entropy(table(cut(yM[,v], breaks=c(minv, midv, maxv))), unit="log2") hf<-entropy(table(cut(yF[,v], breaks=c(minv, midv, maxv))), unit="log2") #calculate K values for each parameter k<-kFun(ht, 0.5*(hm+hf), 1,0) ky<-k*ht dfKeach<-rbind(dfKeach, data.frame(acc=i, varName=dfMV$vars[v], Hm= hm, Hf= hf, Htot= ht, K=k, KY=ky)) print(v) } #calculate the naive k by summing across all parameters HtotSum<-sum(dfKeach$Htot) HmSum<-sum(dfKeach$Hm) HfSum<-sum(dfKeach$Hf) Ksum<-with(dfKeach, kFun(sum(Htot), sum(0.5*(Hm+Hf)), 1, 0)) KsumY<-Ksum*sum(dfKeach$Htot) dfKeach<-rbind(dfKeach, data.frame(acc=i, varName= "Kmd", Hm= HmSum, Hf= HfSum, Htot= HtotSum, K=Ksum, KY=KsumY)) dfKeach<-rbind(dfKeach, data.frame(acc=i, varName= "average K", Hm= mean(dfKeach$Hm[1:15]), Hf= mean(dfKeach$Hf[1:15]), Htot= mean(dfKeach$Htot[1:15]), K=mean(dfKeach$K[1:15]), KY=mean(dfKeach$KY[1:15]))) dfKeach$k<-with(dfKeach, (Htot-0.5*(Hm+Hf))/Htot) #save data write.table(dfKeach, file="gendDiffRes.txt", append=FALSE, dec=",", sep="\t", row.names=FALSE) #dfKeach<-read.table("gendDiffRes.txt", sep="\t", header=TRUE, dec=",") dfK<-data.frame(dfKeach, varK=c("Warmth", "Emotional Stability", "Dominance", "Liveliness", "Rule-Consciousness", "Social Boldness", "Sensitivity", "Vigilance", "Abstractness", "Privateness", "Apprehension", "Openness to Change", "Self-Reliance", "Perfectionism", "Tension", "Kmd", "average K")) dfK$varK<-factor(dfK$varK, levels=c("Liveliness", "Vigilance", "Perfectionism", "Abstractness", "Social Boldness", "Self-Reliance", "Rule-Consciousness", "Privateness", "Tension", "Dominance", "Emotional Stability", "Openness to Change", "Warmth", "Apprehension", "Sensitivity", "average K", "Kmd")) dfD<-data.frame(d=c(dfMV$d.score, mean(abs(dfMV$d.score), na.rm=TRUE), 1.49), varD=c("Warmth", "Emotional Stability", "Dominance", "Liveliness", "Rule-Consciousness", "Social Boldness", "Sensitivity", "Vigilance", "Abstractness", "Privateness", "Apprehension", "Openness to Change", "Self-Reliance", "Perfectionism", "Tension", "average d", "Mahalanobis D")) dfD$varD<-factor(dfD$varD, levels=c("Liveliness", "Vigilance", "Perfectionism", "Abstractness", "Social Boldness", "Self-Reliance", "Rule-Consciousness", "Privateness", "Tension", "Dominance", "Emotional Stability", "Openness to Change", "Warmth", "Apprehension", "Sensitivity", "average d", "Mahalanobis D")) #prepare the plots titSize<-17 subtitSize<-titSize*0.8 labSize<-subtitSize labPosX<-0.03 pd<-ggplot(data= dfD, aes(x=varD, y=abs(d)))+geom_bar(stat="identity", fill=c(rep("black",15), "orange", "red"))+ theme(axis.text.x = element_text(angle = 45, hjust = 1, size=12))+labs(x="", y="abs(d)")+ggtitle(expression("Cohen's d, absolute values"))+theme(plot.title=element_text(face="bold", size=titSize)) pk<-ggplot(data= dfK, aes(x=varK, y=K))+geom_bar(stat="identity", fill=c(rep("black",15), "orange", "blue"))+ theme(axis.text.x = element_text(angle = 45, hjust = 1, size=12))+labs(x="", y="")+ggtitle(expression('K'*'('*'Y'*';'*'gender,'*tau*')'))+theme(plot.title=element_text(face="bold", size=titSize))+annotate("text", x=17, y=0.027, label=round(dfK$KY[16],2), colour=c("blue")) titleGrob<-textGrob("gender differences in personality factors", gp=gpar(fontsize=titSize*1.3)) png(filename="sexDiffEx.png", width=480, height=1.2*480) grid.arrange(titleGrob,textGrob("a", x=labPosX, gp=gpar(fontsize=labSize)), pd, textGrob("b", x=labPosX, gp=gpar(fontsize=labSize)), pk, nrow=5, heights=c(0.8,0.2,5,0.2,5)) dev.off() ##############################--------Figure 7: Cumulative K versus meta-analysis rm(list=ls()) library(ggplot2) library(gridExtra) library(grid) library(cowplot) Kcum<-function(df, exdum, exans, tau, grouping, nDig, method) { require(entropy)#load package with key functions require(plyr)#package to calculate values by group Kfun<-function(hy,hyx,hx,t){ (hy-hyx)/(hy+hx+t) } #center the explanandum, and scale significant digits up to have delta=1 df$y<-round(scale(df[,exdum], scale=FALSE)*10^nDig,0) maxVal<-max(abs(df$y))#create a range that captures all values of y range<-c(-maxVal, maxVal) bins<-2*maxVal+1#the number of bins is then the number of measurement units in the total range KvalTable<-data.frame(group=character(0), Hy=numeric(0), mHyx=numeric(0), mdy=numeric(0), mdyx=numeric(0), K=numeric(0), Kc=numeric(0)) resTable<-data.frame(group=character(0), Ngroup=numeric(0), treatment=character(0), Ntreatment=numeric(0), Hy=numeric(0), dy=numeric(0), WG=numeric(0), Hyx=numeric(0), dyx=numeric(0), WGx=numeric(0))#produce table of goup data #calculate counts and entropy for total yCount<- discretize(df$y, numBins=bins, r=range)#count frequencies average for group-level y yEntTot<- entropy(yCount, method=method, unit="log2")#entropy of average for group-level y totN<-length(df$y)#total sample size #calculate counts and entropy for total by explanans df$scaleVar<-paste(df[,exans], df[,grouping]) ySc<-ddply(.data=df, .variables= "scaleVar", summarize, ySc=scale(y, scale=FALSE)) df$ySc<-ySc$ySc yxCount<- discretize(df$ySc, numBins=bins, r=range) yxEntTot<-entropy(yxCount, method=method, unit="log2") #calculate counts and entropy for total by study, save results in a data frame groupNames<-levels(as.factor(df[,grouping])) for(g in groupNames){ dfg<-df[df[,grouping]==g,] exansNames<-levels(as.factor(dfg[,exans])) for(j in exansNames){ dfgj<-dfg[dfg[,exans]==j,] yCountG<- discretize(dfg$y,numBins=bins, r=range)#count frequencies for group-level y yEntTotG<-entropy(yCountG, method=method, unit="log2")#entropy of group-level y yDistTotG<-KL.empirical(y1=yCountG, y2=yCount, unit="log2")#distance of group-level y from averge of group-level y NG<-length(dfg$y)#group sample size WG<-NG/totN #relative group weight yxCountG<- discretize(scale(dfgj$y, scale=FALSE),numBins=bins, r=range)#count frequencies for treatment-level within group yxEntTotG<-entropy(yxCountG, method=method, unit="log2")#entropy of treatment-level within group yxDistTotG<-KL.empirical(y1=yxCountG, y2=yxCount, unit="log2")#distance for each treatment-level within group from total across groups NGJ<-length(dfgj$y)#treatment-level sample size Wx<-NGJ/totN#weight of treatment-level in total sample resTable<-rbind(resTable, data.frame(group=g, Ngroup= NG, treatment=j, Ntreatment= NGJ, Hy=yEntTotG, dy=yDistTotG, WG=WG, Hyx=yxEntTotG, dyx=yxDistTotG, tau=tau, WGx=Wx)) } avHyxG<-with(resTable[resTable$group==g,], sum(Hyx*WG)) avdyxG<-with(resTable[resTable$group==g,], sum(dyx*WG)) Kg<-Kfun(yEntTotG, avHyxG,1,tau) KcG<-Kfun(yEntTotG, avHyxG+avdyxG,1,tau) KvalTable<-rbind(KvalTable, data.frame(group=g, Hy=yEntTotG, mHyx= avHyxG, mdy= yDistTotG , mdyx= avdyxG, K=Kg, Kc=KcG)) } #calculate cumulative K dfT<-data.frame(resTable) avHy<-with(dfT[!duplicated(dfT$group),], sum(Hy*WG)) avdy<-with(dfT[!duplicated(dfT$group),], sum(dy*WG)) avHyx<-with(dfT, sum(Hyx*WGx)) avdyx<-with(dfT, sum(dyx*WGx)) Km<-Kfun(avHy, avHyx,1, tau) Kc<-Kfun(avHy+avdy, avHyx+avdyx,1, tau) KvalTable<-rbind(KvalTable, data.frame(group="Kc", Hy= avHy, mHyx= avHyx, mdy= avdy , mdyx= avdyx, K=Km, Kc=Kc)) return(list(Analysis=resTable, Kvalues=KvalTable)) } #Figure A, with same variance, SAME population values, same effect size #Figure B, with same variance, different population values, same effect size #Figure C, with different variance, different population values, same effect size #Figure D, with different variance, different population values, null effect size sd1List<-c(1,1,1,1,1) sd2List<-c(1,2,1,1,1) meanX1List<-c(1,1,1,1,1) meanX2List<-c(3,3,3,3,1) meanX3List<-c(1,1,1,4,4) meanX4List<-c(3,3,3,6,4) cumTauList<-c(3,3,4.75,3,3) #commands to save plots and results plotList<-list() sink(file="CumKres.txt", append=TRUE) for(i in 1:length(sd1List)) { N<-1000 sd1<-sd1List[i] sd2<-sd2List[i] meanX1<-meanX1List[i] meanX2<-meanX2List[i] meanX3<-meanX3List[i] meanX4<-meanX4List[i] cumTau<-cumTauList[i] set.seed(666) x1<-rnorm(N, mean=meanX1, sd=sd1) x2<-rnorm(N, mean=meanX2, sd=sd1) x3<-rnorm(N, mean=meanX3, sd=sd2) x4<-rnorm(N, mean=meanX4, sd=sd2) s1resc<-scale(c(x1,x2))+mean(c(x1,x2,x3,x4)) s2resc<-scale(c(x3,x4))+mean(c(x1,x2,x3,x4)) meangroups1<-mean(c(s1resc[1:N],s2resc[1:N])) meangroups2<-mean(c(s1resc[(N+1):(2*N)], s2resc[(N+1):(2*N)])) ES1<-(mean(x2)-mean(x1))/(sd(x1)+sd(x2))*0.5 SE1<-sd(x1)/sqrt(N) ES2<-(mean(x4)-mean(x3))/(sd(x3)+sd(x4))*0.5 SE2<-sd(x3)/sqrt(N) Wtot<-(1/SE1^2)+(1/SE2^2) W1<-(1/SE1^2)/Wtot W2<-(1/SE2^2)/Wtot ESpool<-W1*ES1+W2*ES2 SEpool<-sqrt(1/((N*sd(x1)^(-2))+(N*sd(x3)^(-2)))) #generate data sets for plots dfMet<-data.frame(res=c(x1,x2,x3,x4), expl=c(rep("x1", N), rep("x2", N), rep("x3", N), rep("x4", N)), treat=c(rep("cont", N), rep("treat", N), rep("cont", N), rep("treat", N)), study=c(rep("s1", 2*N), rep("s2", 2*N)), MEAN=c(rep(mean(x1), N), rep(mean(x2), N), rep(mean(x3), N), rep(mean(x4), N)), SDV=c(rep(sd(x1), N), rep(sd(x2), N), rep(sd(x3), N), rep(sd(x4), N)), groups=c(rep("g1", N), rep("g2", N), rep("g1", N), rep("g2", N)), MEANgroups=c(rep(mean(c(x1,x3)), N), rep(mean(c(x2,x4)), N), rep(mean(c(x1,x3)), N), rep(mean(c(x2,x4)), N)), SDVgroups=c(rep(sd(c(x1,x3)), N), rep(sd(c(x2,x4)), N), rep(sd(c(x1,x3)), N), rep(sd(c(x2,x4)), N)), avres=c(s1resc, s2resc), MEANavres=c(rep(meangroups1, N), rep(meangroups2, N), rep(meangroups1, N), rep(meangroups2, N)), study=c(rep("study1", 2*N), rep("study2", 2*N))) dfMet$resScExpl<-dfMet$res-dfMet$MEAN dfMetaAn<-data.frame(study=c("st.1", "st.2", "pooled"), ES=c(ES1,ES2, ESpool), SE=c(SE1, SE2, SEpool)) dfMetaAn$study<-factor(dfMetaAn$study, levels=c("pooled", "st.2", "st.1")) #calculate values of entropy and K Kres<-Kcum(df=dfMet, exdum ="res", exans ="treat", tau= cumTau, grouping ="study", nDig=1, method="ML" ) sigD<-3#significant digits to show on plots txtSize<-13#plot titles size p1<-ggplot(data=subset(dfMet, is.element(expl, c("x1", "x2"))), aes(x=expl, y=res))+geom_point( show.legend=FALSE, col="blue")+geom_errorbar(aes(ymin= MEAN, ymax= MEAN), show.legend=FALSE, col="blue")+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank(), plot.title=element_text(size= txtSize))+scale_y_continuous(limits=c(-3,8))+ggtitle(substitute(list(K==K1), list(K1=round(Kres$Kvalues$K[1], sigD)))) pmarX1<-ggplot(subset(dfMet, is.element(expl, c("x1", "x2"))), aes(x=res, group=expl))+geom_density(show.legend=FALSE, col="blue")+coord_flip()+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank(), plot.title=element_text(size= txtSize))+scale_x_continuous(limits=c(-3,8))+ggtitle(substitute(H(Y[1]~"|"~X)==Y1X, list(Y1X=round(Kres$Kvalues$mHyx[1],sigD)))) pmarY1<-ggplot(subset(dfMet, is.element(expl, c("x1", "x2"))), aes(x=res), col="blue")+geom_density(show.legend=FALSE, col="blue")+coord_flip()+scale_y_reverse()+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank(), plot.title=element_text(size= txtSize))+scale_x_continuous(limits=c(-3,8))+ggtitle(substitute(H(Y[1])==Y1, list(Y1=round(Kres$Kvalues$Hy[1], sigD)))) p2<-ggplot(subset(dfMet, is.element(expl, c("x3", "x4"))), aes(x=expl, y=res))+geom_point( show.legend=FALSE, col="goldenrod")+geom_errorbar(aes(ymin= MEAN, ymax= MEAN), show.legend=FALSE, col="goldenrod")+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank(), plot.title=element_text(size= txtSize))+scale_y_continuous(limits=c(-3,8))+ggtitle(substitute(list(K==K2), list(K2=round(Kres$Kvalues$K[2], sigD)))) pmarX2<-ggplot(subset(dfMet, is.element(expl, c("x3", "x4"))), aes(x=res, group=expl))+geom_density(show.legend=FALSE, col="goldenrod")+coord_flip()+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank(), plot.title=element_text(size= txtSize))+scale_x_continuous(limits=c(-3,8))+ggtitle(substitute(H(Y[2]~"|"~X)==Y2X, list(Y2X=round(Kres$Kvalues$mHyx[2],sigD)))) pmarY2<-ggplot(subset(dfMet, is.element(expl, c("x3", "x4"))), aes(x=res), col="goldenrod")+geom_density(show.legend=FALSE, col="goldenrod")+coord_flip()+scale_y_reverse()+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank(), plot.title=element_text(size= txtSize))+scale_x_continuous(limits=c(-3,8))+ggtitle(substitute(H(Y[2])==Y1, list(Y1=round(Kres$Kvalues$Hy[2], sigD)))) #tags for meta-analysis plot ESpool<-round(dfMetaAn$ES[dfMetaAn$study=="pooled"], sigD) SEpool<-round(dfMetaAn$SE[dfMetaAn$study=="pooled"], sigD) p3<-ggplot(dfMetaAn, aes(x=study, y=ES))+geom_point(show.legend=FALSE, col="black", shape=c(1,1,18), size=c(5,5,5))+geom_errorbar(aes(ymin=ES-1.96*SE, ymax=ES+1.96*SE), show.legend=FALSE, col="black")+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), plot.title=element_text(size= txtSize))+scale_y_continuous(limits=c(-0.2,0.6))+coord_flip()+ggtitle(substitute(ES==ESPOOL %+-% SEPOOL, list(ESPOOL=ESpool, SEPOOL=SEpool))) p4<-ggplot(dfMet, aes(x=groups, y=scale(res, scale=FALSE)))+geom_point(show.legend=FALSE, aes(col=expl, alpha=0.1))+scale_colour_manual(values=c("blue", "blue", "goldenrod", "goldenrod"))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank(), plot.title=element_text(size= txtSize))+scale_y_continuous(limits=c(-6,6))+ggtitle(substitute(list(bar(tau)*'+'*bar(d)==TAU, K[cum]==K3C), list(TAU=cumTau, K3C=round(Kres$Kvalues$Kc[3], sigD)))) pmarX4<-ggplot()+geom_density(inherit.aes=FALSE, data=dfMet, aes(x=resScExpl, col=expl), show.legend=FALSE, lty=2)+scale_colour_manual(values=c("blue", "blue", "goldenrod", "goldenrod"))+coord_flip()+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank(), plot.title=element_text(size= txtSize))+scale_x_continuous(limits=c(-6,6))+geom_density(inherit.aes=FALSE, data=dfMet, aes(x=scale(resScExpl, scale=FALSE)), show.legend=FALSE, col="black")+ggtitle(substitute(bar(H(Y~"|"~X)) + bar(d)==YXD, list(YXD=round(Kres$Kvalues$mHyx[3]+Kres$Kvalues$mdyx[3],sigD)))) pmarY4<-ggplot()+geom_density(inherit.aes=FALSE, data=dfMet, aes(x=scale(res, scale=FALSE), col=study), show.legend=FALSE, lty=2)+scale_colour_manual(values=c("blue", "goldenrod"))+coord_flip()+scale_y_reverse()+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank(), plot.title=element_text(size= txtSize))+scale_x_continuous(limits=c(-6,6))+geom_density(inherit.aes=FALSE, data=dfMet, aes(x=scale(res, scale=FALSE)), show.legend=FALSE, col="black")+ggtitle(substitute(bar(H(Y))+bar(d)==YD, list(YD=round(Kres$Kvalues$Hy[3]+Kres$Kvalues$mdy[3],sigD)))) pEmpty <- ggplot(mtcars, aes(x = wt, y = mpg)) +geom_blank() + theme(axis.text = element_blank(), axis.title = element_blank(), line = element_blank(), panel.background = element_blank()) g<-arrangeGrob(layout_matrix=rbind(c(1,2,3, 4,5,6, 7, 8,9,10)), pmarY1, p1, pmarX1, pmarY2, p2, pmarX2, p3, pmarY4, p4, pmarX4, nrow=1) plotList<-c(plotList, list(g)) print(Kres) } titSize<-35 labelSize<-20 CombPlot<-ggdraw()+draw_plot(plotList[[1]], x=0, y=0.8, height=0.17)+draw_plot(plotList[[2]], x=0, y=0.6, height= 0.17)+draw_plot(plotList[[3]], x=0, y=0.4, height=0.17)+draw_plot(plotList[[4]], x=0, y=0.2, height=0.17)+draw_plot(plotList[[5]], x=0, y=0.0, height=0.17)+draw_plot_label(c("a) same effect size, same variance, same methods, same range", "b) same effect size, different variance, same methods, same range", "c) same effect size, same variance, different methods, same range", "d) same effect size, same variance, same methods, different range", "e) null effect size, same variance, same methods, different range"), x=c(0,0,0,0,0), y=c(0.995,0.795, 0.595,0.395,0.195), hjust=-0.7, size=labelSize) png(filename="cumulativeKexMulti.png", width=3*480, height=2*480) grid.arrange(textGrob(expression("comparison of meta-analysis and "*K[cum]), gp=gpar(fontsize=titSize)), CombPlot, nrow=2, heights=c(0.7,10)) dev.off() sink() sink() ##############################--------Figure 8: Simulation of reproducibility results rm(list=ls()) library(ggplot2) library(grid) library(gridExtra) #fundamental equation Kr<-K*A^(-l*d) A<-2 sdy<-100 sdyx<-50 x<-1 tau<-50 K<-(log(sdy)-log(sdyx))/(log(sdy)+x+tau) R2<-(sdy^2-sdyx^2)/sdy^2 r<-sqrt(R2) Cd<-sqrt((4*r^2)/(1-r^2)) #this is the decline rate, first digit in the variables names l1<-0.01 l2<-0.5 l3<-2 l4<-10 dist<-seq(0,30, 1) lambda.a=1 avLambda.a=1 set.seed(666) dp.a<-rpois(100000,lambda.a) d.a<-dp.a/avLambda.a Kr1.a<-K*A^(-l1*d.a) Kr2.a<-K*A^(-l2*d.a) Kr3.a<-K*A^(-l3*d.a) Kr4.a<-K*A^(-l4*d.a) R21.a<-(sdy^2-sdyx^2)/(sdy^2*(A^(l1*d.a))) R22.a<-(sdy^2-sdyx^2)/(sdy^2*(A^(l2*d.a))) R23.a<-(sdy^2-sdyx^2)/(sdy^2*(A^(l3*d.a))) R24.a<-(sdy^2-sdyx^2)/(sdy^2*(A^(l4*d.a))) r1.a<-sqrt(R21.a) r2.a<-sqrt(R22.a) r3.a<-sqrt(R23.a) r4.a<-sqrt(R24.a) Cd1.a<-sqrt((4*R21.a)/(1-R21.a)) Cd2.a<-sqrt((4*R22.a)/(1-R22.a)) Cd3.a<-sqrt((4*R23.a)/(1-R23.a)) Cd4.a<-sqrt((4*R24.a)/(1-R24.a)) lambda.b=4 avLambda.b=4 dp.b<-rpois(100000,lambda.b) d.b<-dp.b/avLambda.b Kr1.b<-K*A^(-l1*d.b) Kr2.b<-K*A^(-l2*d.b) Kr3.b<-K*A^(-l3*d.b) Kr4.b<-K*A^(-l4*d.b) R21.b<-(sdy^2-sdyx^2)/(sdy^2*(A^(l1*d.b))) R22.b<-(sdy^2-sdyx^2)/(sdy^2*(A^(l2*d.b))) R23.b<-(sdy^2-sdyx^2)/(sdy^2*(A^(l3*d.b))) R24.b<-(sdy^2-sdyx^2)/(sdy^2*(A^(l4*d.b))) r1.b<-sqrt(R21.b) r2.b<-sqrt(R22.b) r3.b<-sqrt(R23.b) r4.b<-sqrt(R24.b) Cd1.b<-sqrt((4*R21.b)/(1-R21.b)) Cd2.b<-sqrt((4*R22.b)/(1-R22.b)) Cd3.b<-sqrt((4*R23.b)/(1-R23.b)) Cd4.b<-sqrt((4*R24.b)/(1-R24.b)) lambda.c=20 avLambda.c=20 dp.c<-rpois(100000,lambda.c) d.c<-dp.c/avLambda.c Kr1.c<-K*A^(-l1*d.c) Kr2.c<-K*A^(-l2*d.c) Kr3.c<-K*A^(-l3*d.c) Kr4.c<-K*A^(-l4*d.c) R21.c<-(sdy^2-sdyx^2)/(sdy^2*(A^(l1*d.c))) R22.c<-(sdy^2-sdyx^2)/(sdy^2*(A^(l2*d.c))) R23.c<-(sdy^2-sdyx^2)/(sdy^2*(A^(l3*d.c))) R24.c<-(sdy^2-sdyx^2)/(sdy^2*(A^(l4*d.c))) r1.c<-sqrt(R21.c) r2.c<-sqrt(R22.c) r3.c<-sqrt(R23.c) r4.c<-sqrt(R24.c) #-------------plot with r titSize<-32 subtitSize<-titSize*0.6 labSize<-subtitSize legendSize<-titSize*0.55 axisTitSize<-subtitSize*0.8 labPosX<-0.03 #set up data set for lines dfKlines<-data.frame(distance=rep(dist,4), lambda=as.factor(c(rep(as.character(l1), length(dist)), rep(as.character(l2), length(dist)), rep(as.character(l3), length(dist)), rep(as.character(l4), length(dist)))), Kvalue=c(K*A^(-l1*dist), K*A^(-l2*dist), K*A^(-l3*dist), K*A^(-l4*dist))) dfKlines$lambda<-factor(dfKlines$lambda, levels=c(as.character(l1), as.character(l2), as.character(l3), as.character(l4))) pKlines<-ggplot(data=dfKlines, aes(x=distance, y=Kvalue, col=lambda))+scale_colour_manual(values=c("royalblue", "green", "orange", "red"))+geom_line()+labs(y="K")+theme(axis.title=element_text(size= axisTitSize), plot.title=element_text(size= subtitSize, hjust=0.5, face="plain"), axis.text.x=element_text(size=labSize), legend.text=element_text(size= legendSize))+ guides(col=guide_legend(title=expression(lambda))) pDist.a<-ggplot()+geom_line(aes(x=dist, y=dpois(dist,lambda.a)/avLambda.a))+labs(x="distance", y="", title="distribution of distances")+theme(axis.title=element_text(size= axisTitSize), plot.title=element_text(size= subtitSize, hjust=0.5, face="plain"), axis.text.x=element_text(size=labSize)) pDist.b<-ggplot()+geom_line(aes(x=dist, y=dpois(dist,lambda.b)/avLambda.b))+labs(x="distance", y="", title="distribution of distances")+theme(axis.title=element_text(size= axisTitSize), plot.title=element_text(size= subtitSize, hjust=0.5, face="plain"), axis.text.x=element_text(size=labSize)) pDist.c<-ggplot()+geom_line(aes(x=dist, y=dpois(dist,lambda.c)/avLambda.c))+labs(x="distance", y="", title="distribution of distances")+theme(axis.title=element_text(size= axisTitSize), plot.title=element_text(size= subtitSize, hjust=0.5, face="plain"), axis.text.x=element_text(size=labSize)) #set up data set for histograms dfr<-data.frame(r1.a, r1.b, r1.c, r2.a, r2.b, r2.c, r3.a, r3.b, r3.c, r4.a, r4.b, r4.c) pr1.a<-ggplot(data=dfr, aes(x=r1.a))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="royalblue", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r1.a), col="black", lwd=1.5) pr1.b<-ggplot(data=dfr, aes(x=r1.b))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="royalblue", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r1.b), col="black", lwd=1.5) pr1.c<-ggplot(data=dfr, aes(x=r1.c))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="royalblue", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r1.c), col="black", lwd=1.5) pr2.a<-ggplot(data=dfr, aes(x=r2.a))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="green", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r2.a), col="black", lwd=1.5) pr2.b<-ggplot(data=dfr, aes(x=r2.b))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="green", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r2.b), col="black", lwd=1.5) pr2.c<-ggplot(data=dfr, aes(x=r2.c))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="green", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r2.c), col="black", lwd=1.5) pr3.a<-ggplot(data=dfr, aes(x=r3.a))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="orange", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r3.a), col="black", lwd=1.5) pr3.b<-ggplot(data=dfr, aes(x=r3.b))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="orange", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r3.b), col="black", lwd=1.5) pr3.c<-ggplot(data=dfr, aes(x=r3.c))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="orange", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r3.c), col="black", lwd=1.5) pr4.a<-ggplot(data=dfr, aes(x=r4.a))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="red", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r4.a), col="black", lwd=1.5) pr4.b<-ggplot(data=dfr, aes(x=r4.b))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="red", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r4.b), col="black", lwd=1.5) pr4.c<-ggplot(data=dfr, aes(x=r4.c))+theme(axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.text.x =element_text(size=labSize))+geom_histogram(fill="red", bins=20)+geom_vline(xintercept=r, col="black", lty=2, lwd=1.5)+geom_vline(xintercept=mean(r4.c), col="black", lwd=1.5) titleGrob<-textGrob("reproducibility by informational distance", gp=gpar(fontsize=titSize)) layMat<-rbind(c(1,1,1), c(2,2,2), c(3,4,5), c(6,7,8), c(9,10,11), c(12,13,14), c(15,16,17)) png(filename="ReprEx-r.png", width=2*480, height=2*480) grid.arrange(layout_matrix=layMat, grobs=list(titleGrob, pKlines, pDist.a, pDist.b, pDist.c, pr1.a, pr1.b, pr1.c, pr2.a, pr2.b, pr2.c, pr3.a, pr3.b, pr3.c, pr4.a, pr4.b, pr4.c), nrow=7, heights=c(1.5,3,3,3,3,3,3)) dev.off() ##############################--------Figure 9: Density plots of data from the Reproducibility Initiative in Psychology rm(list=ls()) library(ggplot2) library(grid) library(gridExtra) library(e1071) df<-read.table("RIPdf.txt", sep="\t", header=TRUE, dec=",", stringsAsFactors=TRUE) # Calculate difference between effect sizes, re-scaled on original effect size df$rDiff<-with(df, (T.r.R-T.r.O)) #df$rDiff<-with(df, T.r.R/T.r.O) # we re-scale extreme values (due to original r=0) and count as zero any negative value #df<-subset(df, round(T.r.O,2)>0) #---attempt at histogram with ggplot #generate a dataset with the two correlation values dft<-data.frame(Orig=c(rep("Original", length(df$T.r.O)), rep("Replication", length(df$T.r.R))), r=c(df$T.r.O, df$T.r.R), expertise=as.factor(c(as.character(df$Methodology.expertise.required.O), as.character(df$Methodology.expertise.required.O)))) dft12<-subset(dft, is.element(expertise, c("No expertise required","Slight expertise required"))) dft345<-subset(dft, !is.element(expertise, c("No expertise required","Slight expertise required"))) #The numbers of each category are: #No expertise required =32 #Slight expertise required =33 #Moderate expertise required =13 #Strong expertise required =12 #Extreme expertise required=7 #test for Kurtosis kurt12o<-with(subset(dft12, Orig=="Original"), round(kurtosis(r),2)) kurt345o<-with(subset(dft345, Orig=="Original"), round(kurtosis(r),2)) kurt12r<-with(subset(dft12, Orig=="Replication"), round(kurtosis(r),2)) kurt345r<-with(subset(dft345, Orig=="Replication"), round(kurtosis(r),2)) titSize<-28 subtitSize<-titSize*0.8 labSize<<-subtitSize axisTitSize<-subtitSize*0.9 labPosX<-0.03 p12<-ggplot(dft12, aes(x=r))+geom_histogram(data=subset(dft12, Orig=="Original"), fill="blue", alpha=0.2, binwidth=0.05)+geom_density(data=subset(dft12, Orig=="Original"), aes(y=..density..*0.1*65), col="blue") p12<-p12+geom_histogram(data=subset(dft12, Orig=="Replication"), fill="red", alpha=0.2, binwidth=0.05)+geom_density(data=subset(dft12, Orig=="Replication"), col="red", aes(y=..density..*0.1*65)) p12<-p12+scale_x_continuous(limits=c(-0.5,1))+annotate("text", label=kurt12o, col="blue", size=12, x=0.75, y=10)+annotate("text", label=kurt12r, col="red", size=12, x=-0.25, y=10) p12<-p12+labs(title="no/slight expertise required", y="")+theme(axis.title=element_text(size= axisTitSize), plot.title=element_text(size= subtitSize, hjust=0.5, face="plain")) p345<-ggplot(dft345, aes(x=r))+geom_histogram(data=subset(dft345, Orig=="Original"), fill="blue", alpha=0.2, binwidth=0.05)+geom_density(data=subset(dft345, Orig=="Original"), aes(y=..density..*0.1*32), col="blue") p345<-p345+geom_histogram(data=subset(dft345, Orig=="Replication"), fill="red", alpha=0.2, binwidth=0.05)+geom_density(data=subset(dft345, Orig=="Replication"), col="red", aes(y=..density..*0.1*32)) p345<-p345+scale_x_continuous(limits=c(-0.5,1))+annotate("text", label=kurt345o, col="blue", size=12, x=0.75, y=6)+annotate("text", label=kurt345r, col="red", size=12, x=-0.25, y=6) p345<-p345+labs(title="moderate/strong/extreme, exp. req.", y="")+theme(axis.title=element_text(size= axisTitSize), plot.title=element_text(size= subtitSize, hjust=0.5, face="plain")) png(filename="RipEx-rGrouped.png", width=1.5*480, height=1.5*480) grid.arrange(textGrob("reproducibility in psychology, by expertise required", gp=gpar(fontsize=titSize)),textGrob("a", x=labPosX, gp=gpar(fontsize=labSize)), p12,textGrob("b", x=labPosX, gp=gpar(fontsize=labSize)), p345, ncol=1, nrow=5, heights=c(0.75,0.1,4.5,0.1,4.5)) dev.off() ##############################----Figure 10: how K_{null} declines with |T| rm(list=ls()) library(ggplot2) library(grid) titSize<-30 axisTitSize<-titSize*0.8 labSize<<-axisTitSize*0.8 T<-2:100 y<-log2(T/(T-1)) p<-ggplot(mapping=aes(x=T, y=y))+geom_line()+scale_x_discrete(lim=c(2,10,50,99)) p<-p+theme(axis.text.y=element_text(size=labSize), axis.text.x=element_text(size=labSize), axis.title=element_text(size=axisTitSize), plot.title = element_text(size=titSize, hjust=0.3, face="plain"))+labs(x="|T|", y=expression(paste("log", frac("|T|", "|T|-1")))) png(filename="logTplot.png", width=480*1.25, height=480*0.75) grid.arrange(textGrob("information value of hypothesis falsification", gp=gpar(fontsize=titSize)), p, nrow=2,heights=c(1,10)) dev.off() ##############################--------Figure 10: simulation of informativeness of null findings rm(list=ls()) library(lattice) library(entropy) Kfun<-function(hy,hyx,hx,t){ (hy-hyx)/(hy+hx+t) } set.seed(666) x<-rnorm(1000) y1<-x+rnorm(1000,0,0.1) y2<-rnorm(1000) y3<-rnorm(1000) y4<-rnorm(1000) #calculating entropies Hx<-round(log2(100),0) Hy.1<-round(log2(sqrt(100^2+10^2)),0) Hy.i<-round(log2(100),0) Hytot<-round(log2(0.25*sqrt(100^2+10^2)+0.75*sqrt(100^2)),0) Hxy.1<-Hx+round(log2(10),0)#joint distribution xy1 Hxy.i<-Hx+Hy.i#joint distribution xy2,3,4 HT<-log2(4) Hyx.T<-0.25*(Hxy.1+3*Hxy.i) HT.yx<-HT-(Hytot-(Hyx.T-Hx)) #calculate entropies using counts range<-c(-10,10) binNum<-20 xcount<-discretize(x,numBins=binNum, r=range) y1count<-discretize(y1,numBins=binNum, r=range) y2count<-discretize(y2,numBins=binNum, r=range) y3count<-discretize(y3,numBins=binNum, r=range) y4count<-discretize(y4,numBins=binNum, r=range) xy1count<-discretize2d(x1=y1, x2=x, numBins1=binNum, r1=range, numBins2=binNum, r2=range) xy2count<-discretize2d(x1=y2, x2=x, numBins1=binNum, r1=range, numBins2=binNum, r2=range) xy3count<-discretize2d(x1=y3, x2=x, numBins1=binNum, r1=range, numBins2=binNum, r2=range) xy4count<-discretize2d(x1=y4, x2=x, numBins1=binNum, r1=range, numBins2=binNum, r2=range) #calculating entropies Hx<-entropy(xcount, method="ML") Hy.1<-entropy(y1count, method="ML") Hy.2<-entropy(y2count, method="ML") Hy.3<-entropy(y3count, method="ML") Hy.4<-entropy(y4count, method="ML") Hytot<-entropy(discretize(c(y1, y2, y3, y4),numBins=binNum, r=range), method="ML") Hytot2<-entropy(discretize(c(y1, y2),numBins=binNum, r=range), method="ML") Hxy.1<-entropy(xy1count, method="ML") Hxy.2<-entropy(xy2count, method="ML") Hxy.3<-entropy(xy3count, method="ML") Hxy.4<-entropy(xy4count, method="ML") #-----plot png(filename="KTyxEx.png", width=480*2, height=480*3, units="px") cexTxt<-3 cexTit<-7 cexSubTit<-cexTit*0.7 minx<- 5 miny<- -2.2 sep<-0.65 ylim<-c(-5,4) xlim<-c(-4,5) par(mfrow=c(3,2), mar=c(0.5,0.5,0.5,0.5)) m<-rbind(c(9,9),c(7,7),c(1,2),c(1,2),c(1,2),c(1,2),c(1,2),c(8,8),c(3,4),c(3,4),c(3,4),c(3,4),c(3,4),c(5,6),c(5,6),c(5,6),c(5,6),c(5,6)) layout(mat=m) par(mar=c(0.5,0.5,0.5,0.5)) #panel A, with only two HT<-log2(2) Hyx.T<-0.5*(Hxy.1+Hxy.2) HT.yx<-HT-(Hytot2-(Hyx.T-Hx)) KT.yx<-Kfun(hy=HT, hyx=HT.yx, hx=Hx+Hytot, t=0) plot(x,y2, pch=20, xlim= xlim, ylim= ylim, tick=FALSE, xaxt="n", yaxt="n", xlab="", ylab="") laby<-substitute(H*'('*Y*')'==VAL, list(VAL=round(Hy.2,2))) text(laby, x=minx, y=miny-3*sep, cex= cexTxt, col="black", adj = c(1,0)) labyx<-substitute(H*'('*Y*'|'*X*','*h[0]*','*tau[b]*')'==VAL, list(VAL=round(Hxy.2-Hx,2))) text(labyx, x=minx, y=miny-4*sep, cex= cexTxt, col="black", adj = c(1,0)) labht<-expression(h[0]) text(labht, x=-3.5, y=3, cex= cexTxt*2, col="black", adj = c(0,0)) plot(x,y1, pch=20, xlim= xlim, ylim= ylim, tick=FALSE, xaxt="n", yaxt="n", xlab="", ylab="") laby<-substitute(H*'('*Y*')'==VAL, list(VAL=round(Hy.1,2))) text(laby, x=minx, y=miny, cex= cexTxt, col="black", adj = c(1,0)) labyx<-substitute(H*'('*Y*'|'*X*','*h[1]*','*tau[b]*')'==VAL, list(VAL=round(Hxy.1-Hx,2))) text(labyx, x=minx, y=miny-sep, cex= cexTxt, col="black", adj = c(1,0)) labT<-substitute(H*'('*T*')'==VAL, list(VAL=round(HT,2))) text(labT, x=minx, y=miny-2*sep, cex= cexTxt, col="black", adj = c(1,0)) labTyx<-substitute(H*'('*T[h*','*tau]*'|'*X*','*Y*')'==VAL, list(VAL=round(HT.yx,2))) text(labTyx, x=minx, y=miny-3*sep, cex= cexTxt, col="black", adj = c(1,0)) labKT<-substitute(K*'('*T[h*','*tau]*';'*X*','*Y*')'==VAL, list(VAL=round(KT.yx,3))) text(labKT, x=minx, y=miny-4.2*sep, cex= cexTxt*1.2, col="red", adj = c(1,0)) labht<-expression(h[1]) text(labht, x=-3.5, y=3, cex= cexTxt*2, col="black", adj = c(0,0)) #panel B, with all four Ts #par(mfrow=c(2,2), mar=c(1,1,1,1)) HT<-log2(4) Hyx.T<-0.25*(Hxy.1+Hxy.2+Hxy.3+Hxy.4) HT.yx<-HT-(Hytot-(Hyx.T-Hx)) KT.yx<-Kfun(hy=HT, hyx=HT.yx, hx=Hx+Hytot, t=0) plot(x,y2, pch=20, xlim= xlim, ylim= ylim, tick=FALSE, xaxt="n", yaxt="n", xlab="", ylab="") laby<-substitute(H*'('*Y*')'==VAL, list(VAL=round(Hy.2,2))) text(laby, x=minx, y=miny-3*sep, cex= cexTxt, col="black", adj = c(1,0)) labyx<-substitute(H*'('*Y*'|'*X*','*h[0]*','*tau[a]*')'==VAL, list(VAL=round(Hxy.2-Hx,2))) text(labyx, x=minx, y=miny-4*sep, cex= cexTxt, col="black", adj = c(1,0)) labht<-expression(h[0]*','*tau[a]) text(labht, x=-3.5, y=3, cex= cexTxt*2, col="black", adj = c(0,0)) plot(x,y3, pch=20, xlim= xlim, ylim= ylim, tick=FALSE, xaxt="n", yaxt="n", xlab="", ylab="") laby<-substitute(H*'('*Y*')'==VAL, list(VAL=round(Hy.3,2))) text(laby, x=minx, y=miny-3*sep, cex= cexTxt, col="black", adj = c(1,0)) labyx<-substitute(H*'('*Y*'|'*X*','*h[1]*','*tau[a]*')'==VAL, list(VAL=round(Hxy.3-Hx,2))) text(labyx, x=minx, y=miny-4*sep, cex= cexTxt, col="black", adj = c(1,0)) labht<-expression(h[1]*','*tau[a]) text(labht, x=-3.5, y=3, cex= cexTxt*2, col="black", adj = c(0,0)) plot(x,y4, pch=20, xlim= xlim, ylim= ylim, tick=FALSE, xaxt="n", yaxt="n", xlab="", ylab="") laby<-substitute(H*'('*Y*')'==VAL, list(VAL=round(Hy.3,2))) text(laby, x=minx, y=miny-3*sep, cex= cexTxt, col="black", adj = c(1,0)) labyx<-substitute(H*'('*Y*'|'*X*','*h[0]*','*tau[b]*')'==VAL, list(VAL=round(Hxy.3-Hx,2))) text(labyx, x=minx, y=miny-4*sep, cex= cexTxt, col="black", adj = c(1,0)) labht<-expression(h[0]*','*tau[b]) text(labht, x=-3.5, y=3, cex= cexTxt*2, col="black", adj = c(0,0)) plot(x,y1, pch=20, xlim= xlim, ylim= ylim, tick=FALSE, xaxt="n", yaxt="n", xlab="", ylab="") laby<-substitute(H*'('*Y*')'==VAL, list(VAL=round(Hy.1,2))) text(laby, x=minx, y=miny+sep, cex= cexTxt, col="black", adj = c(1,0)) labyx<-substitute(H*'('*Y*'|'*X*','*h[1]*','*tau[b]*')'==VAL, list(VAL=round(Hxy.1-Hx,2))) text(labyx, x=minx, y=miny, cex= cexTxt, col="black", adj = c(1,0)) labT<-substitute(H*'('*T*')'==VAL, list(VAL=round(HT,2))) text(labT, x=minx, y=miny-sep, cex= cexTxt, col="black", adj = c(1,0)) labTyx<-substitute(H*'('*T[h*','*tau]*'|'*X*','*Y*')'==VAL, list(VAL=round(HT.yx,2))) text(labTyx, x=minx, y=miny-2*sep, cex= cexTxt, col="black", adj = c(1,0)) labKT<-substitute(K*'('*T[h*','*tau]*';'*X*','*Y*')'==VAL, list(VAL=round(KT.yx,3))) text(labKT, x=minx, y=miny-3.2*sep, cex= cexTxt*1.2, col="red", adj = c(1,0)) labKYT<-substitute(H*'('*T*')'*K*'('*T[h*','*tau]*';'*X*','*Y*')'==VAL, list(VAL=round(KT.yx*HT,3))) text(labKYT, x=minx, y=miny-4.2*sep, cex= cexTxt*1.2, col="red", adj = c(1,0)) labht<-expression(h[1]*','*tau[b]) text(labht, x=-3.5, y=3, cex= cexTxt*2, col="black", adj = c(0,0)) plot.new() text(0.5,0.5,"a) conclusive test of single hypothesis", cex=cexSubTit) plot.new() text(0.5,0.5,"b) test of single hypothesis, plus auxiliary condition", cex=cexSubTit) plot.new() text(0.5,0.5,"knowledge value of empirical tests", cex=cexTit) dev.off()