#==================== #====Front Matter==== #==================== # # R analysis script for inferential performance of participants # trained to select the earlier item in pairs of photographic stimuli # that were of adjacent rank in implied ordered lists. This was followed by # testing on new lists consisting of one item from each training list, with # new image ranks that were strictly derived from the order during training # ("Derived") or were reordered to varying degrees at test ("Scrambled"). # # Copyright Greg Jensen, 2017 # # Analysis associated with the manuscript: # # "Absolute and relative knowledge of ordinal position" # by Tina Kao, Greg Jensen, Charlotte Michaelcheck, Vincent Ferrera, and Herbert Terrace #======================== #====Libraries & Data==== #======================== library(rethinking) HDLD <- read.csv("HumanDerivedLists.csv") HSLD <- read.csv("HumanScrambledLists.csv") #============================ #====Stan Logistic Models==== #============================ # # Models implemented multilevel logistic regressions, with population-level # parameters allowed to covary according to a multivariate normal distribution. # Training performance depended only on trial number, and testing performance # depended on trial, symbolic distance, and the trial-distance interaction. human_training_model_string <- " data{ int N; // Number of observations int P; // Number of participants int partic[N]; // Participant ID vector[N] trials; // Trial number int response[N]; // Behavior (0=Incorrect, 1=Correct) } transformed data { vector[P] u; for (p in 1:P) { u[p] = 1; } } parameters{ matrix[2, P] z; cholesky_factor_corr[2] L_Omega; // prior correlation vector[2] tau_unif; row_vector[2] gamma; } transformed parameters { matrix[P,2] beta; // individual coefficients vector[2] tau; // prior scale for ( k in 1:2 ) { tau[k] = 2.5 * tan(tau_unif[k]); // Equivalent to a cauchy(0,2.5) prior } for ( p in 1:P ) { beta = u * gamma + (diag_pre_multiply(tau,L_Omega) * z)'; } } model{ real mu; to_vector(z) ~ normal(0,1); L_Omega ~ lkj_corr_cholesky(2); tau_unif ~ uniform(0,pi()/2); to_vector(gamma) ~ normal(0,5); for ( n in 1:N ) { mu = beta[partic[n],1] + beta[partic[n],2]*trials[n]; response[n] ~ bernoulli_logit(mu); } } " human_derived_model_string <- " data{ int N; // Number of observations int P; // Number of participants int partic[N]; // Participant ID vector[N] trials; // Trial number vector[N] dist; // Symbolic distance (centered on 2.5) vector[N] td; // Trial-by-dist interaction int response[N]; // Behavior (0=Incorrect, 1=Correct) } transformed data { vector[P] u; for (p in 1:P) { u[p] = 1; } } parameters{ matrix[4, P] z; cholesky_factor_corr[4] L_Omega; // prior correlation vector[4] tau_unif; row_vector[4] gamma; } transformed parameters { matrix[P,4] beta; // individual coefficients vector[4] tau; // prior scale for ( k in 1:4 ) { tau[k] = 2.5 * tan(tau_unif[k]); // Equivalent to a cauchy(0,2.5) prior } for ( p in 1:P ) { beta = u * gamma + (diag_pre_multiply(tau,L_Omega) * z)'; } } model{ real mu; to_vector(z) ~ normal(0,1); L_Omega ~ lkj_corr_cholesky(2); tau_unif ~ uniform(0,pi()/2); to_vector(gamma) ~ normal(0,5); for ( n in 1:N ) { mu = beta[partic[n],1] + beta[partic[n],2]*dist[n] + beta[partic[n],3]*trials[n] + beta[partic[n],4]*td[n]; response[n] ~ bernoulli_logit(mu); } } " human_scrambled_model_string <- " data{ int N; // Number of observations int P; // Number of participants int partic[N]; // Participant ID int pert[N]; // List type (0, 4, 8, 12, 16) vector[N] trials; // Trial number vector[N] dist; // Symbolic distance (centered on 2.5) vector[N] td; // Trial-by-dist interaction int response[N]; // Behavior (0=Incorrect, 1=Correct) } transformed data { vector[P] u; for (p in 1:P) { u[p] = 1; } } parameters{ matrix[20, P] z; cholesky_factor_corr[20] L_Omega; // prior correlation vector[20] tau_unif; row_vector[20] gamma; } transformed parameters { matrix[P,20] beta; // individual coefficients vector[20] tau; // prior scale for ( k in 1:20 ) { tau[k] = 2.5 * tan(tau_unif[k]); // Equivalent to a cauchy(0,2.5) prior } for ( p in 1:P ) { beta = u * gamma + (diag_pre_multiply(tau,L_Omega) * z)'; } } model{ real mu; to_vector(z) ~ normal(0,1); L_Omega ~ lkj_corr_cholesky(2); tau_unif ~ uniform(0,pi()/2); to_vector(gamma) ~ normal(0,5); for ( n in 1:N ) { mu = beta[partic[n],pert[n]+1] + beta[partic[n],pert[n]+2]*dist[n] + beta[partic[n],pert[n]+3]*trials[n] + beta[partic[n],pert[n]+4]*td[n]; response[n] ~ bernoulli_logit(mu); } } " human_pair_model_string <- " data{ int N; // Number of observations int P; // Number of participants int partic[N]; // Participant ID vector[N] trials; // Trial number int pairID[N]; // Pair ID int response[N]; // Behavior (0=Incorrect, 1=Correct) } transformed data { vector[P] u; for (p in 1:P) { u[p] = 1; } } parameters{ matrix[20, P] z; cholesky_factor_corr[20] L_Omega; // prior correlation vector[20] tau_unif; row_vector[20] gamma; } transformed parameters { matrix[P,20] beta; // individual coefficients vector[20] tau; // prior scale for ( k in 1:20 ) { tau[k] = 2.5 * tan(tau_unif[k]); // Equivalent to a cauchy(0,2.5) prior } for ( p in 1:P ) { beta = u * gamma + (diag_pre_multiply(tau,L_Omega) * z)'; } } model{ real mu; to_vector(z) ~ normal(0,1); L_Omega ~ lkj_corr_cholesky(2); tau_unif ~ uniform(0,pi()/2); to_vector(gamma) ~ normal(0,5); for ( n in 1:N ) { mu = beta[partic[n],pairID[n]] + beta[partic[n],pairID[n]+10]*trials[n]; response[n] ~ bernoulli_logit(mu); } } " pair_machine_t <- stan_model(model_code=human_training_model_string) pair_machine_d <- stan_model(model_code=human_derived_model_string) pair_machine_s <- stan_model(model_code=human_scrambled_model_string) pair_machine_p <- stan_model(model_code=human_pair_model_string) #========================================== #====Analysis of Training (Both Phases)==== #========================================== # # Analysis on training data was performed session-wise, first treating the Derived and # Scrambled groups as separate, then pooling them. In principle, because all participants # received identical training, the pooled analysis should offer the best estimate of # performance during training. Each participant was briefly trained on each list twice, so # the first bout of training is kept separate from the second bout. dat1 <- list(N=length(HDLD$Trial[HDLD$Phase==1]), P=length(unique(HDLD$SubjectID)), partic=HDLD$SubjectID[HDLD$Phase==1], trials=HDLD$SubTrial[HDLD$Phase==1]-1, response=HDLD$Correct[HDLD$Phase==1]) dat2 <- list(N=length(HDLD$Trial[HDLD$Phase==2]), P=length(unique(HDLD$SubjectID)), partic=HDLD$SubjectID[HDLD$Phase==2], trials=HDLD$SubTrial[HDLD$Phase==2]-1, response=HDLD$Correct[HDLD$Phase==2]) dat4 <- list(N=length(HSLD$Trial[HSLD$Phase==1]), P=length(unique(HSLD$SubjectID)), partic=HSLD$SubjectID[HSLD$Phase==1], trials=HSLD$SubTrial[HSLD$Phase==1]-1, response=HSLD$Correct[HSLD$Phase==1]) dat5 <- list(N=length(HSLD$Trial[HSLD$Phase==2]), P=length(unique(HSLD$SubjectID)), partic=HSLD$SubjectID[HSLD$Phase==2], trials=HSLD$SubTrial[HSLD$Phase==2]-1, response=HSLD$Correct[HSLD$Phase==2]) output1 <- sampling(pair_machine_t, data=dat1, iter=5000, warmup=2000, chains=4, cores=4) output2 <- sampling(pair_machine_t, data=dat2, iter=5000, warmup=2000, chains=4, cores=4) output4 <- sampling(pair_machine_t, data=dat4, iter=5000, warmup=2000, chains=4, cores=4) output5 <- sampling(pair_machine_t, data=dat5, iter=5000, warmup=2000, chains=4, cores=4) precis(output1,pars=('gamma'),depth=2) precis(output2,pars=('gamma'),depth=2) precis(output4,pars=('gamma'),depth=2) precis(output5,pars=('gamma'),depth=2) dat14 <- list(N=dat1$N+dat4$N, P=dat1$P+dat4$P, partic=c(dat1$partic,35+dat4$partic), trials=c(dat1$trials,dat4$trials), response=c(dat1$response,dat4$response)) dat25 <- list(N=dat2$N+dat5$N, P=dat2$P+dat5$P, partic=c(dat2$partic,35+dat5$partic), trials=c(dat2$trials,dat5$trials), response=c(dat2$response,dat5$response)) output14 <- sampling(pair_machine_t, data=dat14, iter=5000, warmup=2000, chains=4, cores=4) output25 <- sampling(pair_machine_t, data=dat25, iter=5000, warmup=2000, chains=4, cores=4) precis(output14,pars=('gamma'),depth=2) precis(output25,pars=('gamma'),depth=2) #============================================================ #====Analysis of Derived Lists Performance (Experiment 1)==== #============================================================ # # Estimates were obtained for performance across the five derived lists, # pooled into a single estimation paradigm. dat3 <- list(N=length(HDLD$Trial[HDLD$Phase==3]), P=length(unique(HDLD$SubjectID)), partic=HDLD$SubjectID[HDLD$Phase==3], trials=HDLD$SubTrial[HDLD$Phase==3]-1, dist=HDLD$Dist[HDLD$Phase==3]-2.5, td=(HDLD$SubTrial[HDLD$Phase==3]-1)*(HDLD$Dist[HDLD$Phase==3]-2.5), response=HDLD$Correct[HDLD$Phase==3]) output3 <- sampling(pair_machine_d, data=dat3, iter=5000, warmup=2000, chains=4, cores=4) precis(output3,pars=('gamma'),depth=2) #=================== #====Description==== #=================== pars1 <- extract.samples(output1) pars2 <- extract.samples(output2) pars3 <- extract.samples(output3) C1 <- matrix(0,32,dat1$P+5) for (t in 1:32) { for (p in 1:35) { C1[t,p] <- mean(inv_logit(pars1$beta[,p,1] + pars1$beta[,p,2]*(t-1))) } C1[t,dat1$P+1] <- mean(inv_logit(pars1$gamma[,1] + pars1$gamma[,2]*(t-1))) C1[t,(dat1$P+2):(dat1$P+5)] <- quantile(inv_logit(pars1$gamma[,1] + pars1$gamma[,2]*(t-1)),probs = c(.005,.995,0.1,0.9)) } C2 <- matrix(0,32,dat2$P+5) for (t in 1:32) { for (p in 1:35) { C2[t,p] <- mean(inv_logit(pars2$beta[,p,1] + pars2$beta[,p,2]*(t-1))) } C2[t,dat2$P+1] <- mean(inv_logit(pars2$gamma[,1] + pars2$gamma[,2]*(t-1))) C2[t,(dat2$P+2):(dat2$P+5)] <- quantile(inv_logit(pars2$gamma[,1] + pars2$gamma[,2]*(t-1)),probs = c(.005,.995,0.1,0.9)) } C3pop <- matrix(0,40,20) for (t in 1:40) { for (d in 1:4) { q <- (d-1)*5 + 1 C3pop[t,q] <- mean(inv_logit(pars3$gamma[,1] + pars3$gamma[,2]*(d-2.5) + pars3$gamma[,3]*(t-1) + pars3$gamma[,4]*(t-1)*(d-2.5))) C3pop[t,(q+1):(q+4)] <- quantile(inv_logit(pars3$gamma[,1] + pars3$gamma[,2]*(d-2.5) + pars3$gamma[,3]*(t-1) + pars3$gamma[,4]*(t-1)*(d-2.5)),probs = c(.005,.995,0.1,0.9)) } } C3sub <- matrix(0,40,dat3$P*4) for (t in 1:40) { for (d in 1:4) { q <- (d-1)*35 for (p in 1:35) { C3sub[t,p+q] <- mean(inv_logit(pars2$beta[,p,1] + pars2$beta[,p,2]*(t-1))) } } } pars14 <- extract.samples(output14) pars25 <- extract.samples(output25) C14 <- matrix(0,32,dat14$P+5) for (t in 1:32) { for (p in 1:112) { C14[t,p] <- mean(inv_logit(pars14$beta[,p,1] + pars14$beta[,p,2]*(t-1))) } C14[t,dat14$P+1] <- mean(inv_logit(pars14$gamma[,1] + pars14$gamma[,2]*(t-1))) C14[t,(dat14$P+2):(dat14$P+5)] <- quantile(inv_logit(pars14$gamma[,1] + pars14$gamma[,2]*(t-1)),probs = c(.005,.995,0.1,0.9)) } C25 <- matrix(0,32,dat25$P+5) for (t in 1:32) { for (p in 1:112) { C25[t,p] <- mean(inv_logit(pars25$beta[,p,1] + pars25$beta[,p,2]*(t-1))) } C25[t,dat25$P+1] <- mean(inv_logit(pars25$gamma[,1] + pars25$gamma[,2]*(t-1))) C25[t,(dat25$P+2):(dat25$P+5)] <- quantile(inv_logit(pars25$gamma[,1] + pars25$gamma[,2]*(t-1)),probs = c(.005,.995,0.1,0.9)) } write.csv(C1,file = "HDLD_C1.csv") write.csv(C2,file = "HDLD_C2.csv") write.csv(C3pop,file = "HDLD_C3pop.csv") write.csv(C3sub,file = "HDLD_C3sub.csv") write.csv(C14,file = "HDLD_C14.csv") write.csv(C25,file = "HDLD_C25.csv") plot(c(-1,-1),c(-1,-1),xlim=c(0,36),ylim=c(min(pars3$beta[,,1]),max(pars3$beta[,,1])),type="n",xlab="Participant",ylab="beta (intercept)") lines(c(-1,37),c(0,0),lty=2) for (i in 1:35) { points(i,mean(pars3$beta[,i,1])) lines(c(i,i),c(quantile(pars3$beta[,i,1],.005),quantile(pars3$beta[,i,1],.995))) } plot(c(-1,-1),c(-1,-1),xlim=c(0,36),ylim=c(min(pars3$beta[,,2]),max(pars3$beta[,,2])),type="n",xlab="Participant",ylab="beta (dist. effect)") lines(c(-1,37),c(0,0),lty=2) for (i in 1:35) { points(i,mean(pars3$beta[,i,2])) lines(c(i,i),c(quantile(pars3$beta[,i,2],.005),quantile(pars3$beta[,i,2],.995))) } plot(c(-1,-1),c(-1,-1),xlim=c(0,36),ylim=c(min(pars3$beta[,,3]),max(pars3$beta[,,3])),type="n",xlab="Participant",ylab="beta (slope)") lines(c(-1,37),c(0,0),lty=2) for (i in 1:35) { points(i,mean(pars3$beta[,i,3])) lines(c(i,i),c(quantile(pars3$beta[,i,3],.005),quantile(pars3$beta[,i,3],.995))) } plot(c(-1,-1),c(-1,-1),xlim=c(0,36),ylim=c(min(pars3$beta[,,4]),max(pars3$beta[,,4])),type="n",xlab="Participant",ylab="beta (dist. slope)") lines(c(-1,37),c(0,0),lty=2) for (i in 1:35) { points(i,mean(pars3$beta[,i,4])) lines(c(i,i),c(quantile(pars3$beta[,i,4],.005),quantile(pars3$beta[,i,4],.995))) } #============================================================== #====Analysis of Scrambled Lists Performance (Experiment 2)==== #============================================================== # # Estimates were obtained for performance for each of the scrambled lists, # kept separate because they varied with respect to their degree of transposition. # These are encoded as 0, 4, 8, etc. because each phase consists of four parameters. Thus, # parameters 1-4 refer to the first scrambled list, parameters 5-8 refer to the second, # and so forth. dat6 <- list(N=length(HSLD$Trial[HSLD$Phase>2]), P=length(unique(HSLD$SubjectID)), partic=HSLD$SubjectID[HSLD$Phase>2], trials=HSLD$SubTrial[HSLD$Phase>2]-1, dist=HSLD$Dist[HSLD$Phase>2]-2.5, pert=HSLD$PhaseType[HSLD$Phase>2], td=(HSLD$SubTrial[HSLD$Phase>2]-1)*(HSLD$Dist[HSLD$Phase>2]-2.5), response=HSLD$Correct[HSLD$Phase>2]) dat6$pert[dat6$pert==3] <- 0 dat6$pert[dat6$pert==4] <- 4 dat6$pert[dat6$pert==5] <- 8 dat6$pert[dat6$pert==6] <- 12 dat6$pert[dat6$pert==7] <- 16 output6 <- sampling(pair_machine_s, data=dat6, iter=5000, warmup=2000, chains=4, cores=4) precis(output6,pars=('gamma'),depth=2) #=================== #====Description==== #=================== pars4 <- extract.samples(output4) pars5 <- extract.samples(output5) pars6 <- extract.samples(output6) C4 <- matrix(0,32,dat4$P+5) for (t in 1:32) { for (p in 1:77) { C4[t,p] <- mean(inv_logit(pars4$beta[,p,1] + pars4$beta[,p,2]*(t-1))) } C4[t,dat4$P+1] <- mean(inv_logit(pars4$gamma[,1] + pars4$gamma[,2]*(t-1))) C4[t,(dat4$P+2):(dat4$P+5)] <- quantile(inv_logit(pars4$gamma[,1] + pars4$gamma[,2]*(t-1)),probs = c(.005,.995,0.1,0.9)) } C5 <- matrix(0,32,dat5$P+5) for (t in 1:32) { for (p in 1:77) { C5[t,p] <- mean(inv_logit(pars5$beta[,p,1] + pars5$beta[,p,2]*(t-1))) } C5[t,dat5$P+1] <- mean(inv_logit(pars5$gamma[,1] + pars5$gamma[,2]*(t-1))) C5[t,(dat5$P+2):(dat5$P+5)] <- quantile(inv_logit(pars5$gamma[,1] + pars5$gamma[,2]*(t-1)),probs = c(.005,.995,0.1,0.9)) } perts <- c(0, 4, 8, 12, 16) C6pop <- matrix(0,80,100) for (t in 1:80) { for (d in 1:4) { for (p in 1:5) { q <- (d-1)*5 + (p-1)*20 + 1 qq <- inv_logit(pars6$gamma[,1+perts[p]] + pars6$gamma[,2+perts[p]]*(d-2.5) + pars6$gamma[,3+perts[p]]*(t-1) + pars6$gamma[,4+perts[p]]*(t-1)*(d-2.5)) C6pop[t,q] <- mean(qq) C6pop[t,(q+1):(q+4)] <- quantile(qq,probs = c(.005,.995,0.1,0.9)) } } } write.csv(C6pop,file = "HDLD_C6pop.csv") q <- density(pars6$gamma[,20],n=200) q$y <- (q$y/(max(q$y)))*0.3 q$y <- c(q$y,rev(q$y)*(-1)) + 4.5 q$x <- c(q$x,rev(q$x)) plot(q$y,q$x) clip <- pipe("pbcopy", "w") write.table(q$y, file=clip, row.names = FALSE) close(clip) clip <- pipe("pbcopy", "w") write.table(q$x, file=clip, row.names = FALSE) close(clip) listorder <- matrix(1,77,7) j <- 1 for (i in 2:length(HSLD$SubjectID)) { if (HSLD$SubjectID[i] != HSLD$SubjectID[i-1]) { j <- 0 } if (HSLD$PhaseType[i] != HSLD$PhaseType[i-1]) { j <- j+1 listorder[HSLD$SubjectID[i],j] <- HSLD$PhaseType[i] } } #============================================ #====Pairwise Analysis of Scrambled Lists==== #============================================ # # To assess why performance changed in the scrambled lists, an analysis was also # performed treating each of the 10 possible item pairs as distinct. Under this # analysis, only a slope and intercept were used to fit each pair, resulting in # 20 parameters in total. HSLD$PairID <- rep(0,length(HSLD$SubjectID)) q <- HSLD$PairID for (i in 1:length(HSLD$SubjectID)) { q[i] <- min(HSLD$S1[i],HSLD$S2[i])*10 + max(HSLD$S1[i],HSLD$S2[i]) } q <- sort(unique(q)) for (i in 1:length(HSLD$SubjectID)) { HSLD$PairID[i] <- match(min(HSLD$S1[i],HSLD$S2[i])*10 + max(HSLD$S1[i],HSLD$S2[i]),q) } dat61 <- list(N=length(HSLD$Trial[HSLD$PhaseType==3]), P=length(unique(HSLD$SubjectID)), partic=HSLD$SubjectID[HSLD$PhaseType==3], trials=HSLD$SubTrial[HSLD$PhaseType==3]-1, pairID=HSLD$PairID[HSLD$PhaseType==3], response=HSLD$Correct[HSLD$PhaseType==3]) dat62 <- list(N=length(HSLD$Trial[HSLD$PhaseType==4]), P=length(unique(HSLD$SubjectID)), partic=HSLD$SubjectID[HSLD$PhaseType==4], trials=HSLD$SubTrial[HSLD$PhaseType==4]-1, pairID=HSLD$PairID[HSLD$PhaseType==4], response=HSLD$Correct[HSLD$PhaseType==4]) dat63 <- list(N=length(HSLD$Trial[HSLD$PhaseType==5]), P=length(unique(HSLD$SubjectID)), partic=HSLD$SubjectID[HSLD$PhaseType==5], trials=HSLD$SubTrial[HSLD$PhaseType==5]-1, pairID=HSLD$PairID[HSLD$PhaseType==5], response=HSLD$Correct[HSLD$PhaseType==5]) dat64 <- list(N=length(HSLD$Trial[HSLD$PhaseType==6]), P=length(unique(HSLD$SubjectID)), partic=HSLD$SubjectID[HSLD$PhaseType==6], trials=HSLD$SubTrial[HSLD$PhaseType==6]-1, pairID=HSLD$PairID[HSLD$PhaseType==6], response=HSLD$Correct[HSLD$PhaseType==6]) dat65 <- list(N=length(HSLD$Trial[HSLD$PhaseType==7]), P=length(unique(HSLD$SubjectID)), partic=HSLD$SubjectID[HSLD$PhaseType==7], trials=HSLD$SubTrial[HSLD$PhaseType==7]-1, pairID=HSLD$PairID[HSLD$PhaseType==7], response=HSLD$Correct[HSLD$PhaseType==7]) output61 <- sampling(pair_machine_p, data=dat61, iter=5000, warmup=2000, chains=4, cores=4) output62 <- sampling(pair_machine_p, data=dat62, iter=5000, warmup=2000, chains=4, cores=4) output63 <- sampling(pair_machine_p, data=dat63, iter=5000, warmup=2000, chains=4, cores=4) output64 <- sampling(pair_machine_p, data=dat64, iter=5000, warmup=2000, chains=4, cores=4) output65 <- sampling(pair_machine_p, data=dat65, iter=5000, warmup=2000, chains=4, cores=4) pars61 <- extract.samples(output61) pars62 <- extract.samples(output62) pars63 <- extract.samples(output63) pars64 <- extract.samples(output64) pars65 <- extract.samples(output65)