#######################################
# Students vs. representative samples #
#######################################
# R code for "Do student samples provide an accurate estimate of the general public?" by Hanel & Vione
# This R code allows everyone to reconstruct our analyses.
# Download the WVS data set from http://www.worldvaluessurvey.org, 6th round
# We have computed the DVs in SPSS with the syntax provided at the end of this file

library(foreign)
library(BayesFactor)
library(effsize)
library(car)
library(lme4)

# Import dataset into R
df <- read.spss("C:\\Users\\hanel\\Desktop\\WV6_Data_spss_v_2015_04_18.sav", use.missings=TRUE, use.value.labels = F, to.data.frame=T)
ni <- read.spss("C:\\Users\\hanel\\Desktop\\PhD\\Hanel_Vione data Are students an accurate estimate of the general public.sav", use.missings=TRUE, use.value.labels = F, to.data.frame=T)


# IMPORTANT: The columns may likely be different in your dataset than in our dataset. For example, in our dataset, extraversion is in column 438. If this is different in yours, simply replace 438 with the number of the column you are using. Further, we have added a column (column 2) into our SPSS data file, recoding the country codes into numbers from 1 to 60 (as there are 60 countries in the dataset). This was done to faciliate for-loops.

# Add the country level scores to each participant
df$embedded <- rep(ni$embedded, as.numeric(table(df[,2])))
cor(tapply(df$embedded, df$V2B, mean, na.rm=T), ni$embedded, use = "pairwise.complete.obs") # For controlling, should be 1
df$intel.auton <- rep(ni$intel.auton, as.numeric(table(df[,2])))
cor(tapply(df$intel.auton, df$V2B, mean, na.rm=T), ni$intel.auton, use = "pairwise.complete.obs") # For controlling, should be 1

##################
# Hypothesis 2
# Computing Cohen's d for all 60 countries (students vs. representative)
dv <- 436
n <- length(table(df[,2]))
y <- matrix(data=NA, nrow=n, ncol=1)
for(i in 1:n){
  y[i,] <- as.numeric(cohen.d(df[,dv][df[,299]==6 & df[,2]==i], df[,dv][df[,299]!=6 & df[,2]==i], na.rm=T)[3])
}

# Compute BayesFactor for all 60 countries (students vs. representative)
dv <- 436 # dependent variable
n <- length(table(df[,2]))
x <- matrix(data=NA, nrow=n, ncol=1)
for(i in which(!is.na(y))){
  x[i,] <- exp(ttestBF(na.omit(df[,dv][df[,299]==6 & df[,2]==i]), na.omit(df[,dv][df[,299]!=6 & df[,2]==i]))@bayesFactor$bf)
}
elder <- x
round(sort(x),3)


write.table(cbind(ex, ag, co, ne, op, vio, ill, sex, trust, demo, confi, elder), "BF for all 12 variables.txt")

adj <- cbind(c("Algeria", "Azerbaijan", "Argentina", "Australia", "Bahrain", "Armenia", "Brazil", "Belarus", "Chile", "China", "Taiwan", "Colombia", "Cyprus", "Ecuador", "Estonia", "Georgia", "Palestine", "Germany", "Ghana", "Hongkong", "India", "Iraq", "Japan", "Kazakhstan", "Jordan", "Republic of Korea", "Kuwait", "Kyrgyz Republic", "Lebanon", "Libya", "Malaysia", "Mexico", "Morocco", "Netherlands", "New Zealand", "Nigeria", "Pakistan", "Peru", "Philippines", "Poland", "Qatar", "Romania", "Russia", "Rwanda", "Singapore", "Slovenia", "South Africa", "Zimbabwe", "Spain", "Sweden", "Thailand", "Trinidad and Tobago", "Tunisia", "Turkey", "Ukraine", "Egypt", "United States of America", "Uruguay", "Uzbekistan", "Yemen"), x)
adj <- adj[order(adj[,1]), ,drop=FALSE] # Order adj alphabetically (country)
mean(as.numeric(adj[,2]), na.rm=T)
sd(as.numeric(adj[,2]), na.rm=T)
write.table(adj, "respects_eldery.txt")

cor(cbind(as.numeric(adj[,2]), ni[,c(196, 200, 35, 347)]), use='pairwise.complete.obs') # ni[,30]: HDI, 191: embeddedness (Schwartz CVO), 195: intellectual autonomy, 342:Democracy Index, 
cor.test(as.numeric(adj[,2]), ni[,196])
summary(lm(as.numeric(adj[,2])~ni[,191] + ni[,195] + ni[,30] + ni[,342]))


# simple ANOVA
library(ez)
ezANOVA(df, dv = MORAL_PERSO, wid = .(partno), between = .(V2B, STUDENT), detailed = T, type = 2)


##############
# Multilevel #
##############
# Visual inspection
library(lattice)
xyplot(MORAL_PERSO ~ STUDENT|V2B, data = df, type = c("p","r")) # Both intercepts and slopes vary across groups

library(languageR)
library(lmerTest)
df <- df[!(df[,2] == 3),] # Exclude Argentina because of missing values for profession. 
df<- as.data.frame(df)
df$partno <- c(1:length(df[,1]))
df$V2B <- as.factor(df$V2B)
df$STUDENT <- factor(df$STUDENT,
                        levels = c(1,2),
                        labels = c("Students", "General public"))



## Hypotheses 2 (assumption): Are slopes random? If so, this would indicate that students differ unsystematically across countries.
# Extraversion 
h2.null <- lmer(EXTRA ~ STUDENT + (1|V2B), data = df, na.action=na.omit) # Random intercept, fixed slope
h2 <- lmer(EXTRA ~ STUDENT + (STUDENT|V2B), data = df, na.action=na.omit) # Random intercept and random slope
summary(h2)
anova(h2.null, h2) # Likelihood ratio test

# Agreeableness 
h2.null <- lmer(AGREEA ~ STUDENT + (1|V2B), data = df, na.action=na.omit) # Random intercept, fixed slope
h2 <- lmer(AGREEA ~ STUDENT + (STUDENT|V2B), data = df, na.action=na.omit) # Random intercept and random slope
anova(h2.null, h2) # Likelihood ratio test

# Conscientiousness 
h2.null <- lmer(CONSCI ~ STUDENT + (1|V2B), data = df, na.action=na.omit) # Random intercept, fixed slope
h2 <- lmer(CONSCI ~ STUDENT + (STUDENT|V2B), data = df, na.action=na.omit) # Random intercept and random slope
anova(h2.null, h2) # Likelihood ratio test

# Neuroticism 
h2.null <- lmer(NEURO ~ STUDENT + (1|V2B), data = df, na.action=na.omit) # Random intercept, fixed slope
h2 <- lmer(NEURO ~ STUDENT + (STUDENT|V2B), data = df, na.action=na.omit) # Random intercept and random slope
anova(h2.null, h2) # Likelihood ratio test

# Openness 
h2.null <- lmer(OPEN ~ STUDENT + (1|V2B), data = df, na.action=na.omit) # Random intercept, fixed slope
h2 <- lmer(OPEN ~ STUDENT + (STUDENT|V2B), data = df, na.action=na.omit) # Random intercept and random slope
anova(h2.null, h2) # Likelihood ratio test

# Personal-sexual issues
h2.null <- lmer(MORAL_PERSO ~ STUDENT + (1|V2B), data = df, na.action=na.omit) # Random intercept, fixed slope
h2 <- lmer(MORAL_PERSO ~ STUDENT + (STUDENT|V2B), data = df, na.action=na.omit) # Random intercept and random slope
summary(h2)
anova(h2.null, h2) # Likelihood ratio test

# Domestic violence:
h2.null <- lmer(MORAL_VIOL ~ STUDENT + (1|V2B), data = df, na.action=na.omit) # Random intercept, fixed slope
h2 <- lmer(MORAL_VIOL ~ STUDENT + (STUDENT|V2B), data = df, na.action=na.omit) # Random intercept and random slope
anova(h2.null, h2) # Likelihood ratio test

# Dishonest-illegal issues:
h2.null <- lmer(MORAL_ILLEG ~ STUDENT + (1|V2B), data = df, na.action=na.omit) # Random intercept, fixed slope
h2 <- lmer(MORAL_ILLEG ~ STUDENT + (STUDENT|V2B), data = df, na.action=na.omit) # Random intercept and random slope
anova(h2.null, h2) # Likelihood ratio test

# Trust in strangers: 
h2.null <- lmer(TRUST_ST ~ STUDENT + (1|V2B), data = df, na.action=na.omit) # Random intercept, fixed slope
h2 <- lmer(TRUST_ST ~ STUDENT + (STUDENT|V2B), data = df, na.action=na.omit) # Random intercept and random slope
anova(h2.null, h2) # Likelihood ratio test

# Understanding of democracy: 
h2.null <- lmer(DEMOCR ~ STUDENT + (1|V2B), data = df, na.action=na.omit) # Random intercept, fixed slope
h2 <- lmer(DEMOCR ~ STUDENT + (STUDENT|V2B), data = df, na.action=na.omit) # Random intercept and random slope
anova(h2.null, h2) # Likelihood ratio test

# Confidence in political institutions: 
h2.null <- lmer(CONF_POL ~ STUDENT + (1|V2B), data = df, na.action=na.omit) # Random intercept, fixed slope
h2 <- lmer(CONF_POL ~ STUDENT + (STUDENT|V2B), data = df, na.action=na.omit) # Random intercept and random slope
anova(h2.null, h2) # Likelihood ratio test

# Respects towards elderly:
h2.null <- lmer(AGEISM ~ STUDENT + (1|V2B), data = df, na.action=na.omit) # Random intercept, fixed slope
h2 <- lmer(AGEISM ~ STUDENT + (STUDENT|V2B), data = df, na.action=na.omit) # Random intercept and random slope
anova(h2.null, h2) # Likelihood ratio test

# Correlation between Cohen's ds and fixed effect from MLM
d <- c(0.10, -0.08, -0.25, 0.03, -0.02, 0.06, 0.18, 0.21, -0.04, -0.03, -0.03, -0.02)
fe <- c(-.09, .05, .22, -.04, .01, -.065, -.23, -.365, .03, .05, .02, .05)*(-1)
cor(d, fe)
mean(d)-mean(fe)


# Testing if embeddedness explains variation. However, this test is not relevant for our hypothesis 2 because it only explores whether embeddedness adds anything to the model, not if it explains the variations between students and general public.
df1 <- df[!is.na(df$embedded),] # To avoid error message "models were not all fitted to the same size of dataset"
h2 <- lmer(MORAL_PERSO ~ STUDENT + (STUDENT|V2B), data = df1, na.action=na.omit)
h2b <- lmer(MORAL_PERSO ~ embedded + STUDENT + (STUDENT|V2B), data = df1, na.action=na.omit)
anova(h2, h2b)







#####
# Hypothesis 1a: Which medians show greater variability: Those of the students or of the general sample?
library(car) # for leveneTest() function
# Extraversion (data only available for 25 countries)
ev.s <- aggregate(df[,438][df[,299]==6], list(df[,2][df[,299]==6]), median, na.rm=T) # students
sd(ev.s$x, na.rm = T)
ev.g <- aggregate(df[,438][df[,299]!=6], list(df[,2][df[,299]!=6]), median, na.rm=T) # general population
sd(ev.g$x, na.rm = T)
cor(as.numeric(na.omit(ev.s[,2])), as.numeric(na.omit(ev.g[,2])))
ev <- cbind(c(rep(1,25), rep(2, 25)), c(as.numeric(na.omit(ev.s[,2])), as.numeric(na.omit(ev.g[,2]))))
leveneTest(ev[,2], as.factor(ev[,1]))

# Agreeableness (data only available for 25 countries)
ag.s <- aggregate(df[,439][df[,299]==6], list(df[,2][df[,299]==6]), median, na.rm=T) # students
sd(ag.s$x, na.rm = T)
ag.g <- aggregate(df[,439][df[,299]!=6], list(df[,2][df[,299]!=6]), median, na.rm=T) # general population
sd(ag.g$x, na.rm = T)
cor(as.numeric(na.omit(ag.s[,2])), as.numeric(na.omit(ag.g[,2])))
ag <- cbind(c(rep(1,25), rep(2, 25)), c(as.numeric(na.omit(ag.s[,2])), as.numeric(na.omit(ag.g[,2]))))
leveneTest(ag[,2], as.factor(ag[,1]))

# Conscientiousness (data only available for 25 countries)
co.s <- aggregate(df[,440][df[,299]==6], list(df[,2][df[,299]==6]), median, na.rm=T) # students
sd(co.s$x, na.rm = T)
co.g <- aggregate(df[,440][df[,299]!=6], list(df[,2][df[,299]!=6]), median, na.rm=T) # general population
sd(co.g$x, na.rm = T)
cor(as.numeric(na.omit(co.s[,2])), as.numeric(na.omit(co.g[,2])))
co <- cbind(c(rep(1,25), rep(2, 25)), c(as.numeric(na.omit(co.s[,2])), as.numeric(na.omit(co.g[,2]))))
leveneTest(co[,2], as.factor(co[,1]))

# Neuroticism (data only available for 25 countries)
ne.s <- aggregate(df[,441][df[,299]==6], list(df[,2][df[,299]==6]), median, na.rm=T) # students
sd(ne.s$x, na.rm = T)
ne.g <- aggregate(df[,441][df[,299]!=6], list(df[,2][df[,299]!=6]), median, na.rm=T) # general population
sd(ne.g$x, na.rm = T)
cor(as.numeric(na.omit(ne.s[,2])), as.numeric(na.omit(ne.g[,2])))
ne <- cbind(c(rep(1,25), rep(2, 25)), c(as.numeric(na.omit(ne.s[,2])), as.numeric(na.omit(ne.g[,2]))))
leveneTest(ne[,2], as.factor(ne[,1]))

# Openness (data only available for 25 countries)
op.s <- aggregate(df[,442][df[,299]==6], list(df[,2][df[,299]==6]), median, na.rm=T) # students
sd(op.s$x, na.rm = T)
op.g <- aggregate(df[,442][df[,299]!=6], list(df[,2][df[,299]!=6]), median, na.rm=T) # general population
sd(op.g$x, na.rm = T)
cor(as.numeric(na.omit(op.s[,2])), as.numeric(na.omit(op.g[,2])))
op <- cbind(c(rep(1,25), rep(2, 25)), c(as.numeric(na.omit(op.s[,2])), as.numeric(na.omit(op.g[,2]))))
leveneTest(op[,2], as.factor(op[,1]))


# Domestic violence (no data for Argentina available):
dv.s <- aggregate(df[,447][df[,299]==6], list(df[,2][df[,299]==6]), median, na.rm=T) # students
dv.g <- aggregate(df[,447][df[,299]!=6], list(df[,2][df[,299]!=6]), median, na.rm=T) # general population
cor(as.numeric(dv.s[,2]), as.numeric(dv.g[,2]))
dv <- cbind(c(rep(1,59), rep(2, 59)), c(as.numeric(dv.s[,2]), as.numeric(dv.g[,2])))
boxplot(dv[,2]~dv[,1], col="blue",horizontal=FALSE, ylab="Never justifiable (1) -- Always justifiable (10)", main='Attitudes towards domestic violence',border='orange', axes = F)
axis(1, labels = c("Students", "General public"), at=c(1,2))
axis(2)
leveneTest(dv[,2], as.factor(dv[,1]))

# Dishonest-illegal issues (no data for Argentina available):
di.s <- aggregate(df[,446][df[,299]==6], list(df[,2][df[,299]==6]), median, na.rm=T) # students
di.g <- aggregate(df[,446][df[,299]!=6], list(df[,2][df[,299]!=6]), median, na.rm=T) # general population
cor(as.numeric(di.s[,2]), as.numeric(di.g[,2]))
di <- cbind(c(rep(1,59), rep(2, 59)), c(as.numeric(di.s[,2]), as.numeric(di.g[,2])))
boxplot(di[,2]~di[,1], col="darkgreen",horizontal=FALSE, ylab="Never justifiable (1) -- Always justifiable (10)", main='Dishonest-illegal issues',border='orange', axes = F)
axis(1, labels = c("Students", "General public"), at=c(1,2))
axis(2)
leveneTest(di[,2], di[,1])

# Personal-sexual issues (no data for Argentina available):
ps.s <- aggregate(df[,445][df[,299]==6], list(df[,2][df[,299]==6]), median, na.rm=T) # students
ps.g <- aggregate(df[,445][df[,299]!=6], list(df[,2][df[,299]!=6]), median, na.rm=T) # general population
cor(as.numeric(ps.s[,2]), as.numeric(ps.g[,2]))
ps <- cbind(c(rep(1,59), rep(2, 59)), c(as.numeric(ps.s[,2]), as.numeric(ps.g[,2])))
boxplot(ps[,2]~ps[,1], col="darkred",horizontal=FALSE, ylab="Never justifiable (1) -- Always justifiable (10)", main='Personal-sexual issues',border='orange', axes = F)
axis(1, labels = c("Students", "General public"), at=c(1,2))
axis(2)
leveneTest(ps[,2], ps[,1])

# Trust in strangers:
ts.s <- aggregate(df[,433][df[,299]==6], list(df[,2][df[,299]==6]), median, na.rm=T) # students
ts.g <- aggregate(df[,433][df[,299]!=6], list(df[,2][df[,299]!=6]), median, na.rm=T) # general population
cor(as.numeric(ts.s[,2]), as.numeric(ts.g[,2]), use="pairwise.complete.obs")
ts <- cbind(c(rep(1,59), rep(2, 59)), c(as.numeric(ts.s[,2]), as.numeric(ts.g[,2])))
boxplot(ts[,2]~ts[,1], col="darkred",horizontal=FALSE, ylab="", main='Trust in strangers',border='orange', axes = F)
axis(1, labels = c("Students", "General public"), at=c(1,2))
axis(2)
leveneTest(ts[,2], ts[,1])

# Understanding of democracy:
ud.s <- aggregate(df[,434][df[,299]==6], list(df[,2][df[,299]==6]), median, na.rm=T) # students
ud.g <- aggregate(df[,434][df[,299]!=6], list(df[,2][df[,299]!=6]), median, na.rm=T) # general population
cor(as.numeric(ud.s[,2]), as.numeric(ud.g[,2]), use="pairwise.complete.obs")
ud <- cbind(c(rep(1,59), rep(2, 59)), c(as.numeric(ud.s[,2]), as.numeric(ud.g[,2])))
boxplot(ud[,2]~ud[,1], col="darkred",horizontal=FALSE, ylab="", main='Understanding of democracy',border='orange', axes = F)
axis(1, labels = c("Students", "General public"), at=c(1,2))
axis(2)
leveneTest(ud[,2], ud[,1])

# Confidence in political institutions:
cp.s <- aggregate(df[,435][df[,299]==6], list(df[,2][df[,299]==6]), median, na.rm=T) # students
cp.g <- aggregate(df[,435][df[,299]!=6], list(df[,2][df[,299]!=6]), median, na.rm=T) # general population
cor(as.numeric(cp.s[,2]), as.numeric(cp.g[,2]), use="pairwise.complete.obs")
cp <- cbind(c(rep(1,59), rep(2, 59)), c(as.numeric(cp.s[,2]), as.numeric(cp.g[,2])))
boxplot(cp[,2]~cp[,1], col="darkred",horizontal=FALSE, ylab="", main='Confidence in political institutions',border='orange', axes = F)
axis(1, labels = c("Students", "General public"), at=c(1,2))
axis(2)
leveneTest(cp[,2], cp[,1])

# Respects towards elderly:
re.s <- aggregate(df[,436][df[,299]==6], list(df[,2][df[,299]==6]), median, na.rm=T) # students
re.g <- aggregate(df[,436][df[,299]!=6], list(df[,2][df[,299]!=6]), median, na.rm=T) # general population
cor(as.numeric(re.s[,2]), as.numeric(re.g[,2]), use="pairwise.complete.obs")
re <- cbind(c(rep(1,59), rep(2, 59)), c(as.numeric(re.s[,2]), as.numeric(re.g[,2])))
boxplot(re[,2]~re[,1], col="darkred",horizontal=FALSE, ylab="", main='Respects towards elderly',border='orange', axes = F)
axis(1, labels = c("Students", "General public"), at=c(1,2))
axis(2)
leveneTest(re[,2], re[,1])



####
# Hypothesis 1b: Is the variance of students within one country smaller compared to the general public? Test the overall SDs of both groups across 59 countries

# Extraversion
stud <- aggregate(df[,438][df[,299]==6], list(df[,2][df[,299]==6]), sd, na.rm=T)
gen <- aggregate(df[,438][df[,299]!=6], list(df[,2][df[,299]!=6]), sd, na.rm=T)
t.test(stud[,2], gen[,2])
wilcox.test(stud[,2], gen[,2])
cohen.d(stud[,2], gen[,2], na.rm=T)

# Agreeableness
stud <- aggregate(df[,439][df[,299]==6], list(df[,2][df[,299]==6]), sd, na.rm=T)
gen <- aggregate(df[,439][df[,299]!=6], list(df[,2][df[,299]!=6]), sd, na.rm=T)
t.test(stud[,2], gen[,2])
wilcox.test(stud[,2], gen[,2])
cohen.d(stud[,2], gen[,2], na.rm=T)

# Conscientiousness
stud <- aggregate(df[,440][df[,299]==6], list(df[,2][df[,299]==6]), sd, na.rm=T)
gen <- aggregate(df[,440][df[,299]!=6], list(df[,2][df[,299]!=6]), sd, na.rm=T)
t.test(stud[,2], gen[,2])
wilcox.test(stud[,2], gen[,2])
cohen.d(stud[,2], gen[,2], na.rm=T)

# Neuroticism
stud <- aggregate(df[,441][df[,299]==6], list(df[,2][df[,299]==6]), sd, na.rm=T)
gen <- aggregate(df[,441][df[,299]!=6], list(df[,2][df[,299]!=6]), sd, na.rm=T)
t.test(stud[,2], gen[,2])
wilcox.test(stud[,2], gen[,2])
cohen.d(stud[,2], gen[,2], na.rm=T)

# Openness
stud <- aggregate(df[,442][df[,299]==6], list(df[,2][df[,299]==6]), sd, na.rm=T)
gen <- aggregate(df[,442][df[,299]!=6], list(df[,2][df[,299]!=6]), sd, na.rm=T)
t.test(stud[,2], gen[,2])
wilcox.test(stud[,2], gen[,2])
cohen.d(stud[,2], gen[,2], na.rm=T)

# MA towards personal-sexual behavior
stud <- aggregate(df[,445][df[,299]==6], list(df[,2][df[,299]==6]), sd, na.rm=T)
gen <- aggregate(df[,445][df[,299]!=6], list(df[,2][df[,299]!=6]), sd, na.rm=T)
t.test(stud[,2], gen[,2])
wilcox.test(stud[,2], gen[,2])
cohen.d(stud[,2], gen[,2])

# MA towards dishonest-illegal behavior
stud <- aggregate(df[,446][df[,299]==6], list(df[,2][df[,299]==6]), sd, na.rm=T)
gen <- aggregate(df[,446][df[,299]!=6], list(df[,2][df[,299]!=6]), sd, na.rm=T)
t.test(stud[,2], gen[,2])
wilcox.test(stud[,2], gen[,2])
cohen.d(stud[,2], gen[,2])

# MA towards violence
stud <- aggregate(df[,447][df[,299]==6], list(df[,2][df[,299]==6]), sd, na.rm=T)
gen <- aggregate(df[,447][df[,299]!=6], list(df[,2][df[,299]!=6]), sd, na.rm=T)
t.test(stud[,2], gen[,2])
wilcox.test(stud[,2], gen[,2])
cohen.d(stud[,2], gen[,2])

# Trust in strangers
stud <- aggregate(df[,433][df[,299]==6], list(df[,2][df[,299]==6]), sd, na.rm=T)
gen <- aggregate(df[,433][df[,299]!=6], list(df[,2][df[,299]!=6]), sd, na.rm=T)
t.test(stud[,2], gen[,2])
wilcox.test(stud[,2], gen[,2])
cohen.d(stud[,2], gen[,2])

# Understanding of democracy
stud <- aggregate(df[,434][df[,299]==6], list(df[,2][df[,299]==6]), sd, na.rm=T)
gen <- aggregate(df[,434][df[,299]!=6], list(df[,2][df[,299]!=6]), sd, na.rm=T)
wilcox.test(stud[,2], gen[,2])
t.test(stud[,2], gen[,2])
cohen.d(stud[,2], gen[,2])

# Confidence in political institutions
stud <- aggregate(df[,435][df[,299]==6], list(df[,2][df[,299]==6]), sd, na.rm=T)
gen <- aggregate(df[,435][df[,299]!=6], list(df[,2][df[,299]!=6]), sd, na.rm=T)
t.test(stud[,2], gen[,2])
wilcox.test(stud[,2], gen[,2])
cohen.d(stud[,2], gen[,2])

# Respect towards elderly
stud <- aggregate(df[,436][df[,299]==6], list(df[,2][df[,299]==6]), sd, na.rm=T)
gen <- aggregate(df[,436][df[,299]!=6], list(df[,2][df[,299]!=6]), sd, na.rm=T)
t.test(stud[,2], gen[,2])
wilcox.test(stud[,2], gen[,2])
cohen.d(stud[,2], gen[,2])


# 2) Conducting a Levene-Test in each of the 59 countries
leveneTest(df[,DV][df[,299]==6], df[,DV][df[,299]!=6])



################
# Hypotheses 3 #
################
ni <- read.spss("C:\\Users\\johaddon\\Desktop\\PhD\\Cross cultural study\\Students vs. representative sample\\Hanel_Vione data Are students an accurate estimate of the general public.sav", use.missings=TRUE, use.value.labels = F, to.data.frame=T) # Make sure the variable WVSorder is ordered ascending. 
ex <- aggregate(df[,438][df[,299]==6], list(df[,2][df[,299]==6]), sd, na.rm=T)
ag <- aggregate(df[,439][df[,299]==6], list(df[,2][df[,299]==6]), sd, na.rm=T)
co <- aggregate(df[,440][df[,299]==6], list(df[,2][df[,299]==6]), sd, na.rm=T)
ne <- aggregate(df[,441][df[,299]==6], list(df[,2][df[,299]==6]), sd, na.rm=T)
op <- aggregate(df[,442][df[,299]==6], list(df[,2][df[,299]==6]), sd, na.rm=T)
round(cor(cbind(ni[c(1:2,4:60),34], ni[c(1:2, 4:60),38], ex$x,ag$x,co$x,ne$x,op$x), use = "pairwise.complete.obs"),2)



#################################
# Figure for selected countries #
#################################
# Figure 1
wc <- read.spss("C:\\Users\\hanel\\Desktop\\Western countries2.sav", use.missings=TRUE, use.value.labels = F, to.data.frame=T) # This is a subset of the uploaded dataset which contains only the six countries used in Figure 1, including d scores for conscientiousness and openness. Create this file manually based on the results of the loop from above (Hypothesis 2) or contact us for the data (paulhanel@gmail.com)
colnames(wc) <- c("Country", "Conscientiousness", "Openness") 
wc1 <- as.matrix(wc[,2:3])
rownames(wc1) <- wc[,1]
png("Fig1.png", width = 10.89, height = 5.61, units = 'cm', res = 400, pointsize = 4)
barplot(wc1, beside = T, legend.text = T, axes = F, ylab = "Cohen's d: Students - representative sample", args.legend = list(x="topright"), xpd = T, col = c("blue", "red", "gold", "black", "orange", "green4"))
axis(2, las = 1)
# Adding significance stars: Significance differences of students to general public
xc <- seq(1.5, 13.5, 1)
yc <- c(.02, .02, -.02, .02, .02, -.02, NA, -.02, -.02, .02, -.02, -.02, .02)
star <- c("***", "***", "***", "***", "", "**", "", "*", "***", "", "**", "", "***")
text(xc, yc, star)
dev.off()
# Loop for the stars above in "star" vector
DV <- 442
cou <- c(7, 10, 12, 18, 21, 37) # Country code: Brazil 7, China 10, Colombia 12, Germany 18, India 21, Pakistan 37
# x <- matrix(data=NA, nrow=length(cou), ncol=1)
k <- NULL
for(i in cou){
  x <- cbind(k, as.numeric(t.test(df[,DV][df[,299]==6 & df[,2] == i], df[,DV][df[,299]!=6 & df[,2] == i])[3]))
  print(round(x,5))
}



# Figure 2
wc <- read.spss("C:\\Users\\Paul\\Desktop\\Western countries.sav", use.missings=TRUE, use.value.labels = F, to.data.frame=T) # This is a subset of the original dataset
colnames(wc) <- c("Country", "MA: Violence", "MA: Dishonesty", "MA: Sexuality", "Trust in Strangers", "Democracy", "Confidence", "Respect elderly") 
wc1 <- as.matrix(wc[,2:4])
rownames(wc1) <- wc[,1]
png("Fig2.png", width = 12.89, height = 5.61, units = 'cm', res = 400, pointsize = 4.5)
barplot(wc1, beside = T, legend.text = T, axes = F, ylab = "Cohen's d: Students - representative sample", col = c("gold3", "blue", "red", "black", "orange", "indianred3", "green", "white"))
axis(2, las = 1)
# Adding significance stars: Significance differences of students to general public
xc <- seq(1.5, 26.5, 1)
yc <- c(-.02, .02, .02, -.02, .02, -.02, -.02, -.02, NA, -.02, -.02, -.02, -.02, .02, -.02, -.02, -.02, NA, -.02, -.02, -.02, -.02, .02, -.02, -.02, -.02)
star <- c("","","","","*","","","","","***","","","***","*","","**","**","","","**","***","*","**","","","*")
text(xc, yc, star)
dev.off()
# Loop for the stars above in "star" vector
DV <- 447
cou <- c(36,76,156,276,356,392,554,840) # Country code: Australia 36, Brazil 76, China 156, Germany 276, India 356, Japan 392, New Zealand 554, USA 840
# x <- matrix(data=NA, nrow=length(cou), ncol=1)
k <- NULL
for(i in cou){
x <- cbind(k, as.numeric(t.test(df[,DV][df[,299]==6 & df[,3] == i], df[,DV][df[,299]!=6 & df[,3] == i])[3]))
print(x)
}


# Figure 3
wc2 <- as.matrix(wc[,5:8])
rownames(wc2) <- wc[,1]
png("Fig3.png", width = 13.89, height = 5.11, units = 'cm', res = 400, pointsize = 4.5)
barplot(wc2, beside = T, legend.text = T, axes = F, ylab = "Cohen's d: Students - representative sample", args.legend = list(x="bottomleft"), xpd=T, col = c("gold3", "blue", "red", "black", "orange", "indianred3", "green", "white"))
axis(2, las = 1)
# Adding significance stars: Significance differences of students to general public
xc <- seq(1.5, 35.5, 1)
yc <- c(-.02, .02, .02, .02, .02, -.02, .02, -.02, NA, -.02, -.02, .02, .02, .02, .02, .02, -.02, NA, .02, -.02, -.02, .02, -.02, -.02, -.02, .02, NA, .02, .02, -.02, .02, .02, -.02, -.02, .02)
star <- c("","", "", "", "**","", "","","","", "","","","", "","","","", "","","*","�","","","","","","�","","","","","","�","**")
text(xc, yc, star)
dev.off()
# Loop for the stars above in "star" vector
DV <- 446
cou <- c(36, 76,156,276,356,392,554,840) # Country code: Australia 36, Brazil 76, China 156, Germany 276, India 356, Japan 392, New Zealand 554, USA 840
# x <- matrix(data=NA, nrow=length(cou), ncol=1)
k <- NULL
for(i in cou){
  x <- cbind(k, as.numeric(t.test(df[,DV][df[,299]==6 & df[,3] == i], df[,DV][df[,299]==6 & df[,3] != i])[3]))
  print(x)
}


# SPSS-Syntax for computing the DVs
RECODE V160A (1=5) (2=4) (4=2) (5=1).
RECODE V160C (1=5) (2=4) (4=2) (5=1).
RECODE V160D (1=5) (2=4) (4=2) (5=1).
RECODE V160E (1=5) (2=4) (4=2) (5=1).
RECODE V160G (1=5) (2=4) (4=2) (5=1).

COMPUTE EXTRA = MEAN(V160A, V160F).
COMPUTE AGREEA = MEAN(V160B, V160G).
COMPUTE CONSCI = MEAN(V160C, V160H).
COMPUTE NEURO = MEAN(V160D, V160I).
COMPUTE OPEN = MEAN(V160E, V160J). 
EXECUTE.
COMPUTE MORAL_PERSO = MEAN(V203 to V207A).
COMPUTE MORAL_ILLEG = MEAN(V198 to V202).
COMPUTE MORAL_VIOL = MEAN(V208 to V210).
COMPUTE DEMOCR = MEAN(V131, V133, V134, V136, V137, V139).
COMPUTE TRUST_ST = MEAN(V105 to V107).
COMPUTE ELDERLY = MEAN(V161 to V163).
COMPUTE CONFI_POL = MEAN(V113 to V118).
EXECUTE.