import numpy as np
import pandas as pd
import os
import fnmatch
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from myBasics import *
bigDf = pd.read_csv('../outputs/genderTable.csv',
index_col=[0,1,2])
bigDf.head()
def makeGenderSpaghetti(bigDf,cond,count):
ax = plt.subplot(1,2,count)
# to loop through all cases but have only one legend without redundancies,
# here we keep track of whether a condition is already labeled
legTrack = []
# looping through the 3 groups
for group in bigDf.index.levels[0]:
# looping through the participants of each group
for p in bigDf.ix[group].index:
if p[0][0] in group:
# color and legend setting for that group
thisCol = myPal[labelCoding[p[0][0]]]
thisLeg = myGLabels[p[0][0]]
# get the data from the specified row
thisList = list(bigDf.ix[group].ix[p[0]].ix[cond])[:-1] # last row is group membership; get rid of that
# plotting with legend if this is the first instance,
# otherwise plot without a legend
if thisLeg not in legTrack:
ax.plot(thisList,
c=thisCol,
alpha=0.5,
linewidth=4,
label=thisLeg)
else:
ax.plot(thisList,
c=thisCol,
linewidth=4,
alpha=0.5)
# keep track which conditions already have a legend
legTrack.append(thisLeg)
# plot formatting
ax.set_xlabel('Morphing Grade (Fear --> Anger)')
ax.set_ylabel('% Anger Responses')
plt.xticks(np.arange(0,10.1,2), [str(a)+'%' for a in np.arange(0,101,20)])
plt.yticks(np.arange(0,1.01,0.2), [str(a)+'%' for a in np.arange(0,101,20)])
#plt.ylim(.0,1.); plt.xlim(-0.1,10.2)
if cond == 'M':
ax.set_title('Male Faces')
elif cond == 'F':
ax.set_title('Female Faces')
ax.set_ylabel('')
ax.set_yticks([])
# one legend for both plots
plt.legend(loc='best',bbox_to_anchor=[1.1, 1])
plt.figure(figsize=(12,4))
for i,fgender in enumerate(['M','F']):
makeGenderSpaghetti(bigDf,fgender,i+1)
sns.despine()
#plt.savefig('../figures/rawSpaghettiAll.png',dpi=300,bbox_inches="tight")
plt.show()
Same thing, but as interactive plot
def interactiveSpaghetti(i):
mmDf = bigDf[1::2]
mDf = mmDf[mmDf.columns[:-1]]
mY = list( mmDf['group'] )
ffDf = bigDf[0::2]
fDf = ffDf[ffDf.columns[:-1]]
fY = list( ffDf['group'])
fig = plt.figure(figsize=(12,4))
### male
ax = plt.subplot(1,2,1)
selectDf = mDf.ix[i]
otherDf = np.array( pd.concat([mDf[:i],mDf[i+1:] ] ) )
for entry in range(otherDf.shape[0]):
try: ax.plot(otherDf[entry],alpha=0.5,color=myPal[mY[entry]])
except: print "!"
ax.plot(selectDf,linewidth=10,color=myPal[mY[i]])
### female
ax = plt.subplot(1,2,2)
selectDf = fDf.ix[i]
otherDf = np.array( pd.concat([fDf[:i],fDf[i+1:] ] ) )
for entry in range(otherDf.shape[0]):
try: ax.plot(otherDf[entry],alpha=0.5,color=myPal[fY[entry]])
except: print "!"
ax.plot(selectDf,linewidth=10,color=myPal[fY[i]])
sns.despine()
return fig
# Interactive plots for static html notebooks; using ipywidgets by Jake Vanderplas
# https://github.com/jakevdp/ipywidgets-static
from ipywidgets_static import StaticInteract, RangeWidget, RadioWidget
StaticInteract(interactiveSpaghetti,
i=RangeWidget(0, 60, 1)
)
Here, some participants are removed whose response functions indicate guessing or non-compliance.
We also visualise the data of the excluded participants here, to make clear why they had to be excluded.
eliminated = ['G011','G013','G016','G025','A004']
fig = plt.figure(figsize=(16,8))
for e,elim in enumerate(eliminated):
ax = plt.subplot(2,len(eliminated),e+1)
ax.plot( bigDf.ix[elim[0]].ix[elim].ix['M'][:-1] )
ax.set_title(elim+' M')
ax.set_ylim(0,1)
for e,elim in enumerate(eliminated):
ax = plt.subplot(2,len(eliminated),len(eliminated)+e+1)
ax.plot( bigDf.ix[elim[0]].ix[elim].ix['F'][:-1] )
ax.set_title(elim+' F')
ax.set_ylim(0,1)
sns.despine()
plt.tight_layout()
plt.show()
for elim in eliminated:
bigDf.drop([elim],level=1, inplace=True)
plt.figure(figsize=(12,4))
for i,fgender in enumerate(['M','F']):
makeGenderSpaghetti(bigDf,fgender,i+1)
sns.despine()
#plt.savefig('../figures/rawSpaghetti.png',dpi=300,bbox_inches="tight")
plt.show()
Changing the structure of the DataFrame to easily average over group
genderDf = bigDf.unstack(0).unstack(0).stack(1).stack(1)
genderDf.head()
# get rid of group column
genderDf = genderDf[genderDf.columns[:-1]]
genderDf.head()
def makePlot(genderDf,groups):
plt.figure(figsize=(12,4))
for count,gender in enumerate( genderDf.index.levels[0][::-1] ):
ax = plt.subplot(1,2,count+1)
for j,group in enumerate( groups ):
thisMean = genderDf.ix[gender].ix[group].mean()
thisStd = genderDf.ix[gender].ix[group].std(ddof=1)
thisCol = myPal[labelCoding[group]]
thisLeg = myGLabels[group]
plt.errorbar( np.linspace(0,10,len(thisMean))+j/15., # apply small shift so CI are better visible
thisMean,
yerr=thisStd/np.sqrt(len(genderDf.ix[gender].ix[group]) )*1.96,
label=thisLeg,
color=thisCol,
linewidth=3,
alpha=0.7
)
# plot formatting
ax.set_xlabel('Morphing Grade (Fear --> Anger)')
ax.set_ylabel('% Anger Responses')
plt.xticks(np.arange(0,10.1,2), [str(a)+'%' for a in np.arange(0,101,20)])
plt.yticks(np.arange(0,1.01,0.2), [str(a)+'%' for a in np.arange(0,101,20)])
plt.ylim(-0.02,1.02); plt.xlim(-0.1,10.2)
if gender == 'M':
ax.set_title('Male Faces')
elif gender == 'F':
ax.set_title('Female Faces')
ax.set_ylabel('')
ax.set_yticks([])
# one legend for both plots
plt.legend(loc='best',bbox_to_anchor=[1, 1])
#plt.show()
makePlot(genderDf,['G','A'])
sns.despine()
#plt.savefig('../figures/meanGenderPlotViolenceGeneral.png',dpi=300,bbox_inches="tight")
plt.show()
makePlot(genderDf,['G','K'])
sns.despine()
#plt.savefig('../figures/meanGenderPlotViolenceMolesters.png',dpi=300,bbox_inches="tight")
plt.show()
genderStacked = genderDf.unstack(0).stack(0).unstack(2)
groupCol = []
for entry in genderStacked.index.levels[1]:
try:
genderStacked.ix[entry[0]].ix[entry]
groupCol.append(labelCoding[entry[0]] )
except:
print entry # previously eliminated cases are excluded
genderStacked['group'] = groupCol
genderStacked.head()
fig = plt.figure(figsize=(12,6))
for i,fGender in enumerate(['M','F']):
ax = fig.add_subplot('12'+str(i+1))
sns.boxplot(x='group',y=(fGender,'05'),data=genderStacked,
width=0.4,linewidth=1,color='white',whis=True,notch=True,fliersize=0,ax=ax)
sns.stripplot(x='group',y=(fGender,'05'),data=genderStacked,
jitter=True, edgecolor='white',palette=myPal,size=9,linewidth=1,ax=ax)
if fGender == 'M':
ax.set_title('Male Faces')
else :
ax.set_title('Female Faces')
ax.set_ylim(0,1)
ax.set_xticklabels(['Violence\nOffenders','Child\nMolesters','General\nPopulation'],fontsize=15)
ax.set_xlabel('')
if i==0:
ax.set_ylabel('Anger Responses')
else:
ax.set_ylabel('')
sns.despine()
plt.yticks(np.arange(0,1.01,0.1), [str(a)+'%' for a in np.arange(0,101,10)])
#plt.suptitle('Threshold of fitted function',fontsize=20, position=(0.51,1.1))
plt.tight_layout()
#plt.savefig('../figures/fifty_fifty.png',dpi=300)
plt.show()
from scipy import stats
def makeMannUTests(df):
bigDf = pd.DataFrame()
d = {}
for comp in [('G','K'),('G','A'),('K','A')]:
for variable in df.columns:
group1 = df.ix[comp[0]][variable]
group2 = df.ix[comp[1]][variable]
U,p = stats.mannwhitneyu(group1,group2)
if p<0.05:
thisSig = '*'
else:
thisSig = 'n.s.'
d[variable] = {'U':round(U,2),'p':round(p,3),'sig':thisSig}
thisDf = pd.DataFrame(d)
thisDf = thisDf.reindex_axis(['U','p','sig'], axis=0)
thisDf.index = [ [comp[0]+' > '+comp[1]]*len(thisDf.index),thisDf.index ]
bigDf = pd.concat([bigDf,thisDf])
return bigDf.T
makeMannUTests(genderDf.unstack(0))[1::2]
makeMannUTests(genderDf.unstack(0))[::2]
jaspGender = genderDf.unstack(0)
jaspGender.columns = jaspGender.columns.swaplevel(0, 1)
jaspGender.sortlevel(0, axis=1, inplace=True)
jaspGender.columns = ['fm'[jaspGender.columns.labels[0][x]]+'_'+str(jaspGender.columns.labels[1][x]) for x in range(jaspGender.shape[1]) ]
jaspGender.head()
jaspGender.to_csv('../outputs/genderMorphsJASP.csv')
Get the AFAS data
afasDf = pd.read_csv('../outputs/meanAFAS.csv',index_col=[0,1])
Get rid of the participants that were excluded from the morphing task
for elim in eliminated:
afasDf.drop([elim],level=1, inplace=True)
afasDf.head()
Merge together:
assert ( afasDf.index == genderStacked.index ).all()
mergeDf = pd.DataFrame([ afasDf['Overall'], genderStacked['M']['05'], afasDf['group'] ]).T
sns.jointplot("Overall", "05", data=mergeDf, kind="reg",
xlim=(0, 4), ylim=(0, 1),stat_func=None,
scatter_kws={"s": 70,"edgecolor":"white","linewidth":1}
)
#plt.savefig('../figures/afasFiftyCorrelationAll.png',dpi=300)
sns.lmplot(x="Overall", y="05", data=mergeDf,hue="group",ci=None,truncate=True,palette=myPal,
scatter_kws={"s": 70,"edgecolor":"white","linewidth":1}
)
plt.ylim(0,1);plt.xlim(0,4)
#plt.savefig('../figures/afasFiftyCorrelationGroups.png',dpi=300)
for scale in afasDf.columns[:-1]:
print "\n",scale
r, p = stats.spearmanr(afasDf[scale], genderStacked['M']['05'])
print "Spearman r:",r,"p:",p
r, p = stats.pearsonr(afasDf[scale], genderStacked['M']['05'])
print "Pearson r:",r,"p:",p