This is an anlaysis of the AFAS, PPI-R and SCID-II scores of the participants.
import numpy as np
from scipy import stats
import pandas as pd
import os
import fnmatch
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from myBasics import *
for entry in open('../experiment/quest/app/forms.py','r'):
if 'u"' in entry:
print entry[entry.find('u"')+2:-3]
def get_logfile(whichfolder, whichexperiment):
loglist = []
for fileName in os.listdir(whichfolder):
if fnmatch.fnmatch(fileName, whichexperiment):
loglist.append(whichfolder+fileName)
return loglist
loglist = get_logfile('../experiment/quest/app/static/logfiles/','log*.txt')
loglist.sort()
Example:
loglist[:5]
def makeThis(logfile):
pName = logfile[logfile.find('_')+1:logfile.rfind('.')]
pName = pName[0] + ('000'+ pName[1:])[-3:]
fullDf = pd.read_csv(logfile,
skiprows=2,
index_col=[0]
)
fullDf.index = [[pName]*len(fullDf.index),fullDf.index]
fullDf.index.names = ['id', 'f']
fullDf.columns = ['ans']
withinDf = fullDf.unstack(1)
# sort columns
newCols = []
for entry in withinDf.columns.levels[1]:
newName = '00'+str(entry[1:])
newCols.append('f'+newName[-3:])
withinDf.columns = newCols
withinDf = withinDf.reindex_axis(sorted(withinDf.columns), axis=1)
return withinDf
Example:
makeThis(loglist[-1])
def makeAll(loglist):
for p in loglist:
thisDf = makeThis(p)
try:
bigDf = pd.concat([bigDf,thisDf])
except:
bigDf = thisDf
group = [a[0] for a in bigDf.index]
bigDf.index = [group,bigDf.index]
bigDf = bigDf.sort_index(axis=0,level=1)
return bigDf
bigDf = makeAll(loglist)
The final DataFrame:
bigDf.head()
bigDf.to_csv('../outputs/fullAFAS.csv')
Example of re-importing:
pd.read_csv('../outputs/fullAFAS.csv',
index_col =[0,1]).head()
Facilitative Aggression: Items 1,3,5,8,10,12,14,17,19,21,23,25,27,29,30
Appetitive Aggression: Items 2,4,6,7,9,11,13,15,16,18,20,22,24,26,28
fAgr=np.array([1,3,5,8,10,12,14,17,19,21,23,25,27,29,30])-1 # because we start counting from zero, we subtract 1
aAgr=np.array([2,4,6,7,9,11,13,15,16,18,20,22,24,26,28])-1
print fAgr
print aAgr
Example: facilitative aggression
bigDf[bigDf.columns[fAgr]].head()
def makeMean(df,fAgr=fAgr,aAgr=aAgr,labelCoding=labelCoding):
# get the mean on both scales
meanF = df[df.columns[fAgr]].T.mean()
meanA = df[df.columns[aAgr]].T.mean()
# get the overall mean
meanBig = df.T.mean()
assert meanBig.all() == ((meanA+meanF)/2.).all() , 'mean values corrupted; please check'
# put them in one df
meanDf = pd.DataFrame([meanF,meanA,meanBig]).T
# structure the df
meanDf['group'] = [ labelCoding[x[0]] for x in meanDf.index.levels[1] ]
meanDf.columns = ['Facilitative','Appetitive','Overall','group']
meanDf = meanDf.sort_index()
return meanDf
meanDf = makeMean(bigDf)
meanDf.head()
meanDf.to_csv('../outputs/meanAFAS.csv')
sns.palplot(myPal)
sortDf = meanDf.sort_values('group')
sortDf.tail()
fig = plt.figure(figsize=(15,6))
for i,metric in enumerate(meanDf.columns[:-1]):
ax = fig.add_subplot(int('13'+str(i+1)))
sns.boxplot(x='group',y=metric,data=sortDf,
width=0.4,linewidth=1,color='white',whis=True,notch=True,fliersize=0,ax=ax)
sns.stripplot(x='group', y=metric, data=sortDf,
jitter=True, edgecolor='white',palette=myPal,size=9,linewidth=1,ax=ax)
ax.axhline(0,color='k',linewidth=1)
ax.set_title(metric)
ax.set_xticklabels(['Violence\nOffenders','Child\nMolesters','General\nPopulation'],fontsize=15)
#ax.set_xticks(range(3),['Violence\nOffenders','Child\nMolesters','General\nPopulation'])
ax.set_xlabel('')
if i == 0:
ax.set_ylabel('Mean Score (0-4 scale)')
else:
ax.set_ylabel('')
ax.set_ylim(-0.2,4)
sns.despine()
plt.suptitle('AFAS Scores',fontsize=20, position=(0.51,1.1))
# save to file
plt.savefig('../figures/afasFig.png',dpi=300)
# show in notebook
plt.show()
def makeUTests(df):
bigDf = pd.DataFrame()
d = {}
for comp in [('G','K'),('G','A'),('K','A')]:
for variable in df.columns[:-1]:
group1 = df.ix[comp[0]][variable]
group2 = df.ix[comp[1]][variable]
U,p = stats.mannwhitneyu(group1,group2)
if p<0.05:
thisSig = '*'
else:
thisSig = 'n.s.'
d[variable] = {'U':round(U,2),'p':round(p,3),'sig':thisSig}
thisDf = pd.DataFrame(d)
thisDf = thisDf.reindex_axis(['U','p','sig'], axis=0)
thisDf.index = [ [comp[0]+' > '+comp[1]]*len(thisDf.index),thisDf.index ]
bigDf = pd.concat([bigDf,thisDf])
return bigDf.T
makeUTests(meanDf)
# Draw a categorical scatterplot to show each observation
sns.lmplot(x="Facilitative", y="Appetitive", hue="group", data=sortDf,palette=myPal);
meanDf.groupby("group").describe().T[[1,2,9,10,17,18]]
ppiDf = pd.read_csv('../experiment/ppi_r.csv',
index_col=[0,1])
ppiDf['group'] = [ labelCoding[x[0]] for x in ppiDf.index.levels[1]]
ppiDf = ppiDf.replace(-99,np.nan)
ppiDf = ppiDf.dropna()
ppiDf = ppiDf
ppiDf.head()
ppiDf.to_csv('../outputs/ppiDf.csv')
def makeBoxPlot(df):
fig = plt.figure(figsize=(15,8))
sortDf = df.sort_values('group')
myColumns = df.columns[:-1]
numColumns = len(myColumns)
for i,metric in enumerate(myColumns):
ax = fig.add_subplot(2,numColumns/2.,i+1)
sns.boxplot(x='group',y=metric,data=sortDf,
width=0.4,linewidth=1,color='white',whis=True,notch=True,fliersize=0,ax=ax)
sns.stripplot(x='group', y=metric, data=sortDf,
jitter=True, edgecolor='white',palette=myPal,ax=ax)
ax.set_title(metric,fontsize=15)
ax.set_xticklabels(['Violence\nOffenders','Child\nMolesters','General\nPopulation'],fontsize=9)
ax.set_xlabel('')
if i%5 == 0:
ax.set_ylabel('Sum Score',fontsize=15)
else:
ax.set_ylabel('')
#ax.set_ylim(-0.2,4)
sns.despine()
plt.suptitle('PPI-R Scores',fontsize=20, position=(0.51,1.02))
# save to file
#plt.savefig('../figures/afasFig.png',dpi=300)
# show in notebook
plt.tight_layout()
plt.show()
makeBoxPlot(ppiDf)
ppiDf.groupby("group").describe().T[[1,2,9,10,17,18]]
makeUTests(ppiDf)
scidDf = pd.read_csv('../experiment/scid_ii.csv',
index_col=[0,1])
scidDf = scidDf.replace(-99,np.nan)
scidDf = scidDf.dropna()
scidDf = scidDf[:-1]
scidDf.to_csv('../outputs/scidDf.csv')
scidDf.tail()
def makeScidPlot(df):
fig = plt.figure(figsize=(15,9))
sns.set_style('ticks')
sortDf = df.sort_values('group')
myColumns = df.columns[:-1]
numColumns = len(myColumns)
for i,metric in enumerate(myColumns):
ax = fig.add_subplot(3,4,i+1)
sns.boxplot(x='group',y=metric,data=sortDf,
width=0.4,linewidth=1,color='white',whis=True,notch=True,fliersize=0,ax=ax)
sns.stripplot(x='group', y=metric, data=sortDf,
jitter=True, edgecolor='white',palette=myPal,ax=ax)
ax.set_title(metric)
ax.set_xticklabels(['Violence\nOffenders','Child\nMolesters','General\nPopulation'],fontsize=9)
#ax.set_xticks(range(3),['Violence\nOffenders','Child\nMolesters','General\nPopulation'])
ax.set_xlabel('')
if i%4 == 0:
ax.set_ylabel('Sum Score',fontsize=15)
else:
ax.set_ylabel('')
#ax.set_ylim(-0.2,4)
sns.despine()
plt.suptitle('SCID-II',fontsize=20, position=(0.51,1.02))
# save to file
#plt.savefig('../figures/afasFig.png',dpi=300)
# show in notebook
plt.tight_layout()
plt.show()
makeScidPlot(scidDf)
scidDf.groupby("group").describe().T[[1,2,9,10,17,18]]
makeUTests(scidDf)