import numpy as np
from scipy import stats
from scipy.special import erf
from scipy.optimize import curve_fit
from scipy.optimize import leastsq
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import pandas as pd
pd.set_option('max_columns', 100)
pd.set_option('max_rows', 1000)
import os
import fnmatch
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from myBasics import *
Here, we define a logistic psychometric function and first show some examples how it looks like. Later, we fit the function to single participant data and extract the parameters (threshold,slope,guess,lapse) that best describe it. These parameters can be used to reconstruct the functions for each participant and they can be used for other computations, like e.g. comparing thresholds between groups.
Dummy x-scale for plotting:
x = np.linspace(0,1,1000)
$F_{L}(x;{\alpha},{\beta})=\dfrac{1}{1+exp(- {\beta}(x-{\alpha}))}$ ; or with guess and lapse parameters: $F_{L}(x;{\alpha},{\beta},{\gamma},{\lambda})=\gamma + (1 - \gamma - \lambda)*\dfrac{1}{1+exp(- {\beta}(x-{\alpha}))}$
# adapted for Python from the Palamedes Toolbox by Nicolaas Prins and Frederick Kingdom (PAL_Logistic.m)
# Prins, N & Kingdom, F. A. A. (2009) Palamedes: Matlab routines for analyzing psychophysical data.
# http://www.palamedestoolbox.org
def logisticFunction(x,threshold,slope,guess,lapse):
return guess + (1. - guess - lapse)*(1./(1.+np.exp(-slope*(x-threshold))))
plt.plot(x,logisticFunction(x,0.5,10,0,0));
sns.despine()
# Interactive plots for static html notebooks; using ipywidgets by Jake Vanderplas
# https://github.com/jakevdp/ipywidgets-static
from ipywidgets_static import StaticInteract, RangeWidget, RadioWidget
def iPlot(thresh,slope,guess,lapse):
# make x-axis fine-grained
x = np.linspace(0, 1, 1000)
fig, ax = plt.subplots(figsize=(8, 6))
ax.plot(x, logisticFunction(x,thresh,slope,guess,lapse),
lw=5, alpha=0.8)
ax.set_xlim(0,1)
ax.set_ylim(0,1)
thisEquation = '$F_{L}=%s + (1 - %s - %s)*\\frac{1}{1+exp(- {%s}(x-{%s}))}$' %(guess,guess,lapse,slope,thresh)
ax.text(1.1, 0.5, thisEquation, fontsize=25)
sns.despine()
return fig
StaticInteract(iPlot,
thresh=RangeWidget(.2, 0.8, 0.1),
slope=RangeWidget(10, 20, 2),
guess=RangeWidget(.0, .2, 0.1),
lapse=RangeWidget(.0, .2, 0.1),
)
# taken from: Ariel Rokem;
# https://github.com/arokem/teach_optimization/blob/master/optimization.ipynb
def err_func(params, x, y, func):
return y - func(x, *params)
def getFit(x,y,func,initalGuesses):
params, _ = leastsq(err_func,
initalGuesses,
args=(x, y, func),
maxfev=1000000000 # a high number, for safety
)
return params
df = pd.read_csv('../outputs/genderTable.csv',
index_col=[0,1,2])
df = df[df.columns[:-1]]
df.tail()
parameters need to be initalized. Here, we set them as following:
initalGuesses = 0.5,5,0,0
y = np.array( df.ix['A'].mean() )
x = np.linspace(0,1,len(y))
params = getFit(x,y,logisticFunction,initalGuesses)
params
plt.ylim(0,1)
plt.plot( x, y, 'o' )
params = getFit(x,y,logisticFunction,initalGuesses)
plt.plot( x, logisticFunction( x, *params ) )
plt.text(1.05, 0.5,
'threshold: %s\nslope: %s\nguess: %s\nlapse: %s\n'
% (round(params[0],2),round(params[1],2),round(params[2],2),round(params[3],2))
)
sns.despine()
plt.show()
def makeSinglePlot(groupName,participant):
if groupName == "violence offenders":
group = 'G'
elif groupName == "child molesters":
group = 'K'
elif groupName == "general population":
group = 'A'
fig = plt.figure(figsize=(16, 6))
myCol = myPal[labelCoding[group]]
for i,fGender in enumerate(['M','F']):
ax = plt.subplot(1,2,i+1)
try:
#select a participant
thisGroup= group
groupEntries = [ n for n,x in enumerate( df.index.levels[1] ) if x[0]==thisGroup ]
p = df.index.levels[1][groupEntries[participant]]
# get the data and transform to numpy array
y = np.array( df.ix[p[0]].ix[p].ix[fGender] )
x = np.linspace(0,1,len(y))
# plot the raw data
ax.plot( x, y,
'o',markersize=12, markeredgecolor='grey',markerfacecolor=myCol )
# plot the fitted data
params = getFit(x,y,logisticFunction,initalGuesses)
ax.plot( x, logisticFunction( x, *params ),
linewidth=8, alpha=0.6, color=myCol)
ax.text(.6, .1,
'threshold: %s\nslope: %s\nguess: %s\nlapse: %s\n'
% (round(params[0],2),round(params[1],2),round(params[2],2),round(params[3],2))
)
ax.set_xlim(0,1)
ax.set_ylim(0,1)
ax.set_title(fGender)
plt.suptitle(p,fontsize=20)
sns.despine()
except:
ax.set_xlim(0,1)
ax.set_ylim(0,1)
ax.set_title('no data')
plt.suptitle('no data',fontsize=20)
sns.despine()
return fig
StaticInteract(makeSinglePlot,
groupName=RadioWidget(['violence offenders','child molesters','general population']),
participant=RangeWidget(0, 29, 1)
)
def makeParamsDict(df):
# dict to write to
d = {}
# loop through group
for group in df.index.levels[0]:
# loop through participants of that group
for p in df.ix[group].index.levels[0]:
if p[0] == group:
# loop through face gender
for fgender in df.ix[group].ix[p].index:
# get the data
y = np.array( df.ix[group].ix[p].ix[fgender] )
x = np.linspace(0,1,len(y))
# get parameters from fitting
threshold,slope,guess,lapse = getFit(x,y,logisticFunction,initalGuesses)
# either fill dict entry or make the entry and fill then
try:
d[p][fgender] = {'threshold':threshold,
'slope':slope,
'guess':guess,
'lapse':lapse
}
except:
d[p] = { 'F':{}, 'M':{} }
d[p][fgender] = {'threshold':threshold,
'slope':slope,
'guess':guess,
'lapse':lapse
}
# output is the dict
return d
def makeParamsDf(d):
# empty dict to write to
paramsDf = pd.DataFrame()
# loop through dict
for entry in d:
# make a table out of the data of one participant
thisDf = pd.DataFrame(d[entry])
thisDf.index = [ [entry]*len(thisDf),thisDf.index]
# reshape, so that results are all in one row
thisDf = thisDf.unstack()
# append to big df
paramsDf = pd.concat([paramsDf,thisDf])
# making index nice
paramsDf = paramsDf.sort_index()
paramsDf.index = [ [name[0] for name in paramsDf.index ], paramsDf.index ]
return paramsDf
We get a table with all four curve parameters for each participant and each face type (female,male). These data can be compared between groups or used to reconstruct the respective logistic function.
paramsDf = makeParamsDf(makeParamsDict(df))
paramsDf.head()
def makeGenderSpaghetti(paramsDf,cond,count):
# resolution of fitted data
x = np.linspace(0,1,1000)
ax = plt.subplot(3,2,count)
# to loop through all cases but have only one legend without redundancies,
# here we keep track of whether a condition is already labeled
legTrack = []
# looping through the 3 groups
for group in paramsDf.index.levels[0]:
# looping through the participants of each group
for p in paramsDf.ix[group].index:
if p[0] in group:
# color and legend setting for that group
thisCol = myPal[labelCoding[p[0]]]
thisLeg = myGLabels[p[0]]
# get the data from the specified row
theseParams = paramsDf.ix[group].ix[p].ix[cond]
t = theseParams['threshold']
s = theseParams['slope']
g = theseParams['guess']
l = theseParams['lapse']
thisList = logisticFunction( x, t,s,g,l )
# plotting with legend if this is the first instance,
# otherwise plot without a legend
if thisLeg not in legTrack:
ax.plot(thisList,
c=thisCol,
alpha=0.4,
linewidth=3,
label=thisLeg)
else:
ax.plot(thisList,
c=thisCol,
linewidth=3,
alpha=0.4)
# keep track which conditions already have a legend
legTrack.append(thisLeg)
# plot formatting
ax.set_xlabel('Morphing Grade (Fear --> Anger)')
ax.set_ylabel('% Anger Responses')
plt.xticks(np.arange(0,1001,200), [str(a)+'%' for a in np.arange(0,101,20)])
plt.yticks(np.arange(0,1.01,0.2), [str(a)+'%' for a in np.arange(0,101,20)])
plt.ylim(-0.02,1.02); plt.xlim(-0.1,1001)
if cond == 'M':
ax.set_title('Male Faces')
elif cond == 'F':
ax.set_title('Female Faces')
# one legend for both plots
plt.legend(loc='best',bbox_to_anchor=[1, 1])
plt.figure(figsize=(12,12))
for i,fgender in enumerate(['M','F']):
makeGenderSpaghetti(paramsDf,fgender,i+1)
sns.despine()
plt.tight_layout()
plt.show()
eliminated = ['G002','G011','G013','G016','G025','K003','K004','A004']
def showEliminated(paramsDf,eliminated):
fig = plt.figure(figsize=(16,6))
for e,elim in enumerate(eliminated):
thisDf = paramsDf.ix[elim[0]].ix[elim].ix['M']
guess = thisDf.ix['guess']
lapse = thisDf.ix['lapse']
slope = thisDf.ix['slope']
threshold = thisDf.ix['threshold']
ax = plt.subplot(2,len(eliminated),e+1)
ax.plot( logisticFunction(x,threshold,slope,guess,lapse) )
ax.set_title(elim+' M')
ax.set_ylim(0,1)
for e,elim in enumerate(eliminated):
thisDf = paramsDf.ix[elim[0]].ix[elim].ix['F']
guess = thisDf.ix['guess']
lapse = thisDf.ix['lapse']
slope = thisDf.ix['slope']
threshold = thisDf.ix['threshold']
ax = plt.subplot(2,len(eliminated),len(eliminated)+e+1)
ax.plot( logisticFunction(x,threshold,slope,guess,lapse) )
ax.set_title(elim+' F')
ax.set_ylim(0,1)
sns.despine()
plt.tight_layout()
plt.show()
showEliminated(paramsDf,eliminated)
for elim in eliminated:
paramsDf.drop([elim],level=1, inplace=True)
plt.figure(figsize=(12,12))
for i,fgender in enumerate(['M','F']):
makeGenderSpaghetti(paramsDf,fgender,i+1)
sns.despine()
plt.show()
paramsDf.to_csv('../outputs/paramsTable.csv')
def makeFittedValuesDf(paramsDf,fGender):
funcDf = pd.DataFrame()
for i,group in enumerate( paramsDf.index.levels[0] ):
for p in paramsDf.index.levels[1]:
# we need a try/except here, because some participants might have been
# removed from the df (but are still in the non-updated index...)
try:
if group in p:
# choose the parameters of one participant
thisDf = paramsDf.ix[p[0]].ix[p].ix[fGender]
guess = thisDf.ix['guess']
lapse = thisDf.ix['lapse']
slope = thisDf.ix['slope']
threshold = thisDf.ix['threshold']
# get the data
yFunc = logisticFunction(np.linspace(0,1,1001),threshold,slope,guess,lapse)
thisDf = pd.DataFrame(yFunc).T
thisDf.index = [[group],[p]]
funcDf = pd.concat([funcDf,thisDf])
except:
print "...participant %s excluded" % p
return funcDf
This is a large df, which stores not the raw data of the 11 original steps, but the fitted logistic function's y-values in a 1/1000 resolution.
funcDf = makeFittedValuesDf(paramsDf,'M')
funcDf.head()
Do this for both female and male faces, then concatenate.
def mergeFuncDf(paramsDf):
outFunc = pd.DataFrame()
for fGender in ['M','F']:
thisFunc = makeFittedValuesDf(paramsDf,fGender)
thisFunc.columns = [ [fGender]*len(thisFunc.columns), thisFunc.columns ]
outFunc = pd.concat([outFunc,thisFunc],axis=1)
return outFunc
logFuncDf = mergeFuncDf(paramsDf)
logFuncDf.head()
def plotSmoothFuncs(funcDf,myGroups,numSteps=1001,myMax=1.02,myPal=myPal,myLabels=labelCoding):
fig = plt.figure(figsize=(16,6))
for j,fGender in enumerate(['M','F']):
ax = plt.subplot(1,2,j+1)
# now loop through all groups, including the violence offenders (G)
for i,group in enumerate(myGroups):
# get mean value of that group
meanFunc = funcDf[fGender].ix[group].mean()
# get standard deviation of that group
stdFunc = funcDf[fGender].ix[group].std(ddof=1)
# get group size
n = len(funcDf[fGender].ix[group])
# compute 95% confidence interval
ciFunc = (stdFunc/np.sqrt( n ) )*1.96
# compute the lower and upper bounds of the CI
upperBound = meanFunc + ciFunc
lowerBound = meanFunc - ciFunc
# plot the mean value
ax.plot(np.linspace(0,1,numSteps), meanFunc,
color=myPal[myLabels[group]],
label=myGLabels[group])
# plot the error/CI bounds
ax.fill_between( np.linspace(0,1,numSteps) , lowerBound, upperBound,
color=myPal[myLabels[group]], alpha=0.3)
ax.set_xlabel('Morphing Grade (Fear --> Anger)')
ax.set_ylabel('% Anger Responses')
plt.xticks(np.arange(0,1.01,0.2), [str(a)+'%' for a in np.arange(0,101,20)])
plt.yticks(np.arange(0,1.01,0.2), [str(a)+'%' for a in np.arange(0,101,20)])
plt.ylim(-0.02,myMax); plt.xlim(-0.01,1.02)
if fGender == 'M':
ax.set_title('Male Faces')
elif fGender == 'F':
ax.set_title('Female Faces')
ax.set_ylabel('')
ax.set_yticks([])
plt.legend(loc='best')
sns.despine()
plotSmoothFuncs(logFuncDf,['G','K']);
#plt.savefig('../figures/fittedFuncViolenceChildM.png',dpi=300)
plotSmoothFuncs(logFuncDf,['G','A'])
#plt.savefig('../figures/fittedFuncViolenceGenPop.png',dpi=300)
Get a new column indicating group membership
groupCol = []
for entry in paramsDf.index.levels[1]:
try:
paramsDf.ix[entry[0]].ix[entry]
groupCol.append(labelCoding[entry[0]] )
except:
print entry # previously eliminated cases are excluded
paramsDf['group'] = groupCol
Table now has new column:
paramsDf.head()
Only the threshold value:
fig = plt.figure(figsize=(12,8))
for i,fGender in enumerate(['M','F']):
ax = fig.add_subplot('12'+str(i+1))
sns.boxplot(x='group',y=(fGender,'threshold'),data=paramsDf,
width=0.4,linewidth=1,color='white',whis=True,notch=True,fliersize=0,ax=ax)
sns.stripplot(x='group',y=(fGender,'threshold'),data=paramsDf,
jitter=True, edgecolor='white',palette=myPal,size=9,linewidth=1,ax=ax)
if fGender == 'M':
ax.set_title('Male Faces')
else :
ax.set_title('Female Faces')
ax.set_ylim(0,1)
ax.set_xticklabels(['Violence\nOffenders','Child\nMolesters','General\nPopulation'],fontsize=15)
ax.set_xlabel('')
if i==0:
ax.set_ylabel('Anger Threshold')
else:
ax.set_ylabel('')
sns.despine()
plt.yticks(np.arange(0,1.01,0.1), [str(a)+'%' for a in np.arange(0,101,10)])
#plt.suptitle('Threshold of fitted function',fontsize=20, position=(0.51,1.1))
plt.tight_layout()
#plt.savefig('../figures/thresholdPic.png',dpi=300)
plt.show()
All curve parameters:
fig = plt.figure(figsize=(20,12))
i = 1
for fGender in ['M','F']:
for param in ['threshold','slope','guess','lapse']:
ax = fig.add_subplot(int('24'+str(i)))
sns.boxplot(x='group',y=(fGender,param),data=paramsDf,
width=0.4,linewidth=1,color='white',whis=True,notch=True,fliersize=0,ax=ax)
sns.stripplot(x='group',y=(fGender,param),data=paramsDf,
jitter=True, edgecolor='white',palette=myPal,ax=ax)
sns.despine()
i+= 1
plt.tight_layout()
plt.show()
def makeMannUTests(df):
bigDf = pd.DataFrame()
d = {}
for comp in [('G','K'),('G','A'),('K','A')]:
for variable in df.columns:
group1 = df.ix[comp[0]][variable]
group2 = df.ix[comp[1]][variable]
U,p = stats.mannwhitneyu(group1,group2)
if p<0.05:
thisSig = '*'
else:
thisSig = 'n.s.'
d[variable] = {'U':round(U,2),'p':round(p,3),'sig':thisSig}
thisDf = pd.DataFrame(d)
thisDf = thisDf.reindex_axis(['U','p','sig'], axis=0)
thisDf.index = [ [comp[0]+' > '+comp[1]]*len(thisDf.index),thisDf.index ]
bigDf = pd.concat([bigDf,thisDf])
return bigDf.T
makeMannUTests(paramsDf['M'])
makeMannUTests(paramsDf['F'])
get the AFAS data, clean up and merge:
afasDf = pd.read_csv('../outputs/meanAFAS.csv',index_col=[0,1])
for elim in eliminated:
afasDf.drop([elim],level=1, inplace=True)
assert ( afasDf.index == paramsDf.index ).all()
mergeDf = pd.DataFrame([ afasDf['Overall'], paramsDf['M']['threshold'], afasDf['group'] ]).T
sns.jointplot("Overall", "threshold", data=mergeDf, kind="reg",
xlim=(0, 4), ylim=(0, 1),stat_func=None,
scatter_kws={"s": 70,"edgecolor":"white","linewidth":1}
)
#plt.savefig('../figures/afasThreshCorrelationAll.png',dpi=300)
sns.lmplot(x="Overall", y="threshold", data=mergeDf,hue="group",ci=None,truncate=True,palette=myPal,
scatter_kws={"s": 70,"edgecolor":"white","linewidth":1}
)
plt.ylim(0,1);plt.xlim(0,4)
#plt.savefig('../figures/afasThreshCorrelationGroups.png',dpi=300)
r, p = stats.spearmanr(afasDf['Overall'], paramsDf['M']['threshold'])
print "r:",r,"p:",p
r, p = stats.pearsonr(afasDf['Overall'], paramsDf['M']['threshold'])
print "r:",r,"p:",p
for scale in afasDf.columns[:-1]:
print "\n",scale
r, p = stats.spearmanr(afasDf[scale], paramsDf['M']['threshold'])
print "Spearman r:",r,"p:",p
r, p = stats.pearsonr(afasDf[scale], paramsDf['M']['threshold'])
print "Pearson r:",r,"p:",p
jaspParams = paramsDf.copy(deep=True)
jaspParams.columns = jaspParams.columns.reorder_levels([1,0])
jaspParams = jaspParams['threshold'].copy(deep=True)
group = []
for x in jaspParams.index.levels[1]:
try:
jaspParams.ix[x[0]].ix[x]
group.append(labelCoding[x[0]])
except:
print x
jaspParams['group'] = group
jaspParams.head()
jaspParams.to_csv('../outputs/thresholdJASP.csv')