import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy import stats
from matplotlib.patches import Patch
import statsmodels as sm
from statsmodels.stats.anova import AnovaRM
#Read Day 1 file and remove "." from column names, then rename the confusing edat column names
df=pd.read_csv("day1merge_full.csv")
df.columns=df.columns.str.replace('.','')
df.rename(columns={"SubTrial": "TrialNumber", "Procedure[SubTrial]": "Signal"}, inplace=True)
df.head()
#merge the columns that refer to the same variable (GoGreenACC and GoGreen1ACC, for example)
df["GoGreen_ACC"] = df['GoGreenACC'].combine_first(df['GoGreen1ACC'])
df["GoRedRev_ACC"] = df['GoRedRevACC'].combine_first(df['GoRedRev1ACC'])
df["GoViolet_ACC"] = df['GoVioletACC'].combine_first(df['GoViolet1ACC'])
df["GoBlueRev_ACC"] = df['GoBlueRevACC'].combine_first(df['GoBlueRev1ACC'])
df["NoGoRed_ACC"] = df['NoGoRedACC'].combine_first(df['NoGoRed1ACC'])
df["NoGoGreenRev_ACC"] = df['NoGoGreenRevACC'].combine_first(df['NoGoGreenRev1ACC'])
df["NoGoBlue_ACC"] = df['NoGoBlueACC'].combine_first(df['NoGoBlue1ACC'])
df["NoGoVioletRev_ACC"] = df['NoGoVioletRevACC'].combine_first(df['NoGoVioletRev1ACC'])
df["GoGreen_RT"] = df['GoGreenRT'].combine_first(df['GoGreen1RT'])
df["GoRedRev_RT"] = df['GoRedRevRT'].combine_first(df['GoRedRev1RT'])
df["GoViolet_RT"] = df['GoVioletRT'].combine_first(df['GoViolet1RT'])
df["GoBlueRev_RT"] = df['GoBlueRevRT'].combine_first(df['GoBlueRev1RT'])
df["NoGoRed_RT"] = df['NoGoRedRT'].combine_first(df['NoGoRed1RT'])
df["NoGoGreenRev_RT"] = df['NoGoGreenRevRT'].combine_first(df['NoGoGreenRev1RT'])
df["NoGoBlue_RT"] = df['NoGoBlueRT'].combine_first(df['NoGoBlue1RT'])
df["NoGoVioletRev_RT"] = df['NoGoVioletRevRT'].combine_first(df['NoGoVioletRev1RT'])
concat_df=df.iloc[:, np.r_[0:6,38:len(df.columns)]]
#Now replace NaN cells with blanks, and replace the RT 0 values with blanks
df_clean=concat_df.replace(np.nan, '', regex=True)
replacezeros=df_clean.iloc[:,14:len(df_clean.columns)]
df_cleanedzeros=replacezeros.replace(0, "")
#apply zero-replaced version to main dataframe
df_clean.iloc[:,14:len(df_clean.columns)]=df_cleanedzeros
#save cleaned up Day 1 raw data as a csv file in case you want to repeat analyses in Excel with pivot tables
#df_clean.to_csv("Exp8Data_Day1_full.csv")
df_clean.head()
#Now we need to "melt" the data so that it's suitable for plotting and stats. We'll group the data by "Subject" and "ExperimentName"
#Another perk to this approach is that instead of numerous columns for subject performance data, we can group by "Congruency" and "StimulusType"
#Set your identifying variables, value variables, and specify the columns they should go under, i.e., "DV" and "Accuracy"
#Next, make it so that each data column returns its associated category (Familiar, Novel, Congruent, Incongruent)
stacked_data=pd.melt(df_clean, id_vars=["Subject", "ExperimentName"], value_vars=["NoGoRed_ACC", "NoGoGreenRev_ACC", "NoGoBlue_ACC", "NoGoVioletRev_ACC"],
var_name="DV", value_name="Accuracy")
def conditions_phase(x):
if x == "NoGoRed_ACC":
return "Congruent"
elif x == "NoGoGreenRev_ACC":
return "Incongruent"
elif x == "NoGoBlue_ACC":
return "Congruent"
elif x == "NoGoVioletRev_ACC":
return "Incongruent"
func = np.vectorize(conditions_phase)
stacked_data["Congruency"] = func(stacked_data["DV"])
def StimCond(x):
if x == "FamCongFirst":
return "Familiar"
elif x == "FamIncongFirst":
return "Familiar"
elif x == "NovCongFirst":
return "Novel"
elif x == "NovIncongFirst":
return "Novel"
func = np.vectorize(StimCond)
stacked_data["StimulusType"] = func(stacked_data["ExperimentName"])
#add a column for Feedback group, since that will come in handy when we merge both days' dataframes
stacked_data["FeedbackCond"]="NoFeedback"
#stacked_data.to_csv("teststacked.csv")
#the dataframe is full of rows with empty Accuracy values because we're only listing NoGo trials and not Go trials.
#Let's make the Go rows invisible
stacked_data_truncated = stacked_data[stacked_data.Accuracy != '']
#stacked_data_truncated.to_csv("testtruncated.csv")
#We're ready to create our pivot table. Start by converting the Accuracy column into a numeric column so that it can be plotted
stacked_data_truncated['Accuracy'] = pd.to_numeric(stacked_data_truncated['Accuracy'])
#create the pivot table by assigning the columns for each factor, and identify the value, in this case "Accuracy"
piv_tab = pd.pivot_table(stacked_data_truncated, index=["Subject"], columns=["StimulusType", "Congruency"], values=["Accuracy"])
piv_tab.head()
#Let's turn this into a plot. First, python gets mad when you refer to index names as plot axes, so make sure the grouping plots are not seen as indices
forplot = stacked_data_truncated.groupby(['StimulusType', "Congruency", "Subject"], as_index=False).Accuracy.mean()
#create the plot. I like using seaborn, but it could be done with matplotlib's pyplot. Pulling data from a pivot table has its downsides with regard to freedom in customizing seaborn plots, but it'll do for now.
ax=sns.barplot(x="StimulusType", y='Accuracy', hue="Congruency", palette=["#ff0000", "#03d547"], ci=68, capsize=0.01, data=forplot)
#notice that ci (confidence interval) is set to 68. That's because mathematically, the standard error of the mean is when you set 68% CI
#hue only takes 2 colors, so I manually add 2 more colors for novel condition using patches:
ax.patches[1].set_facecolor("#1d47f5")
ax.patches[3].set_facecolor("#d12fdf")
#fun fact: these hex codes are extracted from the actual color of the stimuli we use.
#remove unnecessary plot elements and make it pretty
ax.legend_.remove()
ax.set_xlabel("")
ax.set_ylabel("NoGo Accuracy")
plt.title("Familiar stimuli elicit incongruency-related \nimpairments in NoGo accuracy", weight="bold", y=1.08)
ax.set_ylim(0.5, 0.85)
sns.despine()
#add significance asterisk and line
x1, x2 = -0.20, 0.18 # only two columns, so they would be 0, 1
y, h, col = forplot['Accuracy'].mean()+0.14, 0.005, "k" #y will be the height of the line and star(2 points above the mean), h will be the height of the two lines
#pointing down--0.2, and col is the color--black coded as k
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.25, c=col) #here we plot this line and star on top of our barplot
plt.text((x1+x2)*0.5, y+h, "*", ha='center', va='bottom', color=col)
#plt.savefig("Exp8_NoGo_graph.tiff", bbox_inches="tight", dpi=500)
plt.show()
#Repeat above for Go accuracy
#first melt with Go values
stacked_data_go=pd.melt(df_clean, id_vars=["Subject", "ExperimentName"], value_vars=["GoGreen_ACC", "GoRedRev_ACC", "GoViolet_ACC", "GoBlueRev_ACC"],
var_name="DV", value_name="Accuracy")
def conditions_phase_go(x):
if x == "GoGreen_ACC":
return "Congruent"
elif x == "GoRedRev_ACC":
return "Incongruent"
elif x == "GoViolet_ACC":
return "Congruent"
elif x == "GoBlueRev_ACC":
return "Incongruent"
func_go = np.vectorize(conditions_phase_go)
stacked_data_go["Congruency"] = func_go(stacked_data_go["DV"])
def StimCond_go(x):
if x == "FamCongFirst":
return "Familiar"
elif x == "FamIncongFirst":
return "Familiar"
elif x == "NovCongFirst":
return "Novel"
elif x == "NovIncongFirst":
return "Novel"
func_go = np.vectorize(StimCond_go)
stacked_data_go["StimulusType"] = func_go(stacked_data_go["ExperimentName"])
#add a column for Feedback group, since that will come in handy when we merge both days' dataframes
stacked_data_go["FeedbackCond"]="NoFeedback"
#the dataframe is full of rows with empty Accuracy values because we're only listing Go trials and not NoGo trials.
#Let's make the NoGo rows invisible
stacked_data_go_truncated = stacked_data_go[stacked_data_go.Accuracy != '']
stacked_data_go_truncated.head()
#turn DV into a numeric type
stacked_data_go_truncated['Accuracy'] = pd.to_numeric(stacked_data_go_truncated['Accuracy'])
#create the pivot table by assigning columns for each factor, and identify the value, in this case "Accuracy"
piv_tab_go = pd.pivot_table(stacked_data_go_truncated, index=["Subject"], columns=["StimulusType", "Congruency"], values=["Accuracy"])
piv_tab_go.head()
forplot_go = stacked_data_go_truncated.groupby(["StimulusType", "Congruency", "Subject"], as_index=False).Accuracy.mean()
#create the plot. I like using seaborn, but it could be done with matplotlib's pyplot. Pulling data from a pivot table has its downsides with regard to freedom in customizing seaborn plots, but it'll do for now.
ax_go=sns.barplot(x="StimulusType", y='Accuracy', hue="Congruency", palette=["#03d547", "#ff0000"], ci=68, capsize=0.01, data=forplot_go)
#notice that ci (confidence interval) is set to 68. That's because mathematically, the standard error of the mean is when you set 68% CI
#hue only takes 2 colors, so I manually add 2 more colors for novel condition using patches:
ax_go.patches[1].set_facecolor("#d12fdf")
ax_go.patches[3].set_facecolor("#1d47f5")
#fun fact: these color codes are extracted from the actual color of the stimuli we use.
#remove unnecessary plot elements and make it pretty
ax_go.legend_.remove()
ax_go.set_xlabel("")
ax_go.set_ylabel("Go Accuracy")
plt.title("Familiar stimuli elicit incongruency-related \nimpairments in Go accuracy", weight="bold", y=1.08)
ax_go.set_ylim(0.5, 1)
sns.despine()
#add significance asterisk and line
x1, x2 = -0.20, 0.18 # only two columns, so they would be 0, 1
y, h, col = forplot_go['Accuracy'].mean()+0.1, 0.005, "k" #y will be the height of the line and star(2 points above the mean), h will be the height of the two lines
#pointing down--0.2, and col is the color--black coded as k
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.25, c=col) #here we plot this line and star on top of our barplot
plt.text((x1+x2)*0.5, y+h, "*", ha='center', va='bottom', color=col)
#plt.savefig("Exp8_Go_graph.tiff", bbox_inches="tight", dpi=500)
plt.show()
#next, we read day 2 data, where participants receive the feedback manipulation
df_fb=pd.read_csv("day2merge_full.csv")
df_fb.columns=df_fb.columns.str.replace('.','')
df_fb.rename(columns={"SubTrial": "TrialNumber", "Procedure[SubTrial]": "Signal"}, inplace=True)
df_fb.head()
#Redo data cleanup for the day 2 datasheet. remember, familiar stim only. adjust length of column accordingly!
df_fb["GoGreen_ACC"] = df_fb['GoGreenACC'].combine_first(df_fb['GoGreen1ACC'])
df_fb["GoRedRev_ACC"] = df_fb['GoRedRevACC'].combine_first(df_fb['GoRedRev1ACC'])
df_fb["NoGoRed_ACC"] = df_fb['NoGoRedACC'].combine_first(df_fb['NoGoRed1ACC'])
df_fb["NoGoGreenRev_ACC"] = df_fb['NoGoGreenRevACC'].combine_first(df_fb['NoGoGreenRev1ACC'])
df_fb["GoGreen_RT"] = df_fb['GoGreenRT'].combine_first(df_fb['GoGreen1RT'])
df_fb["GoRedRev_RT"] = df_fb['GoRedRevRT'].combine_first(df_fb['GoRedRev1RT'])
df_fb["NoGoRed_RT"] = df_fb['NoGoRedRT'].combine_first(df_fb['NoGoRed1RT'])
df_fb["NoGoGreenRev_RT"] = df_fb['NoGoGreenRevRT'].combine_first(df_fb['NoGoGreenRev1RT'])
concat_df_fb=df_fb.iloc[:, np.r_[0:6,22:len(df_fb.columns)]]
concat_df_fb
#Now replace NaN cells with blanks, and replace the RT 0 values with blanks
df_fb_clean=concat_df_fb.replace(np.nan, '', regex=True)
replacezeros_fb=df_fb_clean.iloc[:,10:len(df_fb_clean.columns)]
df_fb_cleanedzeros=replacezeros_fb.replace(0, "")
#apply zero-replaced version to main dataframe
df_fb_clean.iloc[:,10:len(df_fb_clean.columns)]=df_fb_cleanedzeros
#save cleaned up Day 1 raw data as a csv file in case you want to repeat analyses in Excel with pivot tables
#df_fb_clean.to_csv("Exp8Data_Day2.csv")
#melt the fb data
stacked_data_fb=pd.melt(df_fb_clean, id_vars=["Subject", "ExperimentName"], value_vars=["NoGoRed_ACC", "NoGoGreenRev_ACC"],
var_name="DV", value_name="Accuracy")
def conditions_phase_fb(x):
if x == "NoGoRed_ACC":
return "Congruent"
elif x == "NoGoGreenRev_ACC":
return "Incongruent"
func_fb = np.vectorize(conditions_phase_fb)
stacked_data_fb["Congruency"] = func_fb(stacked_data_fb["DV"])
def StimCond_fb(x):
if x == "Day2_FamFB":
return "Familiar"
func_fb = np.vectorize(StimCond_fb)
stacked_data_fb["StimulusType"] = func_fb(stacked_data_fb["ExperimentName"])
#add a column for Feedback group
stacked_data_fb["FeedbackCond"]="Feedback"
#the dataframe is full of rows with empty Accuracy values because we're only listing NoGo trials and not Go trials.
#Let's make the Go rows invisible
stacked_data_fb_truncated = stacked_data_fb[stacked_data_fb.Accuracy != '']
stacked_data_fb_truncated.head()
#turn DV into a numeric type
stacked_data_fb_truncated['Accuracy'] = pd.to_numeric(stacked_data_fb_truncated['Accuracy'])
#create the pivot table by assigning the columns for each factor, and identify the value, in this case "Accuracy"
piv_tab_fb = pd.pivot_table(stacked_data_fb_truncated, index=["Subject"], columns=["StimulusType", "Congruency"], values=["Accuracy"])
piv_tab_fb.head()
#A simple day 2 familiar stim graph would be nice. we'll do the side by side with day 1 data later.
forplot_fb = stacked_data_fb_truncated.groupby(["Congruency", "Subject"], as_index=False).Accuracy.mean()
#create the plot. I like using seaborn, but it could be done with matplotlib's pyplot. Pulling data from a pivot table has its downsides with regard to freedom in customizing seaborn plots, but it'll do for now.
ax_fb=sns.barplot(x="Congruency", y='Accuracy', palette=["#ff0000", "#03d547"], ci=68, capsize=0.01, data=forplot_fb)
ax_fb.set_xlabel("")
ax_fb.set_ylabel("NoGo Accuracy")
plt.title("Feedback prevents the congruency-related \nimpairments in NoGo accuracy", weight="bold", y=1.08)
ax_fb.set_ylim(0.5, 0.85)
sns.despine()
#Go accuracy from the day 2 data
#melt the day 2 go data
stacked_data_fb_go=pd.melt(df_fb_clean, id_vars=["Subject", "ExperimentName"], value_vars=["GoGreen_ACC", "GoRedRev_ACC"],
var_name="DV", value_name="Accuracy")
def conditions_phase_fb_go(x):
if x == "GoGreen_ACC":
return "Congruent"
elif x == "GoRedRev_ACC":
return "Incongruent"
func_fb_go = np.vectorize(conditions_phase_fb_go)
stacked_data_fb_go["Congruency"] = func_fb_go(stacked_data_fb_go["DV"])
def StimCond_fb_go(x):
if x == "Day2_FamFB":
return "Familiar"
func_fb_go = np.vectorize(StimCond_fb_go)
stacked_data_fb_go["StimulusType"] = func_fb_go(stacked_data_fb_go["ExperimentName"])
#add a column for Feedback group
stacked_data_fb_go["FeedbackCond"]="Feedback"
#the dataframe is full of rows with empty Accuracy values because we're only listing Go trials and not NoGo trials.
#make the NoGo rows invisible
stacked_data_fb_go_truncated = stacked_data_fb_go[stacked_data_fb_go.Accuracy != '']
stacked_data_fb_go_truncated.head()
#turn DV into a numeric type
stacked_data_fb_go_truncated['Accuracy'] = pd.to_numeric(stacked_data_fb_go_truncated['Accuracy'])
#create the pivot table by assigning columns for each factor, and identify the value, in this case "Accuracy"
piv_tab_fb_go = pd.pivot_table(stacked_data_fb_go_truncated, index=["Subject"], columns=["StimulusType", "Congruency"], values=["Accuracy"])
piv_tab_fb_go.head()
#And a graph for fb go accuracy. again, will add side by side with day 1 later
forplot_fb_go = stacked_data_fb_go_truncated.groupby(["Congruency", "Subject"], as_index=False).Accuracy.mean()
#create the plot. I like using seaborn, but it could be done with matplotlib's pyplot. Pulling data from a pivot table has its downsides with regard to freedom in customizing seaborn plots, but it'll do for now.
ax_fb_go=sns.barplot(x="Congruency", y='Accuracy', palette=["#ff0000", "#03d547"], ci=68, capsize=0.01, data=forplot_fb_go)
ax_fb_go.set_xlabel("")
ax_fb_go.set_ylabel("Go Accuracy")
plt.title("Feedback prevents the congruency-related \nimpairments in Go accuracy", weight="bold", y=1.08)
ax_fb_go.set_ylim(0.5, 1)
sns.despine()
#So far we created pivot tables for NoGo and Go accuracy in both day 1 and day 2 spreadsheets, and plotted them
#Now depict how Feedback actually compares to NoFeedback.
#We'll need to concatenate the day 1 and 2 dataframes.
#create one dataframe for Go and another for NoGo data
df_clean_all_nogo = pd.concat([stacked_data_truncated, stacked_data_fb_truncated])
#df_clean_all_nogo.to_csv("Exp8Data_All_NoGo.csv")
df_clean_all_go = pd.concat([stacked_data_go_truncated, stacked_data_fb_go_truncated])
#df_clean_all_go.to_csv("Exp8Data_All_Go.csv")
#first pivot table will extract data from all nogo information
piv_tab_nogo_all = pd.pivot_table(df_clean_all_nogo, index=["Subject"], columns=["FeedbackCond", "StimulusType", "Congruency"], values=["Accuracy"])
piv_tab_nogo_all.head()
#we can now plot our omnibus nogo data
forplot_all_nogo = df_clean_all_nogo.groupby(["StimulusType", "Congruency", "FeedbackCond", "Subject"], as_index=False).Accuracy.mean()
#create the plot. I like using seaborn, but it could be done with matplotlib's pyplot. Pulling data from a pivot table has its downsides with regard to freedom in customizing seaborn plots, but it'll do for now.
ax_all_nogo=sns.barplot(x="FeedbackCond", y='Accuracy', hue="Congruency", palette=["#ff0000", "#03d547"], ci=68, capsize=0.01, data=forplot_all_nogo[forplot_all_nogo.StimulusType=="Familiar"])
ax_all_nogo.legend_.remove()
ax_all_nogo.set_xlabel("")
ax_all_nogo.set_ylabel("NoGo Accuracy")
plt.title("Feedback prevents incongruency-related impairments \nin NoGo accuracy", weight="bold", y=1.1)
ax_all_nogo.set_ylim(0.5, 0.85)
sns.despine()
#add significance asterisk and line
x2, x3 = 0.8, 1.2
y, h, col = forplot_all_nogo[forplot_all_nogo.StimulusType=="Familiar"]["Accuracy"].mean()+0.13, 0.01, "k"
plt.plot([x2, x2, x3, x3], [y, y+h, y+h, y], lw=1, c=col)
plt.text((x2+x3)*0.5, y+h, "*", ha="center", va="bottom", color=col)
#plt.savefig("Exp8_FB_NoGo_graph.tiff", bbox_inches="tight", dpi=500)
plt.show()
#same sequence of events for Go accuracy across feedback conditions
#this pivot table will extract go information
piv_tab_go_all = pd.pivot_table(df_clean_all_go, index=["Subject"], columns=["FeedbackCond", "StimulusType", "Congruency"], values=["Accuracy"])
piv_tab_go_all.head()
forplot_all_go = df_clean_all_go.groupby(["StimulusType", "Congruency", "FeedbackCond", "Subject"], as_index=False).Accuracy.mean()
#make sure to only plot within the "Familiar" stimulus set, since there's no novel condition with feedback.
ax_all_go=sns.barplot(x="FeedbackCond", y='Accuracy', hue="Congruency", palette=["#03d547", "#ff0000"], ci=68, capsize=0.01, data=forplot_all_go[forplot_all_go.StimulusType=="Familiar"])
ax_all_go.legend_.remove()
ax_all_go.set_xlabel("")
ax_all_go.set_ylabel("Go Accuracy")
plt.title("Feedback prevents incongruency-related \nimpairments in Go accuracy", weight="bold", y=1.1)
ax_all_go.set_ylim(0.5, 1)
sns.despine()
#add significance asterisk and line
x2, x3 = 0.8, 1.2
y, h, col = forplot_all_go[forplot_all_go.StimulusType=="Familiar"]["Accuracy"].mean()+0.07, 0.01, "k"
plt.plot([x2, x2, x3, x3], [y, y+h, y+h, y], lw=1, c=col)
plt.text((x2+x3)*0.5, y+h, "*", ha="center", va="bottom", color=col)
#plt.savefig("Exp8_FB_Go_graph.tiff", bbox_inches="tight", dpi=500)
plt.show()
#create R friendly dataframes for posthoc t-tests to accompany the regressions
for_nogo_stats = df_clean_all_nogo.groupby(["StimulusType", "Congruency", "FeedbackCond", "Subject"], as_index=False).Accuracy.mean()
#for_nogo_stats.to_csv("nogostatssheet_full.csv")
for_go_stats = df_clean_all_go.groupby(["StimulusType", "Congruency", "FeedbackCond", "Subject"], as_index=False).Accuracy.mean()
#for_go_stats.to_csv("gostatssheet_full.csv")