Analysis of Hawk/dove multiple risk attitudes with adjustment¶

Includes analysis of risk attitudes across runs, population category, and paired statistical testing for parameter significance.

Conclusions:

  • adjustment strategy impacts simulation run length (adopt takes much longer to converage than average)
  • recent payoff vs total payoff does not have a significant impact on the risk attitudes of the final population (proportions that are risk inclined, risk moderate, and risk avoidant are similar)
In [1]:
import polars as pl

df = pl.scan_csv("../../data/hawkdovemulti/job*2024-02*.csv").collect()
In [3]:
df.shape
Out[3]:
(329665, 28)
In [4]:
print(f"{df.shape[0]:,} rows")
329,665 rows

risk attitude summary statistics¶

In [5]:
df.columns
Out[5]:
['RunId',
 'iteration',
 'Step',
 'grid_size',
 'risk_adjustment',
 'play_neighborhood',
 'observed_neighborhood',
 'adjust_neighborhood',
 'hawk_odds',
 'adjust_every',
 'risk_distribution',
 'adjust_payoff',
 'max_agent_points',
 'percent_hawk',
 'rolling_percent_hawk',
 'status',
 'total_agents',
 'population_risk_category',
 'total_r0',
 'total_r1',
 'total_r2',
 'total_r3',
 'total_r4',
 'total_r5',
 'total_r6',
 'total_r7',
 'total_r8',
 'total_r9']
In [6]:
#def risk_attitude_mean(df):
#    return pl.col("RunId")df.total_r0 * 0 + 

# calculate a mean of risk attitude per run 
df_riskstats = df.with_columns(
    # ignore r0 since it is times 0
    pl.Series('risk_attitude_mean', values=df.select(
        (pl.col("total_r1") + pl.col("total_r2")*2 + pl.col("total_r3")*3 + pl.col("total_r4")*4 + pl.col("total_r5")*5 + pl.col("total_r6")*6 + pl.col("total_r7")*7 + pl.col("total_r8")*8 + pl.col("total_r9")*9)  
         / pl.col("total_agents"))
))


# calculate largest group of risk attitudes, as a step towards determining the mode
df_riskstats = df_riskstats.with_columns(rmax=pl.max_horizontal("total_r0", "total_r1", "total_r2", "total_r3", "total_r4", "total_r5", "total_r6", "total_r7", "total_r8", "total_r9"))

# TODO: figure out how to calculate mode based on max within each row

df_riskstats[["risk_attitude_mean", "rmax"]]
Out[6]:
shape: (329_665, 2)
risk_attitude_meanrmax
f64i64
4.8820
4.4731
3.6225
4.6933
2.7354
……
4.5296152
5.2496160
3.2816247
5.9296126
2.4512256
In [9]:
# generate a histogram plot of risk attitude mean
import altair as alt

# configure for large datasets
alt.data_transformers.enable("vegafusion")

alt.Chart(df_riskstats).mark_bar().encode(
    alt.X("risk_attitude_mean", bin=True),
    y='count()',
)
Out[9]:

population categories across all runs¶

In [10]:
df["population_risk_category"].describe()
Out[10]:
shape: (9, 2)
statisticvalue
strf64
"count"329665.0
"null_count"0.0
"mean"7.963284
"std"4.267834
"min"1.0
"25%"5.0
"50%"7.0
"75%"13.0
"max"13.0

graph number runs resulting in each category, across all parameter combinations

In [11]:
# graph number of runs in each category

# group on risk category to get total for each type
poprisk_grouped = df.group_by('population_risk_category').agg(pl.col("RunId").sum())  
poprisk_grouped = poprisk_grouped.rename({"population_risk_category": "risk_category", "RunId": "count"})
poprisk_grouped = poprisk_grouped.sort("risk_category")
poprisk_grouped
Out[11]:
shape: (13, 2)
risk_categorycount
i64i64
125050946
2291408160
3140448738
476565066
5423468466
……
978636674
1078893041
1142958110
1230871511
13841578451
In [13]:
from simulatingrisk.hawkdovemulti.model import RiskState

# add column with readable group labels for numeric categories
poprisk_grouped = poprisk_grouped.with_columns(pl.Series(name="type", values=poprisk_grouped["risk_category"].map_elements(RiskState.category, return_dtype=pl.datatypes.String)))
poprisk_grouped
Out[13]:
shape: (13, 3)
risk_categorycounttype
i64i64str
125050946"majority risk inclined"
2291408160"majority risk inclined"
3140448738"majority risk inclined"
476565066"majority risk inclined"
5423468466"majority risk moderate"
………
978636674"majority risk avoidant"
1078893041"majority risk avoidant"
1142958110"majority risk avoidant"
1230871511"majority risk avoidant"
13841578451"no majority"
In [29]:
# not sure how to customize the bulitin polars hvplot bar chart to display the way we want... these colors are not working
#poprisk_grouped.plot.bar(x="risk_category", y="count", color="type", colorbar=True,)
In [14]:
import altair as alt
alt.Chart(poprisk_grouped).mark_bar(width=15).encode(
   x=alt.X("risk_category", title="risk category", axis=alt.Axis(tickCount=13),  # 13 categories
           scale=alt.Scale(domain=[1, 13])),
   y=alt.Y("count", title="Number of runs"),
   color=alt.Color("type", title="type")
).properties(title='Distribution of runs by final population risk category')
Out[14]:
In [15]:
# calculate percentages for each risk attitude

for i in range(0, 10):
    # calculate new series based on existing 
    pct_risk_category = df.select(pl.col(f"total_r{i}") / pl.col("total_agents"))
    # add new column to the dataframe
    df = df.with_columns(pl.Series(name=f"pct_r{i}", values=pct_risk_category))
In [16]:
# calculate percentages for each risk grouping

#  Risk-inclined (RI) : r = 0, 1, 2
#  Risk-moderate (RM): r = 3, 4, 5, 6
#  Risk-avoidant (RA): r = 7, 8, 9

df = df.with_columns(
    pl.Series('pct_risk_inclined', values=df.select((pl.col("total_r0") + pl.col("total_r1") + pl.col("total_r2")) / pl.col("total_agents"))),
    pl.Series('pct_risk_moderate', values=df.select((pl.col("total_r3") + pl.col("total_r4") + pl.col("total_r5") + pl.col("total_r6")) / pl.col("total_agents"))),
    pl.Series('pct_risk_avoidant', values=df.select((pl.col("total_r7") + pl.col("total_r8") + pl.col("total_r9")) / pl.col("total_agents")))
)

df[['pct_r0', 'pct_r1', 'pct_risk_inclined', 'pct_risk_moderate', 'pct_risk_avoidant']].head(10)
Out[16]:
shape: (10, 5)
pct_r0pct_r1pct_risk_inclinedpct_risk_moderatepct_risk_avoidant
f64f64f64f64f64
0.050.050.250.450.3
0.250.160.470.10.43
0.00.180.260.70.04
0.00.020.060.870.07
0.120.050.310.690.0
0.00.00.01.00.0
0.290.140.490.030.48
0.050.260.520.30.18
0.00.00.110.80.09
0.00.010.10.420.48

paired stastistical testing¶

Use T-test to check pairs of parameters

risk adjustment (adopt / average)¶

hypothesis: adjustment strategy does not have a significant impact on the final result, only affects how long it takes to get there

In [17]:
from scipy import stats


# load data from a different run where only risk adjustment varies

df_riskadjust = pl.read_csv("../data/hawkdovemulti/riskadjust_2024-02-20T202234_198160_model.csv")

# TODO: make reusable functions for annotating data

for i in range(0, 10):
    # calculate new series based on existing 
    pct_risk_category = df_riskadjust.select(pl.col(f"total_r{i}") / pl.col("total_agents"))
    # add new column to the dataframe
    df_riskadjust = df_riskadjust.with_columns(pl.Series(name=f"pct_r{i}", values=pct_risk_category))

df_riskadjust = df_riskadjust.with_columns(
    pl.Series('pct_risk_inclined', values=df_riskadjust.select((pl.col("total_r0") + pl.col("total_r1") + pl.col("total_r2")) / pl.col("total_agents"))),
    pl.Series('pct_risk_moderate', values=df_riskadjust.select((pl.col("total_r3") + pl.col("total_r4") + pl.col("total_r5") + pl.col("total_r6")) / pl.col("total_agents"))),
    pl.Series('pct_risk_avoidant', values=df_riskadjust.select((pl.col("total_r7") + pl.col("total_r8") + pl.col("total_r9")) / pl.col("total_agents")))
)

df_riskadjust = df_riskadjust.with_columns(pl.Series('risk_attitude_mean', values=df_riskadjust.select(
        (pl.col("total_r1") + pl.col("total_r2")*2 + pl.col("total_r3")*3 + pl.col("total_r4")*4 + pl.col("total_r5")*5 + pl.col("total_r6")*6 + pl.col("total_r7")*7 + pl.col("total_r8")*8 + pl.col("total_r9")*9)  
         / pl.col("total_agents"))))


df_adopt = df_riskadjust.filter((pl.col("risk_adjustment") == "adopt"))
df_avg = df_riskadjust.filter((pl.col("risk_adjustment") == "average"))

print(f"adopt: {df_adopt.shape[0]:,} rows")
print(f"average: {df_avg.shape[0]:,} rows")

stats.ttest_rel(df_adopt.select("pct_risk_inclined"), df_avg.select("pct_risk_inclined"))
adopt: 200 rows
average: 200 rows
Out[17]:
TtestResult(statistic=array([-10.09625985]), pvalue=array([1.29076295e-19]), df=array([199]))
In [18]:
stats.ttest_rel(df_adopt.select("pct_risk_moderate"), df_avg.select("pct_risk_moderate"))
Out[18]:
TtestResult(statistic=array([-17.94549027]), pvalue=array([1.82928695e-43]), df=array([199]))
In [19]:
stats.ttest_rel(df_adopt.select("pct_risk_avoidant"), df_avg.select("pct_risk_avoidant"))
Out[19]:
TtestResult(statistic=array([30.72677834]), pvalue=array([1.77103269e-77]), df=array([199]))
In [20]:
stats.ttest_rel(df_adopt.select("risk_attitude_mean"), df_avg.select("risk_attitude_mean"))
Out[20]:
TtestResult(statistic=array([12.85084099]), pvalue=array([6.4609322e-28]), df=array([199]))
In [21]:
df_adopt["Step"].describe()
Out[21]:
shape: (9, 2)
statisticvalue
strf64
"count"200.0
"null_count"0.0
"mean"118.465
"std"41.070582
"min"65.0
"25%"93.0
"50%"111.0
"75%"135.0
"max"403.0
In [22]:
df_avg["Step"].describe()
Out[22]:
shape: (9, 2)
statisticvalue
strf64
"count"200.0
"null_count"0.0
"mean"87.065
"std"19.394367
"min"64.0
"25%"71.0
"50%"81.0
"75%"96.0
"max"155.0
In [52]:
alt.Chart(df_riskadjust).mark_boxplot(extent="min-max").encode(
    x=alt.X("Step", title="Simulation length"),
    y=alt.Y("risk_adjustment", title="")
).properties(title="Simulation length by Risk Attitude Adjustment Strategy")
Out[52]:

adjustment payoff comparison (recent vs total)¶

In [31]:
df_payoff = pl.read_csv("../data/hawkdovemulti/payoff_2024-02-20T220514_546573_model.csv")

df_payoff["adjust_payoff"].value_counts()
Out[31]:
shape: (2, 2)
adjust_payoffcount
stru32
"recent"200
"total"200
In [32]:
# TODO: make reusable functions for annotating data

for i in range(0, 10):
    # calculate new series based on existing 
    pct_risk_category = df_payoff.select(pl.col(f"total_r{i}") / pl.col("total_agents"))
    # add new column to the dataframe
    df_payoff = df_payoff.with_columns(pl.Series(name=f"pct_r{i}", values=pct_risk_category))

df_payoff = df_payoff.with_columns(
    pl.Series('pct_risk_inclined', values=df_payoff.select((pl.col("total_r0") + pl.col("total_r1") + pl.col("total_r2")) / pl.col("total_agents"))),
    pl.Series('pct_risk_moderate', values=df_payoff.select((pl.col("total_r3") + pl.col("total_r4") + pl.col("total_r5") + pl.col("total_r6")) / pl.col("total_agents"))),
    pl.Series('pct_risk_avoidant', values=df_payoff.select((pl.col("total_r7") + pl.col("total_r8") + pl.col("total_r9")) / pl.col("total_agents")))
)

df_payoff = df_payoff.with_columns(pl.Series('risk_attitude_mean', values=df_payoff.select(
        (pl.col("total_r1") + pl.col("total_r2")*2 + pl.col("total_r3")*3 + pl.col("total_r4")*4 + pl.col("total_r5")*5 + pl.col("total_r6")*6 + pl.col("total_r7")*7 + pl.col("total_r8")*8 + pl.col("total_r9")*9)  
         / pl.col("total_agents"))))


df_recent = df_payoff.filter((pl.col("adjust_payoff") == "recent"))
df_total = df_payoff.filter((pl.col("adjust_payoff") == "total"))

stats.ttest_rel(df_recent.select("pct_risk_inclined"), df_total.select("pct_risk_inclined"))
Out[32]:
TtestResult(statistic=array([0.3840643]), pvalue=array([0.70134085]), df=array([199]))
In [33]:
stats.ttest_rel(df_recent.select("pct_risk_moderate"), df_total.select("pct_risk_moderate"))
Out[33]:
TtestResult(statistic=array([-0.03284255]), pvalue=array([0.97383306]), df=array([199]))
In [34]:
stats.ttest_rel(df_recent.select("pct_risk_avoidant"), df_total.select("pct_risk_avoidant"))
Out[34]:
TtestResult(statistic=array([-0.34960905]), pvalue=array([0.72700182]), df=array([199]))
In [35]:
stats.ttest_rel(df_recent.select("risk_attitude_mean"), df_total.select("risk_attitude_mean"))
Out[35]:
TtestResult(statistic=array([-0.24279719]), pvalue=array([0.80841258]), df=array([199]))
In [41]:
alt.Chart(df_payoff).mark_boxplot(extent="min-max").encode(
    x=alt.X("pct_risk_inclined", title="% Risk Inclined"),
    y=alt.Y("adjust_payoff", title="Adjustment payoff")
)
Out[41]:
In [45]:
alt.Chart(df_payoff).mark_boxplot(extent="min-max").encode(
    x=alt.X("pct_risk_moderate", title="% Risk Moderate"),
    y=alt.Y("adjust_payoff", title="Adjustment payoff")
)
Out[45]:
In [46]:
alt.Chart(df_payoff).mark_boxplot(extent="min-max").encode(
    x=alt.X("pct_risk_avoidant", title="% Risk avoidant"),
    y=alt.Y("adjust_payoff", title="Adjustment payoff")
)
Out[46]: