Analysis of Hawk/dove multiple risk attitudes with adjustment¶
Includes analysis of risk attitudes across runs, population category, and paired statistical testing for parameter significance.
Conclusions:
- adjustment strategy impacts simulation run length (adopt takes much longer to converage than average)
- recent payoff vs total payoff does not have a significant impact on the risk attitudes of the final population (proportions that are risk inclined, risk moderate, and risk avoidant are similar)
In [1]:
import polars as pl
df = pl.scan_csv("../../data/hawkdovemulti/job*2024-02*.csv").collect()
In [3]:
df.shape
Out[3]:
(329665, 28)
In [4]:
print(f"{df.shape[0]:,} rows")
329,665 rows
risk attitude summary statistics¶
In [5]:
df.columns
Out[5]:
['RunId', 'iteration', 'Step', 'grid_size', 'risk_adjustment', 'play_neighborhood', 'observed_neighborhood', 'adjust_neighborhood', 'hawk_odds', 'adjust_every', 'risk_distribution', 'adjust_payoff', 'max_agent_points', 'percent_hawk', 'rolling_percent_hawk', 'status', 'total_agents', 'population_risk_category', 'total_r0', 'total_r1', 'total_r2', 'total_r3', 'total_r4', 'total_r5', 'total_r6', 'total_r7', 'total_r8', 'total_r9']
In [6]:
#def risk_attitude_mean(df):
# return pl.col("RunId")df.total_r0 * 0 +
# calculate a mean of risk attitude per run
df_riskstats = df.with_columns(
# ignore r0 since it is times 0
pl.Series('risk_attitude_mean', values=df.select(
(pl.col("total_r1") + pl.col("total_r2")*2 + pl.col("total_r3")*3 + pl.col("total_r4")*4 + pl.col("total_r5")*5 + pl.col("total_r6")*6 + pl.col("total_r7")*7 + pl.col("total_r8")*8 + pl.col("total_r9")*9)
/ pl.col("total_agents"))
))
# calculate largest group of risk attitudes, as a step towards determining the mode
df_riskstats = df_riskstats.with_columns(rmax=pl.max_horizontal("total_r0", "total_r1", "total_r2", "total_r3", "total_r4", "total_r5", "total_r6", "total_r7", "total_r8", "total_r9"))
# TODO: figure out how to calculate mode based on max within each row
df_riskstats[["risk_attitude_mean", "rmax"]]
Out[6]:
shape: (329_665, 2)
risk_attitude_mean | rmax |
---|---|
f64 | i64 |
4.88 | 20 |
4.47 | 31 |
3.62 | 25 |
4.69 | 33 |
2.73 | 54 |
… | … |
4.5296 | 152 |
5.2496 | 160 |
3.2816 | 247 |
5.9296 | 126 |
2.4512 | 256 |
In [9]:
# generate a histogram plot of risk attitude mean
import altair as alt
# configure for large datasets
alt.data_transformers.enable("vegafusion")
alt.Chart(df_riskstats).mark_bar().encode(
alt.X("risk_attitude_mean", bin=True),
y='count()',
)
Out[9]:
population categories across all runs¶
In [10]:
df["population_risk_category"].describe()
Out[10]:
shape: (9, 2)
statistic | value |
---|---|
str | f64 |
"count" | 329665.0 |
"null_count" | 0.0 |
"mean" | 7.963284 |
"std" | 4.267834 |
"min" | 1.0 |
"25%" | 5.0 |
"50%" | 7.0 |
"75%" | 13.0 |
"max" | 13.0 |
graph number runs resulting in each category, across all parameter combinations
In [11]:
# graph number of runs in each category
# group on risk category to get total for each type
poprisk_grouped = df.group_by('population_risk_category').agg(pl.col("RunId").sum())
poprisk_grouped = poprisk_grouped.rename({"population_risk_category": "risk_category", "RunId": "count"})
poprisk_grouped = poprisk_grouped.sort("risk_category")
poprisk_grouped
Out[11]:
shape: (13, 2)
risk_category | count |
---|---|
i64 | i64 |
1 | 25050946 |
2 | 291408160 |
3 | 140448738 |
4 | 76565066 |
5 | 423468466 |
… | … |
9 | 78636674 |
10 | 78893041 |
11 | 42958110 |
12 | 30871511 |
13 | 841578451 |
In [13]:
from simulatingrisk.hawkdovemulti.model import RiskState
# add column with readable group labels for numeric categories
poprisk_grouped = poprisk_grouped.with_columns(pl.Series(name="type", values=poprisk_grouped["risk_category"].map_elements(RiskState.category, return_dtype=pl.datatypes.String)))
poprisk_grouped
Out[13]:
shape: (13, 3)
risk_category | count | type |
---|---|---|
i64 | i64 | str |
1 | 25050946 | "majority risk inclined" |
2 | 291408160 | "majority risk inclined" |
3 | 140448738 | "majority risk inclined" |
4 | 76565066 | "majority risk inclined" |
5 | 423468466 | "majority risk moderate" |
… | … | … |
9 | 78636674 | "majority risk avoidant" |
10 | 78893041 | "majority risk avoidant" |
11 | 42958110 | "majority risk avoidant" |
12 | 30871511 | "majority risk avoidant" |
13 | 841578451 | "no majority" |
In [29]:
# not sure how to customize the bulitin polars hvplot bar chart to display the way we want... these colors are not working
#poprisk_grouped.plot.bar(x="risk_category", y="count", color="type", colorbar=True,)
In [14]:
import altair as alt
alt.Chart(poprisk_grouped).mark_bar(width=15).encode(
x=alt.X("risk_category", title="risk category", axis=alt.Axis(tickCount=13), # 13 categories
scale=alt.Scale(domain=[1, 13])),
y=alt.Y("count", title="Number of runs"),
color=alt.Color("type", title="type")
).properties(title='Distribution of runs by final population risk category')
Out[14]:
In [15]:
# calculate percentages for each risk attitude
for i in range(0, 10):
# calculate new series based on existing
pct_risk_category = df.select(pl.col(f"total_r{i}") / pl.col("total_agents"))
# add new column to the dataframe
df = df.with_columns(pl.Series(name=f"pct_r{i}", values=pct_risk_category))
In [16]:
# calculate percentages for each risk grouping
# Risk-inclined (RI) : r = 0, 1, 2
# Risk-moderate (RM): r = 3, 4, 5, 6
# Risk-avoidant (RA): r = 7, 8, 9
df = df.with_columns(
pl.Series('pct_risk_inclined', values=df.select((pl.col("total_r0") + pl.col("total_r1") + pl.col("total_r2")) / pl.col("total_agents"))),
pl.Series('pct_risk_moderate', values=df.select((pl.col("total_r3") + pl.col("total_r4") + pl.col("total_r5") + pl.col("total_r6")) / pl.col("total_agents"))),
pl.Series('pct_risk_avoidant', values=df.select((pl.col("total_r7") + pl.col("total_r8") + pl.col("total_r9")) / pl.col("total_agents")))
)
df[['pct_r0', 'pct_r1', 'pct_risk_inclined', 'pct_risk_moderate', 'pct_risk_avoidant']].head(10)
Out[16]:
shape: (10, 5)
pct_r0 | pct_r1 | pct_risk_inclined | pct_risk_moderate | pct_risk_avoidant |
---|---|---|---|---|
f64 | f64 | f64 | f64 | f64 |
0.05 | 0.05 | 0.25 | 0.45 | 0.3 |
0.25 | 0.16 | 0.47 | 0.1 | 0.43 |
0.0 | 0.18 | 0.26 | 0.7 | 0.04 |
0.0 | 0.02 | 0.06 | 0.87 | 0.07 |
0.12 | 0.05 | 0.31 | 0.69 | 0.0 |
0.0 | 0.0 | 0.0 | 1.0 | 0.0 |
0.29 | 0.14 | 0.49 | 0.03 | 0.48 |
0.05 | 0.26 | 0.52 | 0.3 | 0.18 |
0.0 | 0.0 | 0.11 | 0.8 | 0.09 |
0.0 | 0.01 | 0.1 | 0.42 | 0.48 |
paired stastistical testing¶
Use T-test to check pairs of parameters
risk adjustment (adopt / average)¶
hypothesis: adjustment strategy does not have a significant impact on the final result, only affects how long it takes to get there
In [17]:
from scipy import stats
# load data from a different run where only risk adjustment varies
df_riskadjust = pl.read_csv("../data/hawkdovemulti/riskadjust_2024-02-20T202234_198160_model.csv")
# TODO: make reusable functions for annotating data
for i in range(0, 10):
# calculate new series based on existing
pct_risk_category = df_riskadjust.select(pl.col(f"total_r{i}") / pl.col("total_agents"))
# add new column to the dataframe
df_riskadjust = df_riskadjust.with_columns(pl.Series(name=f"pct_r{i}", values=pct_risk_category))
df_riskadjust = df_riskadjust.with_columns(
pl.Series('pct_risk_inclined', values=df_riskadjust.select((pl.col("total_r0") + pl.col("total_r1") + pl.col("total_r2")) / pl.col("total_agents"))),
pl.Series('pct_risk_moderate', values=df_riskadjust.select((pl.col("total_r3") + pl.col("total_r4") + pl.col("total_r5") + pl.col("total_r6")) / pl.col("total_agents"))),
pl.Series('pct_risk_avoidant', values=df_riskadjust.select((pl.col("total_r7") + pl.col("total_r8") + pl.col("total_r9")) / pl.col("total_agents")))
)
df_riskadjust = df_riskadjust.with_columns(pl.Series('risk_attitude_mean', values=df_riskadjust.select(
(pl.col("total_r1") + pl.col("total_r2")*2 + pl.col("total_r3")*3 + pl.col("total_r4")*4 + pl.col("total_r5")*5 + pl.col("total_r6")*6 + pl.col("total_r7")*7 + pl.col("total_r8")*8 + pl.col("total_r9")*9)
/ pl.col("total_agents"))))
df_adopt = df_riskadjust.filter((pl.col("risk_adjustment") == "adopt"))
df_avg = df_riskadjust.filter((pl.col("risk_adjustment") == "average"))
print(f"adopt: {df_adopt.shape[0]:,} rows")
print(f"average: {df_avg.shape[0]:,} rows")
stats.ttest_rel(df_adopt.select("pct_risk_inclined"), df_avg.select("pct_risk_inclined"))
adopt: 200 rows average: 200 rows
Out[17]:
TtestResult(statistic=array([-10.09625985]), pvalue=array([1.29076295e-19]), df=array([199]))
In [18]:
stats.ttest_rel(df_adopt.select("pct_risk_moderate"), df_avg.select("pct_risk_moderate"))
Out[18]:
TtestResult(statistic=array([-17.94549027]), pvalue=array([1.82928695e-43]), df=array([199]))
In [19]:
stats.ttest_rel(df_adopt.select("pct_risk_avoidant"), df_avg.select("pct_risk_avoidant"))
Out[19]:
TtestResult(statistic=array([30.72677834]), pvalue=array([1.77103269e-77]), df=array([199]))
In [20]:
stats.ttest_rel(df_adopt.select("risk_attitude_mean"), df_avg.select("risk_attitude_mean"))
Out[20]:
TtestResult(statistic=array([12.85084099]), pvalue=array([6.4609322e-28]), df=array([199]))
In [21]:
df_adopt["Step"].describe()
Out[21]:
shape: (9, 2)
statistic | value |
---|---|
str | f64 |
"count" | 200.0 |
"null_count" | 0.0 |
"mean" | 118.465 |
"std" | 41.070582 |
"min" | 65.0 |
"25%" | 93.0 |
"50%" | 111.0 |
"75%" | 135.0 |
"max" | 403.0 |
In [22]:
df_avg["Step"].describe()
Out[22]:
shape: (9, 2)
statistic | value |
---|---|
str | f64 |
"count" | 200.0 |
"null_count" | 0.0 |
"mean" | 87.065 |
"std" | 19.394367 |
"min" | 64.0 |
"25%" | 71.0 |
"50%" | 81.0 |
"75%" | 96.0 |
"max" | 155.0 |
In [52]:
alt.Chart(df_riskadjust).mark_boxplot(extent="min-max").encode(
x=alt.X("Step", title="Simulation length"),
y=alt.Y("risk_adjustment", title="")
).properties(title="Simulation length by Risk Attitude Adjustment Strategy")
Out[52]:
adjustment payoff comparison (recent vs total)¶
In [31]:
df_payoff = pl.read_csv("../data/hawkdovemulti/payoff_2024-02-20T220514_546573_model.csv")
df_payoff["adjust_payoff"].value_counts()
Out[31]:
shape: (2, 2)
adjust_payoff | count |
---|---|
str | u32 |
"recent" | 200 |
"total" | 200 |
In [32]:
# TODO: make reusable functions for annotating data
for i in range(0, 10):
# calculate new series based on existing
pct_risk_category = df_payoff.select(pl.col(f"total_r{i}") / pl.col("total_agents"))
# add new column to the dataframe
df_payoff = df_payoff.with_columns(pl.Series(name=f"pct_r{i}", values=pct_risk_category))
df_payoff = df_payoff.with_columns(
pl.Series('pct_risk_inclined', values=df_payoff.select((pl.col("total_r0") + pl.col("total_r1") + pl.col("total_r2")) / pl.col("total_agents"))),
pl.Series('pct_risk_moderate', values=df_payoff.select((pl.col("total_r3") + pl.col("total_r4") + pl.col("total_r5") + pl.col("total_r6")) / pl.col("total_agents"))),
pl.Series('pct_risk_avoidant', values=df_payoff.select((pl.col("total_r7") + pl.col("total_r8") + pl.col("total_r9")) / pl.col("total_agents")))
)
df_payoff = df_payoff.with_columns(pl.Series('risk_attitude_mean', values=df_payoff.select(
(pl.col("total_r1") + pl.col("total_r2")*2 + pl.col("total_r3")*3 + pl.col("total_r4")*4 + pl.col("total_r5")*5 + pl.col("total_r6")*6 + pl.col("total_r7")*7 + pl.col("total_r8")*8 + pl.col("total_r9")*9)
/ pl.col("total_agents"))))
df_recent = df_payoff.filter((pl.col("adjust_payoff") == "recent"))
df_total = df_payoff.filter((pl.col("adjust_payoff") == "total"))
stats.ttest_rel(df_recent.select("pct_risk_inclined"), df_total.select("pct_risk_inclined"))
Out[32]:
TtestResult(statistic=array([0.3840643]), pvalue=array([0.70134085]), df=array([199]))
In [33]:
stats.ttest_rel(df_recent.select("pct_risk_moderate"), df_total.select("pct_risk_moderate"))
Out[33]:
TtestResult(statistic=array([-0.03284255]), pvalue=array([0.97383306]), df=array([199]))
In [34]:
stats.ttest_rel(df_recent.select("pct_risk_avoidant"), df_total.select("pct_risk_avoidant"))
Out[34]:
TtestResult(statistic=array([-0.34960905]), pvalue=array([0.72700182]), df=array([199]))
In [35]:
stats.ttest_rel(df_recent.select("risk_attitude_mean"), df_total.select("risk_attitude_mean"))
Out[35]:
TtestResult(statistic=array([-0.24279719]), pvalue=array([0.80841258]), df=array([199]))
In [41]:
alt.Chart(df_payoff).mark_boxplot(extent="min-max").encode(
x=alt.X("pct_risk_inclined", title="% Risk Inclined"),
y=alt.Y("adjust_payoff", title="Adjustment payoff")
)
Out[41]:
In [45]:
alt.Chart(df_payoff).mark_boxplot(extent="min-max").encode(
x=alt.X("pct_risk_moderate", title="% Risk Moderate"),
y=alt.Y("adjust_payoff", title="Adjustment payoff")
)
Out[45]:
In [46]:
alt.Chart(df_payoff).mark_boxplot(extent="min-max").encode(
x=alt.X("pct_risk_avoidant", title="% Risk avoidant"),
y=alt.Y("adjust_payoff", title="Adjustment payoff")
)
Out[46]: