Convergence and run length based on population measure of change¶
This notebook analyzes batch run data testing out a revised approach to convergence. Instead of just measuring the number of agents who changed risk attitude on the last adjustment round, we measure changes across the population based on changes in the number with agents in each risk attitude - i.e., if two agents changed risk attitude but they swapped categories, there is no net change.
All data from the Hawk/Dove with multiple risk attitudes simulation with adjustment enabled (adopt/average)
- How many simulations converge? How many do not?
- How long does the simulation take to converge?
- What are the adjustment change levels for those that do not converge?
- Is there a significant difference between adapt and average risk adjustment strategy in terms of convergence and run length?
import polars as pl
# analyzing data from several batch runs with a few variations
# - max runs set to 20k, 30k
# - convergence logic in code:
# - num agents changed == 0
# - num agents changed == 0 or sum of risk level changes == 0
# - num agents changed == 0 or sum of risk level changes == 4
# - num agents changed == 0 or sum of risk level changes == 6
df = pl.scan_csv("../../data/hawkdovemulti/riskadjust_converg*2024-07-*.csv").collect()
total_runs = len(df)
print(f"Analyzing {total_runs} runs")
Analyzing 400 runs
simulation run length¶
They either finished fairly quickly (>200 steps) or did not converge before max runs
df["Step"].describe()
statistic | value |
---|---|
str | f64 |
"count" | 400.0 |
"null_count" | 0.0 |
"mean" | 13905.2325 |
"std" | 13374.20646 |
"min" | 50.0 |
"25%" | 141.0 |
"50%" | 20000.0 |
"75%" | 30000.0 |
"max" | 30000.0 |
import altair as alt
alt.Chart(df).mark_bar().encode(
x=alt.X('Step', bin=True),
y='count()',
color="status",
).properties(width=600, height=400)
# what about the runs that converged?
converged = df.filter(pl.col("status") == "converged")
converged["Step"].describe()
statistic | value |
---|---|
str | f64 |
"count" | 187.0 |
"null_count" | 0.0 |
"mean" | 171.620321 |
"std" | 173.181986 |
"min" | 50.0 |
"25%" | 91.0 |
"50%" | 131.0 |
"75%" | 191.0 |
"max" | 1911.0 |
alt.Chart(converged).mark_bar().encode(
x=alt.X('Step', bin=True),
y='count()',
color="status",
).properties(width=600, height=400)
alt.Chart(converged).mark_boxplot().encode(
y='Step',
).properties(width=400, height=400)
What percentage of runs converged?¶
status_totals = df["status"].value_counts()
status_totals
status | count |
---|---|
str | u32 |
"converged" | 187 |
"running" | 213 |
converg_total = status_totals.filter(status_totals["status"] == "converged")["count"][0]
print(f"{converg_total} runs out of {total_runs}; {converg_total/total_runs*100:.2f}% complete")
187 runs out of 400; 46.75% complete
How does adjustment strategy impact convergence percentage?¶
# the datasets should be evenly split between risk strategies
df.group_by("risk_adjustment").len()
risk_adjustment | len |
---|---|
str | u32 |
"average" | 200 |
"adopt" | 200 |
status_by_riskadjust = converged.group_by('risk_adjustment').len()
status_by_riskadjust
risk_adjustment | len |
---|---|
str | u32 |
"adopt" | 102 |
"average" | 85 |
riskadjust_avg_converged = status_by_riskadjust.row(by_predicate=(pl.col("risk_adjustment") == "average"))[1]
riskadjust_adopt_converged = status_by_riskadjust.row(by_predicate=(pl.col("risk_adjustment") == "adopt"))[1]
#status_by_riskadjust.unpivot(on=["risk_adjustment", "len"])
runs_per_strategy = int(total_runs/2)
print(f"{converg_total} runs out of {total_runs}; {converg_total/total_runs*100:.2f}% complete\n")
print("By risk adjustment strategy:")
print(f"\tadopt: {riskadjust_adopt_converged} runs out of {runs_per_strategy}; {riskadjust_adopt_converged/runs_per_strategy*100:.1f}% complete")
print(f"\taveraget: {riskadjust_avg_converged} runs out of {runs_per_strategy}; {riskadjust_avg_converged/runs_per_strategy*100:.1f}% complete")
187 runs out of 400; 46.75% complete By risk adjustment strategy: adopt: 102 runs out of 200; 51.0% complete averaget: 85 runs out of 200; 42.5% complete
Does adjustment strategy have a noticeable impact on convergence time?¶
alt.Chart(converged).mark_boxplot(size=25).encode(
y='Step',
x=alt.X('risk_adjustment', title='risk adjustment'),
color=alt.Color('risk_adjustment', title='risk adjustment')
).properties(width=400, height=400)
How do change measurements compare?¶
For the runs that did not converge, what was the last measurement of change?
unconverged = df.filter(pl.col("status") == "running")
unconverged["num_agents_risk_changed"].describe()
statistic | value |
---|---|
str | f64 |
"count" | 213.0 |
"null_count" | 0.0 |
"mean" | 11.873239 |
"std" | 7.931778 |
"min" | 1.0 |
"25%" | 5.0 |
"50%" | 11.0 |
"75%" | 17.0 |
"max" | 31.0 |
unconverged["sum_risk_level_changes"].describe()
statistic | value |
---|---|
str | f64 |
"count" | 213.0 |
"null_count" | 0.0 |
"mean" | 6.023474 |
"std" | 4.710928 |
"min" | 2.0 |
"25%" | 2.0 |
"50%" | 4.0 |
"75%" | 8.0 |
"max" | 24.0 |
unconverged_lastchange = unconverged.unpivot(on=['num_agents_risk_changed', 'sum_risk_level_changes'], variable_name='measure', value_name='count')
unconverged_lastchange.head()
measure | count |
---|---|
str | i64 |
"num_agents_risk_changed" | 6 |
"num_agents_risk_changed" | 8 |
"num_agents_risk_changed" | 1 |
"num_agents_risk_changed" | 4 |
"num_agents_risk_changed" | 1 |
alt.Chart(unconverged_lastchange).mark_boxplot(size=25).encode(
x=alt.X('measure').axis(alt.Axis(labels=False)), # hide long labels in access, rely on color legend
y='count',
color=alt.Color('measure')
).properties(width=400, height=400)
risk adjustment (adopt / average)¶
hypothesis: adjustment strategy does not have a significant impact on the final result, only affects how long it takes to get there
Is there a stastical difference between adopt vs. average adjustment strategies when we look at the final percent of the population in any one category?
from scipy import stats
df_riskadjust = converged.clone()
# TODO: make reusable functions for annotating data?
for i in range(0, 10):
# calculate new series based on existing
pct_risk_category = df_riskadjust.select(pl.col(f"total_r{i}") / pl.col("total_agents"))
# add new column to the dataframe
df_riskadjust = df_riskadjust.with_columns(pl.Series(name=f"pct_r{i}", values=pct_risk_category))
df_riskadjust = df_riskadjust.with_columns(
pl.Series('pct_risk_inclined', values=df_riskadjust.select((pl.col("total_r0") + pl.col("total_r1") + pl.col("total_r2")) / pl.col("total_agents"))),
pl.Series('pct_risk_moderate', values=df_riskadjust.select((pl.col("total_r3") + pl.col("total_r4") + pl.col("total_r5") + pl.col("total_r6")) / pl.col("total_agents"))),
pl.Series('pct_risk_avoidant', values=df_riskadjust.select((pl.col("total_r7") + pl.col("total_r8") + pl.col("total_r9")) / pl.col("total_agents")))
)
df_riskadjust = df_riskadjust.with_columns(pl.Series('risk_attitude_mean', values=df_riskadjust.select(
(pl.col("total_r1") + pl.col("total_r2")*2 + pl.col("total_r3")*3 + pl.col("total_r4")*4 + pl.col("total_r5")*5 + pl.col("total_r6")*6 + pl.col("total_r7")*7 + pl.col("total_r8")*8 + pl.col("total_r9")*9)
/ pl.col("total_agents"))))
df_adopt = df_riskadjust.filter((pl.col("risk_adjustment") == "adopt"))
df_avg = df_riskadjust.filter((pl.col("risk_adjustment") == "average"))
print(f"adopt: {len(df_adopt):,} rows")
print(f"average: {len(df_avg):,} rows")
adopt: 102 rows average: 85 rows
maxlen = min(len(df_adopt), len(df_avg))
stats.ttest_rel(df_adopt.select("pct_risk_inclined")[:maxlen], df_avg.select("pct_risk_inclined")[:maxlen])
TtestResult(statistic=array([-0.65389108]), pvalue=array([0.51496813]), df=array([84]))
stats.ttest_rel(df_adopt.select("pct_risk_moderate")[:maxlen], df_avg.select("pct_risk_moderate")[:maxlen])
TtestResult(statistic=array([-4.22371664]), pvalue=array([6.06386961e-05]), df=array([84]))
stats.ttest_rel(df_adopt.select("pct_risk_avoidant")[:maxlen], df_avg.select("pct_risk_avoidant")[:maxlen])
TtestResult(statistic=array([6.21018226]), pvalue=array([1.93791028e-08]), df=array([84]))
alt.Chart(df_riskadjust).mark_boxplot(size=25).encode(
x=alt.X('risk_adjustment').axis(alt.Axis(labels=False)), # hide long labels in access, rely on color legend
y='Step',
color=alt.Color('risk_adjustment')
).properties(width=400, height=400)
from simulatingrisk.hawkdovemulti import analysis_utils
import importlib
importlib.reload(analysis_utils)
# df_adopt, df_average
adopt_chart = analysis_utils.graph_population_risk_category(
analysis_utils.groupby_population_risk_category(df_adopt)
).properties(title="risk adjust: adopt")
average_chart = analysis_utils.graph_population_risk_category(
analysis_utils.groupby_population_risk_category(df_avg)
).properties(title="risk adjust: average")
(adopt_chart | average_chart).properties(title="distribution of population category by run").resolve_scale(y='shared')
/Users/rkoeser/workarea/env/simrisk/lib/python3.12/site-packages/simulatingrisk/hawkdovemulti/analysis_utils.py:28: MapWithoutReturnDtypeWarning: Calling `map_elements` without specifying `return_dtype` can lead to unpredictable results. Specify `return_dtype` to silence this warning. values=poprisk_grouped["risk_category"].map_elements(RiskState.category),
Population risk level percentages¶
What if we look at the distribution of agents by risk level at the end of the simulation in a different way?
# limit to values needed to report and calculate percentages
keep_columns = ['RunId', 'iteration', 'Step', 'risk_adjustment']
# risk levels are 0-9
risk_levels = range(10)
orig_keep_columns = keep_columns.copy() + ['total_agents'] + [f'total_r{i}' for i in risk_levels]
converged_risk_pcts = converged.select(pl.col(orig_keep_columns))
# calculate percentage for each risk level
for rval in risk_levels:
converged_risk_pcts = converged_risk_pcts.with_columns(pl.col(f"total_r{rval}").truediv(pl.col("total_agents")).alias(f"r{rval}"))
# use select to drop unneeded columns\
rlevel_cols = [f'r{i}' for i in risk_levels]
pct_keep_columns = keep_columns.copy() + rlevel_cols
converged_risk_pcts = converged_risk_pcts.select(pct_keep_columns)
converged_risk_pcts.head()
RunId | iteration | Step | risk_adjustment | r0 | r1 | r2 | r3 | r4 | r5 | r6 | r7 | r8 | r9 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
i64 | i64 | i64 | str | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 |
11 | 11 | 120 | "adopt" | 0.13 | 0.11 | 0.05 | 0.12 | 0.03 | 0.3 | 0.0 | 0.0 | 0.26 | 0.0 |
0 | 0 | 380 | "adopt" | 0.09 | 0.05 | 0.19 | 0.01 | 0.19 | 0.24 | 0.0 | 0.05 | 0.06 | 0.12 |
21 | 21 | 120 | "adopt" | 0.18 | 0.04 | 0.1 | 0.13 | 0.0 | 0.1 | 0.3 | 0.11 | 0.01 | 0.03 |
22 | 22 | 170 | "adopt" | 0.12 | 0.01 | 0.15 | 0.17 | 0.17 | 0.05 | 0.09 | 0.16 | 0.01 | 0.07 |
19 | 19 | 280 | "adopt" | 0.17 | 0.03 | 0.07 | 0.03 | 0.18 | 0.09 | 0.1 | 0.14 | 0.13 | 0.06 |
What does the population look like in terms of percentages?¶
risk_pcts = converged_risk_pcts.with_row_index().unpivot(rlevel_cols, variable_name="risk level", value_name="percentage", index=['index', 'risk_adjustment']) #RunId','iteration', 'Step'])
risk_pcts.head()
index | risk_adjustment | risk level | percentage |
---|---|---|---|
u32 | str | str | f64 |
0 | "adopt" | "r0" | 0.13 |
1 | "adopt" | "r0" | 0.09 |
2 | "adopt" | "r0" | 0.18 |
3 | "adopt" | "r0" | 0.12 |
4 | "adopt" | "r0" | 0.17 |
import altair as alt
from simulatingrisk.hawkdove.model import divergent_colors_10
alt.Chart(risk_pcts).mark_bar().encode(
x=alt.X('index', title='simulation run'),
y=alt.Y('percentage', title='% of population').scale(domain=(0, 1)),
color=alt.Color('risk level').scale(domain=rlevel_cols, range=divergent_colors_10),
order=alt.Order(
# Sort the segments of the bars by this field
'risk level',
sort='ascending'
)
).properties(width=1200, height=800)
# what if we split by adjustment strategy?
chart_adopt = alt.Chart(risk_pcts.filter(risk_adjustment='adopt')).mark_bar().encode(
x=alt.X('index:N', title='simulation run').axis(alt.Axis(labels=False)),
y=alt.Y('percentage', title='% of population').scale(domain=(0, 1)),
color=alt.Color('risk level').scale(domain=rlevel_cols, range=divergent_colors_10),
order=alt.Order(
# Sort the segments of the bars by this field
'risk level',
sort='ascending'
)
).properties(width=500, height=600, title="Risk adjustment strategy: adopt")
chart_average = alt.Chart(risk_pcts.filter(risk_adjustment='average')).mark_bar().encode(
x=alt.X('index:N', title='simulation run').axis(alt.Axis(labels=False)),
y=alt.Y('percentage', title='').scale(domain=(0, 1)),
color=alt.Color('risk level').scale(domain=rlevel_cols, range=divergent_colors_10),
order=alt.Order(
# Sort the segments of the bars by this field
'risk level',
sort='ascending'
)
).properties(width=500, height=600, title="Risk adjustment strategy: average")
chart_adopt | chart_average
What if we aggregate risk levels in buckets?¶
- Risk-inclined (RI) : r = 0, 1, 2
- Risk-moderate (RM): r = 3, 4, 5, 6
- Risk-avoidant (RA): r = 7, 8, 9
import importlib
from simulatingrisk.hawkdovemulti import analysis_utils
from simulatingrisk.hawkdovemulti import model
importlib.reload(analysis_utils)
importlib.reload(model)
from simulatingrisk.hawkdovemulti.analysis_utils import grouped_risk_totals
risk_groups = [ "risk_inclined", "risk_moderate", "risk_avoidant"]
converged_grouped = grouped_risk_totals(converged).select("RunId", "iteration", "Step", "total_agents", "risk_adjustment", *risk_groups)
converged_grouped.head()
RunId | iteration | Step | total_agents | risk_adjustment | risk_inclined | risk_moderate | risk_avoidant |
---|---|---|---|---|---|---|---|
i64 | i64 | i64 | i64 | str | i64 | i64 | i64 |
11 | 11 | 120 | 100 | "adopt" | 29 | 45 | 26 |
0 | 0 | 380 | 100 | "adopt" | 33 | 44 | 23 |
21 | 21 | 120 | 100 | "adopt" | 32 | 53 | 15 |
22 | 22 | 170 | 100 | "adopt" | 28 | 48 | 24 |
19 | 19 | 280 | 100 | "adopt" | 27 | 40 | 33 |
# pivot for graphing in altair
converged_grouped_long = converged_grouped.with_row_index().unpivot(risk_groups, variable_name="risk group", value_name="total", index=['index', 'risk_adjustment'])
converged_grouped_long.head()
index | risk_adjustment | risk group | total |
---|---|---|---|
u32 | str | str | i64 |
0 | "adopt" | "risk_inclined" | 29 |
1 | "adopt" | "risk_inclined" | 33 |
2 | "adopt" | "risk_inclined" | 32 |
3 | "adopt" | "risk_inclined" | 28 |
4 | "adopt" | "risk_inclined" | 27 |
# add field for ordering in altair
converged_grouped_long = converged_grouped_long.with_columns(
pl.when(pl.col("risk group") == "risk_inclined")
.then(1)
.when(pl.col("risk group") == "risk_moderate")
.then(2)
.otherwise(3)
.alias("risk group order")
)
converged_grouped_long.head()
index | risk_adjustment | risk group | total | risk group order |
---|---|---|---|---|
u32 | str | str | i64 | i32 |
0 | "adopt" | "risk_inclined" | 29 | 1 |
1 | "adopt" | "risk_inclined" | 33 | 1 |
2 | "adopt" | "risk_inclined" | 32 | 1 |
3 | "adopt" | "risk_inclined" | 28 | 1 |
4 | "adopt" | "risk_inclined" | 27 | 1 |
# subset colors from the divergent color scheme
rgroup_colors = [divergent_colors_10[2], divergent_colors_10[4], divergent_colors_10[6]]
alt.Chart(converged_grouped_long).mark_bar().encode(
x=alt.X('index', title='simulation run'),
y=alt.Y('total', title='# of agents').scale(domain=(0, 100)),
color=alt.Color('risk group').scale(domain=risk_groups, range=rgroup_colors),
order=alt.Order(
# Sort segments by predefined order
'risk group order:N',
sort='ascending'
)
).properties(width=1200, height=800)
# what if we split out by risk strategy?
chart_adopt = alt.Chart(converged_grouped_long.filter(risk_adjustment='adopt')).mark_bar().encode(
x=alt.X('index:N', title='simulation run').axis(alt.Axis(labels=False)),
y=alt.Y('total', title='# of agents').scale(domain=(0, 100)),
color=alt.Color('risk group').scale(domain=risk_groups, range=rgroup_colors),
order=alt.Order(
# Sort segments by predefined order
'risk group order:N',
sort='ascending'
)
).properties(width=500, height=600, title="Risk adjustment strategy: adopt")
chart_average = alt.Chart(converged_grouped_long.filter(risk_adjustment='average')).mark_bar().encode(
x=alt.X('index:N', title='simulation run').axis(alt.Axis(labels=False)),
y=alt.Y('total', title='').scale(domain=(0, 100)),
color=alt.Color('risk group').scale(domain=risk_groups, range=rgroup_colors),
order=alt.Order(
# Sort segments by predefined order
'risk group order:N',
sort='ascending'
)
).properties(width=500, height=600, title="Risk adjustment strategy: average")
chart_adopt | chart_average
# can we sort to group similar populations?
converged_grouped_sort_ri = converged_grouped.sort('risk_inclined', descending=True)
converged_grouped_sort_ri_long = converged_grouped_sort_ri.with_row_index().unpivot(risk_groups, variable_name="risk group", value_name="total", index=['index', 'risk_adjustment'])
# add field for ordering in altair
converged_grouped_sort_ri_long = converged_grouped_sort_ri_long.with_columns(
pl.when(pl.col("risk group") == "risk_inclined")
.then(1)
.when(pl.col("risk group") == "risk_moderate")
.then(2)
.otherwise(3)
.alias("risk group order")
)
alt.Chart(converged_grouped_sort_ri_long).mark_bar().encode(
x=alt.X('index', title='simulation run, sorted by risk inclined population'),
y=alt.Y('total', title='# of agents').scale(domain=(0, 100)),
color=alt.Color('risk group').scale(domain=risk_groups, range=rgroup_colors),
order=alt.Order(
# Sort segments by predefined order
'risk group order',
sort='ascending'
)
).properties(width=1200, height=800)
# can we sort to group similar populations?
converged_grouped_sort_ra = converged_grouped.sort('risk_avoidant', descending=True)
converged_grouped_sort_ra_long = converged_grouped_sort_ra.with_row_index().unpivot(risk_groups, variable_name="risk group", value_name="total", index=['index'])
# add field for ordering in altair
converged_grouped_sort_ra_long = converged_grouped_sort_ra_long.with_columns(
pl.when(pl.col("risk group") == "risk_inclined")
.then(1)
.when(pl.col("risk group") == "risk_moderate")
.then(2)
.otherwise(3)
.alias("risk group order")
)
alt.Chart(converged_grouped_sort_ra_long).mark_bar().encode(
x=alt.X('index', title='simulation run, sorted by risk avoidant population'),
y=alt.Y('total', title='# of agents').scale(domain=(0, 100)),
color=alt.Color('risk group').scale(domain=risk_groups, range=rgroup_colors),
order=alt.Order(
# Sort segments by predefined order
'risk group order',
sort='ascending'
)
).properties(width=1200, height=800)
# what if we split out by risk strategy?
chart_adopt = alt.Chart(converged_grouped_sort_ri_long.filter(risk_adjustment='adopt')).mark_bar().encode(
x=alt.X('index:N', title='simulation run').axis(alt.Axis(labels=False)),
y=alt.Y('total', title='# of agents').scale(domain=(0, 100)),
color=alt.Color('risk group').scale(domain=risk_groups, range=rgroup_colors),
order=alt.Order(
# Sort segments by predefined order
'risk group order:N',
sort='ascending'
)
).properties(width=500, height=600, title="Risk adjustment strategy: adopt")
chart_average = alt.Chart(converged_grouped_sort_ri_long.filter(risk_adjustment='average')).mark_bar().encode(
x=alt.X('index:N', title='simulation run').axis(alt.Axis(labels=False)),
y=alt.Y('total', title='').scale(domain=(0, 100)),
color=alt.Color('risk group').scale(domain=risk_groups, range=rgroup_colors),
order=alt.Order(
# Sort segments by predefined order
'risk group order:N',
sort='ascending'
)
).properties(width=500, height=600, title="Risk adjustment strategy: average")
(chart_adopt | chart_average) #.properties(title="Population risk inclined, risk moderate, and averse by simulation run")