Hawk/dove multi + adjust, hawk odds and convergence¶

In [2]:
import polars as pl

df = pl.read_csv("../../data/hawkdovemulti/hawkodds_c7_2024-03-01T102040_835593_model.csv")
In [3]:
total_runs = len(df)

print(f"Analyzing {total_runs} runs")
Analyzing 900 runs

what % converged?¶

In [22]:
converged_df = df.filter(df["status"] == "converged")
len(converged_df)
Out[22]:
818
In [5]:
print(f"{len(converged_df) / len(df)*100:.2f}% of runs converged")
90.89% of runs converged
In [6]:
converged_df["Step"].plot.hist()
Out[6]:

does starting hawk odds affect convergence?¶

In [7]:
status_by_hawkodds = df.group_by("hawk_odds", "status").count()
status_by_hawkodds
/var/folders/mb/6qm4h4yx3yqdy2bv2sjyp4z00000gp/T/ipykernel_59966/235324943.py:1: DeprecationWarning: `GroupBy.count` was renamed; use `GroupBy.len` instead
  status_by_hawkodds = df.group_by("hawk_odds", "status").count()
Out[7]:
shape: (18, 3)
hawk_oddsstatuscount
f64stru32
0.7"converged"92
0.2"converged"92
0.5"converged"96
0.6"running"8
0.9"running"15
………
0.4"converged"91
0.3"converged"91
0.7"running"8
0.4"running"9
0.2"running"8
In [8]:
import altair as alt

alt.Chart(status_by_hawkodds).mark_bar().encode(
    x='hawk_odds:N',
    y='count',
    color='status:N'
).properties(title="Simulation status (converged/running) by inital hawk odds", width=250, height=400)
Out[8]:

what about the distribution of run length by starting hawk odds?¶

In [9]:
alt.Chart(converged_df).mark_boxplot(size=20).encode(
    x='hawk_odds:N',
    y='Step',
).properties(
    title=alt.TitleParams(
        "Simulation run length by starting hawk odds",
        subtitle="(converged runs only)"),
    width=350, height=450)
Out[9]:
In [24]:
from simulatingrisk.hawkdovemulti import analysis_utils
import importlib
importlib.reload(analysis_utils)

# calculate percentages for each risk level
# converged_df = analysis_utils.grouped_risk_totals(converged_df)


converged_df = converged_df.with_columns(
    pl.Series('pct_risk_inclined', values=converged_df.select((pl.col("total_r0") + pl.col("total_r1") + pl.col("total_r2")) / pl.col("total_agents"))),
    pl.Series('pct_risk_moderate', values=converged_df.select((pl.col("total_r3") + pl.col("total_r4") + pl.col("total_r5") + pl.col("total_r6")) / pl.col("total_agents"))),
    pl.Series('pct_risk_avoidant', values=converged_df.select((pl.col("total_r7") + pl.col("total_r8") + pl.col("total_r9")) / pl.col("total_agents")))
)
converged_df.head()
Out[24]:
shape: (5, 30)
RunIditerationStephawk_oddsrisk_adjustmentrisk_distributiongrid_sizemax_agent_pointspercent_hawkrolling_percent_hawkstatustotal_agentspopulation_risk_categorynum_agents_risk_changedtotal_r0total_r1total_r2total_r3total_r4total_r5total_r6total_r7total_r8total_r9risk_inclinedrisk_moderaterisk_avoidantpct_risk_inclinedpct_risk_moderatepct_risk_avoidant
i64i64i64f64strstri64i64f64f64stri64i64i64i64i64i64i64i64i64i64i64i64i64i64i64i64f64f64f64
00700.1"adopt""uniform"1011840.370.523"converged"1001369245310213121393828340.380.280.34
66900.1"adopt""uniform"1017580.440.514"converged"100131189291819113472947240.290.470.24
221100.1"adopt""uniform"1020560.270.478667"converged"100133112122101910811152541340.250.410.34
771200.1"adopt""uniform"1022610.470.560333"converged"100301431901230121185415310.540.150.31
441300.1"adopt""uniform"1026300.460.531667"converged"10013311421140150171623629350.360.290.35
In [13]:
# calculate percentages for the larger groupings
#converged_df = analysis_utils.percent_risk_buckets(converged_df)

is there a correlation between hawk odds and percent risk inclined?¶

In [16]:
from sklearn.linear_model import LinearRegression

regr = LinearRegression()
In [25]:
# construct our input and output and split into test/train
from sklearn.model_selection import train_test_split

# input (x value) = initial hawk odds
# NOTE: train wants not a series but a dataframe with a single series
input = converged_df[["hawk_odds"]]

# output (y value, potential correlation) = percent risk inclined
output = converged_df[["pct_risk_inclined"]]

X_train, X_test, y_train, y_test = train_test_split(input, output, test_size=0.2)
In [26]:
# train the model
regr.fit(X_train, y_train)
Out[26]:
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Parameters
fit_intercept  True
copy_X  True
tol  1e-06
n_jobs  None
positive  False
In [27]:
# output the best fit values
print("Intercept: \n", regr.intercept_)
print("Coefficients: \n", regr.coef_)
Intercept: 
 [0.39787349]
Coefficients: 
 [[0.02777488]]
In [28]:
import matplotlib.pyplot as plt

qualitiative_colors = ['#1b9e77','#d95f02','#7570b3','#e7298a']

fig, axs = plt.subplots(figsize=(4.,4.), nrows=1, ncols=1, facecolor='white', dpi=200)
axs.scatter(X_train, y_train, s=1, color=qualitiative_colors[0])
axs.plot(X_train, regr.predict(X_train), color=qualitiative_colors[1], linewidth=2)
axs.scatter(X_train[:4], y_train[:4], color=qualitiative_colors[0])
axs.vlines(X_train[:4], regr.intercept_ + regr.coef_[0]*X_train[:4], y_train[:4], lw=2)
axs.set_xlabel('inital hawk odds')
axs.set_ylabel('percent risk inclined')
Out[28]:
Text(0, 0.5, 'percent risk inclined')
No description has been provided for this image
In [29]:
# make predictions based on this fit
y_pred_linear = regr.predict(X_test)
In [30]:
fig, axs = plt.subplots(figsize=(4.,4.), nrows=2, ncols=1, facecolor='white', dpi=200, sharex=True)
axs[0].scatter(X_train, y_train, s=1, color=qualitiative_colors[0])
axs[0].plot(X_test, y_pred_linear, color=qualitiative_colors[1], linewidth=2)
axs[0].scatter(X_test, y_test, color=qualitiative_colors[2], s=8)
axs[1].hlines(0, -0.1, 0.15, color=qualitiative_colors[1], linewidth=2)
axs[1].scatter(X_test, y_test.to_numpy()-y_pred_linear, color=qualitiative_colors[2], s=10)
axs[1].set_xlabel('initial hawk odds')
axs[0].set_ylabel('% risk inclined')
axs[1].set_ylabel('truth - model')
Out[30]:
Text(0, 0.5, 'truth - model')
No description has been provided for this image

check for linear correlations with risk moderate and risk neutral¶

In [31]:
# output (y value, potential correlation) = percent risk inclined
rmoderate_output = converged_df[["pct_risk_moderate"]]

X_train, X_test, rmoderate_train, rmoderate_test = train_test_split(input, rmoderate_output, test_size=0.2)
In [32]:
# fit another linear regression model
rmoderate_regr = LinearRegression()
rmoderate_regr.fit(X_train, rmoderate_train)
Out[32]:
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Parameters
fit_intercept  True
copy_X  True
tol  1e-06
n_jobs  None
positive  False
In [33]:
# output the best fit values
print("Intercept: \n", rmoderate_regr.intercept_)
print("Coefficients: \n", rmoderate_regr.coef_)
Intercept: 
 [0.33433546]
Coefficients: 
 [[-0.01935048]]
In [34]:
fig, axs = plt.subplots(figsize=(4.,4.), nrows=1, ncols=1, facecolor='white', dpi=200)
axs.scatter(X_train, rmoderate_train, s=1, color=qualitiative_colors[0])
axs.plot(X_train, rmoderate_regr.predict(X_train), color=qualitiative_colors[1], linewidth=2)
axs.scatter(X_train[:4], rmoderate_train[:4], color=qualitiative_colors[0])
axs.vlines(X_train[:4], rmoderate_regr.intercept_ + rmoderate_regr.coef_[0]*X_train[:4], rmoderate_train[:4], lw=2)
axs.set_xlabel('inital hawk odds')
axs.set_ylabel('percent risk moderate')
Out[34]:
Text(0, 0.5, 'percent risk moderate')
No description has been provided for this image
In [35]:
# make predictions based on this fit
rmoderate_y_pred_linear = rmoderate_regr.predict(X_test)
In [36]:
fig, axs = plt.subplots(figsize=(4.,4.), nrows=2, ncols=1, facecolor='white', dpi=200, sharex=True)
axs[0].scatter(X_train, rmoderate_train, s=1, color=qualitiative_colors[0])
axs[0].plot(X_test, rmoderate_y_pred_linear, color=qualitiative_colors[1], linewidth=2)
axs[0].scatter(X_test, rmoderate_test, color=qualitiative_colors[2], s=8)
axs[1].hlines(0, -0.1, 0.15, color=qualitiative_colors[1], linewidth=2)
axs[1].scatter(X_test, y_test.to_numpy()-rmoderate_y_pred_linear, color=qualitiative_colors[2], s=10)
axs[1].set_xlabel('initial hawk odds')
axs[0].set_ylabel('% risk moderate')
axs[1].set_ylabel('truth - model')
Out[36]:
Text(0, 0.5, 'truth - model')
No description has been provided for this image
In [39]:
ravoidant_output = converged_df[["pct_risk_avoidant"]]

X_train, X_test, ravoidant_train, ravoidant_test = train_test_split(input, ravoidant_output, test_size=0.2)
In [40]:
# fit another linear regression model
ravoidant_regr = LinearRegression()
ravoidant_regr.fit(X_train, ravoidant_train)
Out[40]:
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Parameters
fit_intercept  True
copy_X  True
tol  1e-06
n_jobs  None
positive  False
In [41]:
# output the best fit values
print("Intercept: \n", ravoidant_regr.intercept_)
print("Coefficients: \n", ravoidant_regr.coef_)
Intercept: 
 [0.26700041]
Coefficients: 
 [[-0.00755293]]
In [42]:
fig, axs = plt.subplots(figsize=(4.,4.), nrows=1, ncols=1, facecolor='white', dpi=200)
axs.scatter(X_train, ravoidant_train, s=1, color=qualitiative_colors[0])
axs.plot(X_train, rmoderate_regr.predict(X_train), color=qualitiative_colors[1], linewidth=2)
axs.scatter(X_train[:4], ravoidant_train[:4], color=qualitiative_colors[0])
axs.vlines(X_train[:4], ravoidant_regr.intercept_ + ravoidant_regr.coef_[0]*X_train[:4], ravoidant_train[:4], lw=2)
axs.set_xlabel('inital hawk odds')
axs.set_ylabel('percent risk avoidant')
Out[42]:
Text(0, 0.5, 'percent risk avoidant')
No description has been provided for this image

how do the distributions compare?¶

In [43]:
boxplot_risk_inclined = alt.Chart(converged_df).mark_boxplot(size=20, color="#e49444").encode(
    x='hawk_odds:N',
    y='pct_risk_inclined',
).properties(
    title="% risk inclined by starting hawk odds",
    width=350, height=450)

boxplot_risk_moderate = alt.Chart(converged_df).mark_boxplot(size=20, color="#d1615d").encode(
    x='hawk_odds:N',
    y='pct_risk_moderate',
).properties(
    title="% risk moderate by starting hawk odds",
     width=350, height=450
)


boxplot_risk_avoidant = alt.Chart(converged_df).mark_boxplot(size=20).encode(
    x='hawk_odds:N',
    y='pct_risk_avoidant',
).properties(
    title="% risk avoidant by starting hawk odds",
     width=350, height=450
)
(boxplot_risk_inclined | boxplot_risk_moderate | boxplot_risk_avoidant).resolve_scale(y='shared')
Out[43]:
In [49]:
# can we melt to put in a single boxplot with colors?

# rename for readability, then go from wide to long so we can plot side by side
melted_df = converged_df \
     .rename({"pct_risk_inclined": "% risk inclined", "pct_risk_moderate": "% risk moderate", "pct_risk_avoidant": "% risk avoidant"}) \
     .unpivot(index=['hawk_odds'], on=['% risk inclined', '% risk moderate', '% risk avoidant'])
melted_df
Out[49]:
shape: (2_454, 3)
hawk_oddsvariablevalue
f64strf64
0.1"% risk inclined"0.38
0.1"% risk inclined"0.29
0.1"% risk inclined"0.25
0.1"% risk inclined"0.54
0.1"% risk inclined"0.36
………
0.9"% risk avoidant"0.22
0.9"% risk avoidant"0.11
0.9"% risk avoidant"0.32
0.9"% risk avoidant"0.37
0.9"% risk avoidant"0.24
In [50]:
alt.Chart(melted_df).mark_boxplot().encode(
    x=alt.X('hawk_odds:N', title='initial hawk odds'),
    y=alt.Y('value', title='percent of the population'),
    color=alt.Color('variable'),
    xOffset=alt.XOffset('variable')
)
Out[50]:

population categories by initial hawk odds¶

In [51]:
hawkodds01_chart = analysis_utils.graph_population_risk_category(
    analysis_utils.groupby_population_risk_category(converged_df.filter(pl.col("hawk_odds") == 0.1))
).properties(title="hawk odds 0.1")
hawkodds02_chart = analysis_utils.graph_population_risk_category(
    analysis_utils.groupby_population_risk_category(converged_df.filter(pl.col("hawk_odds") == 0.2))
).properties(title="hawk odds 0.2")
hawkodds03_chart = analysis_utils.graph_population_risk_category(
    analysis_utils.groupby_population_risk_category(converged_df.filter(pl.col("hawk_odds") == 0.3))
).properties(title="hawk odds 0.3")

hawkodds04_chart = analysis_utils.graph_population_risk_category(
    analysis_utils.groupby_population_risk_category(converged_df.filter(pl.col("hawk_odds") == 0.4))
).properties(title="hawk odds 0.4")
hawkodds05_chart = analysis_utils.graph_population_risk_category(
    analysis_utils.groupby_population_risk_category(converged_df.filter(pl.col("hawk_odds") == 0.5))
).properties(title="hawk odds 0.5")
hawkodds06_chart = analysis_utils.graph_population_risk_category(
    analysis_utils.groupby_population_risk_category(converged_df.filter(pl.col("hawk_odds") == 0.6))
).properties(title="hawk odds 0.6")


hawkodds07_chart = analysis_utils.graph_population_risk_category(
    analysis_utils.groupby_population_risk_category(converged_df.filter(pl.col("hawk_odds") == 0.7))
).properties(title="hawk odds 0.7")
hawkodds08_chart = analysis_utils.graph_population_risk_category(
    analysis_utils.groupby_population_risk_category(converged_df.filter(pl.col("hawk_odds") == 0.8))
).properties(title="hawk odds 0.8")
hawkodds09_chart = analysis_utils.graph_population_risk_category(
    analysis_utils.groupby_population_risk_category(converged_df.filter(pl.col("hawk_odds") == 0.9))
).properties(title="hawk odds 0.9")


((hawkodds01_chart | hawkodds02_chart | hawkodds03_chart) &
 (hawkodds04_chart | hawkodds05_chart | hawkodds06_chart) &
 (hawkodds07_chart | hawkodds08_chart | hawkodds09_chart))\
.properties(title=alt.TitleParams("Population risk category by run over hawk odds", subtitle="(converged runs only)", anchor="middle")
           ).resolve_scale(y='shared')
/Users/rkoeser/workarea/env/simrisk/lib/python3.12/site-packages/simulatingrisk/hawkdovemulti/analysis_utils.py:28: MapWithoutReturnDtypeWarning: Calling `map_elements` without specifying `return_dtype` can lead to unpredictable results. Specify `return_dtype` to silence this warning.
  values=poprisk_grouped["risk_category"].map_elements(RiskState.category),
Out[51]: