import pandas as pd
= "../data/regimed_coffee_prices.csv"
fp = pd.read_csv(fp)
df df
= df["regime"].unique().tolist()
regimes
for r in regimes:
= (df["regime"] == r) # select each regime
reg_select = df[reg_select]
df_reg # the index contains the indices of each regime, discretize the prices with qcut and set
"price"] = pd.qcut(df_reg["cents_per_lb"], 3, labels=["L", "M", "H"])
df.loc[df_reg.index,
for r in regimes:
= (df["regime"] == r) # select each regime
reg_select = df[reg_select]
df_reg # maintain regime point count to mark the previous price for the first entry in each regime as na
= 0
rpc for ri, row in df_reg.iterrows():
if rpc == 0 :
+= 1
rpc continue
else:
"previous_price"] = df.loc[ (ri -1), "price"]
df.loc[ri, += 1 rpc
sum(df["previous_price"].isna())
= "../data/regimed_coffee_prices.csv"
fp =False) df.to_csv(fp, index
= {}
matrix_dict for r in regimes:
= (df["regime"] == r) # select each regime
reg_select = df[reg_select]
df_reg # the index contains the indices of each regime, discretize the prices with qcut and set
= pd.crosstab(df_reg.price, df_reg.previous_price)
df_sm # the next step normalizes the entry in each row by the row sum
= df_sm.div(df_sm.sum(axis=1), axis=0).round(3)
df_sm = df_sm
matrix_dict[r] = "../data/stochastic_matrix_coffee_price-regime-" + r + ".csv"
fp =True)
df_sm.to_csv(fp, index
"R-5"] matrix_dict[
import plotly.express as px
= px.imshow(matrix_dict["R-5"], text_auto=True)
fig
fig.update_layout(={
title'text': "Stochastic Matrix for Region 5",
'y':.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'})
fig.show()