import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from hmmlearn import hmm
= "../data/daily_baugette_sales.csv"
fp = pd.read_csv(fp) df
= df.CWOY.max() - 40 CUT_OFF
= df.CWOY >= CUT_OFF
select_cut_off = df[select_cut_off] df
= ["datetime", "Quantity"]
cols_needed = df[cols_needed] df
"Quantity"].plot.kde()
df[True) plt.grid(
= plt.figure()
fig 10, 6, forward=True)
fig.set_size_inches(=0.3, wspace=0.5)
fig.subplots_adjust(hspace
121)
plt.subplot("Quantity"].plot.kde()
df[True)
plt.grid("Daily Baguette Sales - KDE")
plt.title(122)
plt.subplot("Daily Baguette Sales - Histogram")
plt.title("Quantity"].plot.hist()
df[
fig.tight_layout()True) plt.grid(
= "../data/daily_baguette_last_40_weeks.csv"
fp =True) df.to_csv(fp, index
import unicodedata as ud
= {"mean": df.Quantity.mean().round(3), "var": df.Quantity.var().round(3)} summary_stats
= pd.DataFrame.from_dict(summary_stats, orient="index") df_summ_stats
= df_summ_stats.reset_index()
df_summ_stats = ["Parameter", "Value"] df_summ_stats.columns
df_summ_stats
import numpy as np
import scipy.stats as stats
= df.iloc[:-14,:] df_fit
# Fit the gamma distribution
= stats.gamma.fit(df_fit["Quantity"]) params
# Extract the fitted parameters
= params fitted_alpha, fitted_loc, fitted_beta
print("Fitted shape parameter (alpha):", fitted_alpha)
print("Fitted location parameter (loc):", fitted_loc)
print("Fitted scale parameter (beta):", fitted_beta)
from scipy.stats import nbinom
Check out this wikipedia link and then use the fact that \(n\) is the shape parameter, and the scale parameter \(p = \frac{p}{(1-p)}\), solve for \(p\) which is \(\frac{1}{6.97}\) The specific content of interest is: “That is, we can view the negative binomial as a Poisson(λ) distribution, where λ is itself a random variable, distributed as a gamma distribution with shape r and scale θ = (1 − p)/p” - so we get \(\lambda\) and p from fitting the gamma distribution. These values are shown above.
= 9.42
n= 1/6.97
p = 2000 NUM_SAMPLES
= nbinom.rvs(n,p,size=NUM_SAMPLES)
r1 = nbinom.rvs(n,p,size=NUM_SAMPLES) r2
= pd.DataFrame.from_records({"xi": r1, "yi": r2}) df_gen
= ["xi", "yi"] df_gen.columns
= "../data/samples_for_stoch_estimation.csv"
fp =False) df_gen.to_csv(fp, index
= {"actual": df_fit["Quantity"], "fitted": nbinom.rvs(n,p,size=df_fit["Quantity"].shape[0])}
sample = pd.DataFrame.from_records(sample) df_sample
= plt.figure()
fig 10, 6, forward=True)
fig.set_size_inches(=0.3, wspace=0.5)
fig.subplots_adjust(hspace
121)
plt.subplot("actual"].plot.kde()
df_sample[True)
plt.grid("actual")
plt.title(122)
plt.subplot("fitted (from samples drawn)")
plt.title("fitted"].plot.kde()
df_sample[
fig.tight_layout()True) plt.grid(