import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from hmmlearn import hmm
fp = "../data/daily_baugette_sales.csv"
df = pd.read_csv(fp)
CUT_OFF = df.CWOY.max() - 40
select_cut_off = df.CWOY >= CUT_OFF
df = df[select_cut_off]
cols_needed = ["datetime", "Quantity"]
df = df[cols_needed]
df["Quantity"].plot.kde()
plt.grid(True)
fig = plt.figure()
fig.set_size_inches(10, 6, forward=True)
fig.subplots_adjust(hspace=0.3, wspace=0.5)

plt.subplot(121)
df["Quantity"].plot.kde()
plt.grid(True)
plt.title("Daily Baguette Sales - KDE")
plt.subplot(122)
plt.title("Daily Baguette Sales - Histogram")
df["Quantity"].plot.hist()
fig.tight_layout()
plt.grid(True)
fp = "../data/daily_baguette_last_40_weeks.csv"
df.to_csv(fp, index=True)
import unicodedata as ud

summary_stats = {"mean": df.Quantity.mean().round(3), "var": df.Quantity.var().round(3)}
df_summ_stats = pd.DataFrame.from_dict(summary_stats, orient="index")
df_summ_stats = df_summ_stats.reset_index()
df_summ_stats.columns = ["Parameter", "Value"]
df_summ_stats
import numpy as np
import scipy.stats as stats
df_fit = df.iloc[:-14,:]
# Fit the gamma distribution
params = stats.gamma.fit(df_fit["Quantity"])
# Extract the fitted parameters
fitted_alpha, fitted_loc, fitted_beta = params
print("Fitted shape parameter (alpha):", fitted_alpha)
print("Fitted location parameter (loc):", fitted_loc)
print("Fitted scale parameter (beta):", fitted_beta)
from scipy.stats import nbinom

Check out this wikipedia link and then use the fact that \(n\) is the shape parameter, and the scale parameter \(p = \frac{p}{(1-p)}\), solve for \(p\) which is \(\frac{1}{6.97}\) The specific content of interest is: “That is, we can view the negative binomial as a Poisson(λ) distribution, where λ is itself a random variable, distributed as a gamma distribution with shape r and scale θ = (1 − p)/p” - so we get \(\lambda\) and p from fitting the gamma distribution. These values are shown above.

n= 9.42
p = 1/6.97
NUM_SAMPLES = 2000
r1 = nbinom.rvs(n,p,size=NUM_SAMPLES)
r2 = nbinom.rvs(n,p,size=NUM_SAMPLES)
df_gen = pd.DataFrame.from_records({"xi": r1, "yi": r2})
df_gen.columns = ["xi", "yi"]
fp = "../data/samples_for_stoch_estimation.csv"
df_gen.to_csv(fp, index=False)
sample = {"actual": df_fit["Quantity"], "fitted": nbinom.rvs(n,p,size=df_fit["Quantity"].shape[0])}
df_sample = pd.DataFrame.from_records(sample)
fig = plt.figure()
fig.set_size_inches(10, 6, forward=True)
fig.subplots_adjust(hspace=0.3, wspace=0.5)

plt.subplot(121)
df_sample["actual"].plot.kde()
plt.grid(True)
plt.title("actual")
plt.subplot(122)
plt.title("fitted (from samples drawn)")
df_sample["fitted"].plot.kde()
fig.tight_layout()
plt.grid(True)