import pandas as pd
import numpy as np

# Step 1: Read a pandas DataFrame
# For demonstration, we will create a sample dataframe. You can replace this with your own CSV or data source.
fp = "../data/Retail_Transactions_Dataset.csv"
df = pd.read_csv(fp)

# Step 2: Define lists for categorical columns
categorical_columns = ['Payment_Method', 'City', 'Store_Type',\
       'Discount_Applied', 'Customer_Category', 'Season', 'Promotion']

# Step 3: Define your timestamp column
timestamp_column = 'Date'

# Step 4: Set the type of the categorical columns to 'category'
for col in categorical_columns:
    df[col] = df[col].astype('category')

# Step 5: Set the type of the timestamp column to datetime
df[timestamp_column] = pd.to_datetime(df[timestamp_column])

# Step 6: Function to check for a sentinel value (for this example, let's assume the sentinel value is 0)
def contains_sentinel_value(string_list):
    # Convert the string representation of a list back to an actual list
    try:
        actual_list = eval(string_list)
        return 0 in actual_list  # Check for sentinel value
    except:
        return False  # In case of any errors, return False

# Step 7: Filter the DataFrame to rows that only contain the sentinel value
df['is_ice_cream'] = df["Product"].apply(contains_sentinel_value)
filtered_df = df[df['ice_cream']]

# Step 8: Drop all columns except the timestamp column in the filtered DataFrame
filtered_df = filtered_df[[timestamp_column]]

# Step 9: Define a new column in the filtered DataFrame that is set to the value 1
filtered_df['is_ice_cream'] = 1

# Step 10: Set the index of the filtered DataFrame to the timestamp column
filtered_df.set_index(timestamp_column, inplace=True)

# Step 11: Resample the DataFrame on the timestamp column and sum the new column
# Assuming we want to sum by minute, you can change the frequency as needed
resampled_df = filtered_df.resample('T').sum()

# Display the final resampled DataFrame
print(resampled_df)