import pandas as pdimport numpy as np# Step 1: Read a pandas DataFrame# For demonstration, we will create a sample dataframe. You can replace this with your own CSV or data source.fp ="../data/Retail_Transactions_Dataset.csv"df = pd.read_csv(fp)# Step 2: Define lists for categorical columnscategorical_columns = ['Payment_Method', 'City', 'Store_Type',\'Discount_Applied', 'Customer_Category', 'Season', 'Promotion']# Step 3: Define your timestamp columntimestamp_column ='Date'# Step 4: Set the type of the categorical columns to 'category'for col in categorical_columns: df[col] = df[col].astype('category')# Step 5: Set the type of the timestamp column to datetimedf[timestamp_column] = pd.to_datetime(df[timestamp_column])# Step 6: Function to check for a sentinel value (for this example, let's assume the sentinel value is 0)def contains_sentinel_value(string_list):# Convert the string representation of a list back to an actual listtry: actual_list =eval(string_list)return0in actual_list # Check for sentinel valueexcept:returnFalse# In case of any errors, return False# Step 7: Filter the DataFrame to rows that only contain the sentinel valuedf['is_ice_cream'] = df["Product"].apply(contains_sentinel_value)filtered_df = df[df['ice_cream']]# Step 8: Drop all columns except the timestamp column in the filtered DataFramefiltered_df = filtered_df[[timestamp_column]]# Step 9: Define a new column in the filtered DataFrame that is set to the value 1filtered_df['is_ice_cream'] =1# Step 10: Set the index of the filtered DataFrame to the timestamp columnfiltered_df.set_index(timestamp_column, inplace=True)# Step 11: Resample the DataFrame on the timestamp column and sum the new column# Assuming we want to sum by minute, you can change the frequency as neededresampled_df = filtered_df.resample('T').sum()# Display the final resampled DataFrameprint(resampled_df)