From 4d33ea635e64f6cac4ff600f0fd4f45fb691d2cf Mon Sep 17 00:00:00 2001 From: Oleksandr Shchur Date: Tue, 2 Apr 2024 19:20:32 +0200 Subject: [PATCH] Fix item_id for M5 dataset (#3156) *Description of changes:* - Fix the `item_id` calculation for the M5 dataset in case `id` column is missing in the original dataset By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. **Please tag this pr with at least one of these labels to make our release process faster:** BREAKING, new feature, bug fix, other change, dev setup --- src/gluonts/dataset/repository/_m5.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/gluonts/dataset/repository/_m5.py b/src/gluonts/dataset/repository/_m5.py index 6d9e6b80a8..58af999f0a 100644 --- a/src/gluonts/dataset/repository/_m5.py +++ b/src/gluonts/dataset/repository/_m5.py @@ -112,16 +112,18 @@ def generate_m5_dataset( len(state_ids_un), ] + # Compute unique ID in case `id` column is missing + if "id" not in sales_train_validation.columns: + sales_train_validation["id"] = ( + sales_train_validation["item_id"].astype("str") + + "_" + + sales_train_validation["store_id"].astype("str") + ) # Build target series - train_ids = ( - sales_train_validation["item_id"].str - + "_" - + sales_train_validation["store_id"].str - ) + train_ids = sales_train_validation["id"] train_df = sales_train_validation.drop( ["id", "item_id", "dept_id", "cat_id", "store_id", "state_id"], axis=1, - errors="ignore", ) test_target_values = train_df.values.copy() train_target_values = [ts[:-prediction_length] for ts in train_df.values]