databricks-industry-solutions · ryuta-yoshimatsu · Jan 13, 2025 · Jan 13, 2025
diff --git a/README.md b/README.md
@@ -186,7 +186,7 @@ We encourage you to read through [examples/global_daily.py](https://github.com/d
 
 ### Foundation Models
 
-Foundation time series models are transformer based models pretrained on millions or billions of time points. These models can perform analysis (i.e. forecasting, anomaly detection, classification) on a previously unseen time series without training or tuning. We support open source models from multiple sources: [chronos](https://github.com/amazon-science/chronos-forecasting), [moirai](https://blog.salesforceairesearch.com/moirai/), and [moment](https://github.com/moment-timeseries-foundation-model/moment). Covariates (i.e. exogenous regressors) and fine-tuning are currently not yet supported. This is a rapidly changing field, and we are working on updating the supported models and new features as the field evolves.
+Foundation time series models are transformer based models pretrained on millions or billions of time points. These models can perform analysis (i.e. forecasting, anomaly detection, classification) on a previously unseen time series without training or tuning. We support open source models from multiple sources: [chronos](https://github.com/amazon-science/chronos-forecasting), [timesfm](https://github.com/google-research/timesfm), [moirai](https://blog.salesforceairesearch.com/moirai/), and [moment](https://github.com/moment-timeseries-foundation-model/moment). Covariates (i.e. exogenous regressors) and fine-tuning are currently not yet supported. This is a rapidly changing field, and we are working on updating the supported models and new features as the field evolves.
 
 To get started, attach the [examples/foundation_daily.py](https://github.com/databricks-industry-solutions/many-model-forecasting/blob/main/examples/foundation_daily.py) notebook to a cluster running [DBR 14.3 LTS for ML](https://docs.databricks.com/en/release-notes/runtime/index.html) or later versions. We recommend using a single-node cluster with multiple GPU instances such as [g4dn.12xlarge [T4]](https://aws.amazon.com/ec2/instance-types/g4/) on AWS or [Standard_NC64as_T4_v3](https://learn.microsoft.com/en-us/azure/virtual-machines/nct4-v3-series) on Azure. Multi-node setup is currently not supported. 
 
@@ -202,6 +202,8 @@ active_models = [
     "MoiraiSmall",
     "MoiraiBase",
     "MoiraiLarge",
+    "TimesFM_1_0_200m",
+    "TimesFM_2_0_500m",
     "Moment1Large",
 ]
 ```

diff --git a/examples/foundation_daily.py b/examples/foundation_daily.py
@@ -18,7 +18,7 @@
 
 # COMMAND ----------
 
-# MAGIC %pip install -r ../requirements.txt --quiet
+# MAGIC %pip install datasetsforecast==0.0.8 --quiet
 # MAGIC dbutils.library.restartPython()
 
 # COMMAND ----------
@@ -34,7 +34,6 @@
 import pathlib
 import pandas as pd
 from datasetsforecast.m4 import M4
-from mmf_sa import run_forecast
 
 # COMMAND ----------
 
@@ -124,6 +123,8 @@ def transform_group(df):
     "MoiraiSmall",
     "MoiraiBase",
     "MoiraiLarge",
+    "TimesFM_1_0_200m",
+    "TimesFM_2_0_500m",
     "Moment1Large",
 ]
 
@@ -158,8 +159,11 @@ def transform_group(df):
 # COMMAND ----------
 
 display(
-  spark.sql(f"select * from {catalog}.{db}.daily_evaluation_output order by unique_id, model, backtest_window_start_date")
-  )
+  spark.sql(f"""
+    select * from {catalog}.{db}.daily_evaluation_output 
+    where unique_id = 'D1'
+    order by unique_id, model, backtest_window_start_date
+    """))
 
 # COMMAND ----------
 
@@ -181,7 +185,11 @@ def transform_group(df):
 
 # COMMAND ----------
 
-display(spark.sql(f"select * from {catalog}.{db}.daily_scoring_output order by unique_id, model, ds"))
+display(spark.sql(f"""
+    select * from {catalog}.{db}.daily_scoring_output 
+    where unique_id = 'D1'
+    order by unique_id, model, ds
+    """))
 
 # COMMAND ----------
 

diff --git a/examples/foundation_monthly.py b/examples/foundation_monthly.py
@@ -18,7 +18,7 @@
 
 # COMMAND ----------
 
-# MAGIC %pip install -r ../requirements.txt --quiet
+# MAGIC %pip install datasetsforecast==0.0.8 --quiet
 # MAGIC dbutils.library.restartPython()
 
 # COMMAND ----------
@@ -34,7 +34,6 @@
 import pathlib
 import pandas as pd
 from datasetsforecast.m4 import M4
-from mmf_sa import run_forecast
 
 # COMMAND ----------
 
@@ -130,6 +129,8 @@ def transform_group(df):
     "MoiraiSmall",
     "MoiraiBase",
     "MoiraiLarge",
+    "TimesFM_1_0_200m",
+    "TimesFM_2_0_500m",
     "Moment1Large",
 ]
 
@@ -157,7 +158,11 @@ def transform_group(df):
 
 # COMMAND ----------
 
-display(spark.sql(f"select * from {catalog}.{db}.monthly_evaluation_output order by unique_id, model, backtest_window_start_date"))
+display(spark.sql(f"""
+    select * from {catalog}.{db}.monthly_evaluation_output 
+    where unique_id = 'M1'
+    order by unique_id, model, backtest_window_start_date
+    """))
 
 # COMMAND ----------
 
@@ -166,7 +171,11 @@ def transform_group(df):
 
 # COMMAND ----------
 
-display(spark.sql(f"select * from {catalog}.{db}.monthly_scoring_output order by unique_id, model, date"))
+display(spark.sql(f"""
+    select * from {catalog}.{db}.monthly_scoring_output 
+    where unique_id = 'M1'
+    order by unique_id, model, date
+    """))
 
 # COMMAND ----------
 

diff --git a/examples/global_daily.py b/examples/global_daily.py
@@ -19,7 +19,7 @@
 # COMMAND ----------
 
 # DBTITLE 1,Install the necessary libraries
-# MAGIC %pip install -r ../requirements.txt --quiet
+# MAGIC %pip install datasetsforecast==0.0.8 --quiet
 # MAGIC dbutils.library.restartPython()
 
 # COMMAND ----------
@@ -35,7 +35,6 @@
 import pathlib
 import pandas as pd
 from datasetsforecast.m4 import M4
-from mmf_sa import run_forecast
 
 # COMMAND ----------
 
@@ -162,7 +161,11 @@ def transform_group(df):
 
 # COMMAND ----------
 
-display(spark.sql(f"select * from {catalog}.{db}.daily_evaluation_output order by unique_id, model, backtest_window_start_date"))
+display(spark.sql(f"""
+    select * from {catalog}.{db}.daily_evaluation_output 
+    where unique_id = 'D1'
+    order by unique_id, model, backtest_window_start_date
+    """))
 
 # COMMAND ----------
 
@@ -182,7 +185,11 @@ def transform_group(df):
 
 # COMMAND ----------
 
-display(spark.sql(f"select * from {catalog}.{db}.daily_scoring_output order by unique_id, model, ds"))
+display(spark.sql(f"""
+    select * from {catalog}.{db}.daily_scoring_output 
+    where unique_id = 'D1'
+    order by unique_id, model, ds
+    """))
 
 # COMMAND ----------
 

diff --git a/examples/global_external_regressors_daily.py b/examples/global_external_regressors_daily.py
@@ -19,7 +19,7 @@
 
 # COMMAND ----------
 
-# MAGIC %pip install -r ../requirements.txt --quiet
+# MAGIC %pip install datasetsforecast==0.0.8 --quiet
 
 # COMMAND ----------
 
@@ -32,14 +32,6 @@
 
 # COMMAND ----------
 
-import uuid
-import pathlib
-import pandas as pd
-from datasetsforecast.m4 import M4
-from mmf_sa import run_forecast
-
-# COMMAND ----------
-
 # MAGIC %md
 # MAGIC ### Prepare data 
 # MAGIC Before running this notebook, download the dataset from [Kaggle](https://www.kaggle.com/competitions/rossmann-store-sales/data) and store them in Unity Catalog as a [volume](https://docs.databricks.com/en/connect/unity-catalog/volumes.html).

diff --git a/examples/global_monthly.py b/examples/global_monthly.py
@@ -18,7 +18,7 @@
 
 # COMMAND ----------
 
-# MAGIC %pip install -r ../requirements.txt --quiet
+# MAGIC %pip install datasetsforecast==0.0.8 --quiet
 # MAGIC dbutils.library.restartPython()
 
 # COMMAND ----------
@@ -34,7 +34,6 @@
 import pathlib
 import pandas as pd
 from datasetsforecast.m4 import M4
-from mmf_sa import run_forecast
 
 # COMMAND ----------
 
@@ -158,7 +157,11 @@ def transform_group(df):
 
 # COMMAND ----------
 
-display(spark.sql(f"select * from {catalog}.{db}.monthly_evaluation_output order by unique_id, model, backtest_window_start_date"))
+display(spark.sql(f"""
+    select * from {catalog}.{db}.monthly_evaluation_output 
+    where unique_id = 'M1'
+    order by unique_id, model, backtest_window_start_date
+    """))
 
 # COMMAND ----------
 
@@ -167,7 +170,11 @@ def transform_group(df):
 
 # COMMAND ----------
 
-display(spark.sql(f"select * from {catalog}.{db}.monthly_scoring_output order by unique_id, model, date"))
+display(spark.sql(f"""
+    select * from {catalog}.{db}.monthly_scoring_output 
+    where unique_id = 'M1'
+    order by unique_id, model, date
+    """))
 
 # COMMAND ----------
 

diff --git a/examples/local_univariate_daily.py b/examples/local_univariate_daily.py
@@ -1,6 +1,6 @@
 # Databricks notebook source
 # MAGIC %md
-# MAGIC # Many Models Forecasting
+# MAGIC # Many Models Forecasting (MMF)
 # MAGIC This notebook showcases how to run MMF with local models on multiple univariate time series of daily resolution. We will use [M4 competition](https://www.sciencedirect.com/science/article/pii/S0169207019301128#sec5) data.
 
 # COMMAND ----------
@@ -199,8 +199,11 @@ def transform_group(df):
 # COMMAND ----------
 
 display(
-  spark.sql(f"select * from {catalog}.{db}.daily_evaluation_output order by unique_id, model, backtest_window_start_date")
-  )
+  spark.sql(f"""
+    select * from {catalog}.{db}.daily_evaluation_output 
+    where unique_id = 'D1'
+    order by unique_id, model, backtest_window_start_date
+    """))
 
 # COMMAND ----------
 

diff --git a/examples/local_univariate_monthly.py b/examples/local_univariate_monthly.py
@@ -197,8 +197,11 @@ def transform_group(df):
 # COMMAND ----------
 
 display(
-  spark.sql(f"select * from {catalog}.{db}.monthly_evaluation_output order by unique_id, model, backtest_window_start_date")
-  )
+  spark.sql(f"""
+    select * from {catalog}.{db}.monthly_evaluation_output 
+    where unique_id = 'M1'
+    order by unique_id, model, backtest_window_start_date
+    """))
 
 # COMMAND ----------
 
@@ -207,7 +210,11 @@ def transform_group(df):
 
 # COMMAND ----------
 
-display(spark.sql(f"select * from {catalog}.{db}.monthly_scoring_output order by unique_id, model, date"))
+display(spark.sql(f"""
+    select * from {catalog}.{db}.monthly_scoring_output 
+    where unique_id = 'M1'
+    order by unique_id, model, date
+    """))
 
 # COMMAND ----------
 

diff --git a/examples/run_daily.py b/examples/run_daily.py
@@ -1,5 +1,5 @@
 # Databricks notebook source
-# MAGIC %pip install -r ../requirements.txt --quiet
+# MAGIC %pip install -r ../requirements-global.txt --quiet
 # MAGIC dbutils.library.restartPython()
 
 # COMMAND ----------

diff --git a/examples/run_external_regressors_daily.py b/examples/run_external_regressors_daily.py
@@ -1,5 +1,5 @@
 # Databricks notebook source
-# MAGIC %pip install -r ../requirements.txt --quiet
+# MAGIC %pip install -r ../requirements-global.txt --quiet
 # MAGIC dbutils.library.restartPython()
 
 # COMMAND ----------

diff --git a/examples/run_monthly.py b/examples/run_monthly.py
@@ -1,5 +1,5 @@
 # Databricks notebook source
-# MAGIC %pip install -r ../requirements.txt --quiet
+# MAGIC %pip install -r ../requirements-global.txt --quiet
 # MAGIC dbutils.library.restartPython()
 
 # COMMAND ----------

diff --git a/mmf_sa/Forecaster.py b/mmf_sa/Forecaster.py
@@ -421,7 +421,6 @@ def backtest_global_model(
             .withColumnRenamed("avg(metric_value)", "metric_value")
             .toPandas()
         )
-
         metric_name = None
         metric_value = None
 
@@ -444,7 +443,7 @@ def evaluate_foundation_model(self, model_conf):
             model_name = model_conf["name"]
             model = self.model_registry.get_model(model_name)
             # For now, only support registering chronos, moirai and moment models
-            if model_conf["framework"] in ["Chronos", "Moirai", "Moment"]:
+            if model_conf["framework"] in ["Chronos", "Moirai", "Moment", "TimesFM"]:
                 model.register(
                     registered_model_name=f"{self.conf['model_output']}.{model_conf['name']}_{self.conf['use_case_name']}"
                 )

diff --git a/mmf_sa/data_quality_checks.py b/mmf_sa/data_quality_checks.py
@@ -98,7 +98,7 @@ def _multiple_checks(
             months=conf["backtest_months"]
         )
         if (
-            temp_df[temp_df[conf["date_col"]] < split_date].count()[0]
+            temp_df[temp_df[conf["date_col"]] < split_date].count().iloc[0]
             <= conf["train_predict_ratio"] * conf["prediction_length"]
         ):
             # Removing: train_predict_ratio requirement violated

diff --git a/mmf_sa/forecasting_conf.yaml b/mmf_sa/forecasting_conf.yaml
@@ -59,6 +59,7 @@ active_models:
   - MoiraiBase
   - MoiraiLarge
   - TimesFM_1_0_200m
+  - TimesFM_2_0_500m
   - Moment1Large
 
 #Here we can override hyperparameters for built-in models

diff --git a/mmf_sa/models/models_conf.yaml b/mmf_sa/models/models_conf.yaml
@@ -409,6 +409,12 @@ models:
     framework: TimesFM
     model_type: foundation
 
+  TimesFM_2_0_500m:
+    module: mmf_sa.models.timesfmforecast.TimesFMPipeline
+    model_class: TimesFM_2_0_500m
+    framework: TimesFM
+    model_type: foundation
+
   Moment1Large:
     module: mmf_sa.models.momentforecast.MomentPipeline
     model_class: Moment1Large