From 02dac776516b61fd759531bb5be88c00f99511d7 Mon Sep 17 00:00:00 2001
From: Ryuta Yoshimatsu <ryuta.yoshimatsu@databricks.com>
Date: Wed, 15 Jan 2025 07:22:20 +0100
Subject: [PATCH 1/2] updated chronos pipeline

---
 README.md                                     |   9 +-
 .../chronos/01_chronos_load_inference.py      | 522 -------------
 .../chronos/02_chronos_fine_tune.py           | 305 --------
 .../chronos/configs/chronos-gpt2.yaml         |  35 -
 .../chronos/configs/chronos-t5-base.yaml      |  35 -
 .../chronos/configs/chronos-t5-large.yaml     |  35 -
 .../chronos/configs/chronos-t5-mini.yaml      |  35 -
 .../chronos/configs/chronos-t5-small.yaml     |  35 -
 .../chronos/configs/chronos-t5-tiny.yaml      |  33 -
 .../chronos/train.py                          | 692 ------------------
 .../data_preparation.py                       | 143 ----
 .../moirai/01_moirai_load_inference.py        | 614 ----------------
 .../moirai/02_moirai_fine_tune.py             | 271 -------
 .../moirai/conf/finetune/data/etth1.yaml      |   3 -
 .../moirai/conf/finetune/data/random.yaml     |   3 -
 .../moirai/conf/finetune/default.yaml         |  83 ---
 .../finetune/model/moirai_1.0_R_base.yaml     |  33 -
 .../finetune/model/moirai_1.0_R_large.yaml    |  33 -
 .../finetune/model/moirai_1.0_R_small.yaml    |  37 -
 .../conf/finetune/model/moirai_base.yaml      |  31 -
 .../conf/finetune/model/moirai_large.yaml     |  31 -
 .../conf/finetune/model/moirai_small.yaml     |  35 -
 .../moirai/conf/finetune/val_data/etth1.yaml  |   9 -
 .../conf/finetune/val_data/etth1_multi.yaml   |  16 -
 .../moirai/conf/finetune/val_data/random.yaml |   9 -
 .../foundation-model-examples/moirai/train.py | 149 ----
 .../moment/01_moment_load_inference.py        | 490 -------------
 .../timegpt/01_timegpt_load_inference.py      | 536 --------------
 .../timegpt/02_timegpt_fine_tune.py           | 524 -------------
 .../timesfm/01_timesfm_load_inference.py      | 429 -----------
 .../models/chronosforecast/ChronosPipeline.py |  91 ++-
 mmf_sa/models/models_conf.yaml                |  32 +
 32 files changed, 104 insertions(+), 5234 deletions(-)
 delete mode 100644 examples/foundation-model-examples/chronos/01_chronos_load_inference.py
 delete mode 100644 examples/foundation-model-examples/chronos/02_chronos_fine_tune.py
 delete mode 100644 examples/foundation-model-examples/chronos/configs/chronos-gpt2.yaml
 delete mode 100644 examples/foundation-model-examples/chronos/configs/chronos-t5-base.yaml
 delete mode 100644 examples/foundation-model-examples/chronos/configs/chronos-t5-large.yaml
 delete mode 100644 examples/foundation-model-examples/chronos/configs/chronos-t5-mini.yaml
 delete mode 100644 examples/foundation-model-examples/chronos/configs/chronos-t5-small.yaml
 delete mode 100644 examples/foundation-model-examples/chronos/configs/chronos-t5-tiny.yaml
 delete mode 100644 examples/foundation-model-examples/chronos/train.py
 delete mode 100644 examples/foundation-model-examples/data_preparation.py
 delete mode 100644 examples/foundation-model-examples/moirai/01_moirai_load_inference.py
 delete mode 100644 examples/foundation-model-examples/moirai/02_moirai_fine_tune.py
 delete mode 100644 examples/foundation-model-examples/moirai/conf/finetune/data/etth1.yaml
 delete mode 100644 examples/foundation-model-examples/moirai/conf/finetune/data/random.yaml
 delete mode 100644 examples/foundation-model-examples/moirai/conf/finetune/default.yaml
 delete mode 100644 examples/foundation-model-examples/moirai/conf/finetune/model/moirai_1.0_R_base.yaml
 delete mode 100644 examples/foundation-model-examples/moirai/conf/finetune/model/moirai_1.0_R_large.yaml
 delete mode 100644 examples/foundation-model-examples/moirai/conf/finetune/model/moirai_1.0_R_small.yaml
 delete mode 100644 examples/foundation-model-examples/moirai/conf/finetune/model/moirai_base.yaml
 delete mode 100644 examples/foundation-model-examples/moirai/conf/finetune/model/moirai_large.yaml
 delete mode 100644 examples/foundation-model-examples/moirai/conf/finetune/model/moirai_small.yaml
 delete mode 100644 examples/foundation-model-examples/moirai/conf/finetune/val_data/etth1.yaml
 delete mode 100644 examples/foundation-model-examples/moirai/conf/finetune/val_data/etth1_multi.yaml
 delete mode 100644 examples/foundation-model-examples/moirai/conf/finetune/val_data/random.yaml
 delete mode 100644 examples/foundation-model-examples/moirai/train.py
 delete mode 100644 examples/foundation-model-examples/moment/01_moment_load_inference.py
 delete mode 100644 examples/foundation-model-examples/timegpt/01_timegpt_load_inference.py
 delete mode 100644 examples/foundation-model-examples/timegpt/02_timegpt_fine_tune.py
 delete mode 100644 examples/foundation-model-examples/timesfm/01_timesfm_load_inference.py

diff --git a/README.md b/README.md
index 9d22385..5544c5d 100644
--- a/README.md
+++ b/README.md
@@ -186,7 +186,7 @@ We encourage you to read through [examples/global_daily.py](https://github.com/d
 
 ### Foundation Models
 
-Foundation time series models are transformer based models pretrained on millions or billions of time points. These models can perform analysis (i.e. forecasting, anomaly detection, classification) on a previously unseen time series without training or tuning. We support open source models from multiple sources: [chronos](https://github.com/amazon-science/chronos-forecasting), [timesfm](https://github.com/google-research/timesfm), [moirai](https://blog.salesforceairesearch.com/moirai/), and [moment](https://github.com/moment-timeseries-foundation-model/moment). Covariates (i.e. exogenous regressors) and fine-tuning are currently not yet supported. This is a rapidly changing field, and we are working on updating the supported models and new features as the field evolves.
+Foundation time series models are mostly transformer based models pretrained on millions or billions of time points. These models can perform analysis (i.e. forecasting, anomaly detection, classification) on a previously unseen time series without training or tuning. We support open source models from multiple sources: [chronos](https://github.com/amazon-science/chronos-forecasting), [timesfm](https://github.com/google-research/timesfm), and [moirai](https://blog.salesforceairesearch.com/moirai/). Covariates (i.e. exogenous regressors) and fine-tuning are currently not yet supported. This is a rapidly changing field, and we are working on updating the supported models and new features as the field evolves.
 
 To get started, attach the [examples/foundation_daily.py](https://github.com/databricks-industry-solutions/many-model-forecasting/blob/main/examples/foundation_daily.py) notebook to a cluster running [DBR 14.3 LTS for ML](https://docs.databricks.com/en/release-notes/runtime/index.html) or later versions. We recommend using a single-node cluster with multiple GPU instances such as [g4dn.12xlarge [T4]](https://aws.amazon.com/ec2/instance-types/g4/) on AWS or [Standard_NC64as_T4_v3](https://learn.microsoft.com/en-us/azure/virtual-machines/nct4-v3-series) on Azure. Multi-node setup is currently not supported. 
 
@@ -199,12 +199,15 @@ active_models = [
     "ChronosT5Small",
     "ChronosT5Base",
     "ChronosT5Large",
+    "ChronosBoltTiny",
+    "ChronosBoltMini",
+    "ChronosBoltSmall",
+    "ChronosBoltBase",
     "MoiraiSmall",
     "MoiraiBase",
     "MoiraiLarge",
     "TimesFM_1_0_200m",
     "TimesFM_2_0_500m",
-    "Moment1Large",
 ]
 ```
 
@@ -230,7 +233,7 @@ We encourage you to read through [examples/foundation_daily.py](https://github.c
 
 #### Using Time Series Foundation Models on Databricks
 
-If you want to try out time series foundation models on Databricks without MMF, you can find example notebooks in [examples/foundation-model-examples](https://github.com/databricks-industry-solutions/many-model-forecasting/tree/main/examples/foundation-model-examples). These notebooks will show you how you can load, distribute the inference, fine-tune, register, deploy a model and generate online forecasts on it. We have notebooks for [TimeGPT](https://docs.nixtla.io/), [Chronos](https://github.com/amazon-science/chronos-forecasting), [Moirai](https://github.com/SalesforceAIResearch/uni2ts), [Moment](https://github.com/moment-timeseries-foundation-model/moment), and [TimesFM](https://github.com/google-research/timesfm).
+If you want to try out time series foundation models on Databricks without MMF, you can find example notebooks in [databricks-industry-solutions/transformer_forecasting](https://github.com/databricks-industry-solutions/transformer_forecasting). These notebooks will show you how you can load, distribute the inference, fine-tune, register, deploy a model and generate online forecasts on it. We have notebooks for [TimeGPT](https://docs.nixtla.io/), [Chronos](https://github.com/amazon-science/chronos-forecasting), [Moirai](https://github.com/SalesforceAIResearch/uni2ts), [Moment](https://github.com/moment-timeseries-foundation-model/moment), and [TimesFM](https://github.com/google-research/timesfm).
 
 ## [Vector Lab](https://www.youtube.com/@VectorLab) - Many Model Forecasting
 
diff --git a/examples/foundation-model-examples/chronos/01_chronos_load_inference.py b/examples/foundation-model-examples/chronos/01_chronos_load_inference.py
deleted file mode 100644
index 3625c3e..0000000
--- a/examples/foundation-model-examples/chronos/01_chronos_load_inference.py
+++ /dev/null
@@ -1,522 +0,0 @@
-# Databricks notebook source
-# MAGIC %md
-# MAGIC This is an example notebook that shows how to use [chronos](https://github.com/amazon-science/chronos-forecasting/tree/main) models on Databricks. The notebook loads the model, distributes the inference, registers the model, deploys the model and makes online forecasts.
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Cluster setup
-# MAGIC
-# MAGIC We recommend using a cluster with [Databricks Runtime 14.3 LTS for ML](https://docs.databricks.com/en/release-notes/runtime/14.3lts-ml.html) or above. The cluster can be single-node or multi-node with one or more GPU instances on each worker: e.g. [g5.12xlarge [A10G]](https://aws.amazon.com/ec2/instance-types/g5/) on AWS or [Standard_NV72ads_A10_v5](https://learn.microsoft.com/en-us/azure/virtual-machines/nva10v5-series) on Azure. This notebook will leverage [Pandas UDF](https://docs.databricks.com/en/udf/pandas.html) for distributing the inference tasks and utilizing all the available resource.
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Install package
-
-# COMMAND ----------
-
-# MAGIC %pip install git+https://github.com/amazon-science/chronos-forecasting.git --quiet
-# MAGIC dbutils.library.restartPython()
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Prepare data 
-# MAGIC We use [`datasetsforecast`](https://github.com/Nixtla/datasetsforecast/tree/main/) package to download M4 data. M4 dataset contains a set of time series which we use for testing MMF. Below we have written a number of custome functions to convert M4 time series to an expected format.
-# MAGIC
-# MAGIC Make sure that the catalog and the schema already exist.
-
-# COMMAND ----------
-
-catalog = "mmf"  # Name of the catalog we use to manage our assets
-db = "m4" # Name of the schema we use to manage our assets (e.g. datasets)
-n = 100  # Number of time series to sample
-
-# COMMAND ----------
-
-# This cell runs the notebook ../data_preparation and creates the following tables with M4 data: 
-# 1. {catalog}.{db}.m4_daily_train
-# 2. {catalog}.{db}.m4_monthly_train
-dbutils.notebook.run("../data_preparation", timeout_seconds=0, arguments={"catalog": catalog, "db": db, "n": n})
-
-# COMMAND ----------
-
-from pyspark.sql.functions import collect_list
-
-# Make sure that the data exists
-df = spark.table(f'{catalog}.{db}.m4_daily_train')
-df = df.groupBy('unique_id').agg(collect_list('ds').alias('ds'), collect_list('y').alias('y'))
-display(df)
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Distribute Inference
-# MAGIC We use [Pandas UDF](https://docs.databricks.com/en/udf/pandas.html#iterator-of-series-to-iterator-of-series-udf) to distribute the inference.
-
-# COMMAND ----------
-
-import pandas as pd
-import numpy as np
-import torch
-from typing import Iterator
-from pyspark.sql.functions import pandas_udf
-
-# Function to create a Pandas UDF to generate horizon timestamps
-def create_get_horizon_timestamps(freq, prediction_length):
-    """
-    Creates a Pandas UDF to generate future timestamps based on the given frequency and prediction length.
-
-    Parameters:
-    freq (str): Frequency of the timestamps ('M' for month-end, otherwise daily).
-    prediction_length (int): Number of future timestamps to generate.
-
-    Returns:
-    function: A Pandas UDF that generates an array of future timestamps for each input time series.
-    """
-    
-    @pandas_udf('array<timestamp>')
-    def get_horizon_timestamps(batch_iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
-        # Determine the offset for the next timestamp based on the frequency
-        one_ts_offset = pd.offsets.MonthEnd(1) if freq == "M" else pd.DateOffset(days=1)
-        
-        barch_horizon_timestamps = []  # List to hold the arrays of future timestamps
-        
-        # Iterate over batches of input time series
-        for batch in batch_iterator:
-            for series in batch:
-                timestamp = last = series.max()  # Get the last timestamp in the series
-                horizon_timestamps = []  # List to hold future timestamps for the current series
-                
-                # Generate future timestamps
-                for i in range(prediction_length):
-                    timestamp = timestamp + one_ts_offset
-                    horizon_timestamps.append(timestamp.to_numpy())
-                
-                barch_horizon_timestamps.append(np.array(horizon_timestamps))
-        
-        yield pd.Series(barch_horizon_timestamps)  # Yield the result as a Pandas Series
-
-    return get_horizon_timestamps
-
-
-# Function to create a Pandas UDF to generate forecasts
-def create_forecast_udf(repository, prediction_length, num_samples, batch_size):
-    """
-    Creates a Pandas UDF to generate forecasts using a pretrained model from the given repository.
-
-    Parameters:
-    repository (str): Path or identifier for the model repository.
-    prediction_length (int): Number of future values to predict.
-    num_samples (int): Number of samples to generate for each prediction.
-    batch_size (int): Number of time series to process in each batch.
-
-    Returns:
-    function: A Pandas UDF that generates an array of forecasted values for each input time series.
-    """
-    
-    @pandas_udf('array<double>')
-    def forecast_udf(bulk_iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
-        
-        # Initialization step
-        import numpy as np
-        import pandas as pd
-        import torch
-        from chronos import ChronosPipeline
-        
-        # Load the pretrained model from the repository
-        pipeline = ChronosPipeline.from_pretrained(repository, device_map="auto", torch_dtype=torch.bfloat16)
-        
-        # Inference step
-        for bulk in bulk_iterator:
-            median = []  # List to hold the median forecast for each series
-            
-            # Process the time series in batches
-            for i in range(0, len(bulk), batch_size):
-                batch = bulk[i:i+batch_size]
-                contexts = [torch.tensor(list(series)) for series in batch]  # Convert series to tensors
-                
-                # Generate forecasts using the pretrained model
-                forecasts = pipeline.predict(context=contexts, prediction_length=prediction_length, num_samples=num_samples)
-                
-                # Calculate the median forecast for each series
-                median.extend([np.median(forecast, axis=0) for forecast in forecasts])
-            
-            yield pd.Series(median)  # Yield the result as a Pandas Series
-        
-    return forecast_udf
-
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC We specify the requirements for our forecasts. 
-
-# COMMAND ----------
-
-chronos_model = "chronos-t5-tiny"  # Alternatively: chronos-t5-mini, chronos-t5-small, chronos-t5-base, chronos-t5-large
-prediction_length = 10  # Time horizon for forecasting
-num_samples = 10  # Number of forecast to generate. We will take median as our final forecast.
-batch_size = 4  # Number of time series to process simultaneously
-freq = "D" # Frequency of the time series
-device_count = torch.cuda.device_count()  # Number of GPUs available
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC Let's generate the forecasts.
-
-# COMMAND ----------
-
-# Create a Pandas UDF to generate horizon timestamps with the specified frequency and prediction length
-get_horizon_timestamps = create_get_horizon_timestamps(freq=freq, prediction_length=prediction_length)
-
-# Create a Pandas UDF to generate forecasts using a pretrained model from the specified repository
-forecast_udf = create_forecast_udf(
-    repository=f"amazon/{chronos_model}",  # Model repository path or identifier
-    prediction_length=prediction_length,   # Number of future values to predict
-    num_samples=num_samples,               # Number of samples to generate for each prediction
-    batch_size=batch_size,                 # Number of time series to process in each batch
-)
-
-# Apply the UDFs to the DataFrame and select the relevant columns
-forecasts = df.repartition(device_count).select(
-    df.unique_id,                             # Select the unique identifier for each time series
-    get_horizon_timestamps(df.ds).alias("ds"), # Generate and alias the horizon timestamps for each series
-    forecast_udf(df.y).alias("forecast")       # Generate and alias the forecasted values for each series
-)
-
-# Display the resulting DataFrame containing the forecasts
-display(forecasts)
-
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ##Register Model
-# MAGIC We will package our model using [`mlflow.pyfunc.PythonModel`](https://mlflow.org/docs/latest/python_api/mlflow.pyfunc.html) and register this in Unity Catalog.
-
-# COMMAND ----------
-
-import mlflow
-import torch
-import numpy as np
-from mlflow.models.signature import ModelSignature
-from mlflow.types import DataType, Schema, TensorSpec
-
-# Set the MLflow registry URI to use Databricks Unity Catalog
-mlflow.set_registry_uri("databricks-uc")
-
-# Define a custom MLflow model class for the Chronos pipeline
-class ChronosModel(mlflow.pyfunc.PythonModel):
-    def __init__(self, repository):
-        import torch
-        from chronos import ChronosPipeline
-        # Initialize the ChronosPipeline with a pretrained model from the specified repository
-        self.pipeline = ChronosPipeline.from_pretrained(
-            repository,
-            device_map="cuda",          # Use GPU for inference
-            torch_dtype=torch.bfloat16, # Use bfloat16 precision
-        )  
-    
-    def predict(self, context, input_data, params=None):
-        # Convert input data to a list of PyTorch tensors
-        history = [torch.tensor(list(series)) for series in input_data]
-        # Generate forecasts using the ChronosPipeline
-        forecast = self.pipeline.predict(
-            context=history,
-            prediction_length=10,  # Length of the prediction horizon
-            num_samples=10,        # Number of samples to generate
-        )
-        return forecast.numpy()  # Convert the forecast to a NumPy array
-
-# Instantiate the custom model with the specified repository
-pipeline = ChronosModel(f"amazon/{chronos_model}")
-
-# Define the input and output schema for the model signature
-input_schema = Schema([TensorSpec(np.dtype(np.double), (-1, -1))])      # Input: 2D array of doubles
-output_schema = Schema([TensorSpec(np.dtype(np.uint8), (-1, -1, -1))])  # Output: 3D array of unsigned 8-bit integers
-signature = ModelSignature(inputs=input_schema, outputs=output_schema)
-
-# Create an example input for the model (1 sample, 52 features)
-input_example = np.random.rand(1, 52)
-
-# Define the registered model name in the format: catalog.database.model_name
-registered_model_name = f"{catalog}.{db}.{chronos_model}"
-
-# Log and register the model with MLflow
-with mlflow.start_run() as run:
-    mlflow.pyfunc.log_model(
-      "model",                         # Model artifact path
-      python_model=pipeline,           # Custom model class instance
-      registered_model_name=registered_model_name, # Name to register the model under
-      signature=signature,             # Model signature
-      input_example=input_example,     # Example input
-      pip_requirements=[               # List of pip requirements
-        f"git+https://github.com/amazon-science/chronos-forecasting.git",
-        ],
-      )
-
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ##Reload Model
-# MAGIC Once the registration is complete, we will reload the model and generate forecasts.
-
-# COMMAND ----------
-
-from mlflow import MlflowClient
-client = MlflowClient()
-
-# Function to get the latest version of a registered model
-def get_latest_model_version(client, registered_model_name):
-    latest_version = 1  # Initialize the latest version to 1
-    # Iterate through all model versions for the given registered model name
-    for mv in client.search_model_versions(f"name='{registered_model_name}'"):
-        version_int = int(mv.version)  # Convert version string to integer
-        # Update the latest version if a higher version is found
-        if version_int > latest_version:
-            latest_version = version_int
-    return latest_version  # Return the latest version number
-
-# Get the latest version of the specified registered model
-model_version = get_latest_model_version(client, registered_model_name)
-# Construct the model URI using the registered model name and its latest version
-logged_model = f"models:/{registered_model_name}/{model_version}"
-
-# Load the model as a PyFuncModel from the specified URI
-loaded_model = mlflow.pyfunc.load_model(logged_model)
-
-# Create random input data (5 samples, each with 52 data points)
-input_data = np.random.rand(5, 52)  # (batch, series)
-
-# Generate forecasts using the loaded model
-loaded_model.predict(input_data)
-
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Deploy Model
-# MAGIC We will deploy our model behind a real-time endpoint of [Databricks Mosaic AI Model Serving](https://www.databricks.com/product/model-serving).
-
-# COMMAND ----------
-
-# With the token, you can create our authorization header for our subsequent REST calls
-token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().getOrElse(None)
-headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
-
-# Next you need an endpoint at which to execute your request which you can get from the notebook's tags collection
-java_tags = dbutils.notebook.entry_point.getDbutils().notebook().getContext().tags()
-
-# This object comes from the Java CM - Convert the Java Map opject to a Python dictionary
-tags = sc._jvm.scala.collection.JavaConversions.mapAsJavaMap(java_tags)
-
-# Lastly, extract the Databricks instance (domain name) from the dictionary
-instance = tags["browserHostName"]
-
-# COMMAND ----------
-
-import requests
-
-model_serving_endpoint_name = chronos_model
-
-# auto_capture_config specifies where the inference logs should be written
-my_json = {
-    "name": model_serving_endpoint_name,
-    "config": {
-        "served_models": [
-            {
-                "model_name": registered_model_name,
-                "model_version": model_version,
-                "workload_type": "GPU_SMALL",
-                "workload_size": "Small",
-                "scale_to_zero_enabled": "true",
-            }
-        ],
-        "auto_capture_config": {
-            "catalog_name": catalog,
-            "schema_name": db,
-            "table_name_prefix": model_serving_endpoint_name,
-        },
-    },
-}
-
-# Make sure to drop the inference table of it exists
-_ = spark.sql(
-    f"DROP TABLE IF EXISTS {catalog}.{db}.`{model_serving_endpoint_name}_payload`"
-)
-
-# COMMAND ----------
-
-# Function to create an endpoint in Model Serving and deploy the model behind it
-def func_create_endpoint(model_serving_endpoint_name):
-    # get endpoint status
-    endpoint_url = f"https://{instance}/api/2.0/serving-endpoints"
-    url = f"{endpoint_url}/{model_serving_endpoint_name}"
-    r = requests.get(url, headers=headers)
-    if "RESOURCE_DOES_NOT_EXIST" in r.text:
-        print(
-            "Creating this new endpoint: ",
-            f"https://{instance}/serving-endpoints/{model_serving_endpoint_name}/invocations",
-        )
-        re = requests.post(endpoint_url, headers=headers, json=my_json)
-    else:
-        new_model_version = (my_json["config"])["served_models"][0]["model_version"]
-        print(
-            "This endpoint existed previously! We are updating it to a new config with new model version: ",
-            new_model_version,
-        )
-        # update config
-        url = f"{endpoint_url}/{model_serving_endpoint_name}/config"
-        re = requests.put(url, headers=headers, json=my_json["config"])
-        # wait till new config file in place
-        import time, json
-
-        # get endpoint status
-        url = f"https://{instance}/api/2.0/serving-endpoints/{model_serving_endpoint_name}"
-        retry = True
-        total_wait = 0
-        while retry:
-            r = requests.get(url, headers=headers)
-            assert (
-                r.status_code == 200
-            ), f"Expected an HTTP 200 response when accessing endpoint info, received {r.status_code}"
-            endpoint = json.loads(r.text)
-            if "pending_config" in endpoint.keys():
-                seconds = 10
-                print("New config still pending")
-                if total_wait < 6000:
-                    # if less the 10 mins waiting, keep waiting
-                    print(f"Wait for {seconds} seconds")
-                    print(f"Total waiting time so far: {total_wait} seconds")
-                    time.sleep(10)
-                    total_wait += seconds
-                else:
-                    print(f"Stopping,  waited for {total_wait} seconds")
-                    retry = False
-            else:
-                print("New config in place now!")
-                retry = False
-
-    assert (
-        re.status_code == 200
-    ), f"Expected an HTTP 200 response, received {re.status_code}"
-
-# Function to delete the endpoint from Model Serving
-def func_delete_model_serving_endpoint(model_serving_endpoint_name):
-    endpoint_url = f"https://{instance}/api/2.0/serving-endpoints"
-    url = f"{endpoint_url}/{model_serving_endpoint_name}"
-    response = requests.delete(url, headers=headers)
-    if response.status_code != 200:
-        raise Exception(
-            f"Request failed with status {response.status_code}, {response.text}"
-        )
-    else:
-        print(model_serving_endpoint_name, "endpoint is deleted!")
-    return response.json()
-
-# COMMAND ----------
-
-# Create an endpoint. This may take some time.
-func_create_endpoint(model_serving_endpoint_name)
-
-# COMMAND ----------
-
-import time, mlflow
-
-def wait_for_endpoint():
-    # Construct the base URL for the serving endpoints API
-    endpoint_url = f"https://{instance}/api/2.0/serving-endpoints"
-    
-    while True:
-        # Construct the URL for the specific model serving endpoint
-        url = f"{endpoint_url}/{model_serving_endpoint_name}"
-        
-        # Send a GET request to the endpoint URL
-        response = requests.get(url, headers=headers)
-        
-        # Assert that the response status code is 200 (OK)
-        assert (
-            response.status_code == 200
-        ), f"Expected an HTTP 200 response, received {response.status_code}\n{response.text}"
-        
-        # Extract the status of the endpoint from the response
-        status = response.json().get("state", {}).get("ready", {})
-        
-        # If the endpoint is ready, print the status and return
-        if status == "READY":
-            print(status)
-            print("-" * 80)
-            return
-        else:
-            # If the endpoint is not ready, print the status and wait for 5 minutes
-            print(f"Endpoint not ready ({status}), waiting 5 minutes")
-            time.sleep(300)  # Wait 300 seconds (5 minutes)
-
-# Get the API URL for the Databricks instance
-api_url = mlflow.utils.databricks_utils.get_webapp_url()
-
-# Call the function to wait for the endpoint to be ready
-wait_for_endpoint()
-
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Online Forecast
-# MAGIC Once the endpoint is ready, let's send a request to the model and generate an online forecast.
-
-# COMMAND ----------
-
-import os
-import requests
-import pandas as pd
-import json
-import matplotlib.pyplot as plt
-
-# Replace URL with the endpoint invocation URL you get from the Model Serving page.
-endpoint_url = f"https://{instance}/serving-endpoints/{model_serving_endpoint_name}/invocations"
-
-# Get the Databricks API token
-token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
-
-# Define a function to send input data to the model serving endpoint and get the forecast
-def forecast(input_data, url=endpoint_url, databricks_token=token):
-    # Set up the headers for the POST request, including the authorization token
-    headers = {
-        "Authorization": f"Bearer {databricks_token}",
-        "Content-Type": "application/json",
-    }
-    # Prepare the body of the request with the input data
-    body = {"inputs": input_data.tolist()}
-    # Convert the body to a JSON string
-    data = json.dumps(body)
-    # Send a POST request to the model serving endpoint
-    response = requests.request(method="POST", headers=headers, url=url, data=data)
-    # Check if the response status code is not 200 (OK)
-    if response.status_code != 200:
-        # Raise an exception if the request failed
-        raise Exception(
-            f"Request failed with status {response.status_code}, {response.text}"
-        )
-    # Return the response JSON as a Python dictionary
-    return response.json()
-
-
-# COMMAND ----------
-
-# Send request to the endpoint
-input_data = np.random.rand(5, 52) # (batch, series)
-forecast(input_data)
-
-# COMMAND ----------
-
-# Delete the serving endpoint
-func_delete_model_serving_endpoint(model_serving_endpoint_name)
-
-# COMMAND ----------
-
-
diff --git a/examples/foundation-model-examples/chronos/02_chronos_fine_tune.py b/examples/foundation-model-examples/chronos/02_chronos_fine_tune.py
deleted file mode 100644
index 3c669e3..0000000
--- a/examples/foundation-model-examples/chronos/02_chronos_fine_tune.py
+++ /dev/null
@@ -1,305 +0,0 @@
-# Databricks notebook source
-# MAGIC %md
-# MAGIC This is an example notebook that shows how to use [chronos](https://github.com/amazon-science/chronos-forecasting/tree/main) models on Databricks. The notebook loads, fine-tunes, and registers the model.
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Cluster setup
-# MAGIC **As of June 17, 2024, Chronos finetuning script works on DBR ML 14.3 and below (do not use DBR ML 15 or above).**
-# MAGIC
-# MAGIC We recommend using a cluster with [Databricks Runtime 14.3 LTS for ML](https://docs.databricks.com/en/release-notes/runtime/14.3lts-ml.html). The cluster can be single-node or multi-node with one or more GPU instances on each worker: e.g. [g5.12xlarge [A10G]](https://aws.amazon.com/ec2/instance-types/g5/) on AWS or [Standard_NV72ads_A10_v5](https://learn.microsoft.com/en-us/azure/virtual-machines/nva10v5-series) on Azure.
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Install package
-
-# COMMAND ----------
-
-# MAGIC %pip install "chronos[training] @ git+https://github.com/amazon-science/chronos-forecasting.git" --quiet
-# MAGIC dbutils.library.restartPython()
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Prepare data 
-# MAGIC We use [`datasetsforecast`](https://github.com/Nixtla/datasetsforecast/tree/main/) package to download M4 data. M4 dataset contains a set of time series which we use for testing MMF. Below we have written a number of custome functions to convert M4 time series to an expected format.
-# MAGIC
-# MAGIC Make sure that the catalog and the schema already exist.
-
-# COMMAND ----------
-
-catalog = "mmf"  # Name of the catalog we use to manage our assets
-db = "m4"  # Name of the schema we use to manage our assets (e.g. datasets)
-volume = "chronos_fine_tune" # Name of the volume we store the data and the weigts
-model = "chronos-t5-tiny" # Chronos model to finetune. Alternatives: -mini, -small, -base, -large
-n = 1000  # Number of time series to sample
-
-# COMMAND ----------
-
-# This cell runs the notebook ../data_preparation and creates the following tables with M4 data: 
-# 1. {catalog}.{db}.m4_daily_train
-# 2. {catalog}.{db}.m4_monthly_train
-dbutils.notebook.run("../data_preparation", timeout_seconds=0, arguments={"catalog": catalog, "db": db, "n": n})
-
-# COMMAND ----------
-
-from pyspark.sql.functions import collect_list
-
-# Make sure that the data exists
-df = spark.table(f'{catalog}.{db}.m4_daily_train')
-df = df.groupBy('unique_id').agg(collect_list('ds').alias('ds'), collect_list('y').alias('y')).toPandas()
-display(df)
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC We need to convert our time series dataset into a GluonTS-compatible file dataset.
-
-# COMMAND ----------
-
-import numpy as np
-from pathlib import Path
-from typing import List, Optional, Union
-from gluonts.dataset.arrow import ArrowWriter
-
-def convert_to_arrow(
-    path: Union[str, Path],
-    time_series: Union[List[np.ndarray], np.ndarray],
-    start_times: Optional[Union[List[np.datetime64], np.ndarray]] = None,
-    compression: str = "lz4",
-):
-    """
-    This function converts time series data into the Apache Arrow format and saves it to a file.
-    
-    Parameters:
-    - path (Union[str, Path]): The file path where the Arrow file will be saved.
-    - time_series (Union[List[np.ndarray], np.ndarray]): The time series data to be converted.
-    - start_times (Optional[Union[List[np.datetime64], np.ndarray]]): The start times for each time series. If None, a default start time is used.
-    - compression (str): The compression algorithm to use for the Arrow file. Default is 'lz4'.
-    """
-    
-    # If start_times is not provided, set all start times to '2000-01-01 00:00:00'
-    if start_times is None:
-        start_times = [np.datetime64("2000-01-01 00:00", "s")] * len(time_series)
-
-    # Ensure there is a start time for each time series
-    assert len(time_series) == len(start_times)
-
-    # Create a list of dictionaries where each dictionary represents a time series
-    # Each dictionary contains the start time and the corresponding time series data
-    dataset = [
-        {"start": start, "target": ts} for ts, start in zip(time_series, start_times)
-    ]
-    
-    # Use ArrowWriter to write the dataset to a file in Arrow format with the specified compression
-    ArrowWriter(compression=compression).write_to_file(
-        dataset,
-        path=path,
-    )
-
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC Convert the Pandas dataframe to an arrow file and write it to UC Volume.  
-
-# COMMAND ----------
-
-time_series = list(df["y"])
-start_times = list(df["ds"].apply(lambda x: x.min().to_numpy()))
-
-# Make sure that the volume exists. We stored the fine-tuned weights here.
-_ = spark.sql(f"CREATE VOLUME IF NOT EXISTS {catalog}.{db}.{volume}")
-
-# Convert to GluonTS arrow format and save it in UC Volume
-convert_to_arrow(
-    f"/Volumes/{catalog}/{db}/{volume}/data.arrow", 
-    time_series=time_series, 
-    start_times=start_times,
-    )
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ##Run Fine-tuning
-# MAGIC
-# MAGIC In this example, we wil fine-tune `amazon/chronos-t5-tiny` for 1000 steps with initial learning rate of 1e-3. 
-# MAGIC
-# MAGIC Make sure that you have the configuration yaml files placed inside the `configs` folder and the `train.py` script in the same directory. These two assets are taken directly from [chronos-forecasting/scripts/training](https://github.com/amazon-science/chronos-forecasting/tree/main/scripts/training). They are subject to change as the Chronos' team develops the framework further. Keep your eyes on the latest changes (we will try too) and use the latest versions as needed. We have made a small change to our `train.py` script and set the frequency of the time series to daily ("D"). 
-# MAGIC
-# MAGIC Inside the configuration yaml (for this example, `configs/chronos-t5-tiny.yaml`), make sure to set the parameters: 
-# MAGIC - `training_data_paths` to `/Volumes/mmf/m4/chronos_fine_tune/data.arrow`, where your arrow converted file is stored
-# MAGIC - `probability` to `1.0` if there is only one data source
-# MAGIC - `prediction_length` to your use case's forecasting horizon (in this example `10`)
-# MAGIC - `num_samples` to how many sample you want to generate  
-# MAGIC - `output_dir` to `/Volumes/mmf/m4/chronos_fine_tune/`, where you want to store your fine-tuned weights
-# MAGIC
-# MAGIC And other parameters if needed. 
-# MAGIC
-# MAGIC `CUDA_VISIBLE_DEVICES` tell the script about the avalaible GPU resources. In this example, we are using a single node cluster with g5.12xlarge on AWS, which comes with 4 A10G GPU isntances, hence `CUDA_VISIBLE_DEVICES=0,1,2,3`. See Chronos' training [README](https://github.com/amazon-science/chronos-forecasting/blob/main/scripts/README.md) for more information on multinode multigpu setup.
-
-# COMMAND ----------
-
-# MAGIC %sh CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py \
-# MAGIC     --config configs/chronos-t5-tiny.yaml \
-# MAGIC     --model-id amazon/chronos-t5-tiny \
-# MAGIC     --no-random-init \
-# MAGIC     --max-steps 1000 \
-# MAGIC     --learning-rate 0.001
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ##Register Model
-# MAGIC We get the fine-tuned weights from the latest run from UC volume, wrap the pipeline with [`mlflow.pyfunc.PythonModel`](https://mlflow.org/docs/latest/python_api/mlflow.pyfunc.html) and register this on Unity Catalog.
-
-# COMMAND ----------
-
-import os
-import glob
-import mlflow
-import torch
-import numpy as np
-from mlflow.models.signature import ModelSignature
-from mlflow.types import DataType, Schema, TensorSpec
-
-# Set the registry URI for MLflow to Databricks Unity Catalog
-mlflow.set_registry_uri("databricks-uc")
-
-class FineTunedChronosModel(mlflow.pyfunc.PythonModel):
-    def load_context(self, context):
-        """
-        Load the model context including the pretrained weights.
-        The model is loaded onto a GPU if available, otherwise on CPU.
-        """
-        import torch
-        from chronos import ChronosPipeline
-        
-        # Load the pretrained model pipeline from provided weights
-        self.pipeline = ChronosPipeline.from_pretrained(
-            context.artifacts["weights"],
-            device_map="cuda" if torch.cuda.is_available() else "cpu",
-            torch_dtype=torch.bfloat16,
-        )
-
-    def predict(self, context, input_data, params=None):
-        """
-        Make predictions using the loaded model.
-        
-        Parameters:
-        - context: The context in which the model is being run.
-        - input_data: The input data for prediction, expected to be a list of series.
-        - params: Additional parameters for prediction (not used here).
-        
-        Returns:
-        - forecast: The predicted results as a NumPy array.
-        """
-        # Convert input data to a list of torch tensors
-        history = [torch.tensor(list(series)) for series in input_data]
-        
-        # Make predictions using the model pipeline
-        forecast = self.pipeline.predict(
-            context=history,
-            prediction_length=10,
-            num_samples=10,
-        )
-        
-        # Convert the forecast to a NumPy array and return
-        return forecast.numpy()
-
-# Directory path components for locating the latest run
-files = os.listdir(f"/Volumes/{catalog}/{db}/{volume}/")
-
-# Extract run numbers from the directory names
-runs = [int(file[4:]) for file in files if "run-" in file]
-
-# Identify the latest run based on the highest run number
-latest_run = max(runs)
-
-# Construct the registered model name and weights path
-registered_model_name = f"{catalog}.{db}.{model}_finetuned"
-weights = f"/Volumes/{catalog}/{db}/{volume}/run-{latest_run}/checkpoint-final/"
-
-# Define the model input and output schema for registration
-input_schema = Schema([TensorSpec(np.dtype(np.double), (-1, -1))])
-output_schema = Schema([TensorSpec(np.dtype(np.uint8), (-1, -1, -1))])
-signature = ModelSignature(inputs=input_schema, outputs=output_schema)
-
-# Example input data for model registration
-input_example = np.random.rand(1, 52)
-
-# Register the fine-tuned model with MLflow
-with mlflow.start_run() as run:
-    mlflow.pyfunc.log_model(
-        "model",
-        python_model=FineTunedChronosModel(),
-        artifacts={"weights": weights},
-        registered_model_name=registered_model_name,
-        signature=signature,
-        input_example=input_example,
-        pip_requirements=[
-            "chronos[training] @ git+https://github.com/amazon-science/chronos-forecasting.git",
-        ],
-    )
-
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ##Reload Model
-# MAGIC We reload the model from the registry and perform forecasting on the in-training time series (for testing purpose). You can also go ahead and deploy this model behind a Model Serving's real-time endpoint. See the previous notebook: [`01_chronos_load_inference`](https://github.com/databricks-industry-solutions/many-model-forecasting/blob/main/examples/foundation-model-examples/chronos/01_chronos_load_inference.py) for more information.
-
-# COMMAND ----------
-
-from mlflow import MlflowClient
-
-# Create an instance of the MlflowClient to interact with the MLflow tracking server
-client = MlflowClient()
-
-def get_latest_model_version(client, registered_model_name):
-    """
-    Retrieve the latest version number of a registered model.
-    
-    Parameters:
-    - client (MlflowClient): The MLflow client instance.
-    - registered_model_name (str): The name of the registered model.
-    
-    Returns:
-    - latest_version (int): The latest version number of the registered model.
-    """
-    # Initialize the latest version to 1 (assuming at least one version exists)
-    latest_version = 1
-    
-    # Iterate over all model versions for the given registered model
-    for mv in client.search_model_versions(f"name='{registered_model_name}'"):
-        # Convert the version to an integer
-        version_int = int(mv.version)
-        
-        # Update the latest version if a higher version is found
-        if version_int > latest_version:
-            latest_version = version_int
-            
-    # Return the latest version number
-    return latest_version
-
-# Get the latest version of the registered model
-model_version = get_latest_model_version(client, registered_model_name)
-
-# Construct the URI for the logged model using the registered model name and latest version
-logged_model = f"models:/{registered_model_name}/{model_version}"
-
-# Load the model as a PyFuncModel from the logged model URI
-loaded_model = mlflow.pyfunc.load_model(logged_model)
-
-# Create input data for prediction from the first 100 elements of the 'y' column in a DataFrame
-input_data = df["y"][:100].to_numpy() # Shape should be (batch, series)
-
-# Generate forecasts using the loaded model
-loaded_model.predict(input_data)
-
-
-# COMMAND ----------
-
-
diff --git a/examples/foundation-model-examples/chronos/configs/chronos-gpt2.yaml b/examples/foundation-model-examples/chronos/configs/chronos-gpt2.yaml
deleted file mode 100644
index 4d917ad..0000000
--- a/examples/foundation-model-examples/chronos/configs/chronos-gpt2.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-training_data_paths:
-- "/home/ubuntu/tsmixup-data.arrow"
-- "/home/ubuntu/kernelsynth-data.arrow"
-probability:
-- 0.9
-- 0.1
-context_length: 512
-prediction_length: 64
-min_past: 60
-max_steps: 200_000
-save_steps: 100_000
-log_steps: 500
-per_device_train_batch_size: 32
-learning_rate: 0.001
-optim: adamw_torch_fused
-num_samples: 20
-shuffle_buffer_length: 100_000
-gradient_accumulation_steps: 1
-model_id: openai-community/gpt2
-model_type: causal
-random_init: false
-tie_embeddings: false
-output_dir: ./output/
-tf32: true
-torch_compile: true
-tokenizer_class: "MeanScaleUniformBins"
-tokenizer_kwargs:
-  low_limit: -15.0
-  high_limit: 15.0
-n_tokens: 4096
-lr_scheduler_type: linear
-warmup_ratio: 0.0
-dataloader_num_workers: 1
-max_missing_prop: 0.1
-use_eos_token: true
diff --git a/examples/foundation-model-examples/chronos/configs/chronos-t5-base.yaml b/examples/foundation-model-examples/chronos/configs/chronos-t5-base.yaml
deleted file mode 100644
index c7b56f7..0000000
--- a/examples/foundation-model-examples/chronos/configs/chronos-t5-base.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-training_data_paths:
-- "/home/ubuntu/tsmixup-data.arrow"
-- "/home/ubuntu/kernelsynth-data.arrow"
-probability:
-- 0.9
-- 0.1
-context_length: 512
-prediction_length: 64
-min_past: 60
-max_steps: 200_000
-save_steps: 100_000
-log_steps: 500
-per_device_train_batch_size: 32
-learning_rate: 0.001
-optim: adamw_torch_fused
-num_samples: 20
-shuffle_buffer_length: 100_000
-gradient_accumulation_steps: 1
-model_id: google/t5-efficient-base
-model_type: seq2seq
-random_init: true
-tie_embeddings: true
-output_dir: ./output/
-tf32: true
-torch_compile: true
-tokenizer_class: "MeanScaleUniformBins"
-tokenizer_kwargs:
-  low_limit: -15.0
-  high_limit: 15.0
-n_tokens: 4096
-lr_scheduler_type: linear
-warmup_ratio: 0.0
-dataloader_num_workers: 1
-max_missing_prop: 0.9
-use_eos_token: true
diff --git a/examples/foundation-model-examples/chronos/configs/chronos-t5-large.yaml b/examples/foundation-model-examples/chronos/configs/chronos-t5-large.yaml
deleted file mode 100644
index 189013c..0000000
--- a/examples/foundation-model-examples/chronos/configs/chronos-t5-large.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-training_data_paths:
-- "/home/ubuntu/tsmixup-data.arrow"
-- "/home/ubuntu/kernelsynth-data.arrow"
-probability:
-- 0.9
-- 0.1
-context_length: 512
-prediction_length: 64
-min_past: 60
-max_steps: 200_000
-save_steps: 100_000
-log_steps: 500
-per_device_train_batch_size: 8
-learning_rate: 0.001
-optim: adamw_torch_fused
-num_samples: 20
-shuffle_buffer_length: 100_000
-gradient_accumulation_steps: 4
-model_id: google/t5-efficient-large
-model_type: seq2seq
-random_init: true
-tie_embeddings: true
-output_dir: ./output/
-tf32: true
-torch_compile: true
-tokenizer_class: "MeanScaleUniformBins"
-tokenizer_kwargs:
-  low_limit: -15.0
-  high_limit: 15.0
-n_tokens: 4096
-lr_scheduler_type: linear
-warmup_ratio: 0.0
-dataloader_num_workers: 1
-max_missing_prop: 0.9
-use_eos_token: true
diff --git a/examples/foundation-model-examples/chronos/configs/chronos-t5-mini.yaml b/examples/foundation-model-examples/chronos/configs/chronos-t5-mini.yaml
deleted file mode 100644
index e99d0fc..0000000
--- a/examples/foundation-model-examples/chronos/configs/chronos-t5-mini.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-training_data_paths:
-- "/home/ubuntu/tsmixup-data.arrow"
-- "/home/ubuntu/kernelsynth-data.arrow"
-probability:
-- 0.9
-- 0.1
-context_length: 512
-prediction_length: 64
-min_past: 60
-max_steps: 200_000
-save_steps: 100_000
-log_steps: 500
-per_device_train_batch_size: 32
-learning_rate: 0.001
-optim: adamw_torch_fused
-num_samples: 20
-shuffle_buffer_length: 100_000
-gradient_accumulation_steps: 1
-model_id: google/t5-efficient-mini
-model_type: seq2seq
-random_init: true
-tie_embeddings: true
-output_dir: ./output/
-tf32: true
-torch_compile: true
-tokenizer_class: "MeanScaleUniformBins"
-tokenizer_kwargs:
-  low_limit: -15.0
-  high_limit: 15.0
-n_tokens: 4096
-lr_scheduler_type: linear
-warmup_ratio: 0.0
-dataloader_num_workers: 1
-max_missing_prop: 0.9
-use_eos_token: true
diff --git a/examples/foundation-model-examples/chronos/configs/chronos-t5-small.yaml b/examples/foundation-model-examples/chronos/configs/chronos-t5-small.yaml
deleted file mode 100644
index 873f483..0000000
--- a/examples/foundation-model-examples/chronos/configs/chronos-t5-small.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-training_data_paths:
-- "/home/ubuntu/tsmixup-data.arrow"
-- "/home/ubuntu/kernelsynth-data.arrow"
-probability:
-- 0.9
-- 0.1
-context_length: 512
-prediction_length: 64
-min_past: 60
-max_steps: 200_000
-save_steps: 100_000
-log_steps: 500
-per_device_train_batch_size: 32
-learning_rate: 0.001
-optim: adamw_torch_fused
-num_samples: 20
-shuffle_buffer_length: 100_000
-gradient_accumulation_steps: 1
-model_id: google/t5-efficient-small
-model_type: seq2seq
-random_init: true
-tie_embeddings: true
-output_dir: ./output/
-tf32: true
-torch_compile: true
-tokenizer_class: "MeanScaleUniformBins"
-tokenizer_kwargs:
-  low_limit: -15.0
-  high_limit: 15.0
-n_tokens: 4096
-lr_scheduler_type: linear
-warmup_ratio: 0.0
-dataloader_num_workers: 1
-max_missing_prop: 0.9
-use_eos_token: true
diff --git a/examples/foundation-model-examples/chronos/configs/chronos-t5-tiny.yaml b/examples/foundation-model-examples/chronos/configs/chronos-t5-tiny.yaml
deleted file mode 100644
index 9d9deb2..0000000
--- a/examples/foundation-model-examples/chronos/configs/chronos-t5-tiny.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-training_data_paths:
-- "/Volumes/mmf/m4/chronos_fine_tune/data.arrow"
-probability:
-- 1.0
-context_length: 512
-prediction_length: 10
-min_past: 60
-max_steps: 200_000
-save_steps: 100_000
-log_steps: 500
-per_device_train_batch_size: 32
-learning_rate: 0.001
-optim: adamw_torch_fused
-num_samples: 10
-shuffle_buffer_length: 100_000
-gradient_accumulation_steps: 1
-model_id: google/t5-efficient-tiny
-model_type: seq2seq
-random_init: true
-tie_embeddings: true
-output_dir: "/Volumes/mmf/m4/chronos_fine_tune/"
-tf32: true
-torch_compile: true
-tokenizer_class: "MeanScaleUniformBins"
-tokenizer_kwargs:
-  low_limit: -15.0
-  high_limit: 15.0
-n_tokens: 4096
-lr_scheduler_type: linear
-warmup_ratio: 0.0
-dataloader_num_workers: 1
-max_missing_prop: 0.9
-use_eos_token: true
diff --git a/examples/foundation-model-examples/chronos/train.py b/examples/foundation-model-examples/chronos/train.py
deleted file mode 100644
index c86a898..0000000
--- a/examples/foundation-model-examples/chronos/train.py
+++ /dev/null
@@ -1,692 +0,0 @@
-# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-# SPDX-License-Identifier: Apache-2.0
-
-import ast
-import logging
-import os
-import re
-import sys
-import json
-import itertools
-import random
-from copy import deepcopy
-from pathlib import Path
-from functools import partial
-from typing import List, Iterator, Optional, Dict
-
-import typer
-from typer_config import use_yaml_config
-import numpy as np
-import torch
-import torch.distributed as dist
-from torch.utils.data import IterableDataset, get_worker_info
-import transformers
-from transformers import (
-    AutoModelForSeq2SeqLM,
-    AutoModelForCausalLM,
-    AutoConfig,
-    T5Config,
-    Trainer,
-    TrainingArguments,
-)
-import accelerate
-import gluonts
-from gluonts.dataset.common import FileDataset
-from gluonts.itertools import Cyclic, Map, Filter
-from gluonts.transform import (
-    FilterTransformation,
-    TestSplitSampler,
-    ValidationSplitSampler,
-    InstanceSplitter,
-    ExpectedNumInstanceSampler,
-    MissingValueImputation,
-    LeavesMissingValues,
-    LastValueImputation,
-)
-
-from chronos import ChronosConfig, ChronosTokenizer
-
-
-app = typer.Typer(pretty_exceptions_enable=False)
-
-
-def is_main_process() -> bool:
-    """
-    Check if we're on the main process.
-    """
-    if not dist.is_torchelastic_launched():
-        return True
-    return int(os.environ["RANK"]) == 0
-
-
-def log_on_main(msg: str, logger: logging.Logger, log_level: int = logging.INFO):
-    """
-    Log the given message using the given logger, if we're on the main process.
-    """
-    if is_main_process():
-        logger.log(log_level, msg)
-
-
-def get_training_job_info() -> Dict:
-    """
-    Returns info about this training job.
-    """
-    job_info = {}
-
-    # CUDA info
-    job_info["cuda_available"] = torch.cuda.is_available()
-    if torch.cuda.is_available():
-        job_info["device_count"] = torch.cuda.device_count()
-
-        job_info["device_names"] = {
-            idx: torch.cuda.get_device_name(idx)
-            for idx in range(torch.cuda.device_count())
-        }
-        job_info["mem_info"] = {
-            idx: torch.cuda.mem_get_info(device=idx)
-            for idx in range(torch.cuda.device_count())
-        }
-
-    # DDP info
-    job_info["torchelastic_launched"] = dist.is_torchelastic_launched()
-
-    if dist.is_torchelastic_launched():
-        job_info["world_size"] = dist.get_world_size()
-
-    # Versions
-    job_info["python_version"] = sys.version.replace("\n", " ")
-    job_info["torch_version"] = torch.__version__
-    job_info["numpy_version"] = np.__version__
-    job_info["gluonts_version"] = gluonts.__version__
-    job_info["transformers_version"] = transformers.__version__
-    job_info["accelerate_version"] = accelerate.__version__
-
-    return job_info
-
-
-def save_training_info(ckpt_path: Path, training_config: Dict):
-    """
-    Save info about this training job in a json file for documentation.
-    """
-    assert ckpt_path.is_dir()
-    with open(ckpt_path / "training_info.json", "w") as fp:
-        json.dump(
-            {"training_config": training_config, "job_info": get_training_job_info()},
-            fp,
-            indent=4,
-        )
-
-
-def get_next_path(
-    base_fname: str,
-    base_dir: Path,
-    file_type: str = "yaml",
-    separator: str = "-",
-):
-    """
-    Gets the next available path in a directory. For example, if `base_fname="results"`
-    and `base_dir` has files ["results-0.yaml", "results-1.yaml"], this function returns
-    "results-2.yaml".
-    """
-    if file_type == "":
-        # Directory
-        items = filter(
-            lambda x: x.is_dir() and re.match(f"^{base_fname}{separator}\\d+$", x.stem),
-            base_dir.glob("*"),
-        )
-    else:
-        # File
-        items = filter(
-            lambda x: re.match(f"^{base_fname}{separator}\\d+$", x.stem),
-            base_dir.glob(f"*.{file_type}"),
-        )
-    run_nums = list(
-        map(lambda x: int(x.stem.replace(base_fname + separator, "")), items)
-    ) + [-1]
-
-    next_num = max(run_nums) + 1
-    fname = f"{base_fname}{separator}{next_num}" + (
-        f".{file_type}" if file_type != "" else ""
-    )
-
-    return base_dir / fname
-
-
-def load_model(
-    model_id="google/t5-efficient-tiny",
-    model_type="seq2seq",
-    vocab_size=4096,
-    random_init=False,
-    tie_embeddings=False,
-    pad_token_id=0,
-    eos_token_id=1,
-):
-    """
-    Load the specified HuggingFace model, adjusting the vocabulary
-    size, special token IDs, and initialization options.
-
-    This allows to set a model up for training on a new vocabulary
-    of tokens.
-    """
-    assert model_type in ["seq2seq", "causal"]
-    AutoModelClass = (
-        AutoModelForSeq2SeqLM if model_type == "seq2seq" else AutoModelForCausalLM
-    )
-    if random_init:
-        log_on_main("Using random initialization", logger)
-        config = AutoConfig.from_pretrained(model_id)
-        if isinstance(config, T5Config):
-            # The default initializer_factor (1.0) in transformers is too large
-            config.initializer_factor = 0.05
-        config.tie_word_embeddings = tie_embeddings
-        model = AutoModelClass.from_config(config)
-    else:
-        log_on_main(f"Using pretrained initialization from {model_id}", logger)
-        model = AutoModelClass.from_pretrained(model_id)
-
-    model.resize_token_embeddings(vocab_size)
-
-    model.config.pad_token_id = model.generation_config.pad_token_id = pad_token_id
-    model.config.eos_token_id = model.generation_config.eos_token_id = eos_token_id
-
-    return model
-
-
-def has_enough_observations(
-    entry: dict, min_length: int = 0, max_missing_prop: float = 1.0
-) -> bool:
-    """
-    Check if the given entry has enough observations in the ``"target"`` attribute.
-
-    Parameters
-    ----------
-    entry
-        The data entry (dictionary) to be tested.
-    min_length
-        The minimum length the ``"target"`` attribute must have.
-    max_missing_prop
-        The maximum proportion of missing data allowed in the ``"target"``
-        attribute.
-    """
-    if (
-        len(entry["target"]) >= min_length
-        and np.isnan(entry["target"]).mean() <= max_missing_prop
-    ):
-        return True
-    return False
-
-
-class PseudoShuffledIterableDataset(IterableDataset):
-    """
-    Shuffle entries from an iterable by temporarily accumulating them
-    in an intermediate buffer.
-
-    Parameters
-    ----------
-    base_dataset
-        The original iterable object, representing the dataset.
-    shuffle_buffer_length
-        Size of the buffer use to shuffle entries from the base dataset.
-    """
-
-    def __init__(self, base_dataset, shuffle_buffer_length: int = 100) -> None:
-        super().__init__()
-        self.base_dataset = base_dataset
-        self.shuffle_buffer_length = shuffle_buffer_length
-        self.generator = torch.Generator()
-
-    def __iter__(self):
-        shuffle_buffer = []
-
-        for element in self.base_dataset:
-            shuffle_buffer.append(element)
-            if len(shuffle_buffer) >= self.shuffle_buffer_length:
-                idx = torch.randint(
-                    len(shuffle_buffer), size=(), generator=self.generator
-                )
-                yield shuffle_buffer.pop(idx)
-
-        while shuffle_buffer:
-            idx = torch.randint(len(shuffle_buffer), size=(), generator=self.generator)
-            yield shuffle_buffer.pop(idx)
-
-
-class ShuffleMixin:
-    """
-    Mix-in class that datasets can inherit from to get
-    shuffling functionality.
-    """
-
-    def shuffle(self, shuffle_buffer_length: int = 100):
-        return PseudoShuffledIterableDataset(self, shuffle_buffer_length)
-
-
-class ChronosDataset(IterableDataset, ShuffleMixin):
-    """
-    Dataset wrapper, using a ``ChronosTokenizer`` to turn data from a time series
-    into a HuggingFace-compatible set of ``input_ids``, ``attention_mask`` and
-    ``labels``.
-
-    Entries from the original datasets are assumed to have a ``"start"`` attribute
-    (of type ``pd.Period``), and a ``"target"`` attribute (of type ``np.ndarray``).
-
-    Parameters
-    ----------
-    datasets
-        Datasets containing the original time series data.
-    probabilities
-        In training mode, data will be sampled from each of the original datasets
-        with these probabilities.
-    tokenizer
-        Tokenizer to be used to turn sequences of real numbers into token IDs.
-    context_length
-        Samples context will be limited to this length.
-    prediction_length
-        Samples labels will be limited to this length.
-    drop_prob
-        In training mode, observations from a sample will be turned into ``np.nan``,
-        i.e. turned into missing values, with this probability.
-    min_past
-        Data samples will be considered only if there's at least ``min_past``-many
-        historical observations.
-    mode
-        One of ``"training"``, ``"validation"``, or ``"test"``.
-    np_dtype
-        Numpy float data type.
-    """
-
-    def __init__(
-        self,
-        datasets: list,
-        probabilities: List[float],
-        tokenizer: ChronosTokenizer,
-        context_length: int = 512,
-        prediction_length: int = 64,
-        drop_prob: float = 0.2,
-        min_past: Optional[int] = None,
-        model_type: str = "seq2seq",
-        imputation_method: Optional[MissingValueImputation] = None,
-        mode: str = "training",
-        np_dtype=np.float32,
-    ) -> None:
-        super().__init__()
-
-        assert len(probabilities) == len(datasets)
-        assert mode in ("training", "validation", "test")
-        assert model_type in ("seq2seq", "causal")
-
-        self.datasets = datasets
-        self.probabilities = probabilities
-        self.tokenizer = tokenizer
-        self.context_length = context_length
-        self.prediction_length = prediction_length
-        self.drop_prob = drop_prob
-        self.min_past = min_past or prediction_length
-        self.model_type = model_type
-        self.imputation_method = imputation_method or LeavesMissingValues()
-        self.mode = mode
-        self.np_dtype = np_dtype
-
-    def preprocess_entry(self, entry: dict, mode: str) -> dict:
-        entry = {f: entry[f] for f in ["start", "target"]}
-        entry["target"] = np.asarray(entry["target"], dtype=self.np_dtype)
-        assert entry["target"].ndim == 1, f"got {entry['target'].ndim=}, expected 1"
-
-        if self.model_type == "causal":
-            # Causal models do not play nice with missing values, so it is
-            # recommended to use an imputation method, e.g., LastValueImputation
-            entry["target"] = self.imputation_method(entry["target"])
-
-        if mode == "training" and self.drop_prob > 0:
-            target = entry["target"].copy()
-            drop_p = np.random.uniform(low=0.0, high=self.drop_prob)
-            mask = np.random.choice(
-                [True, False], size=len(target), p=[drop_p, 1 - drop_p]
-            )
-            target[mask] = np.nan
-            entry["target"] = target
-
-        return entry
-
-    def _create_instance_splitter(self, mode: str):
-        assert mode in ["training", "test", "validation"]
-
-        instance_sampler = {
-            "training": ExpectedNumInstanceSampler(
-                num_instances=1.0,
-                min_instances=1,
-                min_past=self.min_past,
-                min_future=self.prediction_length,
-            ),
-            "test": TestSplitSampler(),
-            "validation": ValidationSplitSampler(min_future=self.prediction_length),
-        }[mode]
-
-        return InstanceSplitter(
-            target_field="target",
-            is_pad_field="is_pad",
-            start_field="start",
-            forecast_start_field="forecast_start",
-            instance_sampler=instance_sampler,
-            past_length=self.context_length,
-            future_length=self.prediction_length,
-            dummy_value=np.nan,
-        )
-
-    def create_training_data(self, data):
-        data = Cyclic(data)
-        split_transform = self._create_instance_splitter(
-            "training"
-        ) + FilterTransformation(
-            condition=lambda entry: (~np.isnan(entry["past_target"])).sum() > 0
-        )
-        data = split_transform.apply(data, is_train=True)
-        return data
-
-    def create_test_data(self, data):
-        data = self._create_instance_splitter("test").apply(data, is_train=False)
-        return data
-
-    def create_validation_data(self, data):
-        data = self._create_instance_splitter("validation").apply(data, is_train=False)
-        return data
-
-    def to_hf_format(self, entry: dict) -> dict:
-        past_target = torch.tensor(entry["past_target"]).unsqueeze(0)
-        input_ids, attention_mask, scale = self.tokenizer.context_input_transform(
-            past_target
-        )
-        future_target = torch.tensor(entry["future_target"]).unsqueeze(0)
-        labels, labels_mask = self.tokenizer.label_input_transform(future_target, scale)
-        labels[labels_mask == 0] = -100
-
-        if self.model_type == "causal":
-            # The InstanceSplitter pads time series on the left to be equal to the
-            # context_length. However, certain models (e.g., GPT2) with absolute
-            # position embeddings should not be trained with left padding.
-            # The following piece of code moves padding from left to right.
-
-            assert input_ids.shape[-1] == entry["past_is_pad"].shape[0]
-
-            # Find the index where padding starts
-            pad_start_idx = np.searchsorted(1 - entry["past_is_pad"], 1)
-            padded_input_ids, obs_input_ids = torch.tensor_split(
-                input_ids, [pad_start_idx], dim=-1
-            )
-            padded_attention_mask, obs_attention_mask = torch.tensor_split(
-                attention_mask, [pad_start_idx], dim=-1
-            )
-
-            # Move padding to the right
-            input_ids = torch.cat(
-                [
-                    obs_input_ids,
-                    labels,
-                    padded_input_ids,
-                ],
-                axis=-1,
-            )
-            attention_mask = torch.cat(
-                [
-                    obs_attention_mask,
-                    labels_mask,
-                    padded_attention_mask,
-                ],
-                axis=-1,
-            )
-
-            # labels for causal models are same as the input_ids.
-            # Internally transformers shifts the labels by one during training.
-            labels = input_ids.clone()
-            input_ids[~attention_mask] = self.tokenizer.config.pad_token_id
-            labels[~attention_mask] = -100
-
-        return {
-            "input_ids": input_ids.squeeze(0),
-            "attention_mask": attention_mask.squeeze(0),
-            "labels": labels.squeeze(0),
-        }
-
-    def __iter__(self) -> Iterator:
-        preprocessed_datasets = [
-            Map(
-                partial(self.preprocess_entry, mode=self.mode),
-                dataset,
-            )
-            for dataset in self.datasets
-        ]
-
-        if self.mode == "training":
-            iterables = [
-                self.create_training_data(dataset) for dataset in preprocessed_datasets
-            ]
-        elif self.mode == "test":
-            iterables = [
-                self.create_test_data(dataset) for dataset in preprocessed_datasets
-            ]
-        else:
-            iterables = [
-                self.create_validation_data(dataset)
-                for dataset in preprocessed_datasets
-            ]
-
-        worker_info = get_worker_info()
-        if worker_info is None:
-            probs = list(self.probabilities)
-        else:
-            worker_id = worker_info.id
-            num_workers = worker_info.num_workers
-            iterables = list(itertools.islice(iterables, worker_id, None, num_workers))
-            probs = list(
-                itertools.islice(self.probabilities, worker_id, None, num_workers)
-            )
-
-        probs = [prob / sum(probs) for prob in probs]
-
-        iterators = list(map(iter, iterables))
-        if self.mode == "training":
-            while True:
-                idx = np.random.choice(range(len(iterators)), p=probs)
-                try:
-                    yield self.to_hf_format(next(iterators[idx]))
-                except StopIteration:
-                    probs[idx] = 0
-                    if sum(probs) == 0:
-                        return
-                    probs = [prob / sum(probs) for prob in probs]
-        else:
-            for entry in itertools.chain(*iterators):
-                yield self.to_hf_format(entry)
-
-
-@app.command()
-@use_yaml_config(param_name="config")
-def main(
-    training_data_paths: str,
-    probability: Optional[str] = None,
-    context_length: int = 512,
-    prediction_length: int = 64,
-    min_past: int = 64,
-    max_steps: int = 200_000,
-    save_steps: int = 50_000,
-    log_steps: int = 500,
-    per_device_train_batch_size: int = 32,
-    learning_rate: float = 1e-3,
-    optim: str = "adamw_torch_fused",
-    shuffle_buffer_length: int = 100,
-    gradient_accumulation_steps: int = 2,
-    model_id: str = "google/t5-efficient-tiny",
-    model_type: str = "seq2seq",
-    random_init: bool = False,
-    tie_embeddings: bool = False,
-    output_dir: str = "./output/",
-    tf32: bool = True,
-    torch_compile: bool = True,
-    tokenizer_class: str = "MeanScaleUniformBins",
-    tokenizer_kwargs: str = "{'low_limit': -15.0, 'high_limit': 15.0}",
-    n_tokens: int = 4096,
-    n_special_tokens: int = 2,
-    pad_token_id: int = 0,
-    eos_token_id: int = 1,
-    use_eos_token: bool = True,
-    lr_scheduler_type: str = "linear",
-    warmup_ratio: float = 0.0,
-    dataloader_num_workers: int = 1,
-    max_missing_prop: float = 0.9,
-    num_samples: int = 20,
-    temperature: float = 1.0,
-    top_k: int = 50,
-    top_p: float = 1.0,
-    seed: Optional[int] = None,
-):
-    if tf32 and not (
-        torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8
-    ):
-        # TF32 floating point format is available only on NVIDIA GPUs
-        # with compute capability 8 and above. See link for details.
-        # https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capability-8-x
-        log_on_main(
-            "TF32 format is only available on devices with compute capability >= 8. "
-            "Setting tf32 to False.",
-            logger,
-        )
-        tf32 = False
-
-    if seed is None:
-        seed = random.randint(0, 2**32)
-
-    log_on_main(f"Using SEED: {seed}", logger)
-    transformers.set_seed(seed=seed)
-
-    raw_training_config = deepcopy(locals())
-    output_dir = Path(output_dir)
-    training_data_paths = ast.literal_eval(training_data_paths)
-    assert isinstance(training_data_paths, list)
-
-    if isinstance(probability, str):
-        probability = ast.literal_eval(probability)
-    elif probability is None:
-        probability = [1.0 / len(training_data_paths)] * len(training_data_paths)
-    assert isinstance(probability, list)
-
-    if isinstance(tokenizer_kwargs, str):
-        tokenizer_kwargs = ast.literal_eval(tokenizer_kwargs)
-    assert isinstance(tokenizer_kwargs, dict)
-
-    assert model_type in ["seq2seq", "causal"]
-
-    output_dir = get_next_path("run", base_dir=output_dir, file_type="")
-
-    log_on_main(f"Logging dir: {output_dir}", logger)
-    log_on_main(
-        f"Loading and filtering {len(training_data_paths)} datasets "
-        f"for training: {training_data_paths}",
-        logger,
-    )
-
-    log_on_main(
-        f"Mixing probabilities: {probability}",
-        logger,
-    )
-
-    train_datasets = [
-        Filter(
-            partial(
-                has_enough_observations,
-                min_length=min_past + prediction_length,
-                max_missing_prop=max_missing_prop,
-            ),
-            FileDataset(path=Path(data_path), freq="D"),
-        )
-        for data_path in training_data_paths
-    ]
-
-    log_on_main("Initializing model", logger)
-
-    model = load_model(
-        model_id=model_id,
-        model_type=model_type,
-        vocab_size=n_tokens,
-        random_init=random_init,
-        tie_embeddings=tie_embeddings,
-        pad_token_id=pad_token_id,
-        eos_token_id=eos_token_id,
-    )
-
-    chronos_config = ChronosConfig(
-        tokenizer_class=tokenizer_class,
-        tokenizer_kwargs=tokenizer_kwargs,
-        n_tokens=n_tokens,
-        n_special_tokens=n_special_tokens,
-        pad_token_id=pad_token_id,
-        eos_token_id=eos_token_id,
-        use_eos_token=use_eos_token,
-        model_type=model_type,
-        context_length=context_length,
-        prediction_length=prediction_length,
-        num_samples=num_samples,
-        temperature=temperature,
-        top_k=top_k,
-        top_p=top_p,
-    )
-
-    # Add extra items to model config so that it's saved in the ckpt
-    model.config.chronos_config = chronos_config.__dict__
-
-    shuffled_train_dataset = ChronosDataset(
-        datasets=train_datasets,
-        probabilities=probability,
-        tokenizer=chronos_config.create_tokenizer(),
-        context_length=context_length,
-        prediction_length=prediction_length,
-        min_past=min_past,
-        model_type=model_type,
-        imputation_method=LastValueImputation() if model_type == "causal" else None,
-        mode="training",
-    ).shuffle(shuffle_buffer_length=shuffle_buffer_length)
-
-    # Define training args
-    training_args = TrainingArguments(
-        output_dir=str(output_dir),
-        per_device_train_batch_size=per_device_train_batch_size,
-        learning_rate=learning_rate,
-        lr_scheduler_type=lr_scheduler_type,
-        warmup_ratio=warmup_ratio,
-        optim=optim,
-        logging_dir=str(output_dir / "logs"),
-        logging_strategy="steps",
-        logging_steps=log_steps,
-        save_strategy="steps",
-        save_steps=save_steps,
-        report_to=["tensorboard"],
-        max_steps=max_steps,
-        gradient_accumulation_steps=gradient_accumulation_steps,
-        dataloader_num_workers=dataloader_num_workers,
-        tf32=tf32,  # remove this if not using Ampere GPUs (e.g., A100)
-        torch_compile=torch_compile,
-        ddp_find_unused_parameters=False,
-        remove_unused_columns=False,
-    )
-
-    # Create Trainer instance
-    trainer = Trainer(
-        model=model,
-        args=training_args,
-        train_dataset=shuffled_train_dataset,
-    )
-    log_on_main("Training", logger)
-
-    trainer.train()
-
-    if is_main_process():
-        model.save_pretrained(output_dir / "checkpoint-final")
-        save_training_info(
-            output_dir / "checkpoint-final", training_config=raw_training_config
-        )
-
-
-if __name__ == "__main__":
-    logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
-    logger = logging.getLogger(__file__)
-    logger.setLevel(logging.INFO)
-    app()
diff --git a/examples/foundation-model-examples/data_preparation.py b/examples/foundation-model-examples/data_preparation.py
deleted file mode 100644
index 617d621..0000000
--- a/examples/foundation-model-examples/data_preparation.py
+++ /dev/null
@@ -1,143 +0,0 @@
-# Databricks notebook source
-# MAGIC %pip install datasetsforecast==0.0.8 --quiet
-# MAGIC dbutils.library.restartPython()
-
-# COMMAND ----------
-
-import pathlib
-import pandas as pd
-from datasetsforecast.m4 import M4
-import logging
-logger = spark._jvm.org.apache.log4j
-logging.getLogger("py4j.java_gateway").setLevel(logging.ERROR)
-logging.getLogger("py4j.clientserver").setLevel(logging.ERROR)
-
-# COMMAND ----------
-
-dbutils.widgets.text("catalog", "")
-dbutils.widgets.text("db", "")
-dbutils.widgets.text("n", "")
-
-catalog = dbutils.widgets.get("catalog")  # Name of the catalog we use to manage our assets
-db = dbutils.widgets.get("db")  # Name of the schema we use to store assets
-n = int(dbutils.widgets.get("n"))  # Number of time series to sample
-
-#  Make sure the catalog, schema and volume exist
-_ = spark.sql(f"CREATE CATALOG IF NOT EXISTS {catalog}")
-_ = spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog}.{db}")
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Daily M4 Data
-
-# COMMAND ----------
-
-def create_m4_daily():
-    y_df, _, _ = M4.load(directory=str(pathlib.Path.home()), group="Daily")
-    _ids = [f"D{i}" for i in range(1, n)]
-    y_df = (
-        y_df.groupby("unique_id")
-        .filter(lambda x: x.unique_id.iloc[0] in _ids)
-        .groupby("unique_id")
-        .apply(transform_group_daily)
-        .reset_index(drop=True)
-    )
-    return y_df
-
-
-def transform_group_daily(df):
-    unique_id = df.unique_id.iloc[0]
-    if len(df) > 1020:
-        df = df.iloc[-1020:]
-    _start = pd.Timestamp("2020-01-01")
-    _end = _start + pd.DateOffset(days=int(df.count()[0]) - 1)
-    date_idx = pd.date_range(start=_start, end=_end, freq="D", name="ds")
-    res_df = pd.DataFrame(data=[], index=date_idx).reset_index()
-    res_df["unique_id"] = unique_id
-    res_df["y"] = df.y.values
-    return res_df
-
- 
-(
-    spark.createDataFrame(create_m4_daily())
-    .write.format("delta").mode("overwrite")
-    .saveAsTable(f"{catalog}.{db}.m4_daily_train")
-)
-
-print(f"Saved data to {catalog}.{db}.m4_daily_train")
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Monthly M4 Data
-
-# COMMAND ----------
-
-def create_m4_monthly():
-    y_df, _, _ = M4.load(directory=str(pathlib.Path.home()), group="Monthly")
-    _ids = [f"M{i}" for i in range(1, n + 1)]
-    y_df = (
-        y_df.groupby("unique_id")
-        .filter(lambda x: x.unique_id.iloc[0] in _ids)
-        .groupby("unique_id")
-        .apply(transform_group_monthly)
-        .reset_index(drop=True)
-    )
-    return y_df
-
-
-def transform_group_monthly(df):
-    unique_id = df.unique_id.iloc[0]
-    _cnt = 60  # df.count()[0]
-    _start = pd.Timestamp("2018-01-01")
-    _end = _start + pd.DateOffset(months=_cnt)
-    date_idx = pd.date_range(start=_start, end=_end, freq="M", name="date")
-    _df = (
-        pd.DataFrame(data=[], index=date_idx)
-        .reset_index()
-        .rename(columns={"index": "date"})
-    )
-    _df["unique_id"] = unique_id
-    _df["y"] = df[:60].y.values
-    return _df
-
-
-(
-    spark.createDataFrame(create_m4_monthly())
-    .write.format("delta").mode("overwrite")
-    .saveAsTable(f"{catalog}.{db}.m4_monthly_train")
-)
-
-print(f"Saved data to {catalog}.{db}.m4_monthly_train")
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Daily Rossmann with Exogenous Regressors
-
-# COMMAND ----------
-
-# MAGIC %md Download the dataset from [Kaggle](kaggle.com/competitions/rossmann-store-sales/data) and store them in the volume.
-
-# COMMAND ----------
-
-# Randomly select 100 stores to forecast
-#import random
-#random.seed(7)
-
-# Number of time series to sample
-#sample = True
-#stores = sorted(random.sample(range(0, 1000), n))
-
-#train = spark.read.csv(f"/Volumes/{catalog}/rossmann/csv/train.csv", header=True, inferSchema=True)
-#test = spark.read.csv(f"/Volumes/{catalog}/rossmann/csv/test.csv", header=True, inferSchema=True)
-
-#if sample:
-#    train = train.filter(train.Store.isin(stores))
-#    test = test.filter(test.Store.isin(stores))
-
-#train.write.mode("overwrite").option("mergeSchema", "true").saveAsTable(f"{catalog}.rossmann.rossmann_daily_train")
-#test.write.mode("overwrite").option("mergeSchema", "true").saveAsTable(f"{catalog}.rossmann.rossmann_daily_test")
-
-#print(f"Saved data to {catalog}.rossmann.rossmann_daily_train and {catalog}.rossmann.rossmann_daily_test")
diff --git a/examples/foundation-model-examples/moirai/01_moirai_load_inference.py b/examples/foundation-model-examples/moirai/01_moirai_load_inference.py
deleted file mode 100644
index a7a7e4e..0000000
--- a/examples/foundation-model-examples/moirai/01_moirai_load_inference.py
+++ /dev/null
@@ -1,614 +0,0 @@
-# Databricks notebook source
-# MAGIC %md
-# MAGIC This is an example notebook that shows how to use [Moirai](https://github.com/SalesforceAIResearch/uni2ts) models on Databricks. The notebook loads the model, distributes the inference, registers the model, deploys the model and makes online forecasts.
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Cluster setup
-# MAGIC
-# MAGIC We recommend using a cluster with [Databricks Runtime 14.3 LTS for ML](https://docs.databricks.com/en/release-notes/runtime/14.3lts-ml.html) or above. The cluster can be single-node or multi-node with one or more GPU instances on each worker: e.g. [g5.12xlarge [A10G]](https://aws.amazon.com/ec2/instance-types/g5/) on AWS or [Standard_NV72ads_A10_v5](https://learn.microsoft.com/en-us/azure/virtual-machines/nva10v5-series) on Azure. This notebook will leverage [Pandas UDF](https://docs.databricks.com/en/udf/pandas.html) for distributing the inference tasks and utilizing all the available resource.
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Install package
-
-# COMMAND ----------
-
-# MAGIC %pip install git+https://github.com/SalesforceAIResearch/uni2ts.git --quiet
-# MAGIC dbutils.library.restartPython()
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Prepare Data
-# MAGIC We are using [`datasetsforecast`](https://github.com/Nixtla/datasetsforecast/tree/main/) package to download M4 data. M4 dataset contains a set of time series which we use for testing MMF. Below we have written a number of custome functions to convert M4 time series to an expected format.
-# MAGIC
-# MAGIC Make sure that the catalog and the schema already exist.
-
-# COMMAND ----------
-
-catalog = "mmf"  # Name of the catalog we use to manage our assets
-db = "m4"  # Name of the schema we use to manage our assets (e.g. datasets)
-n = 100  # Number of time series to sample
-
-# COMMAND ----------
-
-# This cell runs the notebook ../data_preparation and creates the following tables with M4 data: 
-# 1. {catalog}.{db}.m4_daily_train, 
-# 2. {catalog}.{db}.m4_monthly_train
-dbutils.notebook.run("../data_preparation", timeout_seconds=0, arguments={"catalog": catalog, "db": db, "n": n})
-
-# COMMAND ----------
-
-from pyspark.sql.functions import collect_list
-
-# Make sure that the data exists
-df = spark.table(f'{catalog}.{db}.m4_daily_train')
-df = df.groupBy('unique_id').agg(collect_list('ds').alias('ds'), collect_list('y').alias('y'))
-display(df)
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Distribute Inference
-# MAGIC We use [Pandas UDF](https://docs.databricks.com/en/udf/pandas.html#iterator-of-series-to-iterator-of-series-udf) to distribute the inference.
-
-# COMMAND ----------
-
-import pandas as pd
-import numpy as np
-import torch
-from einops import rearrange
-from typing import Iterator
-from pyspark.sql.functions import pandas_udf
-
-# Function to create a Pandas UDF to generate horizon timestamps
-def create_get_horizon_timestamps(freq, prediction_length):
-    """
-    Creates a Pandas UDF to generate horizon timestamps based on the given frequency and prediction length.
-
-    Parameters:
-    - freq (str): The frequency of the time series ('M' for monthly, 'D' for daily, etc.).
-    - prediction_length (int): The number of future timestamps to generate.
-
-    Returns:
-    - get_horizon_timestamps (function): A Pandas UDF function that generates horizon timestamps.
-    """
-    
-    @pandas_udf('array<timestamp>')
-    def get_horizon_timestamps(batch_iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
-        # Determine the offset for timestamp increments based on the frequency
-        one_ts_offset = pd.offsets.MonthEnd(1) if freq == "M" else pd.DateOffset(days=1)
-        
-        barch_horizon_timestamps = []
-        # Iterate over batches of series in the batch iterator
-        for batch in batch_iterator:
-            for series in batch:
-                timestamp = last = series.max()
-                horizon_timestamps = []
-                # Generate future timestamps based on the prediction length
-                for i in range(prediction_length):
-                    timestamp = timestamp + one_ts_offset
-                    horizon_timestamps.append(timestamp.to_numpy())
-                barch_horizon_timestamps.append(np.array(horizon_timestamps))
-        # Yield the generated horizon timestamps as a Pandas Series
-        yield pd.Series(barch_horizon_timestamps)
-
-    return get_horizon_timestamps
-
-# Function to create a Pandas UDF to generate forecasts
-def create_forecast_udf(repository, prediction_length, patch_size, num_samples):
-    """
-    Creates a Pandas UDF to generate forecasts using a pre-trained model.
-
-    Parameters:
-    - repository (str): The path to the pre-trained model repository.
-    - prediction_length (int): The length of the forecast horizon.
-    - patch_size (int): The size of the patches for the model input.
-    - num_samples (int): The number of samples to generate for each forecast.
-
-    Returns:
-    - forecast_udf (function): A Pandas UDF function that generates forecasts.
-    """
-    
-    @pandas_udf('array<double>')
-    def forecast_udf(bulk_iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
-        ## Initialization step
-        import torch
-        import numpy as np
-        import pandas as pd
-        from uni2ts.model.moirai import MoiraiForecast, MoiraiModule
-        
-        # Load the pre-trained model module from the repository
-        module = MoiraiModule.from_pretrained(repository)
-
-        ## Inference
-        for bulk in bulk_iterator:
-            median = []
-            for series in bulk:
-                # Initialize the forecast model with the loaded module and given parameters
-                model = MoiraiForecast(
-                    module=module,
-                    prediction_length=prediction_length,
-                    context_length=len(series),
-                    patch_size=patch_size,
-                    num_samples=num_samples,
-                    target_dim=1,
-                    feat_dynamic_real_dim=0,
-                    past_feat_dynamic_real_dim=0,
-                )
-                # Prepare the past target tensor. Shape: (batch, time, variate)
-                past_target = rearrange(
-                    torch.as_tensor(series, dtype=torch.float32), "t -> 1 t 1"
-                )
-                # Create a tensor indicating observed values. Shape: (batch, time, variate)
-                past_observed_target = torch.ones_like(past_target, dtype=torch.bool)
-                # Create a tensor indicating padding values. Shape: (batch, time)
-                past_is_pad = torch.zeros_like(past_target, dtype=torch.bool).squeeze(-1)
-                
-                # Generate the forecast
-                forecast = model(
-                    past_target=past_target,
-                    past_observed_target=past_observed_target,
-                    past_is_pad=past_is_pad,
-                )
-                # Append the median forecast of the first sample to the list
-                median.append(np.median(forecast[0], axis=0))
-        # Yield the generated forecasts as a Pandas Series
-        yield pd.Series(median)
-        
-    return forecast_udf
-
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC We specify the requirements of our forecasts. 
-
-# COMMAND ----------
-
-model = "moirai-1.0-R-small"  # Alternatibely moirai-1.0-R-base, moirai-1.0-R-large
-prediction_length = 10  # Time horizon for forecasting
-num_samples = 10  # Number of forecast to generate. We will take median as our final forecast.
-patch_size = 32  # Patch size: choose from {"auto", 8, 16, 32, 64, 128}
-freq = "D" # Frequency of the time series
-device_count = torch.cuda.device_count()  # Number of GPUs available
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC Let's generate the forecasts.
-
-# COMMAND ----------
-
-# Create the Pandas UDF for generating horizon timestamps using the specified frequency and prediction length
-get_horizon_timestamps = create_get_horizon_timestamps(freq=freq, prediction_length=prediction_length)
-
-# Create the Pandas UDF for generating forecasts using the specified model repository and forecast parameters
-forecast_udf = create_forecast_udf(
-  repository=f"Salesforce/{model}",  # Path to the pre-trained model repository
-  prediction_length=prediction_length,  # Length of the forecast horizon
-  patch_size=patch_size,  # Size of the patches for the model input
-  num_samples=num_samples,  # Number of samples to generate for each forecast
-)
-
-# Repartition the DataFrame to match the number of devices (for parallel processing) and select the required columns
-forecasts = df.repartition(device_count).select(
-  df.unique_id,  # Select the unique identifier for each time series
-  get_horizon_timestamps(df.ds).alias("ds"),  # Generate horizon timestamps and alias as 'ds'
-  forecast_udf(df.y).alias("forecast"),  # Generate forecasts and alias as 'forecast'
-)
-
-# Display the resulting DataFrame with unique_id, horizon timestamps, and forecasts
-display(forecasts)
-
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ##Register Model
-# MAGIC We will package our model using [`mlflow.pyfunc.PythonModel`](https://mlflow.org/docs/latest/python_api/mlflow.pyfunc.html) and register this in Unity Catalog.
-
-# COMMAND ----------
-
-import mlflow
-import torch
-import numpy as np
-from mlflow.models.signature import ModelSignature
-from mlflow.types import DataType, Schema, TensorSpec
-
-# Set the MLflow registry URI to Databricks Unity Catalog
-mlflow.set_registry_uri("databricks-uc")
-
-class MoiraiModel(mlflow.pyfunc.PythonModel):
-    def __init__(self, repository):
-        """
-        Initialize the MoiraiModel class by loading the pre-trained model from the given repository.
-        
-        Parameters:
-        - repository (str): The path to the pre-trained model repository.
-        """
-        import torch
-        from uni2ts.model.moirai import MoiraiForecast, MoiraiModule
-        
-        # Load the pre-trained model module from the repository
-        self.module = MoiraiModule.from_pretrained(repository)
-  
-    def predict(self, context, input_data, params=None):
-        """
-        Generate forecasts using the loaded model.
-        
-        Parameters:
-        - context: The context in which the model is being run.
-        - input_data: The input data for prediction, expected to be a time series.
-        - params: Additional parameters for prediction (not used here).
-        
-        Returns:
-        - forecast: The median forecast result as a NumPy array.
-        """
-        from uni2ts.model.moirai import MoiraiForecast, MoiraiModule
-        
-        # Initialize the forecast model with the loaded module and given parameters
-        model = MoiraiForecast(
-            module=self.module,
-            prediction_length=10,  # Length of the forecast horizon
-            context_length=len(input_data),  # Context length is the length of the input data
-            patch_size=32,  # Size of the patches for the model input
-            num_samples=10,  # Number of samples to generate for each forecast
-            target_dim=1,  # Dimension of the target variable
-            feat_dynamic_real_dim=0,  # No dynamic real features
-            past_feat_dynamic_real_dim=0,  # No past dynamic real features
-        )
-        
-        # Prepare the past target tensor. Shape: (batch, time, variate)
-        past_target = rearrange(
-            torch.as_tensor(input_data, dtype=torch.float32), "t -> 1 t 1"
-        )
-        # Create a tensor indicating observed values. Shape: (batch, time, variate)
-        past_observed_target = torch.ones_like(past_target, dtype=torch.bool)
-        # Create a tensor indicating padding values. Shape: (batch, time)
-        past_is_pad = torch.zeros_like(past_target, dtype=torch.bool).squeeze(-1)
-        
-        # Generate the forecast
-        forecast = model(
-            past_target=past_target,
-            past_observed_target=past_observed_target,
-            past_is_pad=past_is_pad,
-        )
-        
-        # Return the median forecast of the first sample
-        return np.median(forecast[0], axis=0)
-
-# Initialize the MoiraiModel with the specified model repository
-pipeline = MoiraiModel(f"Salesforce/{model}")
-
-# Define the input and output schema for the model
-input_schema = Schema([TensorSpec(np.dtype(np.double), (-1,))])
-output_schema = Schema([TensorSpec(np.dtype(np.uint8), (-1,))])
-signature = ModelSignature(inputs=input_schema, outputs=output_schema)
-
-# Example input data for model registration
-input_example = np.random.rand(52)
-
-# Define the registered model name
-registered_model_name = f"{catalog}.{db}.moirai-1-r-small"
-
-# Log and register the model with MLflow
-with mlflow.start_run() as run:
-    mlflow.pyfunc.log_model(
-        "model",
-        python_model=pipeline,  # The custom Python model
-        registered_model_name=registered_model_name,  # The name under which to register the model
-        signature=signature,  # The model signature
-        input_example=input_example,  # An example of the input data
-        pip_requirements=[
-            "git+https://github.com/SalesforceAIResearch/uni2ts.git",
-        ],
-    )
-
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ##Reload Model
-# MAGIC Once the registration is complete, we will reload the model and generate forecasts.
-
-# COMMAND ----------
-
-from mlflow import MlflowClient
-
-# Create an instance of the MlflowClient to interact with the MLflow tracking server
-mlflow_client = MlflowClient()
-
-def get_latest_model_version(mlflow_client, registered_model_name):
-    """
-    Retrieve the latest version number of a registered model.
-    
-    Parameters:
-    - mlflow_client (MlflowClient): The MLflow client instance.
-    - registered_model_name (str): The name of the registered model.
-    
-    Returns:
-    - latest_version (int): The latest version number of the registered model.
-    """
-    # Initialize the latest version to 1 (assuming at least one version exists)
-    latest_version = 1
-    
-    # Iterate over all model versions for the given registered model
-    for mv in mlflow_client.search_model_versions(f"name='{registered_model_name}'"):
-        # Convert the version to an integer
-        version_int = int(mv.version)
-        
-        # Update the latest version if a higher version is found
-        if version_int > latest_version:
-            latest_version = version_int
-            
-    # Return the latest version number
-    return latest_version
-
-# Get the latest version of the registered model
-model_version = get_latest_model_version(mlflow_client, registered_model_name)
-
-# Construct the URI for the logged model using the registered model name and latest version
-logged_model = f"models:/{registered_model_name}/{model_version}"
-
-# Load the model as a PyFuncModel from the logged model URI
-loaded_model = mlflow.pyfunc.load_model(logged_model)
-
-
-# Create random input data (52 data points)
-input_data = np.random.rand(52)
-
-# Generate forecasts using the loaded model
-loaded_model.predict(input_data)
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Deploy Model
-# MAGIC We will deploy our model behind a real-time endpoint of [Databricks Mosaic AI Model Serving](https://www.databricks.com/product/model-serving).
-
-# COMMAND ----------
-
-# With the token, you can create our authorization header for our subsequent REST calls
-token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().getOrElse(None)
-headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
-
-# Next you need an endpoint at which to execute your request which you can get from the notebook's tags collection
-java_tags = dbutils.notebook.entry_point.getDbutils().notebook().getContext().tags()
-
-# This object comes from the Java CM - Convert the Java Map opject to a Python dictionary
-tags = sc._jvm.scala.collection.JavaConversions.mapAsJavaMap(java_tags)
-
-# Lastly, extract the Databricks instance (domain name) from the dictionary
-instance = tags["browserHostName"]
-
-# COMMAND ----------
-
-import requests
-
-model_serving_endpoint_name = "moirai-1-r-small"
-
-# auto_capture_config specifies where the inference logs should be written
-my_json = {
-    "name": model_serving_endpoint_name,
-    "config": {
-        "served_models": [
-            {
-                "model_name": registered_model_name,
-                "model_version": model_version,
-                "workload_type": "GPU_SMALL",
-                "workload_size": "Small",
-                "scale_to_zero_enabled": "true",
-            }
-        ],
-        "auto_capture_config": {
-            "catalog_name": catalog,
-            "schema_name": db,
-            "table_name_prefix": model_serving_endpoint_name,
-        },
-    },
-}
-
-# Make sure to drop the inference table of it exists
-_ = spark.sql(
-    f"DROP TABLE IF EXISTS {catalog}.{db}.`{model_serving_endpoint_name}_payload`"
-)
-
-# COMMAND ----------
-
-# Function to create an endpoint in Model Serving and deploy the model behind it
-def func_create_endpoint(model_serving_endpoint_name):
-    # get endpoint status
-    endpoint_url = f"https://{instance}/api/2.0/serving-endpoints"
-    url = f"{endpoint_url}/{model_serving_endpoint_name}"
-    r = requests.get(url, headers=headers)
-    if "RESOURCE_DOES_NOT_EXIST" in r.text:
-        print(
-            "Creating this new endpoint: ",
-            f"https://{instance}/serving-endpoints/{model_serving_endpoint_name}/invocations",
-        )
-        re = requests.post(endpoint_url, headers=headers, json=my_json)
-    else:
-        new_model_version = (my_json["config"])["served_models"][0]["model_version"]
-        print(
-            "This endpoint existed previously! We are updating it to a new config with new model version: ",
-            new_model_version,
-        )
-        # update config
-        url = f"{endpoint_url}/{model_serving_endpoint_name}/config"
-        re = requests.put(url, headers=headers, json=my_json["config"])
-        # wait till new config file in place
-        import time, json
-
-        # get endpoint status
-        url = f"https://{instance}/api/2.0/serving-endpoints/{model_serving_endpoint_name}"
-        retry = True
-        total_wait = 0
-        while retry:
-            r = requests.get(url, headers=headers)
-            assert (
-                r.status_code == 200
-            ), f"Expected an HTTP 200 response when accessing endpoint info, received {r.status_code}"
-            endpoint = json.loads(r.text)
-            if "pending_config" in endpoint.keys():
-                seconds = 10
-                print("New config still pending")
-                if total_wait < 6000:
-                    # if less the 10 mins waiting, keep waiting
-                    print(f"Wait for {seconds} seconds")
-                    print(f"Total waiting time so far: {total_wait} seconds")
-                    time.sleep(10)
-                    total_wait += seconds
-                else:
-                    print(f"Stopping,  waited for {total_wait} seconds")
-                    retry = False
-            else:
-                print("New config in place now!")
-                retry = False
-
-    assert (
-        re.status_code == 200
-    ), f"Expected an HTTP 200 response, received {re.status_code}"
-
-# Function to delete the endpoint from Model Serving
-def func_delete_model_serving_endpoint(model_serving_endpoint_name):
-    endpoint_url = f"https://{instance}/api/2.0/serving-endpoints"
-    url = f"{endpoint_url}/{model_serving_endpoint_name}"
-    response = requests.delete(url, headers=headers)
-    if response.status_code != 200:
-        raise Exception(
-            f"Request failed with status {response.status_code}, {response.text}"
-        )
-    else:
-        print(model_serving_endpoint_name, "endpoint is deleted!")
-    return response.json()
-
-# COMMAND ----------
-
-# Create an endpoint. This may take some time.
-func_create_endpoint(model_serving_endpoint_name)
-
-# COMMAND ----------
-
-import time
-import mlflow
-import requests
-
-def wait_for_endpoint():
-    """
-    Waits for a model serving endpoint to become ready.
-
-    This function continuously polls the serving endpoint's status and waits until the endpoint is ready.
-    """
-    # Construct the base URL for the serving endpoint API
-    endpoint_url = f"https://{instance}/api/2.0/serving-endpoints"
-    
-    while True:
-        # Construct the full URL for the specific model serving endpoint
-        url = f"{endpoint_url}/{model_serving_endpoint_name}"
-        
-        # Send a GET request to the endpoint URL with the required headers
-        response = requests.get(url, headers=headers)
-        
-        # Assert that the response status code is 200 (OK)
-        assert (
-            response.status_code == 200
-        ), f"Expected an HTTP 200 response, received {response.status_code}\n{response.text}"
-        
-        # Extract the 'ready' status from the JSON response
-        status = response.json().get("state", {}).get("ready", {})
-        
-        # Check if the status is "READY"
-        if status == "READY":
-            # Print the status and a separator line, then exit the function
-            print(status)
-            print("-" * 80)
-            return
-        else:
-            # Print a message indicating the endpoint is not ready and wait for 5 minutes (300 seconds)
-            print(f"Endpoint not ready ({status}), waiting 5 minutes")
-            time.sleep(300)
-
-# Get the API URL for the current Databricks instance
-api_url = mlflow.utils.databricks_utils.get_webapp_url()
-
-# Call the function to wait for the endpoint to become ready
-wait_for_endpoint()
-
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Online Forecast
-# MAGIC Once the endpoint is ready, let's send a request to the model and generate an online forecast.
-
-# COMMAND ----------
-
-import os
-import requests
-import pandas as pd
-import json
-import matplotlib.pyplot as plt
-
-# Construct the endpoint URL for model invocation using the provided instance and model serving endpoint name.
-# This URL is used to send data to the model and get predictions.
-endpoint_url = f"https://{instance}/serving-endpoints/{model_serving_endpoint_name}/invocations"
-
-# Retrieve the Databricks API token using dbutils (a utility available in Databricks notebooks).
-# This token is used for authentication when making requests to the endpoint.
-token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
-
-def forecast(input_data, url=endpoint_url, databricks_token=token):
-    """
-    Send input data to the model serving endpoint and retrieve the forecast.
-
-    Parameters:
-    - input_data (numpy.ndarray): The input data to be sent to the model.
-    - url (str): The endpoint URL for model invocation.
-    - databricks_token (str): The Databricks API token for authentication.
-
-    Returns:
-    - dict: The JSON response from the model containing the forecast.
-    """
-    # Set the request headers, including the authorization token and content type.
-    headers = {
-        "Authorization": f"Bearer {databricks_token}",
-        "Content-Type": "application/json",
-    }
-    
-    # Convert the input data to a list and create the request body.
-    body = {"inputs": input_data.tolist()}
-    
-    # Serialize the request body to a JSON formatted string.
-    data = json.dumps(body)
-    
-    # Send a POST request to the endpoint URL with the headers and serialized data.
-    response = requests.request(method="POST", headers=headers, url=url, data=data)
-    
-    # Check if the response status code is not 200 (OK), raise an exception if the request failed.
-    if response.status_code != 200:
-        raise Exception(
-            f"Request failed with status {response.status_code}, {response.text}"
-        )
-    
-    # Return the JSON response from the model containing the forecast.
-    return response.json()
-
-
-# COMMAND ----------
-
-# Send request to the endpoint
-input_data = np.random.rand(52)
-forecast(input_data)
-
-# COMMAND ----------
-
-# Delete the serving endpoint
-func_delete_model_serving_endpoint(model_serving_endpoint_name)
-
-# COMMAND ----------
-
-
diff --git a/examples/foundation-model-examples/moirai/02_moirai_fine_tune.py b/examples/foundation-model-examples/moirai/02_moirai_fine_tune.py
deleted file mode 100644
index 5a93c90..0000000
--- a/examples/foundation-model-examples/moirai/02_moirai_fine_tune.py
+++ /dev/null
@@ -1,271 +0,0 @@
-# Databricks notebook source
-# MAGIC %md
-# MAGIC This is an example notebook that shows how to use [Moirai](https://github.com/SalesforceAIResearch/uni2ts) models on Databricks. The notebook loads, fine-tunes, and registers the model.
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Cluster setup
-# MAGIC We recommend using a cluster with [Databricks Runtime 14.3 LTS for ML](https://docs.databricks.com/en/release-notes/runtime/14.3lts-ml.html) or above. The cluster can be single-node or multi-node with one or more GPU instances on each worker: e.g. [g5.12xlarge [A10G]](https://aws.amazon.com/ec2/instance-types/g5/) on AWS or [Standard_NV72ads_A10_v5](https://learn.microsoft.com/en-us/azure/virtual-machines/nva10v5-series) on Azure.
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Install package
-
-# COMMAND ----------
-
-# MAGIC %pip install git+https://github.com/SalesforceAIResearch/uni2ts.git --quiet
-# MAGIC dbutils.library.restartPython()
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Prepare Data
-# MAGIC We are using [`datasetsforecast`](https://github.com/Nixtla/datasetsforecast/tree/main/) package to download M4 data. M4 dataset contains a set of time series which we use for testing MMF. Below we have written a number of custome functions to convert M4 time series to an expected format.
-# MAGIC
-# MAGIC Make sure that the catalog and the schema already exist.
-
-# COMMAND ----------
-
-catalog = "mmf"  # Name of the catalog we use to manage our assets
-db = "random"  # Name of the schema we use to manage our assets (e.g. datasets)
-volume = "moirai_fine_tune" # Name of the volume we store the data and the weigts
-model = "moirai-1.0-R-small"  # Alternatibely: moirai-1.0-R-base, moirai-1.0-R-large
-n = 100  # Number of time series to sample
-
-# COMMAND ----------
-
-# Make sure that the database exists.
-_ = spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog}.{db}")
-
-# Make sure that the volume exists. We stored the fine-tuned weights here.
-_ = spark.sql(f"CREATE VOLUME IF NOT EXISTS {catalog}.{db}.{volume}")
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC We synthesize `n` number of time series (randomly sampled) at daily resolution and store it as a csv file in UC Volume. 
-
-# COMMAND ----------
-
-import pandas as pd
-import numpy as np
-
-df_dict = {}
-
-for i in range(n):
-
-    # Create a date range for the index
-    date_range = pd.date_range(start='2021-01-01', end='2023-12-31', freq='D')
-
-    # Create a DataFrame with a date range index and two columns: 'item_id' and 'target'
-    df = pd.DataFrame({
-        'item_id': str(f"item_{i}"),
-        'target': np.random.randn(len(date_range))
-    }, index=date_range)
-
-    # Set 'item_id' as the second level of the MultiIndex
-    df.set_index('item_id', append=True, inplace=True)
-
-    # Sort the index
-    df.sort_index(inplace=True)
-
-    df_dict[i] = df
-
-
-pdf = pd.concat([df_dict[i] for i in range(n)])
-pdf.to_csv(f"/Volumes/{catalog}/{db}/{volume}/random.csv", index=True)
-pdf
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC This dotenv file is needed to call the [`uni2ts.data.builder.simple`](https://github.com/SalesforceAIResearch/uni2ts/blob/main/src/uni2ts/data/builder/simple.py) function from the [`uni2ts`](https://github.com/SalesforceAIResearch/uni2ts) library to build a dataset. 
-
-# COMMAND ----------
-
-import os
-import site
-
-# Construct the path to the 'uni2ts' directory within the site-packages directory.
-# site.getsitepackages()[0] returns the path to the first directory in the list of site-packages directories.
-uni2ts = os.path.join(site.getsitepackages()[0], "uni2ts")
-
-# Construct the path to the '.env' file within the 'uni2ts' directory.
-dotenv = os.path.join(uni2ts, ".env")
-
-# Set the 'DOTENV' environment variable to the path of the '.env' file.
-# This tells the system where to find the '.env' file.
-os.environ['DOTENV'] = dotenv
-
-# Set the 'CUSTOM_DATA_PATH' environment variable to a path constructed using the provided 'catalog', 'db', and 'volume'.
-# This sets a custom data path for the application to use.
-os.environ['CUSTOM_DATA_PATH'] = f"/Volumes/{catalog}/{db}/{volume}"
-
-
-# COMMAND ----------
-
-# MAGIC %sh 
-# MAGIC rm -f $DOTENV
-# MAGIC touch $DOTENV
-# MAGIC echo "CUSTOM_DATA_PATH=$CUSTOM_DATA_PATH" >> $DOTENV
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC We convert the dataset into the Uni2TS format. `random` is the name we give to the training dataset, which we load from our volume's location. See the [README](https://github.com/SalesforceAIResearch/uni2ts/tree/main?tab=readme-ov-file#fine-tuning) of Uni2TS for more information on the parameters. 
-
-# COMMAND ----------
-
-# MAGIC %sh python -m uni2ts.data.builder.simple random /Volumes/mmf/random/moirai_fine_tune/random.csv \
-# MAGIC     --dataset_type long \
-# MAGIC     --offset 640
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ##Run Fine-tuning
-# MAGIC
-# MAGIC In this example, we wil fine-tune `moirai-1.0-R-small` for max 100 epochs with early stopping (can be specified here: [`examples/foundation-model-examples/moirai/conf/finetune/default.yaml`](https://github.com/databricks-industry-solutions/many-model-forecasting/blob/main/examples/foundation-model-examples/moirai/conf/finetune/default.yaml)). The learning rate is set to 1e-3, which you can modify in the model specific configuration file: [`examples/foundation-model-examples/moirai/conf/finetune/model/moirai_1.0_R_small.yaml`](https://github.com/databricks-industry-solutions/many-model-forecasting/blob/main/examples/foundation-model-examples/moirai/conf/finetune/model/moirai_1.0_R_small.yaml). 
-# MAGIC
-# MAGIC Make sure that you have the configuration yaml files placed inside the [`conf`](examples/foundation-model-examples/moirai/conf) folder and the [`train.py`](https://github.com/databricks-industry-solutions/many-model-forecasting/blob/main/examples/foundation-model-examples/moirai/train.py) script in the same directory. These two assets are taken directly from and [cli/conf](https://github.com/SalesforceAIResearch/uni2ts/tree/main/cli/conf) and [cli/train.py](https://github.com/SalesforceAIResearch/uni2ts/blob/main/cli/train.py). They are subject to change as the Moirai' team develops the framework further. Keep your eyes on the latest changes (we will try too) and use the latest versions as needed.
-# MAGIC
-# MAGIC The key configuration files to be customized for you use case are [`examples/foundation-model-examples/moirai/conf/finetune/default.yaml`](https://github.com/databricks-industry-solutions/many-model-forecasting/blob/main/examples/foundation-model-examples/moirai/conf/finetune/default.yaml), [`examples/foundation-model-examples/moirai/conf/finetune/data/random.yaml`](https://github.com/databricks-industry-solutions/many-model-forecasting/blob/main/examples/foundation-model-examples/moirai/conf/finetune/data/random.yaml) and [`examples/foundation-model-examples/moirai/conf/finetune/val_data/random.yaml`](https://github.com/databricks-industry-solutions/many-model-forecasting/blob/main/examples/foundation-model-examples/moirai/conf/finetune/val_data/random.yaml). Read through the Moirai [documentation](https://github.com/SalesforceAIResearch/uni2ts) for more detail.
-
-# COMMAND ----------
-
-# MAGIC %sh python train.py \
-# MAGIC   -cp conf/finetune \
-# MAGIC   run_name=random_run \
-# MAGIC   model=moirai_1.0_R_small \
-# MAGIC   data=random \
-# MAGIC   val_data=random
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ##Register Model
-# MAGIC We get the fine-tuned weights from the run from the UC volume, wrap the pipeline with [`mlflow.pyfunc.PythonModel`](https://mlflow.org/docs/latest/python_api/mlflow.pyfunc.html) and register this on Unity Catalog.
-
-# COMMAND ----------
-
-import mlflow
-import torch
-import numpy as np
-from mlflow.models.signature import ModelSignature  # Used to define the model input and output schema.
-from mlflow.types import DataType, Schema, TensorSpec  # Used to define the data types and structure for model inputs and outputs.
-
-# Set the MLflow registry URI to Databricks Unity Catalog
-mlflow.set_registry_uri("databricks-uc")
-
-# Define a custom MLflow Python model class
-class FineTunedMoiraiModel(mlflow.pyfunc.PythonModel):  
-    def predict(self, context, input_data, params=None):
-        from einops import rearrange  # Einops is a library for tensor operations.
-        from uni2ts.model.moirai import MoiraiForecast, MoiraiModule  # Import the required classes from the Moirai model.
-        
-        # Determine the device to run the model on (GPU if available, otherwise CPU)
-        device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
-        
-        # Load the pre-trained Moirai model from the checkpoint
-        model = MoiraiForecast.load_from_checkpoint(
-            prediction_length=10,
-            context_length=len(input_data),
-            patch_size=32,
-            num_samples=10,
-            target_dim=1,
-            feat_dynamic_real_dim=0,
-            past_feat_dynamic_real_dim=0,
-            checkpoint_path=context.artifacts["weights"],
-        ).to(device)
-        
-        # Prepare the input data for the model
-        # Time series values. Shape: (batch, time, variate)
-        past_target = rearrange(
-            torch.as_tensor(input_data, dtype=torch.float32), "t -> 1 t 1"
-        )
-        # 1s if the value is observed, 0s otherwise. Shape: (batch, time, variate)
-        past_observed_target = torch.ones_like(past_target, dtype=torch.bool)
-        # 1s if the value is padding, 0s otherwise. Shape: (batch, time)
-        past_is_pad = torch.zeros_like(past_target, dtype=torch.bool).squeeze(-1)
-        
-        # Generate the forecast using the model
-        forecast = model(
-            past_target=past_target.to(device),
-            past_observed_target=past_observed_target.to(device),
-            past_is_pad=past_is_pad.to(device),
-        )
-        
-        # Return the median forecast
-        return np.median(forecast.cpu()[0], axis=0)
-
-# Define the input schema for the model
-input_schema = Schema([TensorSpec(np.dtype(np.double), (-1,))])
-# Define the output schema for the model
-output_schema = Schema([TensorSpec(np.dtype(np.uint8), (-1,))])
-# Create a ModelSignature object to represent the input and output schema
-signature = ModelSignature(inputs=input_schema, outputs=output_schema)
-# Create an example input to log with the model
-input_example = np.random.rand(52)
-
-# Define the registered model name using variables for catalog, database, and volume
-registered_model_name = f"{catalog}.{db}.moirai-1-r-small_finetuned"
-
-# Define the path to the model weights
-weights = f"/Volumes/{catalog}/{db}/{volume}/outputs/moirai_1.0_R_small/random/random_run/checkpoints/epoch=0-step=100.ckpt"
-
-# Log and register the model with MLflow
-with mlflow.start_run() as run:
-    mlflow.pyfunc.log_model(
-        "model",  # The artifact path where the model is logged
-        python_model=FineTunedMoiraiModel(),  # The custom Python model to log
-        registered_model_name=registered_model_name,  # The name to register the model under
-        artifacts={"weights": weights},  # The model artifacts to log
-        signature=signature,  # The model signature
-        input_example=input_example,  # An example input to log with the model
-        pip_requirements=[
-            "git+https://github.com/SalesforceAIResearch/uni2ts.git",
-        ],  # The Python packages required to run the model
-    )
-
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ##Reload Model
-# MAGIC We reload the model from the registry and perform forecasting on a randomly generated time series (for testing purpose). You can also go ahead and deploy this model behind a Model Serving's real-time endpoint. See the previous notebook: [`01_moirai_load_inference`](https://github.com/databricks-industry-solutions/many-model-forecasting/blob/main/examples/foundation-model-examples/chronos/02_moirai_load_inference.py) for more information.
-
-# COMMAND ----------
-
-from mlflow import MlflowClient
-client = MlflowClient()
-
-# Function to get the latest version number of a registered model
-def get_latest_model_version(client, registered_model_name):
-    latest_version = 1  # Initialize the latest version number to 1
-    # Iterate through all model versions of the specified registered model
-    for mv in client.search_model_versions(f"name='{registered_model_name}'"):
-        version_int = int(mv.version)  # Convert the version number to an integer
-        if version_int > latest_version:  # Check if the current version is greater than the latest version
-            latest_version = version_int  # Update the latest version number
-    return latest_version  # Return the latest version number
-
-# Get the latest version number of the specified registered model
-model_version = get_latest_model_version(client, registered_model_name)
-# Construct the model URI using the registered model name and the latest version number
-logged_model = f"models:/{registered_model_name}/{model_version}"
-
-# Load the model as a PyFuncModel
-loaded_model = mlflow.pyfunc.load_model(logged_model)
-
-# Create input data for the model
-input_data = np.random.rand(52)  # Generate random input data of shape (52,)
-
-# Generate forecasts using the loaded model
-loaded_model.predict(input_data)  # Use the loaded model to make predictions on the input data
-
-
-# COMMAND ----------
-
-
diff --git a/examples/foundation-model-examples/moirai/conf/finetune/data/etth1.yaml b/examples/foundation-model-examples/moirai/conf/finetune/data/etth1.yaml
deleted file mode 100644
index a5de611..0000000
--- a/examples/foundation-model-examples/moirai/conf/finetune/data/etth1.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-_target_: uni2ts.data.builder.simple.SimpleDatasetBuilder
-dataset: ETTh1
-weight: 1000
\ No newline at end of file
diff --git a/examples/foundation-model-examples/moirai/conf/finetune/data/random.yaml b/examples/foundation-model-examples/moirai/conf/finetune/data/random.yaml
deleted file mode 100644
index ab8153e..0000000
--- a/examples/foundation-model-examples/moirai/conf/finetune/data/random.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-_target_: uni2ts.data.builder.simple.SimpleDatasetBuilder
-dataset: random
-weight: 1000
\ No newline at end of file
diff --git a/examples/foundation-model-examples/moirai/conf/finetune/default.yaml b/examples/foundation-model-examples/moirai/conf/finetune/default.yaml
deleted file mode 100644
index d8b1abd..0000000
--- a/examples/foundation-model-examples/moirai/conf/finetune/default.yaml
+++ /dev/null
@@ -1,83 +0,0 @@
-hydra:
-  run:
-    dir: /Volumes/mmf/random/moirai_fine_tune/outputs/${hydra:runtime.choices.model}/${hydra:runtime.choices.data}/${run_name}
-defaults:
-  - model: ???
-  - data: ???
-  - val_data: null
-  - _self_
-run_name: ???
-seed: 0
-tf32: true
-compile: false  # set to mode: default, reduce-overhead, max-autotune
-trainer:
-  _target_: lightning.Trainer
-  accelerator: auto
-  strategy: auto
-  devices: auto
-  num_nodes: 1
-  precision: 32
-  logger:
-      _target_: lightning.pytorch.loggers.TensorBoardLogger
-      save_dir: ${hydra:runtime.output_dir}
-      name: logs
-  callbacks:
-    - _target_: lightning.pytorch.callbacks.LearningRateMonitor
-      logging_interval: epoch
-    - _target_: lightning.pytorch.callbacks.ModelCheckpoint
-      dirpath: ${hydra:runtime.output_dir}/checkpoints
-      monitor: val/PackedNLLLoss
-      save_weights_only: true
-      mode: min
-      save_top_k: 1
-      every_n_epochs: 1
-    - _target_: lightning.pytorch.callbacks.EarlyStopping
-      monitor: val/PackedNLLLoss
-      min_delta: 0.0
-      patience: 3
-      mode: min
-      strict: false
-      verbose: true
-  max_epochs: 100
-  enable_progress_bar: true
-  accumulate_grad_batches: 1
-  gradient_clip_val: 1.0
-  gradient_clip_algorithm: norm
-train_dataloader:
-  _target_: uni2ts.data.loader.DataLoader
-  batch_size: 128
-  batch_size_factor: 2.0
-  cycle: true
-  num_batches_per_epoch: 100
-  shuffle: true
-  num_workers: 11
-  collate_fn:
-    _target_: uni2ts.data.loader.PackCollate
-    max_length: ${model.module_kwargs.max_seq_len}
-    seq_fields: ${cls_getattr:${model._target_},seq_fields}
-    pad_func_map: ${cls_getattr:${model._target_},pad_func_map}
-  pin_memory: true
-  drop_last: false
-  fill_last: false
-  worker_init_fn: null
-  prefetch_factor: 2
-  persistent_workers: true
-val_dataloader:
-  _target_: uni2ts.data.loader.DataLoader
-  batch_size: 128
-  batch_size_factor: 2.0
-  cycle: false
-  num_batches_per_epoch: null
-  shuffle: false
-  num_workers: 11
-  collate_fn:
-    _target_: uni2ts.data.loader.PackCollate
-    max_length: ${model.module_kwargs.max_seq_len}
-    seq_fields: ${cls_getattr:${model._target_},seq_fields}
-    pad_func_map: ${cls_getattr:${model._target_},pad_func_map}
-  pin_memory: false
-  drop_last: false
-  fill_last: true
-  worker_init_fn: null
-  prefetch_factor: 2
-  persistent_workers: true
\ No newline at end of file
diff --git a/examples/foundation-model-examples/moirai/conf/finetune/model/moirai_1.0_R_base.yaml b/examples/foundation-model-examples/moirai/conf/finetune/model/moirai_1.0_R_base.yaml
deleted file mode 100644
index 96e5c4e..0000000
--- a/examples/foundation-model-examples/moirai/conf/finetune/model/moirai_1.0_R_base.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-# load a pretrained checkpoint from huggingface hub
-_target_: uni2ts.model.moirai.MoiraiFinetune
-module:
-  _target_: uni2ts.model.moirai.MoiraiModule.from_pretrained
-  pretrained_model_name_or_path: Salesforce/moirai-1.0-R-base
-module_kwargs:
-  _target_: builtins.dict
-  distr_output:
-    _target_: uni2ts.distribution.MixtureOutput
-    components:
-      - _target_: uni2ts.distribution.StudentTOutput
-      - _target_: uni2ts.distribution.NormalFixedScaleOutput
-      - _target_: uni2ts.distribution.NegativeBinomialOutput
-      - _target_: uni2ts.distribution.LogNormalOutput
-  d_model: 768
-  num_layers: 12
-  patch_sizes: ${as_tuple:[8, 16, 32, 64, 128]}
-  max_seq_len: 512
-  attn_dropout_p: 0.0
-  dropout_p: 0.0
-  scaling: true
-min_patches: 2
-min_mask_ratio: 0.15
-max_mask_ratio: 0.5
-max_dim: 128
-loss_func:
-  _target_: uni2ts.loss.packed.PackedNLLLoss
-lr: 1e-3
-weight_decay: 1e-1
-beta1: 0.9
-beta2: 0.98
-num_training_steps: ${mul:${trainer.max_epochs},${train_dataloader.num_batches_per_epoch}}
-num_warmup_steps: 0
\ No newline at end of file
diff --git a/examples/foundation-model-examples/moirai/conf/finetune/model/moirai_1.0_R_large.yaml b/examples/foundation-model-examples/moirai/conf/finetune/model/moirai_1.0_R_large.yaml
deleted file mode 100644
index 991ba8d..0000000
--- a/examples/foundation-model-examples/moirai/conf/finetune/model/moirai_1.0_R_large.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-# load a pretrained checkpoint from huggingface hub
-_target_: uni2ts.model.moirai.MoiraiFinetune
-module:
-  _target_: uni2ts.model.moirai.MoiraiModule.from_pretrained
-  pretrained_model_name_or_path: Salesforce/moirai-1.0-R-large
-module_kwargs:
-  _target_: builtins.dict
-  distr_output:
-    _target_: uni2ts.distribution.MixtureOutput
-    components:
-      - _target_: uni2ts.distribution.StudentTOutput
-      - _target_: uni2ts.distribution.NormalFixedScaleOutput
-      - _target_: uni2ts.distribution.NegativeBinomialOutput
-      - _target_: uni2ts.distribution.LogNormalOutput
-  d_model: 1024
-  num_layers: 24
-  patch_sizes: ${as_tuple:[8, 16, 32, 64, 128]}
-  max_seq_len: 512
-  attn_dropout_p: 0.0
-  dropout_p: 0.0
-  scaling: true
-min_patches: 2
-min_mask_ratio: 0.15
-max_mask_ratio: 0.5
-max_dim: 128
-loss_func:
-  _target_: uni2ts.loss.packed.PackedNLLLoss
-lr: 1e-3
-weight_decay: 1e-1
-beta1: 0.9
-beta2: 0.98
-num_training_steps: ${mul:${trainer.max_epochs},${train_dataloader.num_batches_per_epoch}}
-num_warmup_steps: 0
\ No newline at end of file
diff --git a/examples/foundation-model-examples/moirai/conf/finetune/model/moirai_1.0_R_small.yaml b/examples/foundation-model-examples/moirai/conf/finetune/model/moirai_1.0_R_small.yaml
deleted file mode 100644
index 9f799d7..0000000
--- a/examples/foundation-model-examples/moirai/conf/finetune/model/moirai_1.0_R_small.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-# load a pretrained checkpoint from huggingface hub
-_target_: uni2ts.model.moirai.MoiraiFinetune
-module:
-  _target_: uni2ts.model.moirai.MoiraiModule.from_pretrained
-  pretrained_model_name_or_path: Salesforce/moirai-1.0-R-small
-module_kwargs:
-  _target_: builtins.dict
-  distr_output:
-    _target_: uni2ts.distribution.MixtureOutput
-    components:
-      - _target_: uni2ts.distribution.StudentTOutput
-      - _target_: uni2ts.distribution.NormalFixedScaleOutput
-      - _target_: uni2ts.distribution.NegativeBinomialOutput
-      - _target_: uni2ts.distribution.LogNormalOutput
-  d_model: 384
-  num_layers: 6
-  patch_sizes: ${as_tuple:[8, 16, 32, 64, 128]}
-  max_seq_len: 512
-  attn_dropout_p: 0.0
-  dropout_p: 0.0
-  scaling: true
-min_patches: 2
-min_mask_ratio: 0.15
-max_mask_ratio: 0.5
-max_dim: 128
-loss_func:
-  _target_: uni2ts.loss.packed.PackedNLLLoss
-val_metric:
-  - _target_: uni2ts.loss.packed.PackedMSELoss
-  - _target_: uni2ts.loss.packed.PackedNRMSELoss
-    normalize: absolute_target_squared
-lr: 1e-3
-weight_decay: 1e-1
-beta1: 0.9
-beta2: 0.98
-num_training_steps: ${mul:${trainer.max_epochs},${train_dataloader.num_batches_per_epoch}}
-num_warmup_steps: 0
\ No newline at end of file
diff --git a/examples/foundation-model-examples/moirai/conf/finetune/model/moirai_base.yaml b/examples/foundation-model-examples/moirai/conf/finetune/model/moirai_base.yaml
deleted file mode 100644
index 8962bb3..0000000
--- a/examples/foundation-model-examples/moirai/conf/finetune/model/moirai_base.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-# load a pytorch lightning checkpoint
-_target_: uni2ts.model.moirai.MoiraiFinetune.load_from_checkpoint
-module_kwargs:
-  _target_: builtins.dict
-  distr_output:
-    _target_: uni2ts.distribution.MixtureOutput
-    components:
-      - _target_: uni2ts.distribution.StudentTOutput
-      - _target_: uni2ts.distribution.NormalFixedScaleOutput
-      - _target_: uni2ts.distribution.NegativeBinomialOutput
-      - _target_: uni2ts.distribution.LogNormalOutput
-  d_model: 768
-  num_layers: 12
-  patch_sizes: ${as_tuple:[8, 16, 32, 64, 128]}
-  max_seq_len: 512
-  attn_dropout_p: 0.0
-  dropout_p: 0.0
-  scaling: true
-min_patches: 2
-min_mask_ratio: 0.15
-max_mask_ratio: 0.5
-max_dim: 128
-loss_func:
-  _target_: uni2ts.loss.packed.PackedNLLLoss
-lr: 1e-3
-weight_decay: 1e-1
-beta1: 0.9
-beta2: 0.98
-num_training_steps: ${mul:${trainer.max_epochs},${train_dataloader.num_batches_per_epoch}}
-num_warmup_steps: 0
-checkpoint_path: ...
\ No newline at end of file
diff --git a/examples/foundation-model-examples/moirai/conf/finetune/model/moirai_large.yaml b/examples/foundation-model-examples/moirai/conf/finetune/model/moirai_large.yaml
deleted file mode 100644
index d52f67f..0000000
--- a/examples/foundation-model-examples/moirai/conf/finetune/model/moirai_large.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-# load a pytorch lightning checkpoint
-_target_: uni2ts.model.moirai.MoiraiFinetune.load_from_checkpoint
-module_kwargs:
-  _target_: builtins.dict
-  distr_output:
-    _target_: uni2ts.distribution.MixtureOutput
-    components:
-      - _target_: uni2ts.distribution.StudentTOutput
-      - _target_: uni2ts.distribution.NormalFixedScaleOutput
-      - _target_: uni2ts.distribution.NegativeBinomialOutput
-      - _target_: uni2ts.distribution.LogNormalOutput
-  d_model: 1024
-  num_layers: 24
-  patch_sizes: ${as_tuple:[8, 16, 32, 64, 128]}
-  max_seq_len: 512
-  attn_dropout_p: 0.0
-  dropout_p: 0.0
-  scaling: true
-min_patches: 2
-min_mask_ratio: 0.15
-max_mask_ratio: 0.5
-max_dim: 128
-loss_func:
-  _target_: uni2ts.loss.packed.PackedNLLLoss
-lr: 1e-3
-weight_decay: 1e-1
-beta1: 0.9
-beta2: 0.98
-num_training_steps: ${mul:${trainer.max_epochs},${train_dataloader.num_batches_per_epoch}}
-num_warmup_steps: 0
-checkpoint_path: ...
\ No newline at end of file
diff --git a/examples/foundation-model-examples/moirai/conf/finetune/model/moirai_small.yaml b/examples/foundation-model-examples/moirai/conf/finetune/model/moirai_small.yaml
deleted file mode 100644
index 741e08f..0000000
--- a/examples/foundation-model-examples/moirai/conf/finetune/model/moirai_small.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-# load a pytorch lightning checkpoint
-_target_: uni2ts.model.moirai.MoiraiFinetune.load_from_checkpoint
-module_kwargs:
-  _target_: builtins.dict
-  distr_output:
-    _target_: uni2ts.distribution.MixtureOutput
-    components:
-      - _target_: uni2ts.distribution.StudentTOutput
-      - _target_: uni2ts.distribution.NormalFixedScaleOutput
-      - _target_: uni2ts.distribution.NegativeBinomialOutput
-      - _target_: uni2ts.distribution.LogNormalOutput
-  d_model: 384
-  num_layers: 6
-  patch_sizes: ${as_tuple:[8, 16, 32, 64, 128]}
-  max_seq_len: 512
-  attn_dropout_p: 0.0
-  dropout_p: 0.0
-  scaling: true
-min_patches: 2
-min_mask_ratio: 0.15
-max_mask_ratio: 0.5
-max_dim: 128
-loss_func:
-  _target_: uni2ts.loss.packed.PackedNLLLoss
-val_metric:
-  - _target_: uni2ts.loss.packed.PackedMSELoss
-  - _target_: uni2ts.loss.packed.PackedNRMSELoss
-    normalize: absolute_target_squared
-lr: 1e-3
-weight_decay: 1e-1
-beta1: 0.9
-beta2: 0.98
-num_training_steps: ${mul:${trainer.max_epochs},${train_dataloader.num_batches_per_epoch}}
-num_warmup_steps: 0
-checkpoint_path: ...
\ No newline at end of file
diff --git a/examples/foundation-model-examples/moirai/conf/finetune/val_data/etth1.yaml b/examples/foundation-model-examples/moirai/conf/finetune/val_data/etth1.yaml
deleted file mode 100644
index 00c462a..0000000
--- a/examples/foundation-model-examples/moirai/conf/finetune/val_data/etth1.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-_target_: uni2ts.data.builder.ConcatDatasetBuilder
-_args_:
-  _target_: uni2ts.data.builder.simple.generate_eval_builders
-  dataset: ETTh1_eval
-  offset: 11520
-  eval_length: 2880
-  prediction_lengths: [96, 192, 336, 720]
-  context_lengths: [1000, 2000, 3000, 4000, 5000]
-  patch_sizes: [32, 64]
\ No newline at end of file
diff --git a/examples/foundation-model-examples/moirai/conf/finetune/val_data/etth1_multi.yaml b/examples/foundation-model-examples/moirai/conf/finetune/val_data/etth1_multi.yaml
deleted file mode 100644
index 56e19ae..0000000
--- a/examples/foundation-model-examples/moirai/conf/finetune/val_data/etth1_multi.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-- _target_: uni2ts.data.builder.simple.SimpleEvalDatasetBuilder
-  dataset: ETTh1_eval
-  offset: 11520
-  windows: 10
-  distance: 96
-  prediction_length: 96
-  context_length: 1000
-  patch_size: 32
-- _target_: uni2ts.data.builder.simple.SimpleEvalDatasetBuilder
-  dataset: ETTh1_eval
-  offset: 11520
-  windows: 10
-  distance: 192
-  prediction_length: 192
-  context_length: 1000
-  patch_size: 32
\ No newline at end of file
diff --git a/examples/foundation-model-examples/moirai/conf/finetune/val_data/random.yaml b/examples/foundation-model-examples/moirai/conf/finetune/val_data/random.yaml
deleted file mode 100644
index 261af17..0000000
--- a/examples/foundation-model-examples/moirai/conf/finetune/val_data/random.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-_target_: uni2ts.data.builder.ConcatDatasetBuilder
-_args_:
-  _target_: uni2ts.data.builder.simple.generate_eval_builders
-  dataset: random_eval
-  offset: 273
-  eval_length: 10
-  prediction_lengths: [10]
-  context_lengths: [270]
-  patch_sizes: [32, 64]
\ No newline at end of file
diff --git a/examples/foundation-model-examples/moirai/train.py b/examples/foundation-model-examples/moirai/train.py
deleted file mode 100644
index ee2aa3b..0000000
--- a/examples/foundation-model-examples/moirai/train.py
+++ /dev/null
@@ -1,149 +0,0 @@
-#  Copyright (c) 2024, Salesforce, Inc.
-#  SPDX-License-Identifier: Apache-2
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-from functools import partial
-from typing import Callable, Optional
-
-import hydra
-import lightning as L
-import torch
-from hydra.utils import instantiate
-from omegaconf import DictConfig
-from torch.utils._pytree import tree_map
-from torch.utils.data import Dataset, DistributedSampler
-
-from uni2ts.common import hydra_util  # noqa: hydra resolvers
-from uni2ts.data.loader import DataLoader
-
-
-class DataModule(L.LightningDataModule):
-    def __init__(
-        self,
-        cfg: DictConfig,
-        train_dataset: Dataset,
-        val_dataset: Optional[Dataset | list[Dataset]],
-    ):
-        super().__init__()
-        self.cfg = cfg
-        self.train_dataset = train_dataset
-
-        if val_dataset is not None:
-            self.val_dataset = val_dataset
-            self.val_dataloader = self._val_dataloader
-
-    @staticmethod
-    def get_dataloader(
-        dataset: Dataset,
-        dataloader_func: Callable[..., DataLoader],
-        shuffle: bool,
-        world_size: int,
-        batch_size: int,
-        num_batches_per_epoch: Optional[int] = None,
-    ) -> DataLoader:
-        sampler = (
-            DistributedSampler(
-                dataset,
-                num_replicas=None,
-                rank=None,
-                shuffle=shuffle,
-                seed=0,
-                drop_last=False,
-            )
-            if world_size > 1
-            else None
-        )
-        return dataloader_func(
-            dataset=dataset,
-            shuffle=shuffle if sampler is None else None,
-            sampler=sampler,
-            batch_size=batch_size,
-            num_batches_per_epoch=num_batches_per_epoch,
-        )
-
-    def train_dataloader(self) -> DataLoader:
-        return self.get_dataloader(
-            self.train_dataset,
-            instantiate(self.cfg.train_dataloader, _partial_=True),
-            self.cfg.train_dataloader.shuffle,
-            self.trainer.world_size,
-            self.train_batch_size,
-            num_batches_per_epoch=self.train_num_batches_per_epoch,
-        )
-
-    def _val_dataloader(self) -> DataLoader | list[DataLoader]:
-        return tree_map(
-            partial(
-                self.get_dataloader,
-                dataloader_func=instantiate(self.cfg.val_dataloader, _partial_=True),
-                shuffle=self.cfg.val_dataloader.shuffle,
-                world_size=self.trainer.world_size,
-                batch_size=self.val_batch_size,
-                num_batches_per_epoch=None,
-            ),
-            self.val_dataset,
-        )
-
-    @property
-    def train_batch_size(self) -> int:
-        return self.cfg.train_dataloader.batch_size // (
-            self.trainer.world_size * self.trainer.accumulate_grad_batches
-        )
-
-    @property
-    def val_batch_size(self) -> int:
-        return self.cfg.val_dataloader.batch_size // (
-            self.trainer.world_size * self.trainer.accumulate_grad_batches
-        )
-
-    @property
-    def train_num_batches_per_epoch(self) -> int:
-        return (
-            self.cfg.train_dataloader.num_batches_per_epoch
-            * self.trainer.accumulate_grad_batches
-        )
-
-
-@hydra.main(version_base="1.3", config_name="default.yaml")
-def main(cfg: DictConfig):
-    if cfg.tf32:
-        assert cfg.trainer.precision == 32
-        torch.backends.cuda.matmul.allow_tf32 = True
-        torch.backends.cudnn.allow_tf32 = True
-
-    model: L.LightningModule = instantiate(cfg.model, _convert_="all")
-
-    if cfg.compile:
-        model.module.compile(mode=cfg.compile)
-    trainer: L.Trainer = instantiate(cfg.trainer)
-    train_dataset: Dataset = instantiate(cfg.data).load_dataset(
-        model.train_transform_map
-    )
-    val_dataset: Optional[Dataset | list[Dataset]] = (
-        tree_map(
-            lambda ds: ds.load_dataset(model.val_transform_map),
-            instantiate(cfg.val_data, _convert_="all"),
-        )
-        if "val_data" in cfg
-        else None
-    )
-    L.seed_everything(cfg.seed + trainer.logger.version, workers=True)
-    trainer.fit(
-        model,
-        datamodule=DataModule(cfg, train_dataset, val_dataset),
-    )
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/foundation-model-examples/moment/01_moment_load_inference.py b/examples/foundation-model-examples/moment/01_moment_load_inference.py
deleted file mode 100644
index 2f129e8..0000000
--- a/examples/foundation-model-examples/moment/01_moment_load_inference.py
+++ /dev/null
@@ -1,490 +0,0 @@
-# Databricks notebook source
-# MAGIC %md
-# MAGIC This is an example notebook that shows how to use [moment](https://github.com/moment-timeseries-foundation-model/moment) model on Databricks. The notebook loads the model, distributes the inference, registers the model, deploys the model and makes online forecasts.
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Cluster setup
-# MAGIC
-# MAGIC We recommend using a cluster with [Databricks Runtime 14.3 LTS for ML](https://docs.databricks.com/en/release-notes/runtime/14.3lts-ml.html) or above. The cluster can be single-node or multi-node with one or more GPU instances on each worker: e.g. [g5.12xlarge [A10G]](https://aws.amazon.com/ec2/instance-types/g5/) on AWS or [Standard_NV72ads_A10_v5](https://learn.microsoft.com/en-us/azure/virtual-machines/nva10v5-series) on Azure. This notebook will leverage [Pandas UDF](https://docs.databricks.com/en/udf/pandas.html) for distributing the inference tasks and utilizing all the available resource.
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Install package
-
-# COMMAND ----------
-
-# MAGIC %pip install git+https://github.com/moment-timeseries-foundation-model/moment.git --quiet
-# MAGIC dbutils.library.restartPython()
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Prepare data 
-# MAGIC We use [`datasetsforecast`](https://github.com/Nixtla/datasetsforecast/tree/main/) package to download M4 data. M4 dataset contains a set of time series which we use for testing MMF. Below we have written a number of custome functions to convert M4 time series to an expected format.
-# MAGIC
-# MAGIC Make sure that the catalog and the schema already exist.
-
-# COMMAND ----------
-
-catalog = "mmf"  # Name of the catalog we use to manage our assets
-db = "m4"  # Name of the schema we use to manage our assets (e.g. datasets)
-n = 100  # Number of time series to sample
-
-# COMMAND ----------
-
-# This cell will create tables: 
-# 1. {catalog}.{db}.m4_daily_train
-# 2. {catalog}.{db}.m4_monthly_train
-dbutils.notebook.run("../data_preparation", timeout_seconds=0, arguments={"catalog": catalog, "db": db, "n": n})
-
-# COMMAND ----------
-
-from pyspark.sql.functions import collect_list
-
-# Make sure that the data exists
-df = spark.table(f'{catalog}.{db}.m4_daily_train')
-df = df.groupBy('unique_id').agg(collect_list('ds').alias('ds'), collect_list('y').alias('y'))
-display(df)
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Distribute Inference
-# MAGIC We use [Pandas UDF](https://docs.databricks.com/en/udf/pandas.html#iterator-of-series-to-iterator-of-series-udf) to distribute the inference.
-
-# COMMAND ----------
-
-import pandas as pd
-import numpy as np
-import torch
-from typing import Iterator
-from pyspark.sql.functions import pandas_udf
-
-# Function to create a UDF for generating horizon timestamps for a given frequency and prediction length
-def create_get_horizon_timestamps(freq, prediction_length):
-
-  # Define a Pandas UDF to generate horizon timestamps
-  @pandas_udf('array<timestamp>')
-  def get_horizon_timestamps(batch_iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
-      # Define the offset based on the frequency
-      one_ts_offset = pd.offsets.MonthEnd(1) if freq == "M" else pd.DateOffset(days=1)
-      barch_horizon_timestamps = []  # Initialize a list to store horizon timestamps for each batch
-      
-      # Iterate over batches of time series
-      for batch in batch_iterator:
-          for series in batch:
-              timestamp = last = series.max()  # Get the latest timestamp in the series
-              horizon_timestamps = []  # Initialize a list to store horizon timestamps for the series
-              for i in range(prediction_length):
-                  timestamp = timestamp + one_ts_offset  # Increment the timestamp by the offset
-                  horizon_timestamps.append(timestamp.to_numpy())  # Convert timestamp to numpy format and add to list
-              barch_horizon_timestamps.append(np.array(horizon_timestamps))  # Add the list of horizon timestamps to the batch list
-      yield pd.Series(barch_horizon_timestamps)  # Yield the batch of horizon timestamps as a Pandas Series
-
-  return get_horizon_timestamps  # Return the UDF
-
-
-# Function to create a UDF for generating forecasts using a pre-trained model
-def create_forecast_udf(repository, prediction_length):
-
-  # Define a Pandas UDF to generate forecasts
-  @pandas_udf('array<double>')
-  def forecast_udf(batch_iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
-    ## Initialization step
-    from momentfm import MOMENTPipeline  # Import the MOMENTPipeline class from the momentfm library
-    
-    # Load the pre-trained model from the repository
-    model = MOMENTPipeline.from_pretrained(
-      repository, 
-      model_kwargs={
-        "task_name": "forecasting",
-        "forecast_horizon": prediction_length},
-      )
-    model.init()  # Initialize the model
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Set the device to GPU if available, otherwise CPU
-    model = model.to(device)  # Move the model to the selected device
-
-    ## Inference step
-    for batch in batch_iterator:
-      batch_forecast = []  # Initialize a list to store forecasts for each batch
-      for series in batch:
-        # Prepare the input context and mask
-        context = list(series)
-        if len(context) < 512:
-          input_mask = [1] * len(context) + [0] * (512 - len(context))  # Create an input mask with padding
-          context = context + [0] * (512 - len(context))  # Pad the context to the required length
-        else:
-          input_mask = [1] * 512  # Create an input mask without padding
-          context = context[-512:]  # Truncate the context to the required length
-        
-        # Convert context and input mask to PyTorch tensors and move them to the selected device
-        input_mask = torch.reshape(torch.tensor(input_mask), (1, 512)).to(device)
-        context = torch.reshape(torch.tensor(context), (1, 1, 512)).to(dtype=torch.float32).to(device)
-        
-        # Generate the forecast using the model
-        output = model(context, input_mask=input_mask)
-        forecast = output.forecast.squeeze().tolist()  # Squeeze the output tensor and convert to a list
-        batch_forecast.append(forecast)  # Add the forecast to the batch list
-
-    yield pd.Series(batch_forecast)  # Yield the batch of forecasts as a Pandas Series
-
-  return forecast_udf  # Return the UDF
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC We specify the requirements of our forecasts. 
-
-# COMMAND ----------
-
-moment_model = "MOMENT-1-large"
-prediction_length = 10  # Time horizon for forecasting
-freq = "D" # Frequency of the time series
-device_count = torch.cuda.device_count()  # Number of GPUs available
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC Let's generate the forecasts.
-
-# COMMAND ----------
-
-# Create a UDF for generating horizon timestamps using the specified frequency and prediction length
-get_horizon_timestamps = create_get_horizon_timestamps(freq=freq, prediction_length=prediction_length)
-
-# Create a UDF for generating forecasts using the specified model repository and prediction length
-forecast_udf = create_forecast_udf(
-  repository=f"AutonLab/{moment_model}",  # Repository where the pre-trained model is stored
-  prediction_length=prediction_length,  # Length of the forecast horizon
-)
-
-# Apply the UDFs to the DataFrame
-forecasts = df.repartition(device_count).select(
-  df.unique_id,  # Select the unique_id column from the DataFrame
-  get_horizon_timestamps(df.ds).alias("ds"),  # Apply the horizon timestamps UDF to the ds column and alias the result as "ds"
-  forecast_udf(df.y).alias("forecast"),  # Apply the forecast UDF to the y column and alias the result as "forecast"
-)
-
-# Display the resulting DataFrame with the forecasts
-display(forecasts)
-
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ##Register Model
-# MAGIC We will package our model using [`mlflow.pyfunc.PythonModel`](https://mlflow.org/docs/latest/python_api/mlflow.pyfunc.html) and register this in Unity Catalog.
-
-# COMMAND ----------
-
-import mlflow
-import torch
-import numpy as np
-from mlflow.models import infer_signature
-from mlflow.models.signature import ModelSignature
-from mlflow.types import DataType, Schema, TensorSpec
-
-# Define a custom MLflow Python model class for MomentModel
-class MomentModel(mlflow.pyfunc.PythonModel):
-  def __init__(self, repository):
-    from momentfm import MOMENTPipeline  # Import the MOMENTPipeline class from the momentfm library
-    # Load the pre-trained model from the specified repository with the given task and forecast horizon
-    self.model = MOMENTPipeline.from_pretrained(
-      repository, 
-      model_kwargs={
-        "task_name": "forecasting",
-        "forecast_horizon": 10},
-      )
-    self.model.init()  # Initialize the model
-    self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Set the device to GPU if available, otherwise CPU
-    self.model = self.model.to(self.device)  # Move the model to the selected device
-
-  def predict(self, context, input_data, params=None):
-    series = list(input_data)  # Convert input data to a list
-    if len(series) < 512:
-      # If the series is shorter than 512, pad with zeros
-      input_mask = [1] * len(series) + [0] * (512 - len(series))
-      series = series + [0] * (512 - len(series))
-    else:
-      # If the series is longer than or equal to 512, truncate to the last 512 values
-      input_mask = [1] * 512
-      series = series[-512:]
-    # Convert input mask and series to PyTorch tensors and move them to the selected device
-    input_mask = torch.reshape(torch.tensor(input_mask), (1, 512)).to(self.device)
-    series = torch.reshape(torch.tensor(series), (1, 1, 512)).to(dtype=torch.float32).to(self.device)
-    # Generate the forecast using the model
-    output = self.model(series, input_mask=input_mask)
-    forecast = output.forecast.squeeze().tolist()  # Squeeze the output tensor and convert to a list
-    return forecast  # Return the forecast
-
-# Initialize the custom MomentModel with the specified repository ID
-pipeline = MomentModel(f"AutonLab/{moment_model}")
-# Define the input and output schema for the model
-input_schema = Schema([TensorSpec(np.dtype(np.double), (-1,))])
-output_schema = Schema([TensorSpec(np.dtype(np.uint8), (-1,))])
-# Create a ModelSignature object to represent the input and output schema
-signature = ModelSignature(inputs=input_schema, outputs=output_schema)
-# Create an example input to log with the model
-input_example = np.random.rand(52)
-
-# Define the registered model name using variables for catalog, database, and model
-registered_model_name = f"{catalog}.{db}.{moment_model}"
-
-# Log and register the model with MLflow
-with mlflow.start_run() as run:
-    mlflow.pyfunc.log_model(
-      "model",  # The artifact path where the model is logged
-      python_model=pipeline,  # The custom Python model to log
-      registered_model_name=registered_model_name,  # The name to register the model under
-      signature=signature,  # The model signature
-      input_example=input_example,  # An example input to log with the model
-      pip_requirements=[
-        "git+https://github.com/moment-timeseries-foundation-model/moment.git",  # Python package requirements
-      ],
-    )
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ##Reload Model
-# MAGIC Once the registration is complete, we will reload the model and generate forecasts.
-
-# COMMAND ----------
-
-from mlflow import MlflowClient
-mlflow_client = MlflowClient()
-
-# Define a function to get the latest version number of a registered model
-def get_latest_model_version(mlflow_client, registered_model_name):
-    latest_version = 1  # Initialize the latest version number to 1
-    # Iterate through all model versions of the specified registered model
-    for mv in mlflow_client.search_model_versions(f"name='{registered_model_name}'"):
-        version_int = int(mv.version)  # Convert the version number to an integer
-        if version_int > latest_version:  # Check if the current version is greater than the latest version
-            latest_version = version_int  # Update the latest version number
-    return latest_version  # Return the latest version number
-
-# Get the latest version number of the specified registered model
-model_version = get_latest_model_version(mlflow_client, registered_model_name)
-# Construct the model URI using the registered model name and the latest version number
-logged_model = f"models:/{registered_model_name}/{model_version}"
-
-# Load the model as a PyFuncModel using the constructed model URI
-loaded_model = mlflow.pyfunc.load_model(logged_model)
-
-# Create random input data (52 data points)
-input_data = np.random.rand(52)
-
-# Generate forecasts using the loaded model
-loaded_model.predict(input_data)
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Deploy Model
-# MAGIC We will deploy our model behind a real-time endpoint of [Databricks Mosaic AI Model Serving](https://www.databricks.com/product/model-serving).
-
-# COMMAND ----------
-
-# With the token, you can create our authorization header for our subsequent REST calls
-token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().getOrElse(None)
-headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
-
-# Next you need an endpoint at which to execute your request which you can get from the notebook's tags collection
-java_tags = dbutils.notebook.entry_point.getDbutils().notebook().getContext().tags()
-
-# This object comes from the Java CM - Convert the Java Map opject to a Python dictionary
-tags = sc._jvm.scala.collection.JavaConversions.mapAsJavaMap(java_tags)
-
-# Lastly, extract the Databricks instance (domain name) from the dictionary
-instance = tags["browserHostName"]
-
-# COMMAND ----------
-
-import requests
-
-model_serving_endpoint_name = moment_model
-
-my_json = {
-    "name": model_serving_endpoint_name,
-    "config": {
-        "served_models": [
-            {
-                "model_name": registered_model_name,
-                "model_version": model_version,
-                "workload_type": "GPU_SMALL",
-                "workload_size": "Small",
-                "scale_to_zero_enabled": "true",
-            }
-        ],
-        "auto_capture_config": {
-            "catalog_name": catalog,
-            "schema_name": db,
-            "table_name_prefix": model_serving_endpoint_name,
-        },
-    },
-}
-
-# Make sure to drop the inference table of it exists
-_ = spark.sql(
-    f"DROP TABLE IF EXISTS {catalog}.{db}.`{model_serving_endpoint_name}_payload`"
-)
-
-# COMMAND ----------
-
-# Function to create an endpoint in Model Serving and deploy the model behind it
-def func_create_endpoint(model_serving_endpoint_name):
-    # get endpoint status
-    endpoint_url = f"https://{instance}/api/2.0/serving-endpoints"
-    url = f"{endpoint_url}/{model_serving_endpoint_name}"
-    r = requests.get(url, headers=headers)
-    if "RESOURCE_DOES_NOT_EXIST" in r.text:
-        print(
-            "Creating this new endpoint: ",
-            f"https://{instance}/serving-endpoints/{model_serving_endpoint_name}/invocations",
-        )
-        re = requests.post(endpoint_url, headers=headers, json=my_json)
-    else:
-        new_model_version = (my_json["config"])["served_models"][0]["model_version"]
-        print(
-            "This endpoint existed previously! We are updating it to a new config with new model version: ",
-            new_model_version,
-        )
-        # update config
-        url = f"{endpoint_url}/{model_serving_endpoint_name}/config"
-        re = requests.put(url, headers=headers, json=my_json["config"])
-        # wait till new config file in place
-        import time, json
-
-        # get endpoint status
-        url = f"https://{instance}/api/2.0/serving-endpoints/{model_serving_endpoint_name}"
-        retry = True
-        total_wait = 0
-        while retry:
-            r = requests.get(url, headers=headers)
-            assert (
-                r.status_code == 200
-            ), f"Expected an HTTP 200 response when accessing endpoint info, received {r.status_code}"
-            endpoint = json.loads(r.text)
-            if "pending_config" in endpoint.keys():
-                seconds = 10
-                print("New config still pending")
-                if total_wait < 6000:
-                    # if less the 10 mins waiting, keep waiting
-                    print(f"Wait for {seconds} seconds")
-                    print(f"Total waiting time so far: {total_wait} seconds")
-                    time.sleep(10)
-                    total_wait += seconds
-                else:
-                    print(f"Stopping,  waited for {total_wait} seconds")
-                    retry = False
-            else:
-                print("New config in place now!")
-                retry = False
-
-    assert (
-        re.status_code == 200
-    ), f"Expected an HTTP 200 response, received {re.status_code}"
-
-# Function to delete the endpoint from Model Serving
-def func_delete_model_serving_endpoint(model_serving_endpoint_name):
-    endpoint_url = f"https://{instance}/api/2.0/serving-endpoints"
-    url = f"{endpoint_url}/{model_serving_endpoint_name}"
-    response = requests.delete(url, headers=headers)
-    if response.status_code != 200:
-        raise Exception(
-            f"Request failed with status {response.status_code}, {response.text}"
-        )
-    else:
-        print(model_serving_endpoint_name, "endpoint is deleted!")
-    return response.json()
-
-# COMMAND ----------
-
-# Create an endpoint. This may take some time.
-func_create_endpoint(model_serving_endpoint_name)
-
-# COMMAND ----------
-
-import time, mlflow
-
-# Define a function to wait for a serving endpoint to be ready
-def wait_for_endpoint():
-    endpoint_url = f"https://{instance}/api/2.0/serving-endpoints"  # Construct the base URL for the serving endpoints API
-    while True:  # Infinite loop to repeatedly check the status of the endpoint
-        url = f"{endpoint_url}/{model_serving_endpoint_name}"  # Construct the URL for the specific model serving endpoint
-        response = requests.get(url, headers=headers)  # Send a GET request to the endpoint URL with the necessary headers
-        
-        # Ensure the response status code is 200 (OK)
-        assert (
-            response.status_code == 200
-        ), f"Expected an HTTP 200 response, received {response.status_code}\n{response.text}"
-
-        # Extract the status of the endpoint from the response JSON
-        status = response.json().get("state", {}).get("ready", {})
-        # print("status",status)  # Optional: Print the status for debugging purposes
-        
-        # Check if the endpoint status is "READY"
-        if status == "READY":
-            print(status)  # Print the status if the endpoint is ready
-            print("-" * 80)  # Print a separator line for clarity
-            return  # Exit the function when the endpoint is ready
-        else:
-            # Print a message indicating the endpoint is not ready and wait for 5 minutes
-            print(f"Endpoint not ready ({status}), waiting 5 minutes")
-            time.sleep(300)  # Wait for 300 seconds before checking again
-
-# Get the Databricks web application URL using an MLflow utility function
-api_url = mlflow.utils.databricks_utils.get_webapp_url()
-
-# Call the wait_for_endpoint function to wait for the serving endpoint to be ready
-wait_for_endpoint()
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Online Forecast
-# MAGIC Once the endpoint is ready, let's send a request to the model and generate an online forecast.
-
-# COMMAND ----------
-
-import os
-import requests
-import pandas as pd
-import json
-import matplotlib.pyplot as plt
-
-# Replace URL with the end point invocation url you get from Model Seriving page.
-endpoint_url = f"https://{instance}/serving-endpoints/{model_serving_endpoint_name}/invocations"
-token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
-def forecast(input_data, url=endpoint_url, databricks_token=token):
-    headers = {
-        "Authorization": f"Bearer {databricks_token}",
-        "Content-Type": "application/json",
-    }
-    body = {"inputs": input_data.tolist()}
-    data = json.dumps(body)
-    response = requests.request(method="POST", headers=headers, url=url, data=data)
-    if response.status_code != 200:
-        raise Exception(
-            f"Request failed with status {response.status_code}, {response.text}"
-        )
-    return response.json()
-
-# COMMAND ----------
-
-# Send request to the endpoint
-input_data = np.random.rand(52)
-forecast(input_data)
-
-# COMMAND ----------
-
-# Delete the serving endpoint
-func_delete_model_serving_endpoint(model_serving_endpoint_name)
-
-# COMMAND ----------
-
-
diff --git a/examples/foundation-model-examples/timegpt/01_timegpt_load_inference.py b/examples/foundation-model-examples/timegpt/01_timegpt_load_inference.py
deleted file mode 100644
index 3a7bdb2..0000000
--- a/examples/foundation-model-examples/timegpt/01_timegpt_load_inference.py
+++ /dev/null
@@ -1,536 +0,0 @@
-# Databricks notebook source
-# MAGIC %md
-# MAGIC This is an example notebook that shows how to use Foundational Model Time-Series [TimeGPT](https://docs.nixtla.io/) models on Databricks. 
-# MAGIC The notebook loads the model, distributes the inference, registers the model, deploys the model and makes online forecasts.
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ### Pre-requisites to URL and API key for AzureAI
-# MAGIC Here are the prerequisites:
-# MAGIC 1. If you don’t have an Azure subscription, get one here: https://azure.microsoft.com/en-us/pricing/purchase-options/pay-as-you-go
-# MAGIC 2. Create an Azure AI Studio hub and project. Supported regions are: East US 2, Sweden Central, North Central US, East US, West US, West US3, South Central US. Make sure you pick one these as the Azure region for the hub.
-# MAGIC Next, you need to create a deployment to obtain the inference API and key.
-# MAGIC
-# MAGIC 3. Open the TimeGEN-1 model card in the model catalog: https://aka.ms/aistudio/landing/nixtlatimegen1
-# MAGIC 4. Click on Deploy and select the Pay-as-you-go option.
-# MAGIC 5. Subscribe to the Marketplace offer and deploy. You can also review the API pricing at this step.
-# MAGIC 6. You should land on the deployment page that shows you the API key and URL in less than a minute.
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Cluster setup
-# MAGIC
-# MAGIC TimeGPT is accessible through an API as a service, so the actual compute for inference or fine-tuning will not take place on Databricks. For this reason a GPU cluster is not necessary and we recommend using a cluster with [Databricks Runtime 14.3 LTS for ML](https://docs.databricks.com/en/release-notes/runtime/14.3lts-ml.html) or above with CPUs. This notebook will leverage [Pandas UDF](https://docs.databricks.com/en/udf/pandas.html) for distributing the inference tasks and utilizing all the available resource.
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Install package
-
-# COMMAND ----------
-
-# DBTITLE 1,Import Libraries
-# MAGIC %pip install nixtla --quiet
-# MAGIC %pip install --upgrade mlflow --quiet
-# MAGIC dbutils.library.restartPython()
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Add the API key as a secret
-
-# COMMAND ----------
-
-key_name = f'api_key'
-scope_name = f'time-gpt'
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC If this is your first time running the notebook and you still don't have your credential managed in the secret, uncomment and run the following cell. Read more about Databricks secrets management [here](https://docs.databricks.com/en/security/secrets/index.html).
-
-# COMMAND ----------
-
-#import time
-#from databricks.sdk import WorkspaceClient
-
-#w = WorkspaceClient()
-
-# put the key in secret 
-#w.secrets.create_scope(scope=scope_name)
-#w.secrets.put_secret(scope=scope_name, key=key_name, string_value=f'<input api key here>')
-
-# cleanup
-#w.secrets.delete_secret(scope=scope_name, key=key_name)
-## w.secrets.delete_secret(scope=scope_name, key=key_name)
-## w.secrets.delete_scope(scope=scope_name)
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Prepare data 
-# MAGIC We use [`datasetsforecast`](https://github.com/Nixtla/datasetsforecast/tree/main/) package to download M4 data. M4 dataset contains a set of time series which we use for testing MMF. Below we have written a number of custome functions to convert M4 time series to an expected format.
-# MAGIC
-# MAGIC Make sure that the catalog and the schema already exist.
-
-# COMMAND ----------
-
-catalog = "mmf"  # Name of the catalog we use to manage our assets
-db = "m4"  # Name of the schema we use to manage our assets (e.g. datasets)
-n = 10  # Number of time series to sample
-
-# COMMAND ----------
-
-# This cell runs the notebook ../data_preparation and creates the following tables with M4 data: 
-# 1. {catalog}.{db}.m4_daily_train, 
-# 2. {catalog}.{db}.m4_monthly_train
-dbutils.notebook.run("../data_preparation", timeout_seconds=0, arguments={"catalog": catalog, "db": db, "n": n})
-
-# COMMAND ----------
-
-from pyspark.sql.functions import collect_list
-
-# Make sure that the data exists
-df = spark.table(f'{catalog}.{db}.m4_daily_train')
-df = df.groupBy('unique_id').agg(collect_list('ds').alias('ds'), collect_list('y').alias('y'))
-display(df)
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Distribute Inference
-# MAGIC We use [Pandas UDF](https://docs.databricks.com/en/udf/pandas.html#iterator-of-series-to-iterator-of-series-udf) to distribute the inference.
-
-# COMMAND ----------
-
-import pandas as pd
-import numpy as np
-import torch
-from typing import Iterator,Tuple
-from pyspark.sql.functions import pandas_udf
-
-
-## Function to select a single time series from the prepared dataset
-def get_single_time_series(unique_id):
-  # Filter the DataFrame to get records with the specified unique_id and convert to a pandas DataFrame
-  pdf = df.filter(df.unique_id == unique_id).toPandas()
-  # Create a dictionary with timestamp and value columns
-  pdf = {
-    "timestamp" : list(pdf['ds'][0]),
-    "value" : list(pdf['y'][0])
-  }
-  # Return a new pandas DataFrame created from the dictionary
-  return pd.DataFrame(pdf)
-
-
-## Function to create a Pandas UDF to generate forecasts given a time series history
-def create_forecast_udf(model_url, api_key, prediction_length=12):
-
-  @pandas_udf('struct<timestamp:array<string>,forecast:array<double>>')
-  def forecast_udf(iterator: Iterator[Tuple[pd.Series, pd.Series]]) -> Iterator[pd.DataFrame]:
-    
-    ## Initialization step
-    import numpy as np
-    import pandas as pd
-    from nixtla import NixtlaClient  # Import NixtlaClient from the nixtla library
-
-    # Initialize the NixtlaClient with the provided model URL and API key
-    model = NixtlaClient(
-      base_url=model_url,
-      api_key=api_key)
-
-    ## Inference step
-    for timeseries, past_values in iterator:
-      median = []  # Initialize a list to store the forecast results
-      for ts, y in zip(timeseries, past_values):
-        # Create a DataFrame from the time series and past values
-        tdf = pd.DataFrame({"timestamp": ts,
-                            "value": y})
-        # Generate a forecast using the NixtlaClient model
-        pred = model.forecast(
-                      df=tdf,
-                      h=prediction_length,
-                      time_col="timestamp",
-                      target_col="value")
-        
-        # Append the forecast results to the median list
-        median.append({'timestamp': list(pred['timestamp'].astype(str).values),
-                       'forecast': list(pred['TimeGPT'].values)})
-    # Yield the results as a pandas DataFrame
-    yield pd.DataFrame(median)  
-    
-  return forecast_udf  # Return the forecast UDF
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC We specify the requirements of our forecasts. 
-
-# COMMAND ----------
-
-# DBTITLE 1,Forecasting with TimeGEN on Azure AI
-model_url = "https://TimeGEN-1-pj-serverless.eastus2.inference.ai.azure.com" # Put your model url
-prediction_length = 12  # Time horizon for forecasting
-api_key = dbutils.secrets.get(scope=scope_name, key=key_name) # Get credential from secrets 
-freq = "D" # Frequency of the time series
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC Let's generate the forecasts.
-
-# COMMAND ----------
-
-# Create Pandas UDF
-forecast_udf = create_forecast_udf(
-  model_url=model_url, 
-  api_key=api_key,
-  )
-
-# Apply Pandas UDF to the dataframe
-forecasts = df.select(
-  df.unique_id,
-  forecast_udf("ds", "y").alias("forecast"),
-  ).select("unique_id", "forecast.timestamp", "forecast.forecast")
-
-display(forecasts)
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ##Register Model
-# MAGIC We will package our model using [`mlflow.pyfunc.PythonModel`](https://mlflow.org/docs/latest/python_api/mlflow.pyfunc.html) and register this in Unity Catalog.
-
-# COMMAND ----------
-
-import mlflow
-import torch
-import numpy as np
-from mlflow.models.signature import ModelSignature
-from mlflow.types import DataType, Schema, TensorSpec, ColSpec, ParamSpec, ParamSchema
-
-mlflow.set_registry_uri("databricks-uc")  # Set the MLflow registry URI to Databricks Unity Catalog.
-
-# Define a custom MLflow Python model class for TimeGPTPipeline
-class TimeGPTPipeline(mlflow.pyfunc.PythonModel):
-  def __init__(self, model_url, api_key):
-    import numpy as np
-    import pandas as pd
-    from nixtla import NixtlaClient  # Import NixtlaClient from the nixtla library
-    self.model_url = model_url  # Store the model URL
-    self.api_key = api_key  # Store the API key
-  
-  def predict(self, context, input_data, params=None):
-    from nixtla import NixtlaClient  # Import NixtlaClient from the nixtla library
-    model = NixtlaClient(
-        base_url=self.model_url,
-        api_key=self.api_key)  # Initialize the NixtlaClient with the stored model URL and API key
-    
-    # Generate a forecast using the NixtlaClient model
-    pred = model.forecast(
-              df=input_data,
-              h=params['h'],  # Use the horizon length from the params
-              time_col="timestamp",
-              target_col="value")
-    # Rename the forecast column to 'forecast'
-    pred.rename(columns={'TimeGPT': 'forecast'},
-                inplace=True)
-    return pred  # Return the prediction DataFrame
-
-# Initialize the custom TimeGPTPipeline with the specified model URL and API key
-pipeline = TimeGPTPipeline(model_url=model_url, api_key=api_key)
-
-# Define the input and output schema for the model
-input_schema = Schema([ColSpec.from_json_dict(**{"type": "datetime", "name": "timestamp", "required": True}),
-                       ColSpec.from_json_dict(**{"type": "double", "name": "value", "required": True})])
-output_schema = Schema([ColSpec.from_json_dict(**{"type": "datetime", "name": "timestamp", "required": True}),
-                       ColSpec.from_json_dict(**{"type": "double", "name": "forecast", "required": True})])
-param_schema = ParamSchema([ParamSpec.from_json_dict(**{"type": "integer", "name": "h", "default": 12})])
-# Create a ModelSignature object to represent the input, output, and parameter schema
-signature = ModelSignature(inputs=input_schema, outputs=output_schema, params=param_schema)
-
-# Define the registered model name using variables for catalog and database
-registered_model_name = f"{catalog}.{db}.time_gpt"
-
-# Filter the DataFrame to get records with the specified unique_id and convert to a pandas DataFrame
-pdf = df.filter(df.unique_id == 'D7').toPandas()
-pdf = {
-  "timestamp" : list(pdf['ds'][0]),
-  "value" : list(pdf['y'][0])
-}
-# Get a single time series from the dataset
-pdf = get_single_time_series('D4')
-
-# Log and register the model with MLflow
-with mlflow.start_run() as run:
-  mlflow.pyfunc.log_model(
-    "model",  # The artifact path where the model is logged
-    python_model=pipeline,  # The custom Python model to log
-    registered_model_name=registered_model_name,  # The name to register the model under
-    signature=signature,  # The model signature
-    input_example=pdf[:10],  # An example input to log with the model
-    pip_requirements=[
-      "nixtla"  # Python package requirements
-    ]
-  )
-
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ##Reload Model
-# MAGIC Once the registration is complete, we will reload the model and generate forecasts.
-
-# COMMAND ----------
-
-from mlflow import MlflowClient
-mlflow_client = MlflowClient()
-
-# Define a function to get the latest version number of a registered model
-def get_latest_model_version(mlflow_client, registered_model_name):
-    latest_version = 1  # Initialize the latest version number to 1
-    # Iterate through all model versions of the specified registered model
-    for mv in mlflow_client.search_model_versions(f"name='{registered_model_name}'"):
-        version_int = int(mv.version)  # Convert the version number to an integer
-        if version_int > latest_version:  # Check if the current version is greater than the latest version
-            latest_version = version_int  # Update the latest version number
-    return latest_version  # Return the latest version number
-
-# Get the latest version number of the specified registered model
-model_version = get_latest_model_version(mlflow_client, registered_model_name)
-# Construct the model URI using the registered model name and the latest version number
-logged_model = f"models:/{registered_model_name}/{model_version}"
-
-# Load the model as a PyFuncModel using the constructed model URI
-loaded_model = mlflow.pyfunc.load_model(logged_model)
-
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ###Test the endpoint before deployment
-
-# COMMAND ----------
-
-# Get random data points
-pdf = get_single_time_series('D4')
-
-# Generate forecasts
-loaded_model.predict(pdf, params = {'h': 20})
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Deploy Model
-# MAGIC We will deploy our model behind a real-time endpoint of [Databricks Mosaic AI Model Serving](https://www.databricks.com/product/model-serving).
-
-# COMMAND ----------
-
-# With the token, you can create our authorization header for our subsequent REST calls
-token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().getOrElse(None)
-headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
-
-# Next you need an endpoint at which to execute your request which you can get from the notebook's tags collection
-java_tags = dbutils.notebook.entry_point.getDbutils().notebook().getContext().tags()
-
-# This object comes from the Java CM - Convert the Java Map opject to a Python dictionary
-tags = sc._jvm.scala.collection.JavaConversions.mapAsJavaMap(java_tags)
-
-# Lastly, extract the Databricks instance (domain name) from the dictionary
-instance = tags["browserHostName"]
-
-# COMMAND ----------
-
-import requests
-
-model_serving_endpoint_name = "time-gpt"
-
-my_json = {
-    "name": model_serving_endpoint_name,
-    "config": {
-        "served_models": [
-            {
-                "model_name": registered_model_name,
-                "model_version": model_version,
-                "workload_type": "CPU_SMALL",
-                "workload_size": "Small",
-                "scale_to_zero_enabled": "true",
-            }
-        ],
-        "auto_capture_config": {
-            "catalog_name": catalog,
-            "schema_name": db,
-            "table_name_prefix": model_serving_endpoint_name,
-        },
-    },
-}
-
-# Make sure to drop the inference table of it exists
-_ = spark.sql(
-    f"DROP TABLE IF EXISTS {catalog}.{db}.`{model_serving_endpoint_name}_payload`"
-)
-
-# COMMAND ----------
-
-# Function to create an endpoint in Model Serving and deploy the model behind it
-def func_create_endpoint(model_serving_endpoint_name):
-    # get endpoint status
-    endpoint_url = f"https://{instance}/api/2.0/serving-endpoints"
-    url = f"{endpoint_url}/{model_serving_endpoint_name}"
-    r = requests.get(url, headers=headers)
-    if "RESOURCE_DOES_NOT_EXIST" in r.text:
-        print(
-            "Creating this new endpoint: ",
-            f"https://{instance}/serving-endpoints/{model_serving_endpoint_name}/invocations",
-        )
-        re = requests.post(endpoint_url, headers=headers, json=my_json)
-    else:
-        new_model_version = (my_json["config"])["served_models"][0]["model_version"]
-        print(
-            "This endpoint existed previously! We are updating it to a new config with new model version: ",
-            new_model_version,
-        )
-        # update config
-        url = f"{endpoint_url}/{model_serving_endpoint_name}/config"
-        re = requests.put(url, headers=headers, json=my_json["config"])
-        # wait till new config file in place
-        import time, json
-
-        # get endpoint status
-        url = f"https://{instance}/api/2.0/serving-endpoints/{model_serving_endpoint_name}"
-        retry = True
-        total_wait = 0
-        while retry:
-            r = requests.get(url, headers=headers)
-            assert (
-                r.status_code == 200
-            ), f"Expected an HTTP 200 response when accessing endpoint info, received {r.status_code}"
-            endpoint = json.loads(r.text)
-            if "pending_config" in endpoint.keys():
-                seconds = 10
-                print("New config still pending")
-                if total_wait < 6000:
-                    # if less the 10 mins waiting, keep waiting
-                    print(f"Wait for {seconds} seconds")
-                    print(f"Total waiting time so far: {total_wait} seconds")
-                    time.sleep(10)
-                    total_wait += seconds
-                else:
-                    print(f"Stopping,  waited for {total_wait} seconds")
-                    retry = False
-            else:
-                print("New config in place now!")
-                retry = False
-
-    assert (
-        re.status_code == 200
-    ), f"Expected an HTTP 200 response, received {re.status_code}"
-
-# Function to delete the endpoint from Model Serving
-def func_delete_model_serving_endpoint(model_serving_endpoint_name):
-    endpoint_url = f"https://{instance}/api/2.0/serving-endpoints"
-    url = f"{endpoint_url}/{model_serving_endpoint_name}"
-    response = requests.delete(url, headers=headers)
-    if response.status_code != 200:
-        raise Exception(
-            f"Request failed with status {response.status_code}, {response.text}"
-        )
-    else:
-        print(model_serving_endpoint_name, "endpoint is deleted!")
-    return response.json()
-
-# COMMAND ----------
-
-func_create_endpoint(model_serving_endpoint_name)
-
-# COMMAND ----------
-
-import time
-import mlflow
-
-# Define a function to wait for a serving endpoint to be ready
-def wait_for_endpoint():
-    endpoint_url = f"https://{instance}/api/2.0/serving-endpoints"  # Construct the base URL for the serving endpoints API
-    while True:  # Infinite loop to repeatedly check the status of the endpoint
-        url = f"{endpoint_url}/{model_serving_endpoint_name}"  # Construct the URL for the specific model serving endpoint
-        response = requests.get(url, headers=headers)  # Send a GET request to the endpoint URL with the necessary headers
-        
-        # Ensure the response status code is 200 (OK)
-        assert (
-            response.status_code == 200
-        ), f"Expected an HTTP 200 response, received {response.status_code}\n{response.text}"
-
-        # Extract the status of the endpoint from the response JSON
-        status = response.json().get("state", {}).get("ready", {})
-        # print("status",status)  # Optional: Print the status for debugging purposes
-        
-        # Check if the endpoint status is "READY"
-        if status == "READY":
-            print(status)  # Print the status if the endpoint is ready
-            print("-" * 80)  # Print a separator line for clarity
-            return  # Exit the function when the endpoint is ready
-        else:
-            # Print a message indicating the endpoint is not ready and wait for 5 minutes
-            print(f"Endpoint not ready ({status}), waiting 5 minutes")
-            time.sleep(300)  # Wait for 300 seconds (5 minutes) before checking again
-
-# Get the Databricks web application URL using an MLflow utility function
-api_url = mlflow.utils.databricks_utils.get_webapp_url()
-
-# Call the wait_for_endpoint function to wait for the serving endpoint to be ready
-wait_for_endpoint()
-
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Online Forecast
-# MAGIC Once the endpoint is ready, let's send a request to the model and generate an online forecast.
-
-# COMMAND ----------
-
-import os
-import requests
-import pandas as pd
-import json
-import matplotlib.pyplot as plt
-
-# Replace URL with the end point invocation url you get from Model Seriving page.
-endpoint_url = f"https://{instance}/serving-endpoints/{model_serving_endpoint_name}/invocations"
-token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
-def forecast(input_data, url=endpoint_url, databricks_token=token):
-    headers = {
-        "Authorization": f"Bearer {databricks_token}",
-        "Content-Type": "application/json",
-    }
-    body = {'dataframe_split': input_data.to_dict(orient='split'),"params" :{'h':20}}
-    data = json.dumps(body)
-    response = requests.request(method="POST", headers=headers, url=url, data=data)
-    if response.status_code != 200:
-        raise Exception(
-            f"Request failed with status {response.status_code}, {response.text}"
-        )
-    return response.json()
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ### Test online forecast
-
-# COMMAND ----------
-
-# Send request to the endpoint
-pdf = get_single_time_series('D3')
-pdf['timestamp'] = pdf['timestamp'].astype(str)
-forecast(pdf)
-
-# COMMAND ----------
-
-# Delete the serving endpoint
-func_delete_model_serving_endpoint(model_serving_endpoint_name)
-
-# COMMAND ----------
-
-
diff --git a/examples/foundation-model-examples/timegpt/02_timegpt_fine_tune.py b/examples/foundation-model-examples/timegpt/02_timegpt_fine_tune.py
deleted file mode 100644
index 0d59f21..0000000
--- a/examples/foundation-model-examples/timegpt/02_timegpt_fine_tune.py
+++ /dev/null
@@ -1,524 +0,0 @@
-# Databricks notebook source
-# MAGIC %md
-# MAGIC This is an example notebook that shows how to use Foundational Model Time-Series [TimeGPT](https://docs.nixtla.io/) models on Databricks and fine-tune it on the fly.
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ### Pre-requisites to URL and API key for AzureAI
-# MAGIC Here are the prerequisites:
-# MAGIC 1. If you don’t have an Azure subscription, get one here: https://azure.microsoft.com/en-us/pricing/purchase-options/pay-as-you-go
-# MAGIC 2. Create an Azure AI Studio hub and project. Supported regions are: East US 2, Sweden Central, North Central US, East US, West US, West US3, South Central US. Make sure you pick one these as the Azure region for the hub.
-# MAGIC Next, you need to create a deployment to obtain the inference API and key:
-# MAGIC
-# MAGIC 3. Open the TimeGEN-1 model card in the model catalog: https://aka.ms/aistudio/landing/nixtlatimegen1
-# MAGIC 4. Click on Deploy and select the Pay-as-you-go option.
-# MAGIC 5. Subscribe to the Marketplace offer and deploy. You can also review the API pricing at this step.
-# MAGIC 6. You should land on the deployment page that shows you the API key and URL in less than a minute.
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Cluster setup
-# MAGIC
-# MAGIC TimeGPT is accessible through an API as a service, so the actual compute for inference or fine-tuning will not take place on Databricks. For this reason a GPU cluster is not necessary and we recommend using a cluster with [Databricks Runtime 14.3 LTS for ML](https://docs.databricks.com/en/release-notes/runtime/14.3lts-ml.html) or above with CPUs. This notebook will leverage [Pandas UDF](https://docs.databricks.com/en/udf/pandas.html) for distributing the inference tasks and utilizing all the available resource.
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Install package
-
-# COMMAND ----------
-
-# DBTITLE 1,Import Libraries
-# MAGIC %pip install nixtla --quiet
-# MAGIC %pip install --upgrade mlflow --quiet
-# MAGIC dbutils.library.restartPython()
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Add the API key as a secret
-
-# COMMAND ----------
-
-key_name = f'api_key'
-scope_name = f'time-gpt'
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC If this is your first time running the notebook and you still don't have your credential managed in the secret, uncomment and run the following cell. Read more about Databricks secrets management [here](https://docs.databricks.com/en/security/secrets/index.html).
-
-# COMMAND ----------
-
-#import time
-#from databricks.sdk import WorkspaceClient
-
-#w = WorkspaceClient()
-
-# put the key in secret 
-#w.secrets.create_scope(scope=scope_name)
-#w.secrets.put_secret(scope=scope_name, key=key_name, string_value=f'<input api key here>')
-
-# cleanup
-#w.secrets.delete_secret(scope=scope_name, key=key_name)
-## w.secrets.delete_secret(scope=scope_name, key=key_name)
-## w.secrets.delete_scope(scope=scope_name)
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Prepare data 
-# MAGIC We use [`datasetsforecast`](https://github.com/Nixtla/datasetsforecast/tree/main/) package to download M4 data. M4 dataset contains a set of time series which we use for testing MMF. Below we have written a number of custome functions to convert M4 time series to an expected format.
-# MAGIC
-# MAGIC Make sure that the catalog and the schema already exist.
-
-# COMMAND ----------
-
-catalog = "mmf"  # Name of the catalog we use to manage our assets
-db = "m4"  # Name of the schema we use to manage our assets (e.g. datasets)
-n = 10  # Number of time series to sample
-
-# COMMAND ----------
-
-# This cell runs the notebook ../data_preparation and creates the following tables with M4 data: 
-# 1. {catalog}.{db}.m4_daily_train, 
-# 2. {catalog}.{db}.m4_monthly_train
-dbutils.notebook.run("../data_preparation", timeout_seconds=0, arguments={"catalog": catalog, "db": db, "n": n})
-
-# COMMAND ----------
-
-from pyspark.sql.functions import collect_list,size
-
-# Make sure that the data exists
-df = spark.table(f'{catalog}.{db}.m4_daily_train')
-df = df.groupBy('unique_id').agg(collect_list('ds').alias('ds'), collect_list('y').alias('y'))
-df = df.filter(size(df.ds) >= 300)
-display(df)
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Distribute Fine-Tuning and Inference
-# MAGIC We use [Pandas UDF](https://docs.databricks.com/en/udf/pandas.html#iterator-of-series-to-iterator-of-series-udf) to distribute fine-tuning and inference.
-
-# COMMAND ----------
-
-import pandas as pd
-import numpy as np
-import torch
-from typing import Iterator, Tuple
-from pyspark.sql.functions import pandas_udf
-
-## Function to create a Pandas UDF to fine-tune and generate forecasts given a time series history
-def create_forecast_udf(model_url, api_key, prediction_length=12, ft_steps=10):
-
-  # Define the Pandas UDF with the specified output schema
-  @pandas_udf('struct<timestamp:array<string>,forecast:array<double>>')
-  def forecast_udf(iterator: Iterator[Tuple[pd.Series, pd.Series]]) -> Iterator[pd.DataFrame]:
-    
-    ## Initialization step
-    import numpy as np
-    import pandas as pd
-    from nixtla import NixtlaClient  # Import NixtlaClient from the nixtla library
-
-    # Initialize the NixtlaClient with the provided model URL and API key
-    model = NixtlaClient(
-      base_url=model_url,
-      api_key=api_key)
-
-    ## Inference step
-    for timeseries, past_values in iterator:
-      median = []  # Initialize a list to store the forecast results
-      for ts, y in zip(timeseries, past_values):
-        # Create a DataFrame from the time series and past values
-        tdf = pd.DataFrame({"timestamp": ts,
-                            "value": y})
-        
-        # Generate a forecast using the NixtlaClient model with fine-tuning
-        pred = model.forecast(
-          df=tdf,
-          h=prediction_length,  # Horizon length for the forecast
-          finetune_steps=ft_steps,  # Number of fine-tuning steps
-          time_col="timestamp",  # Column name for timestamps
-          target_col="value")  # Column name for target values
-
-        # Append the forecast results to the median list
-        median.append({'timestamp': list(pred['timestamp'].astype(str).values),
-                       'forecast': list(pred['TimeGPT'].values)})
-    # Yield the results as a pandas DataFrame
-    yield pd.DataFrame(median)  
-    
-  return forecast_udf  # Return the forecast UDF
-
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC We specify the requirements of our forecasts. 
-
-# COMMAND ----------
-
-model_url = "https://TimeGEN-1-pj-serverless.eastus2.inference.ai.azure.com"
-prediction_length = 10  # Time horizon for forecasting
-ft_steps = 10  # Number of training interations to perform for fientuning
-api_key = dbutils.secrets.get(scope =scope_name,key = key_name)
-freq = "D" # Frequency of the time series
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC Let's fine-tune and generate forecasts.
-
-# COMMAND ----------
-
-# Create a forecast UDF using the specified model URL and API key
-forecast_udf = create_forecast_udf(
-  model_url=model_url,  # URL of the pre-trained model
-  api_key=api_key,  # API key for authentication
-)
-
-# Apply the forecast UDF to the DataFrame
-forecasts = df.select(
-  df.unique_id,  # Select the unique_id column from the DataFrame
-  forecast_udf("ds", "y").alias("forecast"),  # Apply the forecast UDF to the ds (timestamp) and y (value) columns, alias the result as "forecast"
-).select(
-  "unique_id",  # Select the unique_id column
-  "forecast.timestamp",  # Select the timestamp array from the forecast struct
-  "forecast.forecast"  # Select the forecast array from the forecast struct
-)
-
-# Display the resulting DataFrame with the forecasts
-display(forecasts)
-
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ##Register Model
-# MAGIC We will package our model using [`mlflow.pyfunc.PythonModel`](https://mlflow.org/docs/latest/python_api/mlflow.pyfunc.html) and register this in Unity Catalog.
-
-# COMMAND ----------
-
-import mlflow
-import torch
-import numpy as np
-from mlflow.models.signature import ModelSignature
-from mlflow.types import DataType, Schema, TensorSpec ,ColSpec, ParamSpec,ParamSchema
-
-mlflow.set_registry_uri("databricks-uc")  # Set the MLflow registry URI to Databricks Unity Catalog.
-
-# Define a custom MLflow Python model class for TimeGPTPipeline
-class TimeGPTPipeline(mlflow.pyfunc.PythonModel):
-  def __init__(self, model_url, api_key):
-    import numpy as np
-    import pandas as pd
-    from nixtla import NixtlaClient  # Import NixtlaClient from the nixtla library
-    self.model_url = model_url  # Store the model URL
-    self.api_key = api_key  # Store the API key
-  
-  def predict(self, context, input_data, params=None):
-    from nixtla import NixtlaClient  # Import NixtlaClient from the nixtla library
-    # Initialize the NixtlaClient with the stored model URL and API key
-    model = NixtlaClient(
-        base_url=self.model_url,
-        api_key=self.api_key)
-    
-    # Generate a forecast using the NixtlaClient model with fine-tuning steps
-    pred = model.forecast(
-              df=input_data,
-              h=params['h'],  # Use the horizon length from the params
-              finetune_steps=params['finetune_steps'],  # Use the fine-tuning steps from the params
-              time_col="timestamp",
-              target_col="value")
-    # Rename the forecast column to 'forecast'
-    pred.rename(columns={'TimeGPT': 'forecast'},
-                inplace=True)
-    return pred  # Return the prediction DataFrame
-
-# Initialize the custom TimeGPTPipeline with the specified model URL and API key
-pipeline = TimeGPTPipeline(model_url=model_url, api_key=api_key)
-
-# Define the input and output schema for the model
-input_schema = Schema([ColSpec.from_json_dict(**{"type": "datetime", "name": "timestamp", "required": True}),
-                       ColSpec.from_json_dict(**{"type": "double", "name": "value", "required": True})])
-output_schema = Schema([ColSpec.from_json_dict(**{"type": "datetime", "name": "timestamp", "required": True}),
-                        ColSpec.from_json_dict(**{"type": "double", "name": "forecast", "required": True})])
-param_schema = ParamSchema([ParamSpec.from_json_dict(**{"type": "integer", "name": "h", "default": 12}),
-                            ParamSpec.from_json_dict(**{"type": "integer", "name": "finetune_steps", "default": 10})])
-# Create a ModelSignature object to represent the input, output, and parameter schema
-signature = ModelSignature(inputs=input_schema, outputs=output_schema, params=param_schema)
-
-# Define the registered model name using variables for catalog and database
-registered_model_name = f"{catalog}.{db}.time_gpt_ft"
-
-# Filter the DataFrame to get records with the specified unique_id and convert to a pandas DataFrame
-pdf = df.filter(df.unique_id == 'D7').toPandas()
-pdf = {
-  "timestamp": list(pdf['ds'][0]),
-  "value": list(pdf['y'][0])
-}
-pdf = pd.DataFrame(pdf)
-
-# Log and register the model with MLflow
-with mlflow.start_run() as run:
-  mlflow.pyfunc.log_model(
-    "model",  # The artifact path where the model is logged
-    python_model=pipeline,  # The custom Python model to log
-    registered_model_name=registered_model_name,  # The name to register the model under
-    signature=signature,  # The model signature
-    input_example=pdf,  # An example input to log with the model
-    pip_requirements=[
-      "nixtla"  # Python package requirements
-    ]
-  )
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ##Reload Model
-# MAGIC Once the registration is complete, we will reload the model and generate forecasts.
-
-# COMMAND ----------
-
-from mlflow import MlflowClient
-mlflow_client = MlflowClient()
-
-# Define a function to get the latest version number of a registered model
-def get_latest_model_version(mlflow_client, registered_model_name):
-    latest_version = 1  # Initialize the latest version number to 1
-    # Iterate through all model versions of the specified registered model
-    for mv in mlflow_client.search_model_versions(f"name='{registered_model_name}'"):
-        version_int = int(mv.version)  # Convert the version number to an integer
-        if version_int > latest_version:  # Check if the current version is greater than the latest version
-            latest_version = version_int  # Update the latest version number
-    return latest_version  # Return the latest version number
-
-# Get the latest version number of the specified registered model
-model_version = get_latest_model_version(mlflow_client, registered_model_name)
-# Construct the model URI using the registered model name and the latest version number
-logged_model = f"models:/{registered_model_name}/{model_version}"
-
-# Load the model as a PyFuncModel using the constructed model URI
-loaded_model = mlflow.pyfunc.load_model(logged_model)
-
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ###Test the endpoint before deployment
-
-# COMMAND ----------
-
-# Test the endpoint before deployment
-loaded_model.predict(pdf,params = {'h' :20})
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Deploy Model
-# MAGIC We will deploy our model behind a real-time endpoint of [Databricks Mosaic AI Model Serving](https://www.databricks.com/product/model-serving).
-
-# COMMAND ----------
-
-# With the token, you can create our authorization header for our subsequent REST calls
-token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().getOrElse(None)
-headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
-
-# Next you need an endpoint at which to execute your request which you can get from the notebook's tags collection
-java_tags = dbutils.notebook.entry_point.getDbutils().notebook().getContext().tags()
-
-# This object comes from the Java CM - Convert the Java Map opject to a Python dictionary
-tags = sc._jvm.scala.collection.JavaConversions.mapAsJavaMap(java_tags)
-
-# Lastly, extract the Databricks instance (domain name) from the dictionary
-instance = tags["browserHostName"]
-
-# COMMAND ----------
-
-import requests
-
-model_serving_endpoint_name = "timegpt_ft"
-
-my_json = {
-    "name": model_serving_endpoint_name,
-    "config": {
-        "served_models": [
-            {
-                "model_name": registered_model_name,
-                "model_version": model_version,
-                "workload_type": "CPU_SMALL",
-                "workload_size": "Small",
-                "scale_to_zero_enabled": "true",
-            }
-        ],
-        "auto_capture_config": {
-            "catalog_name": catalog,
-            "schema_name": db,
-            "table_name_prefix": model_serving_endpoint_name,
-        },
-    },
-}
-
-# Make sure to drop the inference table of it exists
-_ = spark.sql(
-    f"DROP TABLE IF EXISTS {catalog}.{db}.`{model_serving_endpoint_name}_payload`"
-)
-
-# COMMAND ----------
-
-# Function to create an endpoint in Model Serving and deploy the model behind it
-def func_create_endpoint(model_serving_endpoint_name):
-    # get endpoint status
-    endpoint_url = f"https://{instance}/api/2.0/serving-endpoints"
-    url = f"{endpoint_url}/{model_serving_endpoint_name}"
-    r = requests.get(url, headers=headers)
-    if "RESOURCE_DOES_NOT_EXIST" in r.text:
-        print(
-            "Creating this new endpoint: ",
-            f"https://{instance}/serving-endpoints/{model_serving_endpoint_name}/invocations",
-        )
-        re = requests.post(endpoint_url, headers=headers, json=my_json)
-    else:
-        new_model_version = (my_json["config"])["served_models"][0]["model_version"]
-        print(
-            "This endpoint existed previously! We are updating it to a new config with new model version: ",
-            new_model_version,
-        )
-        # update config
-        url = f"{endpoint_url}/{model_serving_endpoint_name}/config"
-        re = requests.put(url, headers=headers, json=my_json["config"])
-        # wait till new config file in place
-        import time, json
-
-        # get endpoint status
-        url = f"https://{instance}/api/2.0/serving-endpoints/{model_serving_endpoint_name}"
-        retry = True
-        total_wait = 0
-        while retry:
-            r = requests.get(url, headers=headers)
-            assert (
-                r.status_code == 200
-            ), f"Expected an HTTP 200 response when accessing endpoint info, received {r.status_code}"
-            endpoint = json.loads(r.text)
-            if "pending_config" in endpoint.keys():
-                seconds = 10
-                print("New config still pending")
-                if total_wait < 6000:
-                    # if less the 10 mins waiting, keep waiting
-                    print(f"Wait for {seconds} seconds")
-                    print(f"Total waiting time so far: {total_wait} seconds")
-                    time.sleep(10)
-                    total_wait += seconds
-                else:
-                    print(f"Stopping,  waited for {total_wait} seconds")
-                    retry = False
-            else:
-                print("New config in place now!")
-                retry = False
-
-    assert (
-        re.status_code == 200
-    ), f"Expected an HTTP 200 response, received {re.status_code}"
-
-# Function to delete the endpoint from Model Serving
-def func_delete_model_serving_endpoint(model_serving_endpoint_name):
-    endpoint_url = f"https://{instance}/api/2.0/serving-endpoints"
-    url = f"{endpoint_url}/{model_serving_endpoint_name}"
-    response = requests.delete(url, headers=headers)
-    if response.status_code != 200:
-        raise Exception(
-            f"Request failed with status {response.status_code}, {response.text}"
-        )
-    else:
-        print(model_serving_endpoint_name, "endpoint is deleted!")
-    return response.json()
-
-# COMMAND ----------
-
-func_create_endpoint(model_serving_endpoint_name)
-
-# COMMAND ----------
-
-import time
-import mlflow
-
-# Define a function to wait for a serving endpoint to be ready
-def wait_for_endpoint():
-    endpoint_url = f"https://{instance}/api/2.0/serving-endpoints"  # Construct the base URL for the serving endpoints API using the instance variable
-    while True:  # Infinite loop to repeatedly check the status of the endpoint
-        url = f"{endpoint_url}/{model_serving_endpoint_name}"  # Construct the URL for the specific model serving endpoint
-        response = requests.get(url, headers=headers)  # Send a GET request to the endpoint URL with the necessary headers
-        
-        # Ensure the response status code is 200 (OK)
-        assert (
-            response.status_code == 200
-        ), f"Expected an HTTP 200 response, received {response.status_code}\n{response.text}"
-
-        # Extract the status of the endpoint from the response JSON
-        status = response.json().get("state", {}).get("ready", {})
-        # print("status",status)  # Optional: Print the status for debugging purposes
-        
-        # Check if the endpoint status is "READY"
-        if status == "READY":
-            print(status)  # Print the status if the endpoint is ready
-            print("-" * 80)  # Print a separator line for clarity
-            return  # Exit the function when the endpoint is ready
-        else:
-            # Print a message indicating the endpoint is not ready and wait for 5 minutes
-            print(f"Endpoint not ready ({status}), waiting 5 minutes")
-            time.sleep(300)  # Wait for 300 seconds (5 minutes) before checking again
-
-# Get the Databricks web application URL using an MLflow utility function
-api_url = mlflow.utils.databricks_utils.get_webapp_url()
-
-# Call the wait_for_endpoint function to wait for the serving endpoint to be ready
-wait_for_endpoint()
-
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Online Forecast
-# MAGIC Once the endpoint is ready, let's send a request to the model and generate an online forecast.
-
-# COMMAND ----------
-
-import os
-import requests
-import pandas as pd
-import json
-import matplotlib.pyplot as plt
-
-# Replace URL with the end point invocation url you get from Model Seriving page.
-endpoint_url = f"https://{instance}/serving-endpoints/{model_serving_endpoint_name}/invocations"
-token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
-def forecast(input_data, url=endpoint_url, databricks_token=token):
-    headers = {
-        "Authorization": f"Bearer {databricks_token}",
-        "Content-Type": "application/json",
-    }
-    body = {'dataframe_split': input_data.to_dict(orient='split'),"params" :{'h':20}}
-    data = json.dumps(body)
-    response = requests.request(method="POST", headers=headers, url=url, data=data)
-    if response.status_code != 200:
-        raise Exception(
-            f"Request failed with status {response.status_code}, {response.text}"
-        )
-    return response.json()
-
-# COMMAND ----------
-
-# Send forecast requests
-pdf['timestamp'] = pdf['timestamp'].astype('str')
-forecast(pdf)
-
-# COMMAND ----------
-
-# Delete the serving endpoint
-func_delete_model_serving_endpoint(model_serving_endpoint_name)
-
-# COMMAND ----------
-
-
diff --git a/examples/foundation-model-examples/timesfm/01_timesfm_load_inference.py b/examples/foundation-model-examples/timesfm/01_timesfm_load_inference.py
deleted file mode 100644
index 018c2e7..0000000
--- a/examples/foundation-model-examples/timesfm/01_timesfm_load_inference.py
+++ /dev/null
@@ -1,429 +0,0 @@
-# Databricks notebook source
-# MAGIC %md
-# MAGIC This is an example notebook that shows how to use [TimesFM](https://github.com/google-research/timesfm) models on Databricks. The notebook loads the model, distributes the inference, registers the model, deploys the model and makes online forecasts.
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Cluster setup
-# MAGIC
-# MAGIC **As of June 5, 2024, TimesFM supports python version below 3.10. So make sure your cluster is below DBR ML 14.3.**
-# MAGIC
-# MAGIC We recommend using a cluster with [Databricks Runtime 14.3 LTS for ML](https://docs.databricks.com/en/release-notes/runtime/14.3lts-ml.html). The cluster can be single-node or multi-node with one or more GPU instances on each worker: e.g. [g5.12xlarge [A10G]](https://aws.amazon.com/ec2/instance-types/g5/) on AWS or [Standard_NV72ads_A10_v5](https://learn.microsoft.com/en-us/azure/virtual-machines/nva10v5-series) on Azure. This notebook will leverage [Pandas UDF](https://docs.databricks.com/en/udf/pandas.html) for distributing the inference tasks and utilizing all the available resource.
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Install package
-
-# COMMAND ----------
-
-# MAGIC %pip install jax[cuda12]==0.4.26 --quiet
-# MAGIC %pip install protobuf==3.20.* --quiet
-# MAGIC %pip install utilsforecast --quiet
-# MAGIC dbutils.library.restartPython()
-
-# COMMAND ----------
-
-import sys
-import subprocess
-package = "git+https://github.com/google-research/timesfm.git"
-subprocess.check_call([sys.executable, "-m", "pip", "install", package, "--quiet"])
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Prepare data 
-# MAGIC We use [`datasetsforecast`](https://github.com/Nixtla/datasetsforecast/tree/main/) package to download M4 data. M4 dataset contains a set of time series which we use for testing MMF. Below we have written a number of custome functions to convert M4 time series to an expected format.
-# MAGIC
-# MAGIC Make sure that the catalog and the schema already exist.
-
-# COMMAND ----------
-
-catalog = "mmf"  # Name of the catalog we use to manage our assets
-db = "m4"  # Name of the schema we use to manage our assets (e.g. datasets)
-n = 100  # Number of time series to sample
-
-# COMMAND ----------
-
-# This cell runs the notebook ../data_preparation and creates the following tables with M4 data: 
-# 1. {catalog}.{db}.m4_daily_train
-# 2. {catalog}.{db}.m4_monthly_train
-dbutils.notebook.run("../data_preparation", timeout_seconds=0, arguments={"catalog": catalog, "db": db, "n": n})
-
-# COMMAND ----------
-
-# Make sure that the data exists
-df = spark.table(f'{catalog}.{db}.m4_daily_train').toPandas()
-display(df)
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Distribute Inference
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC Distribution of the inference is managed by TimesFM so we don't need to use Pandas UDF. See the [github repository](https://github.com/google-research/timesfm/tree/master?tab=readme-ov-file#initialize-the-model-and-load-a-checkpoint) of TimesFM for detailed description of the input parameters. 
-
-# COMMAND ----------
-
-import timesfm
-
-# Initialize the TimesFm model with specified parameters.
-tfm = timesfm.TimesFm(
-    context_len=512,  # Max context length of the model. It must be a multiple of input_patch_len, which is 32.
-    horizon_len=10,  # Forecast horizon length. It can be set to any value, recommended to be the largest needed.
-    input_patch_len=32,  # Length of the input patch.
-    output_patch_len=128,  # Length of the output patch.
-    num_layers=20,
-    model_dims=1280,
-    backend="gpu",  # Backend for computation, set to use GPU for faster processing.
-)
-
-# Load the pre-trained model from the specified checkpoint.
-tfm.load_from_checkpoint(repo_id="google/timesfm-1.0-200m")
-
-# Generate forecasts on the input DataFrame.
-forecast_df = tfm.forecast_on_df(
-    inputs=df,  # The input DataFrame containing the time series data.
-    freq="D",  # Frequency of the time series data, set to daily.
-    value_name="y",  # Column name in the DataFrame containing the values to forecast.
-    num_jobs=-1,  # Number of parallel jobs to run, set to -1 to use all available processors.
-)
-
-# Display the forecast DataFrame.
-display(forecast_df)
-
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ##Register Model
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC We should ensure that any non-serializable attributes (like the timesfm model in TimesFMModel class) are not included in the serialization process. One common approach is to override the __getstate__ and __setstate__ methods in the class to manage what gets pickled. This modification ensures that the timesfm model is not included in the serialization process, thus avoiding the error. The load_model method is called to load the model when needed, such as during prediction or after deserialization.
-# MAGIC
-# MAGIC We will package our model using [`mlflow.pyfunc.PythonModel`](https://mlflow.org/docs/latest/python_api/mlflow.pyfunc.html) and register this in Unity Catalog.
-
-# COMMAND ----------
-
-import mlflow
-import torch
-import numpy as np
-from mlflow.models import infer_signature
-from mlflow.models.signature import ModelSignature
-from mlflow.types import DataType, Schema, TensorSpec
-
-# Set the MLflow registry URI to Databricks Unity Catalog
-mlflow.set_registry_uri("databricks-uc")
-
-# Define a custom MLflow Python model class for TimesFM
-class TimesFMModel(mlflow.pyfunc.PythonModel):
-    def __init__(self, repository):
-        self.repository = repository  # Store the repository ID for the model checkpoint
-        self.tfm = None  # Initialize the model attribute to None
-
-    def load_model(self):
-        import timesfm
-        # Initialize the TimesFm model with specified parameters
-        self.tfm = timesfm.TimesFm(
-            context_len=512,  # Max context length of the model, must be a multiple of input_patch_len (32).
-            horizon_len=10,  # Horizon length for the forecast.
-            input_patch_len=32,  # Length of the input patch.
-            output_patch_len=128,  # Length of the output patch.
-            num_layers=20,
-            model_dims=1280,
-            backend="gpu",  # Backend for computation, set to GPU.
-        )
-        # Load the pre-trained model from the specified checkpoint
-        self.tfm.load_from_checkpoint(repo_id=self.repository)
-
-    def predict(self, context, input_df, params=None):
-        # Load the model if it hasn't been loaded yet
-        if self.tfm is None:
-            self.load_model()
-        # Generate forecasts on the input DataFrame
-        forecast_df = self.tfm.forecast_on_df(
-            inputs=input_df,  # Input DataFrame containing the time series data.
-            freq="D",  # Frequency of the time series data, set to daily.
-            value_name="y",  # Column name in the DataFrame containing the values to forecast.
-            num_jobs=-1,  # Number of parallel jobs to run, set to -1 to use all available processors.
-        )
-        return forecast_df  # Return the forecast DataFrame
-
-    def __getstate__(self):
-        state = self.__dict__.copy()  # Copy the instance's state
-        # Remove the tfm attribute from the state, as it's not serializable
-        del state['tfm']
-        return state
-
-    def __setstate__(self, state):
-        # Restore instance attributes
-        self.__dict__.update(state)
-        # Reload the model since it was not stored in the state
-        self.load_model()
-
-# Initialize the custom TimesFM model with the specified repository ID
-pipeline = TimesFMModel("google/timesfm-1.0-200m")
-# Infer the model signature based on input and output DataFrames
-signature = infer_signature(
-    model_input=df,  # Input DataFrame for the model
-    model_output=pipeline.predict(None, df),  # Output DataFrame from the model
-)
-
-# Define the registered model name using variables for catalog and database
-registered_model_name = f"{catalog}.{db}.timesfm-1-200m"
-
-# Start an MLflow run to log and register the model
-with mlflow.start_run() as run:
-    mlflow.pyfunc.log_model(
-        "model",  # The artifact path where the model is logged
-        python_model=pipeline,  # The custom Python model to log
-        registered_model_name=registered_model_name,  # The name to register the model under
-        signature=signature,  # The model signature
-        input_example=df,  # An example input to log with the model
-        pip_requirements=[
-            "jax[cuda12]==0.4.26",  # Required Python packages
-            "utilsforecast==0.1.10",
-            "git+https://github.com/google-research/timesfm.git",
-        ],
-    )
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ##Reload Model
-# MAGIC Once the registration is complete, we will reload the model and generate forecasts.
-
-# COMMAND ----------
-
-from mlflow import MlflowClient
-client = MlflowClient()
-
-# Define a function to get the latest version number of a registered model
-def get_latest_model_version(client, registered_model_name):
-    latest_version = 1  # Initialize the latest version number to 1
-    # Iterate through all model versions of the specified registered model
-    for mv in client.search_model_versions(f"name='{registered_model_name}'"):
-        version_int = int(mv.version)  # Convert the version number to an integer
-        if version_int > latest_version:  # Check if the current version is greater than the latest version
-            latest_version = version_int  # Update the latest version number
-    return latest_version  # Return the latest version number
-
-# Get the latest version number of the specified registered model
-model_version = get_latest_model_version(client, registered_model_name)
-# Construct the model URI using the registered model name and the latest version number
-logged_model = f"models:/{registered_model_name}/{model_version}"
-
-# Load the model as a PyFuncModel
-loaded_model = mlflow.pyfunc.load_model(logged_model)
-
-# Generate forecasts using the loaded model on the input DataFrame
-loaded_model.predict(df)  # Use the loaded model to make predictions on the input DataFrame
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Deploy Model
-# MAGIC We will deploy our model behind a real-time endpoint of [Databricks Mosaic AI Model Serving](https://www.databricks.com/product/model-serving).
-
-# COMMAND ----------
-
-# With the token, you can create our authorization header for our subsequent REST calls
-token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().getOrElse(None)
-headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
-
-# Next you need an endpoint at which to execute your request which you can get from the notebook's tags collection
-java_tags = dbutils.notebook.entry_point.getDbutils().notebook().getContext().tags()
-
-# This object comes from the Java CM - Convert the Java Map opject to a Python dictionary
-tags = sc._jvm.scala.collection.JavaConversions.mapAsJavaMap(java_tags)
-
-# Lastly, extract the Databricks instance (domain name) from the dictionary
-instance = tags["browserHostName"]
-
-# COMMAND ----------
-
-import requests
-
-model_serving_endpoint_name = "timesfm-1-200m"
-
-my_json = {
-    "name": model_serving_endpoint_name,
-    "config": {
-        "served_models": [
-            {
-                "model_name": registered_model_name,
-                "model_version": model_version,
-                "workload_type": "GPU_SMALL",
-                "workload_size": "Small",
-                "scale_to_zero_enabled": "true",
-            }
-        ],
-        "auto_capture_config": {
-            "catalog_name": catalog,
-            "schema_name": db,
-            "table_name_prefix": model_serving_endpoint_name,
-        },
-    },
-}
-
-# Make sure to drop the inference table of it exists
-_ = spark.sql(
-    f"DROP TABLE IF EXISTS {catalog}.{db}.`{model_serving_endpoint_name}_payload`"
-)
-
-# COMMAND ----------
-
-# Function to create an endpoint in Model Serving and deploy the model behind it
-def func_create_endpoint(model_serving_endpoint_name):
-    # get endpoint status
-    endpoint_url = f"https://{instance}/api/2.0/serving-endpoints"
-    url = f"{endpoint_url}/{model_serving_endpoint_name}"
-    r = requests.get(url, headers=headers)
-    if "RESOURCE_DOES_NOT_EXIST" in r.text:
-        print(
-            "Creating this new endpoint: ",
-            f"https://{instance}/serving-endpoints/{model_serving_endpoint_name}/invocations",
-        )
-        re = requests.post(endpoint_url, headers=headers, json=my_json)
-    else:
-        new_model_version = (my_json["config"])["served_models"][0]["model_version"]
-        print(
-            "This endpoint existed previously! We are updating it to a new config with new model version: ",
-            new_model_version,
-        )
-        # update config
-        url = f"{endpoint_url}/{model_serving_endpoint_name}/config"
-        re = requests.put(url, headers=headers, json=my_json["config"])
-        # wait till new config file in place
-        import time, json
-
-        # get endpoint status
-        url = f"https://{instance}/api/2.0/serving-endpoints/{model_serving_endpoint_name}"
-        retry = True
-        total_wait = 0
-        while retry:
-            r = requests.get(url, headers=headers)
-            assert (
-                r.status_code == 200
-            ), f"Expected an HTTP 200 response when accessing endpoint info, received {r.status_code}"
-            endpoint = json.loads(r.text)
-            if "pending_config" in endpoint.keys():
-                seconds = 10
-                print("New config still pending")
-                if total_wait < 6000:
-                    # if less the 10 mins waiting, keep waiting
-                    print(f"Wait for {seconds} seconds")
-                    print(f"Total waiting time so far: {total_wait} seconds")
-                    time.sleep(10)
-                    total_wait += seconds
-                else:
-                    print(f"Stopping,  waited for {total_wait} seconds")
-                    retry = False
-            else:
-                print("New config in place now!")
-                retry = False
-
-    assert (
-        re.status_code == 200
-    ), f"Expected an HTTP 200 response, received {re.status_code}"
-
-# Function to delete the endpoint from Model Serving
-def func_delete_model_serving_endpoint(model_serving_endpoint_name):
-    endpoint_url = f"https://{instance}/api/2.0/serving-endpoints"
-    url = f"{endpoint_url}/{model_serving_endpoint_name}"
-    response = requests.delete(url, headers=headers)
-    if response.status_code != 200:
-        raise Exception(
-            f"Request failed with status {response.status_code}, {response.text}"
-        )
-    else:
-        print(model_serving_endpoint_name, "endpoint is deleted!")
-    return response.json()
-
-# COMMAND ----------
-
-# Create an endpoint. This may take some time.
-func_create_endpoint(model_serving_endpoint_name)
-
-# COMMAND ----------
-
-import time, mlflow
-
-# Define a function to wait for a serving endpoint to be ready
-def wait_for_endpoint():
-    endpoint_url = f"https://{instance}/api/2.0/serving-endpoints"  # Construct the base URL for the serving endpoints API
-    while True:  # Infinite loop to repeatedly check the status of the endpoint
-        url = f"{endpoint_url}/{model_serving_endpoint_name}"  # Construct the URL for the specific model serving endpoint
-        response = requests.get(url, headers=headers)  # Send a GET request to the endpoint URL with the necessary headers
-        
-        # Ensure the response status code is 200 (OK)
-        assert (
-            response.status_code == 200
-        ), f"Expected an HTTP 200 response, received {response.status_code}\n{response.text}"
-
-        # Extract the status of the endpoint from the response JSON
-        status = response.json().get("state", {}).get("ready", {})
-        # print("status",status)  # Optional: Print the status for debugging purposes
-        
-        # Check if the endpoint status is "READY"
-        if status == "READY":
-            print(status)  # Print the status if the endpoint is ready
-            print("-" * 80)  # Print a separator line for clarity
-            return  # Exit the function when the endpoint is ready
-        else:
-            # Print a message indicating the endpoint is not ready and wait for 5 minutes
-            print(f"Endpoint not ready ({status}), waiting 5 minutes")
-            time.sleep(300)  # Wait for 300 seconds before checking again
-
-# Get the Databricks web application URL using MLflow utility function
-api_url = mlflow.utils.databricks_utils.get_webapp_url()
-
-# Call the wait_for_endpoint function to wait for the serving endpoint to be ready
-wait_for_endpoint()
-
-# COMMAND ----------
-
-# MAGIC %md
-# MAGIC ## Online Forecast
-# MAGIC Once the endpoint is ready, let's send a request to the model and generate an online forecast.
-
-# COMMAND ----------
-
-import os
-import requests
-import pandas as pd
-import json
-import matplotlib.pyplot as plt
-
-# Replace URL with the end point invocation url you get from Model Seriving page.
-endpoint_url = f"https://{instance}/serving-endpoints/{model_serving_endpoint_name}/invocations"
-token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
-def forecast(input_data, url=endpoint_url, databricks_token=token):
-    headers = {
-        "Authorization": f"Bearer {databricks_token}",
-        "Content-Type": "application/json",
-    }
-    body = {"inputs": input_data.tolist()}
-    data = json.dumps(body)
-    response = requests.request(method="POST", headers=headers, url=url, data=data)
-    if response.status_code != 200:
-        raise Exception(
-            f"Request failed with status {response.status_code}, {response.text}"
-        )
-    return response.json()
-
-# COMMAND ----------
-
-# Send request to the endpoint
-forecast(df)
-
-# COMMAND ----------
-
-# Delete the serving endpoint
-func_delete_model_serving_endpoint(model_serving_endpoint_name)
diff --git a/mmf_sa/models/chronosforecast/ChronosPipeline.py b/mmf_sa/models/chronosforecast/ChronosPipeline.py
index b67db2c..9b9708a 100644
--- a/mmf_sa/models/chronosforecast/ChronosPipeline.py
+++ b/mmf_sa/models/chronosforecast/ChronosPipeline.py
@@ -1,6 +1,3 @@
-from abc import ABC
-import sys
-import subprocess
 import pandas as pd
 import numpy as np
 import torch
@@ -24,17 +21,13 @@ def __init__(self, params):
         self.params = params
         self.device = None
         self.model = None
-        self.install("git+https://github.com/amazon-science/chronos-forecasting.git")
-
-    @staticmethod
-    def install(package: str):
-        subprocess.check_call([sys.executable, "-m", "pip", "install", package, "--quiet"])
 
     def register(self, registered_model_name: str):
         pipeline = ChronosModel(
             self.repo,
             self.params["prediction_length"],
             self.params["num_samples"],
+            self.device,
         )
         input_schema = Schema([TensorSpec(np.dtype(np.double), (-1, -1))])
         output_schema = Schema([TensorSpec(np.dtype(np.uint8), (-1, -1, -1))])
@@ -46,8 +39,12 @@ def register(self, registered_model_name: str):
             registered_model_name=registered_model_name,
             signature=signature,
             input_example=input_example,
-            pip_requirements=[
-                "git+https://github.com/amazon-science/chronos-forecasting.git",
+            pip_requirements=[  # List of pip requirements
+                "torch==2.3.1",
+                "torchvision==0.18.1",
+                "transformers==4.41.2",
+                "cloudpickle==2.2.1",
+                "chronos-forecasting",
                 "git+https://github.com/databricks-industry-solutions/many-model-forecasting.git",
                 "pyspark==3.5.0",
             ],
@@ -169,12 +166,20 @@ def predict_udf(bulk_iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
             import torch
             import numpy as np
             import pandas as pd
-            from chronos import ChronosPipeline
-            pipeline = ChronosPipeline.from_pretrained(
-                self.repo,
-                device_map="auto",
-                torch_dtype=torch.bfloat16,
-            )
+            # Initialize the ChronosPipeline with a pretrained model from the specified repository
+            from chronos import BaseChronosPipeline, ChronosBoltPipeline
+            if "bolt" in self.repo:
+                pipeline = ChronosBoltPipeline.from_pretrained(
+                    self.repo,
+                    device_map=self.device,
+                    torch_dtype=torch.bfloat16,
+                )
+            else:
+                pipeline = BaseChronosPipeline.from_pretrained(
+                    self.repo,
+                    device_map=self.device,
+                    torch_dtype=torch.bfloat16,
+                )
             # inference
             for bulk in bulk_iterator:
                 median = []
@@ -184,7 +189,7 @@ def predict_udf(bulk_iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
                     forecasts = pipeline.predict(
                         context=contexts,
                         prediction_length=self.params["prediction_length"],
-                        num_samples=self.params["num_samples"],
+                        #num_samples=self.params["num_samples"],
                     )
                     median.extend([np.median(forecast, axis=0) for forecast in forecasts])
             yield pd.Series(median)
@@ -226,25 +231,61 @@ def __init__(self, params):
         self.repo = "amazon/chronos-t5-large"
 
 
+class ChronosBoltTiny(ChronosForecaster):
+    def __init__(self, params):
+        super().__init__(params)
+        self.params = params
+        self.repo = "amazon/chronos-bolt-tiny"
+
+
+class ChronosBoltMini(ChronosForecaster):
+    def __init__(self, params):
+        super().__init__(params)
+        self.params = params
+        self.repo = "amazon/chronos-bolt-mini"
+
+
+class ChronosBoltSmall(ChronosForecaster):
+    def __init__(self, params):
+        super().__init__(params)
+        self.params = params
+        self.repo = "amazon/chronos-bolt-small"
+
+class ChronosBoltBase(ChronosForecaster):
+    def __init__(self, params):
+        super().__init__(params)
+        self.params = params
+        self.repo = "amazon/chronos-bolt-base"
+
+
 class ChronosModel(mlflow.pyfunc.PythonModel):
-    def __init__(self, repository, prediction_length, num_samples):
+    def __init__(self, repository, prediction_length, num_samples, device):
         import torch
-        from chronos import ChronosPipeline
         self.repository = repository
         self.prediction_length = prediction_length
         self.num_samples = num_samples
-        self.pipeline = ChronosPipeline.from_pretrained(
-            self.repository,
-            device_map="cuda",
-            torch_dtype=torch.bfloat16,
-        )
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        # Initialize the ChronosPipeline with a pretrained model from the specified repository
+        from chronos import BaseChronosPipeline, ChronosBoltPipeline
+        if "bolt" in self.repository:
+            self.pipeline = ChronosBoltPipeline.from_pretrained(
+                self.repository,
+                device_map=self.device,
+                torch_dtype=torch.bfloat16,
+            )
+        else:
+            self.pipeline = BaseChronosPipeline.from_pretrained(
+                self.repository,
+                device_map=self.device,
+                torch_dtype=torch.bfloat16,
+            )
 
     def predict(self, context, input_data, params=None):
         history = [torch.tensor(list(series)) for series in input_data]
         forecast = self.pipeline.predict(
             context=history,
             prediction_length=self.prediction_length,
-            num_samples=self.num_samples,
+            #num_samples=self.num_samples,
         )
         return forecast.numpy()
 
diff --git a/mmf_sa/models/models_conf.yaml b/mmf_sa/models/models_conf.yaml
index ae08037..38de2d7 100644
--- a/mmf_sa/models/models_conf.yaml
+++ b/mmf_sa/models/models_conf.yaml
@@ -376,6 +376,38 @@ models:
     num_samples: 10
     batch_size: 2
 
+  ChronosBoltTiny:
+    module: mmf_sa.models.chronosforecast.ChronosPipeline
+    model_class: ChronosBoltTiny
+    framework: Chronos
+    model_type: foundation
+    num_samples: 10
+    batch_size: 16
+
+  ChronosBoltMini:
+    module: mmf_sa.models.chronosforecast.ChronosPipeline
+    model_class: ChronosBoltMini
+    framework: Chronos
+    model_type: foundation
+    num_samples: 10
+    batch_size: 16
+
+  ChronosBoltSmall:
+    module: mmf_sa.models.chronosforecast.ChronosPipeline
+    model_class: ChronosBoltSmall
+    framework: Chronos
+    model_type: foundation
+    num_samples: 10
+    batch_size: 16
+
+  ChronosBoltBase:
+    module: mmf_sa.models.chronosforecast.ChronosPipeline
+    model_class: ChronosBoltBase
+    framework: Chronos
+    model_type: foundation
+    num_samples: 10
+    batch_size: 16
+
   MoiraiBase:
     module: mmf_sa.models.moiraiforecast.MoiraiPipeline
     model_class: MoiraiBase

From 733e1c32c3e799698b3f04f5b14ab9cdbfd31142 Mon Sep 17 00:00:00 2001
From: Ryuta Yoshimatsu <ryuta.yoshimatsu@databricks.com>
Date: Wed, 15 Jan 2025 07:28:25 +0100
Subject: [PATCH 2/2] updated chronos pipeline

---
 examples/foundation_daily.py                | 4 ++++
 examples/foundation_monthly.py              | 5 ++++-
 examples/m5-examples/foundation_daily_m5.py | 5 ++++-
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/examples/foundation_daily.py b/examples/foundation_daily.py
index 61d056b..0e6c9c3 100644
--- a/examples/foundation_daily.py
+++ b/examples/foundation_daily.py
@@ -120,6 +120,10 @@ def transform_group(df):
     "ChronosT5Small",
     "ChronosT5Base",
     "ChronosT5Large",
+    "ChronosBoltTiny",
+    "ChronosBoltMini",
+    "ChronosBoltSmall",
+    "ChronosBoltBase",
     "MoiraiSmall",
     "MoiraiBase",
     "MoiraiLarge",
diff --git a/examples/foundation_monthly.py b/examples/foundation_monthly.py
index a2c4534..0b38369 100644
--- a/examples/foundation_monthly.py
+++ b/examples/foundation_monthly.py
@@ -126,12 +126,15 @@ def transform_group(df):
     "ChronosT5Small",
     "ChronosT5Base",
     "ChronosT5Large",
+    "ChronosBoltTiny",
+    "ChronosBoltMini",
+    "ChronosBoltSmall",
+    "ChronosBoltBase",
     "MoiraiSmall",
     "MoiraiBase",
     "MoiraiLarge",
     "TimesFM_1_0_200m",
     "TimesFM_2_0_500m",
-    "Moment1Large",
 ]
 
 # COMMAND ----------
diff --git a/examples/m5-examples/foundation_daily_m5.py b/examples/m5-examples/foundation_daily_m5.py
index 4070e07..f1afa4d 100644
--- a/examples/m5-examples/foundation_daily_m5.py
+++ b/examples/m5-examples/foundation_daily_m5.py
@@ -26,12 +26,15 @@
     "ChronosT5Small",
     "ChronosT5Base",
     "ChronosT5Large",
+    "ChronosBoltTiny",
+    "ChronosBoltMini",
+    "ChronosBoltSmall",
+    "ChronosBoltBase",
     "MoiraiSmall",
     "MoiraiBase",
     "MoiraiLarge",
     "TimesFM_1_0_200m",
     "TimesFM_2_0_500m",
-    #"Moment1Large",
 ]
 
 # COMMAND ----------