From 635236e576bf3c1640d090238e678ca96799a556 Mon Sep 17 00:00:00 2001 From: Zach Barnett Date: Wed, 30 Oct 2024 17:17:35 -0500 Subject: [PATCH 01/12] fix: Remove update_infra from Expedia provider --- .../feast/expediagroup/provider/expedia.py | 24 ------------------- sdk/python/feast/repo_config.py | 2 ++ 2 files changed, 2 insertions(+), 24 deletions(-) diff --git a/sdk/python/feast/expediagroup/provider/expedia.py b/sdk/python/feast/expediagroup/provider/expedia.py index b242e00437..71cb0e6f2b 100644 --- a/sdk/python/feast/expediagroup/provider/expedia.py +++ b/sdk/python/feast/expediagroup/provider/expedia.py @@ -59,27 +59,3 @@ def ingest_df( ) super().ingest_df(feature_view, df.drop(drop_list, axis=1)) - - def update_infra( - self, - project: str, - tables_to_delete: Sequence[FeatureView], - tables_to_keep: Sequence[FeatureView], - entities_to_delete: Sequence[Entity], - entities_to_keep: Sequence[Entity], - partial: bool, - ): - if self.online_store: - if tables_to_delete: - logger.info( - f"Data associated to {[feature_view.name for feature_view in tables_to_delete]} feature views will be deleted from the online store based on ttl defined if the entities are not shared with other feature views" - ) - - if self.batch_engine: - self.batch_engine.update( - project, - tables_to_delete, - tables_to_keep, - entities_to_delete, - entities_to_keep, - ) diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index 92f47b130a..f3ad938faa 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -243,6 +243,8 @@ def __init__(self, **data: Any): self._online_store = None if provider == "expedia": self.online_config = data.get("online_store", "redis") + if self.online_config["type"] == "redis": + self.online_config["full_scan_for_deletion"] = True else: self.online_config = data.get("online_store", "sqlite") From 2569f818e23de4547cf871982654f903e769495b Mon Sep 17 00:00:00 2001 From: Zach Barnett Date: Wed, 30 Oct 2024 17:22:54 -0500 Subject: [PATCH 02/12] fix: default full_scan_for_deletion to false --- sdk/python/feast/expediagroup/provider/expedia.py | 2 +- sdk/python/feast/infra/online_stores/redis.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/python/feast/expediagroup/provider/expedia.py b/sdk/python/feast/expediagroup/provider/expedia.py index 71cb0e6f2b..9c10c4b7ad 100644 --- a/sdk/python/feast/expediagroup/provider/expedia.py +++ b/sdk/python/feast/expediagroup/provider/expedia.py @@ -1,5 +1,5 @@ import logging -from typing import List, Sequence, Set +from typing import List, Set import pandas as pd diff --git a/sdk/python/feast/infra/online_stores/redis.py b/sdk/python/feast/infra/online_stores/redis.py index 47c4ab49d8..9bde401783 100644 --- a/sdk/python/feast/infra/online_stores/redis.py +++ b/sdk/python/feast/infra/online_stores/redis.py @@ -76,7 +76,7 @@ class RedisOnlineStoreConfig(FeastConfigBaseModel): key_ttl_seconds: Optional[int] = None """(Optional) redis key bin ttl (in seconds) for expiring entities""" - full_scan_for_deletion: Optional[bool] = True + full_scan_for_deletion: Optional[bool] = False """(Optional) whether to scan for deletion of features""" From f6b1f9440459c8949ae0d125f4e81b3d0bfda6aa Mon Sep 17 00:00:00 2001 From: Zach Barnett Date: Wed, 30 Oct 2024 17:28:22 -0500 Subject: [PATCH 03/12] fix: lint error --- sdk/python/feast/expediagroup/provider/expedia.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sdk/python/feast/expediagroup/provider/expedia.py b/sdk/python/feast/expediagroup/provider/expedia.py index 9c10c4b7ad..dc9fed9d49 100644 --- a/sdk/python/feast/expediagroup/provider/expedia.py +++ b/sdk/python/feast/expediagroup/provider/expedia.py @@ -3,7 +3,6 @@ import pandas as pd -from feast.entity import Entity from feast.feature_view import FeatureView from feast.infra.passthrough_provider import PassthroughProvider from feast.repo_config import RepoConfig From 38a37745f18367c750eb5d0e188c88bea110313d Mon Sep 17 00:00:00 2001 From: Zach Barnett Date: Wed, 30 Oct 2024 18:53:53 -0500 Subject: [PATCH 04/12] fix: default online_config --- sdk/python/feast/repo_config.py | 7 ++++++- .../tests/unit/infra/scaffolding/test_repo_config.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index f3ad938faa..4153808cf9 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -242,7 +242,12 @@ def __init__(self, **data: Any): self._online_store = None if provider == "expedia": - self.online_config = data.get("online_store", "redis") + self.online_config = data.get("online_store", { + "type": "redis", + "connection_string": "${REDIS_CONNECTION_STRING}", + "redis_type": "redis_cluster", + "key_ttl_seconds": 604800 + }) if self.online_config["type"] == "redis": self.online_config["full_scan_for_deletion"] = True else: diff --git a/sdk/python/tests/unit/infra/scaffolding/test_repo_config.py b/sdk/python/tests/unit/infra/scaffolding/test_repo_config.py index 71f8602c78..decdf011e3 100644 --- a/sdk/python/tests/unit/infra/scaffolding/test_repo_config.py +++ b/sdk/python/tests/unit/infra/scaffolding/test_repo_config.py @@ -358,7 +358,7 @@ def test_repo_config_init_expedia_provider(): ) assert c.registry_config == "registry.db" assert c.offline_config["type"] == "spark" - assert c.online_config == "redis" + assert c.online_config["type"] == "redis" assert c.batch_engine_config == "spark.engine" assert isinstance(c.online_store, RedisOnlineStoreConfig) assert isinstance(c.batch_engine, SparkMaterializationEngineConfig) From e22c3276d16b98697649e97b7ade80fea49fd6b4 Mon Sep 17 00:00:00 2001 From: Zach Barnett Date: Wed, 30 Oct 2024 18:57:25 -0500 Subject: [PATCH 05/12] fix: lint formatting --- sdk/python/feast/repo_config.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index 4153808cf9..cf1e082167 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -242,12 +242,15 @@ def __init__(self, **data: Any): self._online_store = None if provider == "expedia": - self.online_config = data.get("online_store", { + self.online_config = data.get( + "online_store", + { "type": "redis", "connection_string": "${REDIS_CONNECTION_STRING}", "redis_type": "redis_cluster", - "key_ttl_seconds": 604800 - }) + "key_ttl_seconds": 604800, + }, + ) if self.online_config["type"] == "redis": self.online_config["full_scan_for_deletion"] = True else: From 1a182c2cbef118c9017e47130a5dc0490036c5eb Mon Sep 17 00:00:00 2001 From: Zach Barnett Date: Thu, 31 Oct 2024 16:52:22 -0500 Subject: [PATCH 06/12] fix: address comments --- sdk/python/feast/feature_store.py | 6 +----- sdk/python/feast/infra/online_stores/redis.py | 2 +- sdk/python/feast/repo_config.py | 17 ++++++----------- .../unit/infra/scaffolding/test_repo_config.py | 2 +- 4 files changed, 9 insertions(+), 18 deletions(-) diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 44d7242bd0..e1854a452e 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -1283,11 +1283,7 @@ def materialize_incremental( ) start_date = _utc_now() - timedelta(weeks=52) provider = self._get_provider() - print( - f"{Style.BRIGHT + Fore.GREEN}{feature_view.name}{Style.RESET_ALL}" - f" from {Style.BRIGHT + Fore.GREEN}{start_date.replace(microsecond=0).astimezone()}{Style.RESET_ALL}" - f" to {Style.BRIGHT + Fore.GREEN}{end_date.replace(microsecond=0).astimezone()}{Style.RESET_ALL}:" - ) + logger.info(f"{feature_view.name} from {start_date.replace(microsecond=0).astimezone()} to {end_date.replace(microsecond=0).astimezone()}:") def tqdm_builder(length): return tqdm(total=length, ncols=100) diff --git a/sdk/python/feast/infra/online_stores/redis.py b/sdk/python/feast/infra/online_stores/redis.py index 9bde401783..47c4ab49d8 100644 --- a/sdk/python/feast/infra/online_stores/redis.py +++ b/sdk/python/feast/infra/online_stores/redis.py @@ -76,7 +76,7 @@ class RedisOnlineStoreConfig(FeastConfigBaseModel): key_ttl_seconds: Optional[int] = None """(Optional) redis key bin ttl (in seconds) for expiring entities""" - full_scan_for_deletion: Optional[bool] = False + full_scan_for_deletion: Optional[bool] = True """(Optional) whether to scan for deletion of features""" diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index cf1e082167..b4b737688d 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -242,17 +242,12 @@ def __init__(self, **data: Any): self._online_store = None if provider == "expedia": - self.online_config = data.get( - "online_store", - { - "type": "redis", - "connection_string": "${REDIS_CONNECTION_STRING}", - "redis_type": "redis_cluster", - "key_ttl_seconds": 604800, - }, - ) - if self.online_config["type"] == "redis": - self.online_config["full_scan_for_deletion"] = True + self.online_config = data.get("online_store", "redis") + if ( + isinstance(self.online_config, Dict) + and self.online_config["type"] == "redis" + ): + self.online_config["full_scan_for_deletion"] = False else: self.online_config = data.get("online_store", "sqlite") diff --git a/sdk/python/tests/unit/infra/scaffolding/test_repo_config.py b/sdk/python/tests/unit/infra/scaffolding/test_repo_config.py index decdf011e3..71f8602c78 100644 --- a/sdk/python/tests/unit/infra/scaffolding/test_repo_config.py +++ b/sdk/python/tests/unit/infra/scaffolding/test_repo_config.py @@ -358,7 +358,7 @@ def test_repo_config_init_expedia_provider(): ) assert c.registry_config == "registry.db" assert c.offline_config["type"] == "spark" - assert c.online_config["type"] == "redis" + assert c.online_config == "redis" assert c.batch_engine_config == "spark.engine" assert isinstance(c.online_store, RedisOnlineStoreConfig) assert isinstance(c.batch_engine, SparkMaterializationEngineConfig) From 422fcda636fab7baa1ab360a2b152359face1788 Mon Sep 17 00:00:00 2001 From: Zach Barnett Date: Thu, 31 Oct 2024 16:55:28 -0500 Subject: [PATCH 07/12] fix: lint formatting --- sdk/python/feast/feature_store.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index e1854a452e..2d5eb13bb9 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -1283,7 +1283,9 @@ def materialize_incremental( ) start_date = _utc_now() - timedelta(weeks=52) provider = self._get_provider() - logger.info(f"{feature_view.name} from {start_date.replace(microsecond=0).astimezone()} to {end_date.replace(microsecond=0).astimezone()}:") + logger.info( + f"{feature_view.name} from {start_date.replace(microsecond=0).astimezone()} to {end_date.replace(microsecond=0).astimezone()}:" + ) def tqdm_builder(length): return tqdm(total=length, ncols=100) From d805f9c130cc4b4d0f605bcb3c35ecc72d9c6bf5 Mon Sep 17 00:00:00 2001 From: Zach Barnett Date: Thu, 31 Oct 2024 17:24:00 -0500 Subject: [PATCH 08/12] fix: docstring test --- sdk/python/feast/feature_store.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 2d5eb13bb9..5fe3ed24ac 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -1251,7 +1251,6 @@ def materialize_incremental( >>> fs.materialize_incremental(end_date=_utc_now() - timedelta(minutes=5)) Materializing... - ... """ feature_views_to_materialize = self._get_feature_views_to_materialize( feature_views From 001667dd83692cc36d7a3e80d169eed63fee36c2 Mon Sep 17 00:00:00 2001 From: Zach Barnett Date: Fri, 1 Nov 2024 10:38:12 -0500 Subject: [PATCH 09/12] fix: remove broken coloring from logs --- sdk/python/feast/feature_store.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 5fe3ed24ac..86284107bc 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -1339,7 +1339,6 @@ def materialize( ... ) Materializing... - ... """ if utils.make_tzaware(start_date) > utils.make_tzaware(end_date): raise ValueError( @@ -1359,7 +1358,9 @@ def materialize( # TODO paging large loads for feature_view in feature_views_to_materialize: provider = self._get_provider() - print(f"{Style.BRIGHT + Fore.GREEN}{feature_view.name}{Style.RESET_ALL}:") + logger.info( + f"{feature_view.name} from {start_date.replace(microsecond=0).astimezone()} to {end_date.replace(microsecond=0).astimezone()}:" + ) def tqdm_builder(length): return tqdm(total=length, ncols=100) From c3507e3e631d0b6982b18b3dadda19a56b01caad Mon Sep 17 00:00:00 2001 From: Zach Barnett Date: Fri, 1 Nov 2024 14:49:41 -0500 Subject: [PATCH 10/12] fix: add print statements back --- sdk/python/feast/feature_store.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 86284107bc..ef6acf0066 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -1251,6 +1251,7 @@ def materialize_incremental( >>> fs.materialize_incremental(end_date=_utc_now() - timedelta(minutes=5)) Materializing... + ... """ feature_views_to_materialize = self._get_feature_views_to_materialize( feature_views @@ -1282,6 +1283,11 @@ def materialize_incremental( ) start_date = _utc_now() - timedelta(weeks=52) provider = self._get_provider() + print( + f"{Style.BRIGHT + Fore.GREEN}{feature_view.name}{Style.RESET_ALL}" + f" from {Style.BRIGHT + Fore.GREEN}{start_date.replace(microsecond=0).astimezone()}{Style.RESET_ALL}" + f" to {Style.BRIGHT + Fore.GREEN}{end_date.replace(microsecond=0).astimezone()}{Style.RESET_ALL}:" + ) logger.info( f"{feature_view.name} from {start_date.replace(microsecond=0).astimezone()} to {end_date.replace(microsecond=0).astimezone()}:" ) @@ -1339,6 +1345,7 @@ def materialize( ... ) Materializing... + ... """ if utils.make_tzaware(start_date) > utils.make_tzaware(end_date): raise ValueError( @@ -1358,6 +1365,7 @@ def materialize( # TODO paging large loads for feature_view in feature_views_to_materialize: provider = self._get_provider() + print(f"{Style.BRIGHT + Fore.GREEN}{feature_view.name}{Style.RESET_ALL}:") logger.info( f"{feature_view.name} from {start_date.replace(microsecond=0).astimezone()} to {end_date.replace(microsecond=0).astimezone()}:" ) From 15bfc6cbe471bb6abe34cbcc48b54f1eca9ae318 Mon Sep 17 00:00:00 2001 From: Zach Barnett Date: Fri, 1 Nov 2024 15:24:21 -0500 Subject: [PATCH 11/12] fix: log microseconds during materialization --- sdk/python/feast/feature_store.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index ef6acf0066..26f01e60d0 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -1289,7 +1289,7 @@ def materialize_incremental( f" to {Style.BRIGHT + Fore.GREEN}{end_date.replace(microsecond=0).astimezone()}{Style.RESET_ALL}:" ) logger.info( - f"{feature_view.name} from {start_date.replace(microsecond=0).astimezone()} to {end_date.replace(microsecond=0).astimezone()}:" + f"{feature_view.name} from {start_date.astimezone()} to {end_date.astimezone()}:" ) def tqdm_builder(length): @@ -1367,7 +1367,7 @@ def materialize( provider = self._get_provider() print(f"{Style.BRIGHT + Fore.GREEN}{feature_view.name}{Style.RESET_ALL}:") logger.info( - f"{feature_view.name} from {start_date.replace(microsecond=0).astimezone()} to {end_date.replace(microsecond=0).astimezone()}:" + f"{feature_view.name} from {start_date.astimezone()} to {end_date.astimezone()}:" ) def tqdm_builder(length): From 86da8f0195abb211565abad7a191cce2ed70d400 Mon Sep 17 00:00:00 2001 From: Zach Barnett Date: Fri, 1 Nov 2024 16:57:23 -0500 Subject: [PATCH 12/12] fix: update logger message --- sdk/python/feast/feature_store.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 26f01e60d0..225c9f5dea 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -1289,7 +1289,7 @@ def materialize_incremental( f" to {Style.BRIGHT + Fore.GREEN}{end_date.replace(microsecond=0).astimezone()}{Style.RESET_ALL}:" ) logger.info( - f"{feature_view.name} from {start_date.astimezone()} to {end_date.astimezone()}:" + f"Materializing {feature_view.name} from {start_date.astimezone()} to {end_date.astimezone()}" ) def tqdm_builder(length): @@ -1367,7 +1367,7 @@ def materialize( provider = self._get_provider() print(f"{Style.BRIGHT + Fore.GREEN}{feature_view.name}{Style.RESET_ALL}:") logger.info( - f"{feature_view.name} from {start_date.astimezone()} to {end_date.astimezone()}:" + f"Materializing {feature_view.name} from {start_date.astimezone()} to {end_date.astimezone()}" ) def tqdm_builder(length):