From af3907644a2a19b4b5a8997fafd172dffed08be2 Mon Sep 17 00:00:00 2001 From: TheDude Date: Thu, 26 Dec 2024 14:28:22 +0530 Subject: [PATCH 1/3] Add secrets reload withs safe retry --- plugins/openai/superduper_openai/model.py | 11 +++++-- superduper/base/config.py | 1 + superduper/base/exceptions.py | 8 +++++ superduper/misc/files.py | 19 ++++++++++++ superduper/misc/retry.py | 38 +++++++++++++++++++++++ 5 files changed, 74 insertions(+), 3 deletions(-) diff --git a/plugins/openai/superduper_openai/model.py b/plugins/openai/superduper_openai/model.py index 73565e3a0..165f1ab1b 100644 --- a/plugins/openai/superduper_openai/model.py +++ b/plugins/openai/superduper_openai/model.py @@ -19,7 +19,8 @@ from superduper.base.datalayer import Datalayer from superduper.components.model import APIBaseModel, Inputs from superduper.misc.compat import cache -from superduper.misc.retry import Retry +from superduper.misc.retry import Retry, safe_retry +from superduper.base import exceptions retry = Retry( exception_types=( @@ -61,19 +62,23 @@ def __post_init__(self, db, example): self.client_kwargs['base_url'] = self.openai_api_base self.client_kwargs['default_headers'] = self.openai_api_base + @safe_retry(exceptions.MissingSecretsException) + def init(self, db=None): + """Initialize the model.""" + super().init() + # dall-e is not currently included in list returned by OpenAI model endpoint if self.model not in ( mo := _available_models(json.dumps(self.client_kwargs)) ) and self.model not in ('dall-e'): msg = f'model {self.model} not in OpenAI available models, {mo}' raise ValueError(msg) - self.syncClient = SyncOpenAI(**self.client_kwargs) if 'OPENAI_API_KEY' not in os.environ and ( 'api_key' not in self.client_kwargs.keys() and self.client_kwargs ): - raise ValueError( + raise exceptions.MissingSecretsException( 'OPENAI_API_KEY not available neither in environment vars ' 'nor in `client_kwargs`' ) diff --git a/superduper/base/config.py b/superduper/base/config.py index 7d39b748e..35088d282 100644 --- a/superduper/base/config.py +++ b/superduper/base/config.py @@ -174,6 +174,7 @@ class Config(BaseConfig): envs: dc.InitVar[t.Optional[t.Dict[str, str]]] = None data_backend: str = "mongodb://localhost:27017/test_db" + secrets_volume: str = os.path.join(".superduper", "/session/secrets") lance_home: str = os.path.join(".superduper", "vector_indices") diff --git a/superduper/base/exceptions.py b/superduper/base/exceptions.py index eeb0225ca..67801f131 100644 --- a/superduper/base/exceptions.py +++ b/superduper/base/exceptions.py @@ -89,3 +89,11 @@ class UnsupportedDatatype(BaseException): :param msg: msg for BaseException """ + + +class MissingSecretsException(BaseException): + """ + Missing secrets. + + :param msg: msg for BaseException + """ diff --git a/superduper/misc/files.py b/superduper/misc/files.py index 4ab053c0a..d7cdcfa85 100644 --- a/superduper/misc/files.py +++ b/superduper/misc/files.py @@ -1,8 +1,27 @@ import hashlib +import os from superduper import CFG +def load_secrets(): + secrets_dir = CFG.secrets_volume + if not os.path.isdir(secrets_dir): + raise ValueError(f"The path '{secrets_dir}' is not a valid directory.") + + for root, _, files in os.walk(secrets_dir): + for file_name in files: + file_path = os.path.join(root, file_name) + try: + with open(file_path, 'r') as file: + content = file.read().strip() + + key = file_name + os.environ[key] = content + except Exception as e: + print(f"Error reading file {file_path}: {e}") + + def get_file_from_uri(uri): """ Get file name from uri. diff --git a/superduper/misc/retry.py b/superduper/misc/retry.py index a8762427c..f087a07e7 100644 --- a/superduper/misc/retry.py +++ b/superduper/misc/retry.py @@ -1,3 +1,4 @@ +import time import dataclasses as dc import functools import typing as t @@ -5,6 +6,8 @@ import tenacity import superduper as s +from superduper import logging +from superduper.misc.files import load_secrets ExceptionTypes = t.Union[t.Type[BaseException], t.Tuple[t.Type[BaseException], ...]] @@ -40,6 +43,41 @@ def __call__(self, f: t.Callable) -> t.Any: return retrier(f) +def safe_retry(exception_to_check, retries=1, delay=1): + """ + A decorator that retries a function if a specified exception is raised. + + :param exception_to_check: The exception or tuple of exceptions to check. + :param retries: The maximum number of retries. + :param delay: Delay between retries in seconds. + :return: The result of the decorated function. + """ + + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + attempt = 0 + while attempt < retries: + try: + load_secrets() + return func(*args, **kwargs) + except exception_to_check as e: + attempt += 1 + if attempt >= retries: + logging.error( + f"Function {func.__name__} failed after {retries} retries." + ) + raise + logging.warn( + f"Retrying {func.__name__} due to {e}, attempt {attempt} of {retries}..." + ) + time.sleep(delay) + + return wrapper + + return decorator + + def db_retry(connector='databackend'): """Helper method to retry methods with database calls. From 96c16c1f8c914be4a78e91a7e1dea2a7f390b225 Mon Sep 17 00:00:00 2001 From: TheDude Date: Fri, 27 Dec 2024 14:09:20 +0530 Subject: [PATCH 2/3] Update changelog --- CHANGELOG.md | 1 + superduper/misc/files.py | 1 + superduper/misc/retry.py | 5 +++-- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 002817474..dbe6d294a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Support `` in Template to enable apps to use files/folders included in the template - Add Data Component for storing data directly in the template - Add a standalone flag in Streamlit to mark the page as independent. +- Add secrets directory mount for loading secret env vars. #### Bug Fixes diff --git a/superduper/misc/files.py b/superduper/misc/files.py index d7cdcfa85..f970699f9 100644 --- a/superduper/misc/files.py +++ b/superduper/misc/files.py @@ -5,6 +5,7 @@ def load_secrets(): + """Help method to load secrets from directory.""" secrets_dir = CFG.secrets_volume if not os.path.isdir(secrets_dir): raise ValueError(f"The path '{secrets_dir}' is not a valid directory.") diff --git a/superduper/misc/retry.py b/superduper/misc/retry.py index f087a07e7..8e2968ab3 100644 --- a/superduper/misc/retry.py +++ b/superduper/misc/retry.py @@ -1,6 +1,6 @@ -import time import dataclasses as dc import functools +import time import typing as t import tenacity @@ -69,7 +69,8 @@ def wrapper(*args, **kwargs): ) raise logging.warn( - f"Retrying {func.__name__} due to {e}, attempt {attempt} of {retries}..." + f"Retrying {func.__name__} due to {e}" + ", attempt {attempt} of {retries}..." ) time.sleep(delay) From 2be19ea2605e69af9c877d5f064f1c0cf27ee9ea Mon Sep 17 00:00:00 2001 From: TheDude Date: Fri, 27 Dec 2024 14:33:17 +0530 Subject: [PATCH 3/3] [PLUGINS] Bump Version [openai] --- plugins/openai/pyproject.toml | 3 ++- plugins/openai/superduper_openai/__init__.py | 2 +- plugins/openai/superduper_openai/model.py | 23 ++++++++--------- superduper/base/config.py | 3 ++- superduper/misc/retry.py | 27 ++++++++++++-------- 5 files changed, 32 insertions(+), 26 deletions(-) diff --git a/plugins/openai/pyproject.toml b/plugins/openai/pyproject.toml index 1cc7508c5..ed22a766e 100644 --- a/plugins/openai/pyproject.toml +++ b/plugins/openai/pyproject.toml @@ -26,7 +26,8 @@ dependencies = [ [project.optional-dependencies] test = [ - "vcrpy>=5.1.0", + "vcrpy==5.1.0", + "urllib3==2.2.3", ] [project.urls] diff --git a/plugins/openai/superduper_openai/__init__.py b/plugins/openai/superduper_openai/__init__.py index 891e18817..8aa4cf285 100644 --- a/plugins/openai/superduper_openai/__init__.py +++ b/plugins/openai/superduper_openai/__init__.py @@ -1,5 +1,5 @@ from .model import OpenAIChatCompletion, OpenAIEmbedding -__version__ = "0.4.2" +__version__ = "0.4.3" __all__ = 'OpenAIChatCompletion', 'OpenAIEmbedding' diff --git a/plugins/openai/superduper_openai/model.py b/plugins/openai/superduper_openai/model.py index 165f1ab1b..0bf36dab0 100644 --- a/plugins/openai/superduper_openai/model.py +++ b/plugins/openai/superduper_openai/model.py @@ -16,11 +16,11 @@ ) from openai._types import NOT_GIVEN from superduper.backends.query_dataset import QueryDataset +from superduper.base import exceptions from superduper.base.datalayer import Datalayer from superduper.components.model import APIBaseModel, Inputs from superduper.misc.compat import cache from superduper.misc.retry import Retry, safe_retry -from superduper.base import exceptions retry = Retry( exception_types=( @@ -55,27 +55,19 @@ def __post_init__(self, db, example): super().__post_init__(db, example) assert isinstance(self.client_kwargs, dict) - if self.openai_api_key is not None: self.client_kwargs['api_key'] = self.openai_api_key if self.openai_api_base is not None: self.client_kwargs['base_url'] = self.openai_api_base self.client_kwargs['default_headers'] = self.openai_api_base - @safe_retry(exceptions.MissingSecretsException) - def init(self, db=None): + @safe_retry(exceptions.MissingSecretsException, verbose=0) + def init(self): """Initialize the model.""" super().init() # dall-e is not currently included in list returned by OpenAI model endpoint - if self.model not in ( - mo := _available_models(json.dumps(self.client_kwargs)) - ) and self.model not in ('dall-e'): - msg = f'model {self.model} not in OpenAI available models, {mo}' - raise ValueError(msg) - self.syncClient = SyncOpenAI(**self.client_kwargs) - - if 'OPENAI_API_KEY' not in os.environ and ( + if 'OPENAI_API_KEY' not in os.environ or ( 'api_key' not in self.client_kwargs.keys() and self.client_kwargs ): raise exceptions.MissingSecretsException( @@ -83,6 +75,13 @@ def init(self, db=None): 'nor in `client_kwargs`' ) + if self.model not in ( + mo := _available_models(json.dumps(self.client_kwargs)) + ) and self.model not in ('dall-e'): + msg = f'model {self.model} not in OpenAI available models, {mo}' + raise ValueError(msg) + self.syncClient = SyncOpenAI(**self.client_kwargs) + def predict_batches(self, dataset: t.Union[t.List, QueryDataset]) -> t.List: """Predict on a dataset. diff --git a/superduper/base/config.py b/superduper/base/config.py index 35088d282..55e72e19b 100644 --- a/superduper/base/config.py +++ b/superduper/base/config.py @@ -1,7 +1,7 @@ """Configuration variables for superduper.io. The classes in this file define the configuration variables for superduper.io, -which means that this file gets imported before alost anything else, and +hich means that this file gets imported before alost anything else, and canot contain any other imports from this project. """ @@ -148,6 +148,7 @@ class Config(BaseConfig): :param envs: The envs datas :param data_backend: The URI for the data backend + :param secrets_volume: The secrets volume mount for secrets env vars. :param lance_home: The home directory for the Lance vector indices, Default: .superduper/vector_indices :param artifact_store: The URI for the artifact store diff --git a/superduper/misc/retry.py b/superduper/misc/retry.py index 8e2968ab3..f294242df 100644 --- a/superduper/misc/retry.py +++ b/superduper/misc/retry.py @@ -43,13 +43,14 @@ def __call__(self, f: t.Callable) -> t.Any: return retrier(f) -def safe_retry(exception_to_check, retries=1, delay=1): +def safe_retry(exception_to_check, retries=1, delay=0.3, verbose=1): """ A decorator that retries a function if a specified exception is raised. :param exception_to_check: The exception or tuple of exceptions to check. :param retries: The maximum number of retries. :param delay: Delay between retries in seconds. + :param verbose: Verbose for logs. :return: The result of the decorated function. """ @@ -57,21 +58,25 @@ def decorator(func): @functools.wraps(func) def wrapper(*args, **kwargs): attempt = 0 - while attempt < retries: + while attempt <= retries: try: - load_secrets() + if attempt >= 1: + load_secrets() return func(*args, **kwargs) except exception_to_check as e: attempt += 1 - if attempt >= retries: - logging.error( - f"Function {func.__name__} failed after {retries} retries." - ) + if attempt > retries: + if verbose: + logging.error( + f"Function {func.__name__} failed ", + "after {retries} retries.", + ) raise - logging.warn( - f"Retrying {func.__name__} due to {e}" - ", attempt {attempt} of {retries}..." - ) + if verbose: + logging.warn( + f"Retrying {func.__name__} due to {e}" + ", attempt {attempt} of {retries}..." + ) time.sleep(delay) return wrapper