diff --git a/argilla-server/CHANGELOG.md b/argilla-server/CHANGELOG.md index 7d8daf02a9..e7b2afcbfd 100644 --- a/argilla-server/CHANGELOG.md +++ b/argilla-server/CHANGELOG.md @@ -16,6 +16,10 @@ These are the section headers that we use: ## [Unreleased]() +### Added + +- Added new `ARGILLA_DATABASE_SQLITE_TIMEOUT` environment variable allowing to set transactions timeout for SQLite. ([#5213](https://github.com/argilla-io/argilla/pull/5213)) + ### Fixed - Fixed SQLite connection settings not working correctly due to a outdated conditional. ([#5149](https://github.com/argilla-io/argilla/pull/5149)) diff --git a/argilla-server/src/argilla_server/constants.py b/argilla-server/src/argilla_server/constants.py index 6bda197c27..96d8b20de8 100644 --- a/argilla-server/src/argilla_server/constants.py +++ b/argilla-server/src/argilla_server/constants.py @@ -15,6 +15,8 @@ API_KEY_HEADER_NAME = "X-Argilla-Api-Key" WORKSPACE_HEADER_NAME = "X-Argilla-Workspace" +DATABASE_SQLITE = "sqlite" + SEARCH_ENGINE_ELASTICSEARCH = "elasticsearch" SEARCH_ENGINE_OPENSEARCH = "opensearch" @@ -22,6 +24,8 @@ DEFAULT_PASSWORD = "1234" DEFAULT_API_KEY = "argilla.apikey" +DEFAULT_DATABASE_SQLITE_TIMEOUT = 15 + DEFAULT_MAX_KEYWORD_LENGTH = 128 DEFAULT_TELEMETRY_KEY = "C6FkcaoCbt78rACAgvyBxGBcMB3dM3nn" diff --git a/argilla-server/src/argilla_server/database.py b/argilla-server/src/argilla_server/database.py index e0bc4c4c95..1d52168c4c 100644 --- a/argilla-server/src/argilla_server/database.py +++ b/argilla-server/src/argilla_server/database.py @@ -51,7 +51,7 @@ def set_sqlite_pragma(dbapi_connection, connection_record): cursor.close() -async_engine = create_async_engine(settings.database_url) +async_engine = create_async_engine(settings.database_url, connect_args=settings.database_connect_args) AsyncSessionLocal = async_sessionmaker(autocommit=False, expire_on_commit=False, bind=async_engine) diff --git a/argilla-server/src/argilla_server/settings.py b/argilla-server/src/argilla_server/settings.py index d0900896fa..fc78a2cbc0 100644 --- a/argilla-server/src/argilla_server/settings.py +++ b/argilla-server/src/argilla_server/settings.py @@ -22,14 +22,16 @@ import re import warnings from pathlib import Path -from typing import List, Optional +from typing import Dict, List, Optional from urllib.parse import urlparse from argilla_server.constants import ( + DATABASE_SQLITE, DEFAULT_LABEL_SELECTION_OPTIONS_MAX_ITEMS, DEFAULT_MAX_KEYWORD_LENGTH, DEFAULT_SPAN_OPTIONS_MAX_ITEMS, DEFAULT_TELEMETRY_KEY, + DEFAULT_DATABASE_SQLITE_TIMEOUT, SEARCH_ENGINE_ELASTICSEARCH, SEARCH_ENGINE_OPENSEARCH, ) @@ -74,6 +76,10 @@ class Settings(BaseSettings): home_path: Optional[str] = Field(description="The home path where argilla related files will be stored") base_url: Optional[str] = Field(description="The default base url where server will be deployed") database_url: Optional[str] = Field(description="The database url that argilla will use as data store") + database_sqlite_timeout: Optional[int] = Field( + default=DEFAULT_DATABASE_SQLITE_TIMEOUT, + description="SQLite database connection timeout in seconds", + ) elasticsearch: str = "http://localhost:9200" elasticsearch_ssl_verify: bool = True @@ -220,6 +226,20 @@ def old_dataset_records_index_name(self) -> str: return index_name.replace("", "") return index_name.replace("", f".{ns}") + @property + def database_connect_args(self) -> Dict: + if self.database_is_sqlite: + return {"timeout": self.database_sqlite_timeout} + + return {} + + @property + def database_is_sqlite(self) -> bool: + if self.database_url is None: + return False + + return self.database_url.lower().startswith(DATABASE_SQLITE) + @property def search_engine_is_elasticsearch(self) -> bool: return self.search_engine == SEARCH_ENGINE_ELASTICSEARCH diff --git a/argilla-server/tests/unit/commons/test_settings.py b/argilla-server/tests/unit/commons/test_settings.py index f6aea4c669..3063f4c27e 100644 --- a/argilla-server/tests/unit/commons/test_settings.py +++ b/argilla-server/tests/unit/commons/test_settings.py @@ -27,6 +27,7 @@ def test_wrong_settings_namespace(monkeypatch, bad_namespace): def test_settings_namespace(monkeypatch): monkeypatch.setenv("ARGILLA_NAMESPACE", "namespace") + settings = Settings() assert settings.namespace == "namespace" @@ -38,12 +39,12 @@ def test_settings_index_replicas_with_shards_defined(monkeypatch): monkeypatch.setenv("ARGILLA_ES_RECORDS_INDEX_SHARDS", "100") monkeypatch.setenv("ARGILLA_ES_RECORDS_INDEX_REPLICAS", "2") - settings = Settings() - assert settings.es_records_index_replicas == 2 + assert Settings().es_records_index_replicas == 2 def test_settings_default_index_replicas_with_shards_defined(monkeypatch): monkeypatch.setenv("ARGILLA_ES_RECORDS_INDEX_SHARDS", "100") + settings = Settings() assert settings.es_records_index_shards == 100 @@ -54,9 +55,20 @@ def test_settings_default_database_url(monkeypatch): monkeypatch.setenv("ARGILLA_DATABASE_URL", "") settings = Settings() + assert settings.database_url == f"sqlite+aiosqlite:///{settings.home_path}/argilla.db?check_same_thread=False" +def test_settings_database_sqlite_timeout(monkeypatch): + monkeypatch.setenv("ARGILLA_DATABASE_SQLITE_TIMEOUT", "3") + + assert Settings().database_sqlite_timeout == 3 + + +def test_settings_default_database_sqlite_timeout(): + assert Settings().database_sqlite_timeout == 15 + + @pytest.mark.parametrize( "url, expected_url", [ @@ -68,5 +80,5 @@ def test_settings_default_database_url(monkeypatch): ) def test_settings_database_url(url: str, expected_url: str, monkeypatch): monkeypatch.setenv("ARGILLA_DATABASE_URL", url) - settings = Settings() - assert settings.database_url == expected_url + + assert Settings().database_url == expected_url diff --git a/docs/_source/getting_started/installation/configurations/server_configuration.md b/docs/_source/getting_started/installation/configurations/server_configuration.md index 96378dd7d0..6889aeba63 100644 --- a/docs/_source/getting_started/installation/configurations/server_configuration.md +++ b/docs/_source/getting_started/installation/configurations/server_configuration.md @@ -67,6 +67,12 @@ You can set the following environment variables to further configure your server - `ARGILLA_DATABASE_URL`: A URL string that contains the necessary information to connect to a database. Argilla uses SQLite by default, PostgreSQL is also officially supported (Default: `sqlite:///$ARGILLA_HOME_PATH/argilla.db?check_same_thread=False`). +#### SQLite + +The following environment variables are useful only when SQLite is used: + +- `ARGILLA_DATABASE_SQLITE_TIMEOUT`: How many seconds the connection should wait before raising an `OperationalError` when a table is locked. If another connection opens a transaction to modify a table, that table will be locked until the transaction is committed. (Defaut: `15` seconds). + #### Elasticsearch and Opensearch - `ARGILLA_ELASTICSEARCH`: URL of the connection endpoint of the Elasticsearch instance (Default: `http://localhost:9200`).