From 83996fe0db274b2dc516a163b0e17c30ee1ef6e9 Mon Sep 17 00:00:00 2001 From: David Berenstein Date: Mon, 7 Oct 2024 12:33:06 +0200 Subject: [PATCH 1/2] =?UTF-8?q?feat:=20set=20CREATOR=5FUSER=5FID=20to=20av?= =?UTF-8?q?oid=20difficulties=20with=20creation=20in=20orga=E2=80=A6=20(#5?= =?UTF-8?q?556)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …nisation # Description User `CREATOR_USER_ID` with this API https://huggingface.co/api/users/davidberenstein1957/overview **Type of change** - Improvement (change adding some improvement to an existing functionality) **How Has This Been Tested** **Checklist** - I added relevant documentation - I followed the style guidelines of this project - I did a self-review of my code - I made corresponding changes to the documentation - I confirm My changes generate no new warnings - I have added tests that prove my fix is effective or that my feature works - I have added relevant notes to the CHANGELOG.md file (See https://keepachangelog.com/) --------- Co-authored-by: Paco Aranda --- .../docker/argilla-hf-spaces/Dockerfile | 4 +++- .../docker/argilla-hf-spaces/scripts/start.sh | 7 +++++-- .../how-to-configure-argilla-on-huggingface.md | 17 ++--------------- 3 files changed, 10 insertions(+), 18 deletions(-) diff --git a/argilla-server/docker/argilla-hf-spaces/Dockerfile b/argilla-server/docker/argilla-hf-spaces/Dockerfile index 1fb2419e81..131b0a9e6b 100644 --- a/argilla-server/docker/argilla-hf-spaces/Dockerfile +++ b/argilla-server/docker/argilla-hf-spaces/Dockerfile @@ -44,7 +44,9 @@ RUN \ apt-get remove -y wget gnupg && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* && \ - rm -rf /packages + rm -rf /packages && \ + # Install pwgen curl and jq + apt-get update && apt-get install -y curl jq pwgen COPY config/elasticsearch.yml /etc/elasticsearch/elasticsearch.yml diff --git a/argilla-server/docker/argilla-hf-spaces/scripts/start.sh b/argilla-server/docker/argilla-hf-spaces/scripts/start.sh index 28b5e2a1e1..cc29f104d3 100644 --- a/argilla-server/docker/argilla-hf-spaces/scripts/start.sh +++ b/argilla-server/docker/argilla-hf-spaces/scripts/start.sh @@ -8,8 +8,11 @@ export OAUTH2_HUGGINGFACE_CLIENT_ID=$OAUTH_CLIENT_ID export OAUTH2_HUGGINGFACE_CLIENT_SECRET=$OAUTH_CLIENT_SECRET export OAUTH2_HUGGINGFACE_SCOPE=$OAUTH_SCOPES -# Set the space author name as username if no provided. +# Set the space creator name as username if no name is provided, if the user is not found, use the provided space author name # See https://huggingface.co/docs/hub/en/spaces-overview#helper-environment-variables for more details -export USERNAME="${USERNAME:-$SPACE_AUTHOR_NAME}" +DEFAULT_USERNAME=$(curl -L -s https://huggingface.co/api/users/${SPACES_CREATOR_USER_ID}/overview | jq -r '.user' || echo "${SPACE_AUTHOR_NAME}") +export USERNAME="${USERNAME:-$DEFAULT_USERNAME}" +DEFAULT_PASSWORD=$(pwgen -s 16 1) +export PASSWORD="${PASSWORD:-$DEFAULT_PASSWORD}" honcho start diff --git a/argilla/docs/getting_started/how-to-configure-argilla-on-huggingface.md b/argilla/docs/getting_started/how-to-configure-argilla-on-huggingface.md index a256f5d15c..7a0c46516a 100644 --- a/argilla/docs/getting_started/how-to-configure-argilla-on-huggingface.md +++ b/argilla/docs/getting_started/how-to-configure-argilla-on-huggingface.md @@ -82,10 +82,7 @@ Creating an Argilla Space within an organization is useful for several scenarios - **You want manage the Space together with other users** (e.g., Space settings, etc.). Note that if you just want to manage your Argilla datasets, workspaces, you can achieve this by adding other Argilla `owner` roles to your Argilla Server. - **More generally, you want to make available your space under an organization/community umbrella**. -The steps are very similar the [Quickstart guide](quickstart.md) with two important differences: - -!!! tip "Setup USERNAME" - You need to **set up the `USERNAME` Space Secret with your Hugging Face username**. This way, the first time you enter with the `Hugging Face Sign in` button, you'll be granted the `owner` role. +The steps are very similar the [Quickstart guide](quickstart.md) with one important difference: !!! tip "Enable Persistent Storage `SMALL`" Not setting persistent storage to `Small` means that **you will loose your data when the Space restarts**. @@ -118,16 +115,6 @@ client = rg.Argilla( ## Space Secrets overview -There's two optional secrets to set up the `USERNAME` and `PASSWORD` of the `owner` of the Argilla Space. Remember that, by default Argilla Spaces are configured with a *Sign in with Hugging Face* button, which is also used to grant an `owner` to the creator of the Space for personal spaces. - -The `USERNAME` and `PASSWORD` are only useful in a couple of scenarios: - -- You have [disabled Hugging Face OAuth](#how-to-configure-and-disable-oauth-access). -- You want to [set up Argilla under an organization](#how-to-deploy-argilla-under-a-hugging-face-organization) and want your Hugging Face username to be granted the `owner` role. +Remember that, by default, Argilla Spaces are configured with a *Sign in with Hugging Face* button, which is also used to grant an `owner` to the creator of the Space. There are two optional secrets to set up the `USERNAME` and `PASSWORD` of the `owner` of Argilla Space. Those are useful when you want to create a different owner user to login into Argilla. -In summary, when setting up a Space: -!!! info "Creating a Space under your personal account" - If you are creating the Space under your personal account, **don't insert any value for `USERNAME` and `PASSWORD`**. Once you launch the Space you will be able to Sign in with your Hugging Face username and the `owner` role. -!!! info "Creating a Space under an organization" - If you are creating the Space under an organization **make sure to insert your Hugging Face username in the secret `USERNAME`**. In this way, you'll be able to Sign in with your Hugging Face user. \ No newline at end of file From 485f3ff4b5fcb3b654e00c11e4cd6cd03666f556 Mon Sep 17 00:00:00 2001 From: Paco Aranda Date: Mon, 7 Oct 2024 13:22:25 +0200 Subject: [PATCH 2/2] [Refactor] remove name validations for dataset workspaces and usernames (#5575) # Description This PR removes name pattern validations for datasets, workspaces, and users. **Type of change** - Refactor (change restructuring the codebase without changing functionality) - Improvement (change adding some improvement to an existing functionality) **How Has This Been Tested** **Checklist** - I added relevant documentation - I followed the style guidelines of this project - I did a self-review of my code - I made corresponding changes to the documentation - I confirm My changes generate no new warnings - I have added tests that prove my fix is effective or that my feature works - I have added relevant notes to the CHANGELOG.md file (See https://keepachangelog.com/) --- argilla-server/CHANGELOG.md | 4 +++ .../argilla_server/api/schemas/v1/datasets.py | 6 +++-- .../argilla_server/api/schemas/v1/users.py | 3 +-- .../api/schemas/v1/workspaces.py | 5 +--- .../cli/database/users/migrate.py | 6 ++--- .../src/argilla_server/constants.py | 2 -- .../unit/api/handlers/v1/test_datasets.py | 8 ------ .../api/handlers/v1/users/test_create_user.py | 17 ------------ .../v1/workspaces/test_create_workspace.py | 12 --------- .../unit/cli/database/users/test_create.py | 27 ++----------------- .../unit/cli/database/users/test_migrate.py | 24 ----------------- .../tests/unit/security/test_model.py | 27 +------------------ 12 files changed, 15 insertions(+), 126 deletions(-) diff --git a/argilla-server/CHANGELOG.md b/argilla-server/CHANGELOG.md index 4567fac5ab..0289a4b3fa 100644 --- a/argilla-server/CHANGELOG.md +++ b/argilla-server/CHANGELOG.md @@ -16,6 +16,10 @@ These are the section headers that we use: ## [Unreleased]() +### Removed + +- Removed name pattern validation for Workspaces, Datasets, and Users. ([#5575](https://github.com/argilla-io/argilla/pull/5575)) + ## [2.3.0](https://github.com/argilla-io/argilla/compare/v2.2.0...v2.3.0) ### Added diff --git a/argilla-server/src/argilla_server/api/schemas/v1/datasets.py b/argilla-server/src/argilla_server/api/schemas/v1/datasets.py index 054f8a346b..2becb6c1f2 100644 --- a/argilla-server/src/argilla_server/api/schemas/v1/datasets.py +++ b/argilla-server/src/argilla_server/api/schemas/v1/datasets.py @@ -25,14 +25,16 @@ except ImportError: from typing_extensions import Annotated -DATASET_NAME_REGEX = r"^(?!-|_)[a-zA-Z0-9-_ ]+$" DATASET_NAME_MIN_LENGTH = 1 DATASET_NAME_MAX_LENGTH = 200 DATASET_GUIDELINES_MIN_LENGTH = 1 DATASET_GUIDELINES_MAX_LENGTH = 10000 DatasetName = Annotated[ - constr(regex=DATASET_NAME_REGEX, min_length=DATASET_NAME_MIN_LENGTH, max_length=DATASET_NAME_MAX_LENGTH), + constr( + min_length=DATASET_NAME_MIN_LENGTH, + max_length=DATASET_NAME_MAX_LENGTH, + ), Field(..., description="Dataset name"), ] diff --git a/argilla-server/src/argilla_server/api/schemas/v1/users.py b/argilla-server/src/argilla_server/api/schemas/v1/users.py index 1b93d7f6e2..1480001552 100644 --- a/argilla-server/src/argilla_server/api/schemas/v1/users.py +++ b/argilla-server/src/argilla_server/api/schemas/v1/users.py @@ -19,7 +19,6 @@ from argilla_server.enums import UserRole from argilla_server.pydantic_v1 import BaseModel, Field, constr -USER_USERNAME_REGEX = "^(?!-|_)[A-za-z0-9-_]+$" USER_PASSWORD_MIN_LENGTH = 8 USER_PASSWORD_MAX_LENGTH = 100 @@ -43,7 +42,7 @@ class Config: class UserCreate(BaseModel): first_name: constr(min_length=1, strip_whitespace=True) last_name: Optional[constr(min_length=1, strip_whitespace=True)] - username: str = Field(regex=USER_USERNAME_REGEX, min_length=1) + username: str = Field(..., min_length=1) role: Optional[UserRole] password: str = Field(min_length=USER_PASSWORD_MIN_LENGTH, max_length=USER_PASSWORD_MAX_LENGTH) diff --git a/argilla-server/src/argilla_server/api/schemas/v1/workspaces.py b/argilla-server/src/argilla_server/api/schemas/v1/workspaces.py index 071aeaf7b6..1b6215123c 100644 --- a/argilla-server/src/argilla_server/api/schemas/v1/workspaces.py +++ b/argilla-server/src/argilla_server/api/schemas/v1/workspaces.py @@ -16,11 +16,8 @@ from typing import List from uuid import UUID -from argilla_server.constants import ES_INDEX_REGEX_PATTERN from argilla_server.pydantic_v1 import BaseModel, Field -WORKSPACE_NAME_REGEX = ES_INDEX_REGEX_PATTERN - class Workspace(BaseModel): id: UUID @@ -33,7 +30,7 @@ class Config: class WorkspaceCreate(BaseModel): - name: str = Field(regex=WORKSPACE_NAME_REGEX, min_length=1) + name: str = Field(min_length=1) class Workspaces(BaseModel): diff --git a/argilla-server/src/argilla_server/cli/database/users/migrate.py b/argilla-server/src/argilla_server/cli/database/users/migrate.py index 1de6126ee0..beb6c32d84 100644 --- a/argilla-server/src/argilla_server/cli/database/users/migrate.py +++ b/argilla-server/src/argilla_server/cli/database/users/migrate.py @@ -18,8 +18,6 @@ import typer import yaml -from argilla_server.api.schemas.v1.users import USER_USERNAME_REGEX -from argilla_server.api.schemas.v1.workspaces import WORKSPACE_NAME_REGEX from argilla_server.database import AsyncSessionLocal from argilla_server.models import User, UserRole from argilla_server.pydantic_v1 import BaseModel, Field, constr @@ -31,12 +29,12 @@ class WorkspaceCreate(BaseModel): - name: str = Field(..., regex=WORKSPACE_NAME_REGEX, min_length=1) + name: str = Field(..., min_length=1) class UserCreate(BaseModel): first_name: constr(strip_whitespace=True) - username: str = Field(..., regex=USER_USERNAME_REGEX, min_length=1) + username: str = Field(..., min_length=1) role: UserRole api_key: constr(min_length=1) password_hash: constr(min_length=1) diff --git a/argilla-server/src/argilla_server/constants.py b/argilla-server/src/argilla_server/constants.py index 7814d86af7..768193095f 100644 --- a/argilla-server/src/argilla_server/constants.py +++ b/argilla-server/src/argilla_server/constants.py @@ -39,6 +39,4 @@ # The metadata field name prefix defined for protected (non-searchable) values PROTECTED_METADATA_FIELD_PREFIX = "_" -ES_INDEX_REGEX_PATTERN = r"^(?!-|_)[a-z0-9-_]+$" - JS_MAX_SAFE_INTEGER = 9007199254740991 diff --git a/argilla-server/tests/unit/api/handlers/v1/test_datasets.py b/argilla-server/tests/unit/api/handlers/v1/test_datasets.py index 95da034f73..e86ff36662 100644 --- a/argilla-server/tests/unit/api/handlers/v1/test_datasets.py +++ b/argilla-server/tests/unit/api/handlers/v1/test_datasets.py @@ -916,10 +916,6 @@ async def test_create_dataset_with_invalid_length_guidelines( "dataset_json", [ {"name": ""}, - {"name": "123$abc"}, - {"name": "unit@test"}, - {"name": "-test-dataset"}, - {"name": "_test-dataset"}, {"name": "a" * (DATASET_NAME_MAX_LENGTH + 1)}, {"name": "test-dataset", "guidelines": ""}, {"name": "test-dataset", "guidelines": "a" * (DATASET_GUIDELINES_MAX_LENGTH + 1)}, @@ -4777,10 +4773,6 @@ async def test_update_dataset(self, async_client: "AsyncClient", db: "AsyncSessi [ {"name": None}, {"name": ""}, - {"name": "123$abc"}, - {"name": "unit@test"}, - {"name": "-test-dataset"}, - {"name": "_test-dataset"}, {"name": "a" * (DATASET_NAME_MAX_LENGTH + 1)}, {"name": "test-dataset", "guidelines": ""}, {"name": "test-dataset", "guidelines": "a" * (DATASET_GUIDELINES_MAX_LENGTH + 1)}, diff --git a/argilla-server/tests/unit/api/handlers/v1/users/test_create_user.py b/argilla-server/tests/unit/api/handlers/v1/users/test_create_user.py index 059f863830..ebe95aa6b7 100644 --- a/argilla-server/tests/unit/api/handlers/v1/users/test_create_user.py +++ b/argilla-server/tests/unit/api/handlers/v1/users/test_create_user.py @@ -201,23 +201,6 @@ async def test_create_user_with_existent_username( assert (await db.execute(select(func.count(User.id)))).scalar() == 2 - async def test_create_user_with_invalid_username( - self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict - ): - response = await async_client.post( - self.url(), - headers=owner_auth_header, - json={ - "first_name": "First name", - "last_name": "Last name", - "username": "invalid username", - "password": "12345678", - }, - ) - - assert response.status_code == 422 - assert (await db.execute(select(func.count(User.id)))).scalar() == 1 - async def test_create_user_with_invalid_min_length_first_name( self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict ): diff --git a/argilla-server/tests/unit/api/handlers/v1/workspaces/test_create_workspace.py b/argilla-server/tests/unit/api/handlers/v1/workspaces/test_create_workspace.py index c8de03b874..d2bb2b4e40 100644 --- a/argilla-server/tests/unit/api/handlers/v1/workspaces/test_create_workspace.py +++ b/argilla-server/tests/unit/api/handlers/v1/workspaces/test_create_workspace.py @@ -87,18 +87,6 @@ async def test_create_workspace_with_existent_name( assert (await db.execute(select(func.count(Workspace.id)))).scalar() == 1 - async def test_create_workspace_with_invalid_name( - self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict - ): - response = await async_client.post( - self.url(), - headers=owner_auth_header, - json={"name": "invalid name"}, - ) - - assert response.status_code == 422 - assert (await db.execute(select(func.count(Workspace.id)))).scalar() == 0 - async def test_create_workspace_with_invalid_min_length_name( self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict ): diff --git a/argilla-server/tests/unit/cli/database/users/test_create.py b/argilla-server/tests/unit/cli/database/users/test_create.py index d39f5ab56a..7869592b0b 100644 --- a/argilla-server/tests/unit/cli/database/users/test_create.py +++ b/argilla-server/tests/unit/cli/database/users/test_create.py @@ -15,11 +15,11 @@ from typing import TYPE_CHECKING import pytest -from argilla_server.contexts import accounts -from argilla_server.models import User, UserRole, Workspace from click.testing import CliRunner from typer import Typer +from argilla_server.contexts import accounts +from argilla_server.models import User, UserRole, Workspace from tests.factories import UserSyncFactory, WorkspaceSyncFactory if TYPE_CHECKING: @@ -131,17 +131,6 @@ def test_create_with_input_username(sync_db: "Session", cli_runner: CliRunner, c assert sync_db.query(User).filter_by(username="username").first() -def test_create_with_invalid_username(sync_db: "Session", cli_runner: CliRunner, cli: Typer): - result = cli_runner.invoke( - cli, - "database users create --first-name first-name --username -Invalid-Username --password 12345678 --role owner", - ) - - assert result.exit_code == 1 - assert sync_db.query(User).count() == 0 - assert sync_db.query(Workspace).count() == 0 - - def test_create_with_existing_username(sync_db: "Session", cli_runner: CliRunner, cli: Typer): UserSyncFactory.create(username="username") @@ -243,15 +232,3 @@ def test_create_with_existent_workspaces(sync_db: "Session", cli_runner: CliRunn user = sync_db.query(User).filter_by(username="username").first() assert user assert [ws.name for ws in user.workspaces] == ["workspace-a", "workspace-b", "workspace-c"] - - -def test_create_with_invalid_workspaces(sync_db: "Session", cli_runner: CliRunner, cli: Typer): - result = cli_runner.invoke( - cli, - "database users create --first-name first-name --username username --role owner --password 12345678 " - "--workspace workspace-a --workspace 'invalid workspace' --workspace workspace-c", - ) - - assert result.exit_code == 1 - assert sync_db.query(User).count() == 0 - assert sync_db.query(Workspace).count() == 0 diff --git a/argilla-server/tests/unit/cli/database/users/test_migrate.py b/argilla-server/tests/unit/cli/database/users/test_migrate.py index f35c7ba07f..45e8071541 100644 --- a/argilla-server/tests/unit/cli/database/users/test_migrate.py +++ b/argilla-server/tests/unit/cli/database/users/test_migrate.py @@ -96,30 +96,6 @@ def test_migrate_with_one_user_file(monkeypatch, sync_db: "Session", cli_runner: assert [ws.name for ws in user.workspaces] == ["john", "argilla", "team"] -def test_migrate_with_invalid_user(monkeypatch, sync_db: "Session", cli_runner: CliRunner, cli: Typer): - mock_users_file = os.path.join(os.path.dirname(__file__), "test_user_files", "users_invalid_user.yml") - - with mock.patch.dict(os.environ, {"ARGILLA_LOCAL_AUTH_USERS_DB_FILE": mock_users_file}): - result = cli_runner.invoke(cli, "database users migrate") - - assert result.exit_code == 1 - assert sync_db.query(User).count() == 0 - assert sync_db.query(Workspace).count() == 0 - assert sync_db.query(WorkspaceUser).count() == 0 - - -def test_migrate_with_invalid_workspace(monkeypatch, sync_db: "Session", cli_runner: CliRunner, cli: Typer): - mock_users_file = os.path.join(os.path.dirname(__file__), "test_user_files", "users_invalid_workspace.yml") - - with mock.patch.dict(os.environ, {"ARGILLA_LOCAL_AUTH_USERS_DB_FILE": mock_users_file}): - result = cli_runner.invoke(cli, "database users migrate") - - assert result.exit_code == 1 - assert sync_db.query(User).count() == 0 - assert sync_db.query(Workspace).count() == 0 - assert sync_db.query(WorkspaceUser).count() == 0 - - def test_migrate_with_nonexistent_file(monkeypatch, sync_db: "Session", cli_runner: CliRunner, cli: Typer): with mock.patch.dict(os.environ, {"ARGILLA_LOCAL_AUTH_USERS_DB_FILE": "nonexistent.yml"}): result = cli_runner.invoke(cli, "database users migrate") diff --git a/argilla-server/tests/unit/security/test_model.py b/argilla-server/tests/unit/security/test_model.py index 1b3e7a875c..fa8092a775 100644 --- a/argilla-server/tests/unit/security/test_model.py +++ b/argilla-server/tests/unit/security/test_model.py @@ -14,19 +14,13 @@ import pytest + from argilla_server.api.schemas.v1.users import User, UserCreate from argilla_server.api.schemas.v1.workspaces import WorkspaceCreate - from tests.factories import UserFactory from tests.pydantic_v1 import ValidationError -@pytest.mark.parametrize("invalid_name", ["work space", "work/space", "work.space", "_", "-"]) -def test_workspace_create_invalid_name(invalid_name: str): - with pytest.raises(ValidationError): - WorkspaceCreate(name=invalid_name) - - @pytest.mark.parametrize( "username", [ @@ -49,25 +43,6 @@ def test_user_create(username: str): assert UserCreate(first_name="first-name", username=username, password="12345678") -@pytest.mark.parametrize( - "invalid_username", - [ - "user name", - "user/name", - "user.name", - "_", - "-", - "-1234", - "_1234", - "_mark", - "-mark", - ], -) -def test_user_create_invalid_username(invalid_username: str): - with pytest.raises(ValidationError): - UserCreate(first_name="first-name", username=invalid_username, password="12345678") - - @pytest.mark.asyncio async def test_user_first_name(): user = await UserFactory.create(first_name="first-name", workspaces=[])