Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENHANCEMENT / BUGFIX] argilla: manage responses for deleted users #5070

Closed
2 changes: 1 addition & 1 deletion argilla/docs/how_to_guides/record.md
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,7 @@ dataset.records.log(records=updated_data)
```

=== "Update vectors"
When a new vector field is added to the dataset settings, or some value for the existing record vectors must updated, you can iterate over the records and update the vectors in the same way as the metadata.
When a new vector field is added to the dataset settings, or some value for the existing record vectors must updated, you can iterate over the records and update the vectors in the same way as the metadata.

```python
updated_records = []
Expand Down
1 change: 1 addition & 0 deletions argilla/src/argilla/_models/_record/_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

FieldValue = Union[str, None]


class RecordModel(ResourceModel):
"""Schema for the records of a `Dataset`"""

Expand Down
21 changes: 5 additions & 16 deletions argilla/src/argilla/_models/_record/_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import warnings
from enum import Enum
from typing import Dict, Optional, Union, Any
from uuid import UUID

from pydantic import BaseModel, field_serializer, field_validator, Field
from pydantic import BaseModel, field_serializer, Field, ConfigDict


class ResponseStatus(str, Enum):
Expand All @@ -33,20 +32,10 @@ class UserResponseModel(BaseModel):
status: ResponseStatus
user_id: Optional[UUID] = Field(None, validate_default=True)

class Config:
validate_assignment = True

@field_validator("user_id")
@classmethod
def user_id_must_have_value(cls, user_id: Optional[UUID]):
if not user_id:
warnings.warn(
"`user_id` not provided, so it will be set to `None`. Which is not an"
" issue, unless you're planning to log the response in Argilla, as"
" it will be automatically set to the active `user_id`.",
)
return user_id
model_config = ConfigDict(
validate_assignment=True,
)

@field_serializer("user_id", when_used="always")
def serialize_user_id(value: UUID) -> str:
def serialize_user_id(self, value: UUID) -> str:
return str(value)
1 change: 0 additions & 1 deletion argilla/src/argilla/records/_dataset_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
from argilla.settings._metadata import MetadataPropertyBase
from argilla.settings._question import QuestionPropertyBase
from argilla.suggestions import Suggestion
from argilla.vectors import Vector

if TYPE_CHECKING:
from argilla.datasets import Dataset
Expand Down
7 changes: 6 additions & 1 deletion argilla/src/argilla/responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from argilla._models import UserResponseModel, ResponseStatus as ResponseStatusModel
from argilla._resource import Resource
from argilla.settings import RankingQuestion
from argilla.users import DELETED_USER

if TYPE_CHECKING:
from argilla import Argilla, Dataset, Record
Expand Down Expand Up @@ -143,7 +144,7 @@ def status(self, status: ResponseStatus) -> None:
self._model.status = status

@property
def user_id(self) -> UUID:
def user_id(self) -> Optional[UUID]:
"""Returns the user_id of the UserResponse"""
return self._model.user_id

Expand All @@ -160,6 +161,10 @@ def answers(self) -> List[Response]:
@classmethod
def from_model(cls, model: UserResponseModel, dataset: "Dataset") -> "UserResponse":
"""Creates a UserResponse from a ResponseModel"""

if model.user_id is None:
model.user_id = DELETED_USER.id

answers = cls.__model_as_response_list(model)
for answer in answers:
question = dataset.settings.question_by_name(answer.question_name)
Expand Down
6 changes: 5 additions & 1 deletion argilla/src/argilla/users/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from uuid import UUID

from argilla.users._resource import User

__all__ = ["User"]
__all__ = ["User", "DELETED_USER"]

# This is the user id for the deleted user. Used when records contains responses from a user that has been deleted.
DELETED_USER = User(id=UUID("00000000-0000-0000-0000-000000000000"), username="deleted")
2 changes: 1 addition & 1 deletion argilla/src/argilla/users/_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from typing import Optional
from uuid import UUID

from argilla import Workspace
from argilla.workspaces import Workspace
from argilla._api import UsersAPI
from argilla._models import UserModel, Role
from argilla._resource import Resource
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

@pytest.fixture
def record():

return rg.Record(
id=uuid.uuid4(),
fields={"text": "Hello World, how are you?"},
Expand All @@ -34,7 +33,6 @@ def record():
responses=[rg.Response("label", "positive", user_id=uuid.uuid4())],
metadata={"source": "twitter", "language": "en"},
vectors={"text": [0, 0, 0]},

)


Expand Down
11 changes: 5 additions & 6 deletions argilla/tests/unit/test_resources/test_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ def test_record_repr(self):
responses=[Response(question_name="question", value="answer", user_id=user_id)],
)
assert (
record.__repr__() == f"Record(id={record_id},"
"fields={'name': 'John', 'age': '30'},"
"metadata={'key': 'value'},"
"suggestions={'question': {'value': 'answer', 'score': None, 'agent': None}},"
f"responses={{'question': [{{'value': 'answer'}}]}})"
record.__repr__() == f"Record(id={record_id},"
"fields={'name': 'John', 'age': '30'},"
"metadata={'key': 'value'},"
"suggestions={'question': {'value': 'answer', 'score': None, 'agent': None}},"
f"responses={{'question': [{{'value': 'answer'}}]}})"
)

def test_update_record_metadata_by_key(self):
Expand Down Expand Up @@ -62,4 +62,3 @@ def test_update_record_vectors(self):

record.vectors["new-vector"] = [1.0, 2.0, 3.0]
assert record.vectors == {"vector": [1.0, 2.0, 3.0], "new-vector": [1.0, 2.0, 3.0]}

15 changes: 14 additions & 1 deletion argilla/tests/unit/test_resources/test_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@

import pytest

from argilla import UserResponse, Response
from argilla import Response, Dataset, Settings, TextQuestion, Workspace
from argilla._models import UserResponseModel
from argilla.responses import ResponseStatus, UserResponse
from argilla.users import DELETED_USER


class TestResponses:
Expand Down Expand Up @@ -85,3 +88,13 @@ def test_create_user_response_with_multiple_user_id(self):
Response(question_name="other-question", value="answer", user_id=other_user_id),
],
)

def test_create_user_response_from_model_without_user_id(self):
mock_workspace = Workspace(id=uuid.uuid4(), name="workspace")
dataset = Dataset(settings=Settings(questions=[TextQuestion(name="question")]), workspace=mock_workspace)
response = UserResponse.from_model(
UserResponseModel(values={"question": {"value": "answer"}}, user_id=None, status=ResponseStatus.draft),
dataset=dataset,
)

assert response.user_id == DELETED_USER.id
Loading