Skip to content

Commit

Permalink
Merge branch 'develop' into feat/add-webhooks-feature-branch
Browse files Browse the repository at this point in the history
  • Loading branch information
frascuchon authored Sep 16, 2024
2 parents a55e884 + bafd92f commit 935a299
Show file tree
Hide file tree
Showing 17 changed files with 429 additions and 41 deletions.
10 changes: 1 addition & 9 deletions .github/workflows/argilla.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,15 @@ jobs:
build:
services:
argilla-server:
image: argilladev/argilla-server:develop
image: argilladev/argilla-hf-spaces:develop
ports:
- 6900:6900
env:
ARGILLA_ENABLE_TELEMETRY: 0
ARGILLA_ELASTICSEARCH: http://elasticsearch:9200
# Set credentials
USERNAME: argilla
PASSWORD: 12345678
API_KEY: argilla.apikey
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:8.8.2
ports:
- 9200:9200
env:
discovery.type: single-node
xpack.security.enabled: false
runs-on: ubuntu-latest
defaults:
run:
Expand Down
4 changes: 4 additions & 0 deletions argilla-server/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ These are the section headers that we use:
- Added new webhook events when responses are created, updated, deleted or upserted. ([#5468](https://github.com/argilla-io/argilla/pull/5468))
- Added new webhook events when datasets are created, updated, deleted or published. ([#5468](https://github.com/argilla-io/argilla/pull/5468))

### Fixed

- Fixed error when computing dataset progress by users without responses related to pending or completed records. ([#5484](https://github.com/argilla-io/argilla/pull/5484))

## [2.1.0](https://github.com/argilla-io/argilla/compare/v2.0.0...v2.1.0)

### Added
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ async def get_dataset_progress(
return await datasets.get_dataset_progress(db, dataset.id)


@router.get("/datasets/{dataset_id}/users/progress", response_model=UsersProgress, response_model_exclude_unset=True)
@router.get("/datasets/{dataset_id}/users/progress", response_model=UsersProgress)
async def get_dataset_users_progress(
*,
current_user: User = Security(auth.get_current_user),
Expand Down
10 changes: 5 additions & 5 deletions argilla-server/src/argilla_server/api/schemas/v1/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,15 @@ class DatasetProgress(BaseModel):


class RecordResponseDistribution(BaseModel):
submitted: Optional[int]
discarded: Optional[int]
draft: Optional[int]
submitted: int = 0
discarded: int = 0
draft: int = 0


class UserProgress(BaseModel):
username: str
completed: RecordResponseDistribution
pending: RecordResponseDistribution
completed: RecordResponseDistribution = RecordResponseDistribution()
pending: RecordResponseDistribution = RecordResponseDistribution()


class UsersProgress(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,96 @@ async def test_get_dataset_users_progress(self, async_client: AsyncClient, owner
"users": [
{
"username": user_with_submitted.username,
"completed": {"submitted": 3},
"pending": {"submitted": 2},
"completed": {"submitted": 3, "discarded": 0, "draft": 0},
"pending": {"submitted": 2, "discarded": 0, "draft": 0},
},
{
"username": user_with_draft.username,
"completed": {"draft": 3},
"pending": {"draft": 2},
"completed": {"submitted": 0, "discarded": 0, "draft": 3},
"pending": {"submitted": 0, "discarded": 0, "draft": 2},
},
{
"username": user_with_discarded.username,
"completed": {"discarded": 3},
"pending": {"discarded": 2},
"completed": {"submitted": 0, "discarded": 3, "draft": 0},
"pending": {"submitted": 0, "discarded": 2, "draft": 0},
},
]
}

async def test_get_dataset_users_progress_only_with_pending(
self, async_client: AsyncClient, owner_auth_header: dict
):
dataset = await DatasetFactory.create()

user_with_submitted = await AnnotatorFactory.create()
user_with_draft = await AnnotatorFactory.create()
user_with_discarded = await AnnotatorFactory.create()

records_pending = await RecordFactory.create_batch(2, status=RecordStatus.pending, dataset=dataset)

for record in records_pending:
await ResponseFactory.create(record=record, user=user_with_submitted, status=ResponseStatus.submitted)
await ResponseFactory.create(record=record, user=user_with_draft, status=ResponseStatus.draft)
await ResponseFactory.create(record=record, user=user_with_discarded, status=ResponseStatus.discarded)

response = await async_client.get(self.url(dataset.id), headers=owner_auth_header)

assert response.status_code == 200, response.json()
assert response.json() == {
"users": [
{
"username": user_with_submitted.username,
"completed": {"submitted": 0, "discarded": 0, "draft": 0},
"pending": {"submitted": 2, "discarded": 0, "draft": 0},
},
{
"username": user_with_draft.username,
"completed": {"submitted": 0, "discarded": 0, "draft": 0},
"pending": {"submitted": 0, "discarded": 0, "draft": 2},
},
{
"username": user_with_discarded.username,
"completed": {"submitted": 0, "discarded": 0, "draft": 0},
"pending": {"submitted": 0, "discarded": 2, "draft": 0},
},
]
}

async def test_get_dataset_users_progress_only_with_completed(
self, async_client: AsyncClient, owner_auth_header: dict
):
dataset = await DatasetFactory.create()

user_with_submitted = await AnnotatorFactory.create()
user_with_draft = await AnnotatorFactory.create()
user_with_discarded = await AnnotatorFactory.create()

records_completed = await RecordFactory.create_batch(3, status=RecordStatus.completed, dataset=dataset)

for record in records_completed:
await ResponseFactory.create(record=record, user=user_with_submitted, status=ResponseStatus.submitted)
await ResponseFactory.create(record=record, user=user_with_draft, status=ResponseStatus.draft)
await ResponseFactory.create(record=record, user=user_with_discarded, status=ResponseStatus.discarded)

response = await async_client.get(self.url(dataset.id), headers=owner_auth_header)

assert response.status_code == 200, response.json()
assert response.json() == {
"users": [
{
"username": user_with_submitted.username,
"completed": {"submitted": 3, "discarded": 0, "draft": 0},
"pending": {"submitted": 0, "discarded": 0, "draft": 0},
},
{
"username": user_with_draft.username,
"completed": {"submitted": 0, "discarded": 0, "draft": 3},
"pending": {"submitted": 0, "discarded": 0, "draft": 0},
},
{
"username": user_with_discarded.username,
"completed": {"submitted": 0, "discarded": 3, "draft": 0},
"pending": {"submitted": 0, "discarded": 0, "draft": 0},
},
]
}
Expand Down
3 changes: 3 additions & 0 deletions argilla/docs/how_to_guides/annotate.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ You can track the progress of an annotation task in the progress bar shown in th

You can also track your own progress in real time expanding the right-bottom panel inside the dataset page. There you can see the number of records for which you have `Pending``Draft``Submitted` and `Discarded` responses.

!!! note
You can also explore the dataset progress from the SDK. Check the [Track your team's progress](./distribution.md#track-your-teams-progress) to know more about it.

## Use search, filters, and sort

The UI offers various features designed for data exploration and understanding. Combining these features with bulk labelling can save you and your team hours of time.
Expand Down
59 changes: 58 additions & 1 deletion argilla/docs/how_to_guides/distribution.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,61 @@ dataset = client.datasets("my_dataset")
dataset.settings.distribution.min_submitted = 4

dataset.update()
```
```

## Track your team's progress

You can check the progress of the annotation task by using the `dataset.progress` method.
This method will return the number of records that have the status `completed`, `pending`, and the
total number of records in the dataset.

```python
import argilla as rg

client = rg.Argilla(api_url="<api_url>", api_key="<api_key>")

dataset = client.datasets("my_dataset")

progress = dataset.progress()
```
```json
{
"total": 100,
"completed": 10,
"pending": 90
}
```

You can see also include to the progress the users distribution by setting the `with_users_distribution` parameter to `True`.
This will return the number of records that have the status `completed`, `pending`, and the total number of records in the dataset,
as well as the number of completed submissions per user. You can visit the [Annotation Progress](../how_to_guides/annotate.md#annotation-progress) section for more information.

```python
import argilla as rg

client = rg.Argilla(api_url="<api_url>", api_key="<api_key>")

dataset = client.datasets("my_dataset")

progress = dataset.progress(with_users_distribution=True)
```
```json
{
"total": 100,
"completed": 50,
"pending": 50,
"users": {
"user1": {
"completed": { "submitted": 10, "draft": 5, "discarded": 5},
"pending": { "submitted": 5, "draft": 10, "discarded": 10},
},
"user2": {
"completed": { "submitted": 20, "draft": 10, "discarded": 5},
"pending": { "submitted": 2, "draft": 25, "discarded": 0},
},
...
}
```

!!! note
Since the completed records can contain submissions from multiple users, the number of completed submissions per user may not match the total number of completed records.
4 changes: 3 additions & 1 deletion argilla/src/argilla/_api/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,9 @@ class APIClient:
def __init__(
self,
api_url: Optional[str] = DEFAULT_HTTP_CONFIG.api_url,
api_key: str = DEFAULT_HTTP_CONFIG.api_key,
api_key: Optional[str] = DEFAULT_HTTP_CONFIG.api_key,
timeout: int = DEFAULT_HTTP_CONFIG.timeout,
retries: int = DEFAULT_HTTP_CONFIG.retries,
**http_client_args,
):
if not api_url:
Expand All @@ -120,6 +121,7 @@ def __init__(

http_client_args = http_client_args or {}
http_client_args["timeout"] = timeout
http_client_args["retries"] = retries

self.http_client = create_http_client(
api_url=self.api_url, # type: ignore
Expand Down
20 changes: 20 additions & 0 deletions argilla/src/argilla/_api/_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@

__all__ = ["DatasetsAPI"]

from argilla._models._dataset_progress import UserProgressModel, DatasetProgressModel


class DatasetsAPI(ResourceAPI[DatasetModel]):
"""Manage datasets via the API"""
Expand Down Expand Up @@ -80,6 +82,24 @@ def exists(self, dataset_id: UUID) -> bool:
# Utility methods #
####################

@api_error_handler
def get_progress(self, dataset_id: UUID) -> DatasetProgressModel:
response = self.http_client.get(f"{self.url_stub}/{dataset_id}/progress")
response.raise_for_status()
response_json = response.json()

self._log_message(message=f"Got progress for dataset {dataset_id}")
return DatasetProgressModel.model_validate(response_json)

@api_error_handler
def list_users_progress(self, dataset_id: UUID) -> List[UserProgressModel]:
response = self.http_client.get(f"{self.url_stub}/{dataset_id}/users/progress")
response.raise_for_status()
response_json = response.json()

self._log_message(message=f"Got users progress for dataset {dataset_id}")
return [UserProgressModel.model_validate(data) for data in response_json["users"]]

@api_error_handler
def publish(self, dataset_id: UUID) -> "DatasetModel":
response = self.http_client.put(url=f"{self.url_stub}/{dataset_id}/publish")
Expand Down
18 changes: 10 additions & 8 deletions argilla/src/argilla/_api/_http/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,8 @@ class HTTPClientConfig:

api_url: str
api_key: str
timeout: int = None

def __post_init__(self):
self.api_url = self.api_url
self.api_key = self.api_key
self.timeout = self.timeout or 60
timeout: int = 60
retries: int = 5


def create_http_client(api_url: str, api_key: str, **client_args) -> httpx.Client:
Expand All @@ -37,5 +33,11 @@ def create_http_client(api_url: str, api_key: str, **client_args) -> httpx.Clien

headers = client_args.pop("headers", {})
headers["X-Argilla-Api-Key"] = api_key

return httpx.Client(base_url=api_url, headers=headers, **client_args)
retries = client_args.pop("retries", 0)

return httpx.Client(
base_url=api_url,
headers=headers,
transport=httpx.HTTPTransport(retries=retries),
**client_args,
)
39 changes: 39 additions & 0 deletions argilla/src/argilla/_models/_dataset_progress.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Copyright 2024-present, Argilla, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pydantic import BaseModel


class DatasetProgressModel(BaseModel):
"""Dataset progress model."""

total: int = 0
completed: int = 0
pending: int = 0


class RecordResponseDistributionModel(BaseModel):
"""Response distribution model."""

submitted: int = 0
draft: int = 0
discarded: int = 0


class UserProgressModel(BaseModel):
"""User progress model."""

username: str
completed: RecordResponseDistributionModel = RecordResponseDistributionModel()
pending: RecordResponseDistributionModel = RecordResponseDistributionModel()
Loading

0 comments on commit 935a299

Please sign in to comment.