Merge branch 'develop' into docs/5260-docs-add-server-configuration-a…

…nd-database-migration-docs-to-v2
argilla-io · Jul 29, 2024 · c6e593a · c6e593a
2 parents 6886891 + ad31d60
commit c6e593a
Show file tree

Hide file tree

Showing 83 changed files with 1,057 additions and 603 deletions.
diff --git a/.github/workflows/argilla-server.build-docker-images.yml b/.github/workflows/argilla-server.build-docker-images.yml
@@ -49,14 +49,14 @@ jobs:
             echo "PLATFORMS=linux/amd64,linux/arm64" >> $GITHUB_ENV
             echo "IMAGE_TAG=v$PACKAGE_VERSION" >> $GITHUB_ENV
             echo "SERVER_DOCKER_IMAGE=argilla/argilla-server" >> $GITHUB_ENV
-            echo "QUICKSTART_DOCKER_IMAGE=argilla/argilla-quickstart" >> $GITHUB_ENV
+            echo "HF_SPACES_DOCKER_IMAGE=argilla/argilla-hf-spaces" >> $GITHUB_ENV
             echo "DOCKER_USERNAME=$DOCKER_USERNAME" >> $GITHUB_ENV
             echo "DOCKER_PASSWORD=$DOCKER_PASSWORD" >> $GITHUB_ENV
           else
             echo "PLATFORMS=linux/amd64" >> $GITHUB_ENV
             echo "IMAGE_TAG=$DOCKER_IMAGE_TAG" >> $GITHUB_ENV
             echo "SERVER_DOCKER_IMAGE=argilladev/argilla-server" >> $GITHUB_ENV
-            echo "QUICKSTART_DOCKER_IMAGE=argilladev/argilla-quickstart" >> $GITHUB_ENV
+            echo "HF_SPACES_DOCKER_IMAGE=argilladev/argilla-hf-spaces" >> $GITHUB_ENV
             echo "DOCKER_USERNAME=$DOCKER_USERNAME_DEV" >> $GITHUB_ENV
             echo "DOCKER_PASSWORD=$DOCKER_PASSWORD_DEV" >> $GITHUB_ENV
           fi
@@ -92,7 +92,6 @@ jobs:
         uses: docker/build-push-action@v5
         with:
           context: argilla-server/docker/server
-          file: argilla-server/docker/server/Dockerfile
           platforms: ${{ env.PLATFORMS }}
           tags: ${{ env.SERVER_DOCKER_IMAGE }}:${{ env.IMAGE_TAG }}
           labels: ${{ steps.meta.outputs.labels }}
@@ -103,35 +102,33 @@ jobs:
         uses: docker/build-push-action@v5
         with:
           context: argilla-server/docker/server
-          file: argilla-server/docker/server/Dockerfile
           platforms: ${{ env.PLATFORMS }}
           tags: ${{ env.SERVER_DOCKER_IMAGE }}:latest
           labels: ${{ steps.meta.outputs.labels }}
           push: true
 
-      - name: Build and push `argilla-quickstart` image
+      - name: Build and push `argilla-hf-spaces` image
         uses: docker/build-push-action@v5
         with:
-          context: argilla-server/docker/quickstart
-          file: argilla-server/docker/quickstart/Dockerfile
+          context: argilla-server/docker/argilla-hf-spaces
           platforms: ${{ env.PLATFORMS }}
-          tags: ${{ env.QUICKSTART_DOCKER_IMAGE }}:${{ env.IMAGE_TAG }}
+          tags: ${{ env.HF_SPACES_DOCKER_IMAGE }}:${{ env.IMAGE_TAG }}
           labels: ${{ steps.meta.outputs.labels }}
           build-args: |
             ARGILLA_SERVER_IMAGE=${{ env.SERVER_DOCKER_IMAGE }}
             ARGILLA_VERSION=${{ env.IMAGE_TAG }}
           push: true
 
-      - name: Push latest `argilla-quickstart` image
+      - name: Push latest `argilla-hf-spaces` image
         if: ${{ inputs.is_release && inputs.publish_latest }}
         uses: docker/build-push-action@v5
         with:
-          context: argilla-server/docker/quickstart
-          file: argilla-server/docker/quickstart/Dockerfile
+          context: argilla-server/docker/argilla-hf-spaces
           platforms: ${{ env.PLATFORMS }}
-          tags: ${{ env.QUICKSTART_DOCKER_IMAGE }}:latest
+          tags: ${{ env.HF_SPACES_DOCKER_IMAGE }}:latest
           labels: ${{ steps.meta.outputs.labels }}
           build-args: |
+            ARGILLA_SERVER_IMAGE=${{ env.SERVER_DOCKER_IMAGE }}
             ARGILLA_VERSION=${{ env.IMAGE_TAG }}
           push: true
 
@@ -141,14 +138,14 @@ jobs:
         with:
           username: ${{ env.DOCKER_USERNAME }}
           password: ${{ env.DOCKER_PASSWORD }}
-          repository: argilla/argilla-server
+          repository: $${{ env.SERVER_DOCKER_IMAGE }}
           readme-filepath: argilla-server/README.md
 
-      - name: Docker Hub Description for `argilla-quickstart`
+      - name: Docker Hub Description for `argilla-hf-spaces`
         uses: peter-evans/dockerhub-description@v4
         if: ${{ inputs.is_release && inputs.publish_latest }}
         with:
           username: ${{ secrets.AR_DOCKER_USERNAME }}
           password: ${{ secrets.AR_DOCKER_PASSWORD }}
-          repository: argilla/argilla-quickstart
-          readme-filepath: argilla-server/docker/quickstart/README.md
+          repository: $${{ env.HF_SPACES_DOCKER_IMAGE }}
+          readme-filepath: argilla-server/docker/argilla-hf-spaces/README.md
diff --git a/.github/workflows/argilla.yml b/.github/workflows/argilla.yml
@@ -20,16 +20,21 @@ on:
 jobs:
   build:
     services:
-      argilla-quickstart:
-        image: argilladev/argilla-quickstart:develop
+      argilla-server:
+        image: argilladev/argilla-server:develop
         ports:
           - 6900:6900
         env:
-          ANNOTATOR_USERNAME: annotator
-          OWNER_USERNAME: argilla
-          OWNER_API_KEY: argilla.apikey
-          ADMIN_USERNAME: admin
-          ADMIN_API_KEY: admin.apikey
+          ARGILLA_ENABLE_TELEMETRY: 0
+          ARGILLA_ELASTICSEARCH: http://elasticsearch:9200
+          DEFAULT_USER_ENABLED: 1
+      elasticsearch:
+        image: docker.elastic.co/elasticsearch/elasticsearch:8.8.2
+        ports:
+          - 9200:9200
+        env:
+          discovery.type: single-node
+          xpack.security.enabled: false
     runs-on: ubuntu-latest
     defaults:
       run:
@@ -50,7 +55,7 @@ jobs:
       - name: Install dependencies
         run: |
           pdm install
-      - name: Wait for argilla-quickstart to start
+      - name: Wait for argilla server to start
         run: |
           while ! curl -XGET http://localhost:6900/api/_status; do sleep 5; done
       - name: Set huggingface hub credentials

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -13,7 +13,7 @@ However you choose to contribute, please be mindful and respect our
 
 ## Need some help?
 
-We understand that getting started might be a bit difficult, therefore, you can join our discord channel [#argilla-general](https://discord.gg/hugging-face-879548962464493619) by selecting *Argilla* in *Channels and Roles* after joining the Discord.
+We understand that getting started might be a bit difficult, therefore, you can join our discord channel [#argilla-distilabel-general](https://discord.gg/hugging-face-879548962464493619) by selecting *Argilla* in *Channels and Roles* after joining the Discord.
 
 ## Want to work on your own?
 

diff --git a/README.md b/README.md
@@ -32,52 +32,52 @@
 </a>
 </p>
 
-Argilla is a **collaboration platform for AI engineers and domain experts** that require **high-quality outputs, full data ownership, and overall efficiency**.
+Argilla is a collaboration tool for AI engineers and domain experts who need to build high-quality datasets for their projects.
 
 If you just want to get started, we recommend our [UI demo](https://demo.argilla.io/sign-in?auth=ZGVtbzoxMjM0NTY3OA%3D%3D) or our [free Hugging Face Spaces deployment integration](https://huggingface.co/new-space?template=argilla/argilla-template-space). Curious, and want to know more? Read our [documentation](https://argilla-io.github.io/argilla/latest/).
 
 ## Why use Argilla?
 
-Whether you are working on monitoring and improving complex **generative tasks** involving LLM pipelines with RAG, or you are working on a **predictive task** for things like AB-testing of span- and text-classification models. Our versatile platform helps you ensure **your data work pays off**.
+Argilla can be used for collecting human feedback for a wide variety of AI projects like traditional NLP (text classification, NER, etc.), LLMs (RAG, preference tuning, etc.), or multimodal models (text to image, etc.). Argilla's programmatic approach lets you build workflows for continuous evaluation and model improvement. The goal of Argilla is to ensure your data work pays off by quickly iterating on the right data and models.
 
 ### Improve your AI output quality through data quality
 
 Compute is expensive and output quality is important. We help you focus on data, which tackles the root cause of both of these problems at once. Argilla helps you to **achieve and keep high-quality standards** for your data. This means you can improve the quality of your AI output.
 
 ### Take control of your data and models
 
-Most AI platforms are black boxes. Argilla is different. We believe that you should be the owner of both your data and your models. That's why we provide you with all the tools your team needs to **manage your data and models in a way that suits you best**.
+Most AI tools are black boxes. Argilla is different. We believe that you should be the owner of both your data and your models. That's why we provide you with all the tools your team needs to **manage your data and models in a way that suits you best**.
 
 ### Improve efficiency by quickly iterating on the right data and models
 
-Gathering data is a time-consuming process. Argilla helps by providing a platform that allows you to **interact with your data in a more engaging way**. This means you can quickly and easily label your data with filters, AI feedback suggestions and semantic search. So you can focus on training your models and monitoring their performance.
+Gathering data is a time-consuming process. Argilla helps by providing a tool that allows you to **interact with your data in a more engaging way**. This means you can quickly and easily label your data with filters, AI feedback suggestions and semantic search. So you can focus on training your models and monitoring their performance.
 
 ## 🏘️ Community
 
 We are an open-source community-driven project and we love to hear from you. Here are some ways to get involved:
 
 - [Community Meetup](https://lu.ma/embed-checkout/evt-IQtRiSuXZCIW6FB): listen in or present during one of our bi-weekly events.
 
-- [Discord](http://hf.co/join/discord): get direct support from the community in #argilla-general and #argilla-help.
+- [Discord](http://hf.co/join/discord): get direct support from the community in #argilla-distilabel-general and #argilla-distilabel-help.
 
 - [Roadmap](https://github.com/orgs/argilla-io/projects/10/views/1): plans change but we love to discuss those with our community so feel encouraged to participate.
 
 ## What do people build with Argilla?
 
 ### Open-source datasets and models
 
-Argilla is a tool that can be used to achieve and keep **high-quality data standards** with a **focus on NLP and LLMs**. Our community uses Argilla to create amazing open-source [datasets](https://huggingface.co/datasets?other=argilla) and [models](https://huggingface.co/models?other=distilabel), and **we love contributions to open-source** ourselves too.
+The community uses Argilla to create amazing open-source [datasets](https://huggingface.co/datasets?library=library:argilla&sort=trending) and [models](https://huggingface.co/models?other=distilabel).
 
-- Our [cleaned UltraFeedback dataset](https://huggingface.co/datasets/argilla/ultrafeedback-binarized-preferences-cleaned) and the [Notus](https://huggingface.co/argilla/notus-7b-v1) and [Notux](https://huggingface.co/argilla/notux-8x7b-v1) models, where we improved benchmark and empirical human judgment for the Mistral and Mixtral models with cleaner data using **human feedback**.
-- Our [distilabeled Intel Orca DPO dataset](https://huggingface.co/datasets/argilla/distilabel-intel-orca-dpo-pairs) and the [improved OpenHermes model](https://huggingface.co/argilla/distilabeled-OpenHermes-2.5-Mistral-7B), show how we improve model performance by filtering out 50% of the original dataset through **human and AI feedback**.
+- [Cleaned UltraFeedback dataset](https://huggingface.co/datasets/argilla/ultrafeedback-binarized-preferences-cleaned) used to fine-tune the [Notus](https://huggingface.co/argilla/notus-7b-v1) and [Notux](https://huggingface.co/argilla/notux-8x7b-v1) models. The original UltraFeedback dataset was curated using Argilla UI filters to find and report a bug in the original data generation code. Based on this data curation process, Argilla built this new version of the UltraFeedback dataset and fine-tuned Notus, outperforming Zephyr on several benchmarks.
+- [distilabeled Intel Orca DPO dataset](https://huggingface.co/datasets/argilla/distilabel-intel-orca-dpo-pairs) used to fine-tune the [improved OpenHermes model](https://huggingface.co/argilla/distilabeled-OpenHermes-2.5-Mistral-7B). This dataset was built by combining human curation in Argilla with AI feedback from distilabel, leading to an improved version of the Intel Orca dataset and outperforming models fine-tuned on the original dataset.
 
-### Internal Use cases
+### Examples Use cases
 
-AI teams from companies like [the Red Cross](https://510.global/), [Loris.ai](https://loris.ai/) and [Prolific](https://www.prolific.com/) use Argilla to **improve the quality and efficiency of AI** projects. They shared their experiences in our [AI community meetup](https://lu.ma/embed-checkout/evt-IQtRiSuXZCIW6FB).
+AI teams from companies like [the Red Cross](https://510.global/), [Loris.ai](https://loris.ai/) and [Prolific](https://www.prolific.com/) use Argilla to improve the quality and efficiency of AI projects. They shared their experiences in our [AI community meetup](https://lu.ma/embed-checkout/evt-IQtRiSuXZCIW6FB).
 
-- AI for good: [the Red Cross presentation](https://youtu.be/ZsCqrAhzkFU?feature=shared) showcases **how their experts and AI team collaborate** by classifying and redirecting requests from refugees of the Ukrainian crisis to streamline the support processes of the Red Cross.
-- Customer support: during [the Loris meetup](https://youtu.be/jWrtgf2w4VU?feature=shared) they showed how their AI team uses unsupervised and few-shot contrastive learning to help them **quickly validate and gain labelled samples for a huge amount of multi-label classifiers**.
-- Research studies: [the showcase from Prolific](https://youtu.be/ePDlhIxnuAs?feature=shared) announced their integration with our platform. They use it to actively **distribute data collection projects** among their annotating workforce. This allows them to quickly and **efficiently collect high-quality data** for their research studies.
+- AI for good: [the Red Cross presentation](https://youtu.be/ZsCqrAhzkFU?feature=shared) showcases how the Red Cross domain experts and AI team collaborated by classifying and redirecting requests from refugees of the Ukrainian crisis to streamline the support processes of the Red Cross.
+- Customer support: during [the Loris meetup](https://youtu.be/jWrtgf2w4VU?feature=shared) they showed how their AI team uses unsupervised and few-shot contrastive learning to help them quickly validate and gain labelled samples for a huge amount of multi-label classifiers.
+- Research studies: [the showcase from Prolific](https://youtu.be/ePDlhIxnuAs?feature=shared) announced their integration with our platform. They use it to actively distribute data collection projects among their annotating workforce. This allows Prolific to quickly and efficiently collect high-quality data for research studies.
 
 ## 👨‍💻 Getting started
 

diff --git a/argilla-frontend/README.md b/argilla-frontend/README.md
@@ -31,8 +31,7 @@
 </a>
 </p>
 
-Argilla is a **collaboration platform for AI engineers and domain experts** that require **high-quality outputs, full data ownership, and overall efficiency**.
-
+Argilla is a collaboration tool for AI engineers and domain experts who need to build high-quality datasets for their projects.
 If you just want to get started, we recommend our [UI demo](https://demo.argilla.io/sign-in?auth=ZGVtbzoxMjM0NTY3OA%3D%3D) or our [free Hugging Face Spaces deployment integration](https://huggingface.co/new-space?template=argilla/argilla-template-space). Curious, and want to know more? Read our [documentation](https://argilla-io.github.io/argilla/latest/).
 
 This repository only contains developer info about the front end. If you want to get started, we recommend taking a

diff --git a/argilla-frontend/components/features/annotation/container/fields/Record.vue b/argilla-frontend/components/features/annotation/container/fields/Record.vue
@@ -62,7 +62,7 @@ export default {
         if (
           this.record?.questions
             .filter((q) => q.isSpanType)
-            .some((q) => q.isModified)
+            .some((q) => q.isAnswerModified)
         ) {
           this.onSelectedRecord(true);
         }

diff --git a/argilla-frontend/components/features/annotation/container/mode/useBulkAnnotationViewModel.ts b/argilla-frontend/components/features/annotation/container/mode/useBulkAnnotationViewModel.ts
@@ -7,7 +7,6 @@ import {
   AvailableStatus,
   BulkAnnotationUseCase,
 } from "~/v1/domain/usecases/bulk-annotation-use-case";
-import { useDebounce } from "~/v1/infrastructure/services/useDebounce";
 import { useNotifications } from "~/v1/infrastructure/services/useNotifications";
 import { useTranslate } from "~/v1/infrastructure/services/useTranslate";
 
@@ -17,7 +16,6 @@ export const useBulkAnnotationViewModel = ({
   records: Records;
 }) => {
   const notification = useNotifications();
-  const debounceForSubmit = useDebounce(300);
 
   const affectAllRecords = ref(false);
   const progress = ref(0);
@@ -44,11 +42,12 @@ export const useBulkAnnotationViewModel = ({
     recordReference: Record,
     selectedRecords: Record[]
   ) => {
+    let allSuccessful = false;
     try {
       const totalRecords = records.total;
       const isAffectingAllRecords = affectAllRecords.value;
 
-      const allSuccessful = await bulkAnnotationUseCase.execute(
+      allSuccessful = await bulkAnnotationUseCase.execute(
         status,
         criteria,
         recordReference,
@@ -73,19 +72,13 @@ export const useBulkAnnotationViewModel = ({
           type: "info",
         });
       }
-
-      progress.value = 0;
-
-      await debounceForSubmit.wait();
-
-      return allSuccessful;
     } catch {
     } finally {
       affectAllRecords.value = false;
       progress.value = 0;
     }
 
-    return false;
+    return allSuccessful;
   };
 
   const discard = async (

diff --git a/...lla-frontend/components/features/annotation/container/mode/useFocusAnnotationViewModel.ts b/...lla-frontend/components/features/annotation/container/mode/useFocusAnnotationViewModel.ts
@@ -4,11 +4,8 @@ import { Record } from "~/v1/domain/entities/record/Record";
 import { DiscardRecordUseCase } from "~/v1/domain/usecases/discard-record-use-case";
 import { SubmitRecordUseCase } from "~/v1/domain/usecases/submit-record-use-case";
 import { SaveDraftUseCase } from "~/v1/domain/usecases/save-draft-use-case";
-import { useDebounce } from "~/v1/infrastructure/services/useDebounce";
 
 export const useFocusAnnotationViewModel = () => {
-  const debounceForSubmit = useDebounce(300);
-
   const isDraftSaving = ref(false);
   const isDiscarding = ref(false);
   const isSubmitting = ref(false);
@@ -17,33 +14,36 @@ export const useFocusAnnotationViewModel = () => {
   const saveDraftUseCase = useResolve(SaveDraftUseCase);
 
   const discard = async (record: Record) => {
-    isDiscarding.value = true;
-
-    await discardUseCase.execute(record);
-
-    await debounceForSubmit.wait();
-
-    isDiscarding.value = false;
+    try {
+      isDiscarding.value = true;
+
+      await discardUseCase.execute(record);
+    } catch {
+    } finally {
+      isDiscarding.value = false;
+    }
   };
 
   const submit = async (record: Record) => {
-    isSubmitting.value = true;
-
-    await submitUseCase.execute(record);
-
-    await debounceForSubmit.wait();
-
-    isSubmitting.value = false;
+    try {
+      isSubmitting.value = true;
+
+      await submitUseCase.execute(record);
+    } catch {
+    } finally {
+      isSubmitting.value = false;
+    }
   };
 
   const saveAsDraft = async (record: Record) => {
-    isDraftSaving.value = true;
-
-    await saveDraftUseCase.execute(record);
-
-    await debounceForSubmit.wait();
-
-    isDraftSaving.value = false;
+    try {
+      isDraftSaving.value = true;
+
+      await saveDraftUseCase.execute(record);
+    } catch {
+    } finally {
+      isDraftSaving.value = false;
+    }
   };
 
   return {

diff --git a/argilla-frontend/components/features/annotation/settings/SettingsInfo.vue b/argilla-frontend/components/features/annotation/settings/SettingsInfo.vue
@@ -56,7 +56,7 @@
             <BaseButton
               type="button"
               class="secondary light small"
-              @on-click="settings.dataset.restoreDistribution()"
+              @on-click="settings.dataset.restore('distribution')"
               :disabled="!settings.dataset.isModifiedTaskDistribution"
             >
               <span v-text="$t('cancel')" />
@@ -91,7 +91,7 @@
             <BaseButton
               type="button"
               class="secondary light small"
-              @on-click="settings.dataset.restoreGuidelines()"
+              @on-click="settings.dataset.restore('guidelines')"
               :disabled="!settings.dataset.isModifiedGuidelines"
             >
               <span v-text="$t('cancel')" />