From fa7e84109c68471f3ecb319e618ef1900bfaf025 Mon Sep 17 00:00:00 2001
From: davidberenstein1957
-First things first! You can install Argilla from pypi. +First things first! You can install Argilla from pypi. ```bash pip install argilla @@ -97,7 +97,7 @@ pip install argilla
```bash @@ -108,20 +108,20 @@ docker run -d --name argilla -p 6900:6900 argilla/argilla-quickstart:latest
HuggingFace Spaces now have persistent storage and this is supported from Argilla 1.11.0 onwards, but you will need to manually activate it via the HuggingFace Spaces settings. Otherwise, unless you're on a paid space upgrade, after 48 hours of inactivity the space will be shut off and you will lose all the data. To avoid losing data, we highly recommend using the persistent storage layer offered by HuggingFace. After this, we can connect to our server. - +
Once you have deployed Argilla, we will connect to the server. @@ -135,13 +135,13 @@ workspace = "argilla-workspace" # e.g. "admin" ) ``` -After this, you can start using Argilla, so you can create a dataset and add records to it. We use the FeedbackDataset as an example, but you can use any of the other datasets available in Argilla. You can find more information about the different datasets here. +After this, you can start using Argilla, so you can create a dataset and add records to it. We use the FeedbackDataset as an example, but you can use any of the other datasets available in Argilla. You can find more information about the different datasets here.
Once you have connected to the server, we will create a workspace for datasets. @@ -161,7 +161,7 @@ for user in users:
```python @@ -189,13 +189,13 @@ dataset = rg.FeedbackDataset( remote_dataset = dataset.push_to_argilla(name="my-dataset", workspace="my-workspace") ``` - +
```python @@ -214,14 +214,14 @@ remote_dataset.add_records(record) And that's it, you now have your first dataset ready. You can begin annotating it or embark on other related tasks. - +
```python @@ -230,13 +230,13 @@ import argilla_v1 as rg filtered_dataset = dataset.filter_by(response_status="submitted") ``` - +
```python @@ -257,13 +257,13 @@ similar_records = ds.find_similar_records( ) ``` - +
```python
@@ -273,7 +273,7 @@ rule = Rule(query="positive impact", label="optimism")
add_rules(dataset="go_emotion", rules=[rule])
```
-
+
# Model Card for *{{ model_name | default("Model ID", true) }}*
-This model has been created with [Argilla](https://docs.argilla.io), trained with *{{ library_name }}*.
+This model has been created with [Argilla](https://docs.v1.argilla.io), trained with *{{ library_name }}*.
diff --git a/argilla-v1/src/argilla_v1/client/models.py b/argilla-v1/src/argilla_v1/client/models.py
index f4e0d8d875..242434784d 100644
--- a/argilla-v1/src/argilla_v1/client/models.py
+++ b/argilla-v1/src/argilla_v1/client/models.py
@@ -135,7 +135,7 @@ def _normalize_id(cls, v):
message = (
"Integer ids won't be supported in future versions. We recommend to start using strings instead. "
"For datasets already containing integer values we recommend migrating them to avoid deprecation issues. "
- "See https://docs.argilla.io/en/latest/getting_started/installation/configurations"
+ "See https://docs.v1.argilla.io/en/latest/getting_started/installation/configurations"
"/database_migrations.html#elasticsearch"
)
warnings.warn(message, DeprecationWarning, stacklevel=2)
diff --git a/argilla-v1/src/argilla_v1/labeling/text_classification/rule.py b/argilla-v1/src/argilla_v1/labeling/text_classification/rule.py
index 37b3f376a2..15b377e26d 100644
--- a/argilla-v1/src/argilla_v1/labeling/text_classification/rule.py
+++ b/argilla-v1/src/argilla_v1/labeling/text_classification/rule.py
@@ -24,7 +24,7 @@ class Rule:
"""A rule (labeling function) in form of an ElasticSearch query.
Args:
- query: An ElasticSearch query with the `query string syntax
📚 Argilla documentation for more guides and +
📚 Argilla documentation for more guides and tutorials.
\ No newline at end of file diff --git a/docs/_source/_common/snippets/training/feedback-task/question-answering.md b/docs/_source/_common/snippets/training/feedback-task/question-answering.md index 1c34cf616a..ac8822b7eb 100644 --- a/docs/_source/_common/snippets/training/feedback-task/question-answering.md +++ b/docs/_source/_common/snippets/training/feedback-task/question-answering.md @@ -3,7 +3,7 @@ title: Question Answering description: When two TextFields and a TextQuestion are present in the datasets, we can define a TrainingTaskForQuestionAnswering to use our ArgillaTrainer integration for fine-tuning with "transformers". links: - linkText: Practical guide to Question Answering - linkLink: https://docs.argilla.io/en/latest/practical_guides/fine_tune.html#question-answering + linkLink: https://docs.v1.argilla.io/en/latest/practical_guides/fine_tune.html#question-answering --- ```python diff --git a/docs/_source/_common/snippets/training/feedback-task/rlhf.md b/docs/_source/_common/snippets/training/feedback-task/rlhf.md index 357f2c5358..341590b118 100644 --- a/docs/_source/_common/snippets/training/feedback-task/rlhf.md +++ b/docs/_source/_common/snippets/training/feedback-task/rlhf.md @@ -3,11 +3,11 @@ title: RLHF description: Reinforcement Learning with Human Feedback (RLHF) proved to be the driving force behind the power of ChatGPT and other LLMs. Argilla does provides an integration for Reinforcement Learning with Human Feedback (RLHF) with the ArgillaTrainer class. Generally, this is done in three steps (1) SFT, (2) Reward Modeling and (3) PPO. links: - linkText: Practical guide to SFT - linkLink: https://docs.argilla.io/en/latest/guides/llms/practical_guides/fine_tune.html#supervised-finetuning + linkLink: https://docs.v1.argilla.io/en/latest/guides/llms/practical_guides/fine_tune.html#supervised-finetuning - linkText: Practical Guide to Reward Modeling - linkLink: https://docs.argilla.io/en/latest/guides/llms/practical_guides/fine_tune.html#reward-modeling + linkLink: https://docs.v1.argilla.io/en/latest/guides/llms/practical_guides/fine_tune.html#reward-modeling - linkText: Practical Guide to PPO - linkLink: https://docs.argilla.io/en/latest/guides/llms/practical_guides/fine_tune.html#proximal-policy-optimization + linkLink: https://docs.v1.argilla.io/en/latest/guides/llms/practical_guides/fine_tune.html#proximal-policy-optimization --- ```python diff --git a/docs/_source/_common/snippets/training/feedback-task/sentence-similarity.md b/docs/_source/_common/snippets/training/feedback-task/sentence-similarity.md index b80d523e6a..a30305b3fd 100644 --- a/docs/_source/_common/snippets/training/feedback-task/sentence-similarity.md +++ b/docs/_source/_common/snippets/training/feedback-task/sentence-similarity.md @@ -3,7 +3,7 @@ title: Sentence Similarity description: When we have two TextFields in the datasets and potentially a LabelQuestion or RankingQuestion, we can define a TrainingTaskForSentenceSimilarity to use our ArgillaTrainer integration for fine-tuning with "sentence-transformers" to train a model for sentence similarity to optimize Retrieval Augmented Generation tasks (RAG) with better retrieval and reranking. links: - linkText: Practical guide to Sentence Similarity - linkLink: https://docs.argilla.io/en/latest/practical_guides/fine_tune.html#sentence-similarity + linkLink: https://docs.v1.argilla.io/en/latest/practical_guides/fine_tune.html#sentence-similarity --- ```python diff --git a/docs/_source/_common/snippets/training/feedback-task/text-classification.md b/docs/_source/_common/snippets/training/feedback-task/text-classification.md index 61c15dd7c4..fc90098de3 100644 --- a/docs/_source/_common/snippets/training/feedback-task/text-classification.md +++ b/docs/_source/_common/snippets/training/feedback-task/text-classification.md @@ -3,9 +3,9 @@ title: Text classification description: When a RatingQuestion, LabelQuestion or MultiLabelQuestion is present in the datasets, we can define a TrainingTaskForTextClassification to use our ArgillaTrainer integration for fine-tuning with "openai", "setfit", "peft", "spacy" and "transformers". links: - linkText: Argilla unification docs - linkLink: https://docs.argilla.io/en/latest/practical_guides/collect_responses.html#solve-disagreements + linkLink: https://docs.v1.argilla.io/en/latest/practical_guides/collect_responses.html#solve-disagreements - linkText: Practical guide to Text Classification - linkLink: https://docs.argilla.io/en/latest/practical_guides/fine_tune.html#text-classification + linkLink: https://docs.v1.argilla.io/en/latest/practical_guides/fine_tune.html#text-classification --- ```python diff --git a/docs/_source/_common/snippets/training/text-classification/openai.md b/docs/_source/_common/snippets/training/text-classification/openai.md index 38184edb82..3d601be485 100644 --- a/docs/_source/_common/snippets/training/text-classification/openai.md +++ b/docs/_source/_common/snippets/training/text-classification/openai.md @@ -3,7 +3,7 @@ title: OpenAI description: The ArgillaOpenAITrainer leverages the features of OpenAI to fine-tune programmatically with Argilla. links: - linkText: Argilla docs - linkLink: https://docs.argilla.io/en/latest/practical_guides/fine_tune.html#text-classification + linkLink: https://docs.v1.argilla.io/en/latest/practical_guides/fine_tune.html#text-classification - linkText: OpenAI docs linkLink: https://platform.openai.com/docs/guides/fine-tuning --- diff --git a/docs/_source/_common/snippets/training/text-classification/peft.md b/docs/_source/_common/snippets/training/text-classification/peft.md index 188480c977..0bef2d1b12 100644 --- a/docs/_source/_common/snippets/training/text-classification/peft.md +++ b/docs/_source/_common/snippets/training/text-classification/peft.md @@ -3,7 +3,7 @@ title: Peft description: The ArgillaPeftTrainer leverages the base features of transformers and uses the Low Rank Adaptation (LoRA) implementation of Parameter Efficient Fine-Tuning (PEFT). links: - linkText: Argilla docs - linkLink: https://docs.argilla.io/en/practical_guides/fine_tune.html#text-classification + linkLink: https://docs.v1.argilla.io/en/practical_guides/fine_tune.html#text-classification - linkText: Transformers blog linkLink: https://huggingface.co/blog/peft - linkText: Transformers docss diff --git a/docs/_source/_common/snippets/training/text-classification/setfit.md b/docs/_source/_common/snippets/training/text-classification/setfit.md index 90443fa25f..acc486206b 100644 --- a/docs/_source/_common/snippets/training/text-classification/setfit.md +++ b/docs/_source/_common/snippets/training/text-classification/setfit.md @@ -3,7 +3,7 @@ title: SetFit description: The ArgillaSetFitTrainer leverages the features of SetFit to train programmatically with Argilla. links: - linkText: Argilla docs - linkLink: https://docs.argilla.io/en/latest/practical_guides/fine_tune.html#text-classification + linkLink: https://docs.v1.argilla.io/en/latest/practical_guides/fine_tune.html#text-classification - linkText: SetFit docs linkLink: https://github.com/huggingface/setfit --- diff --git a/docs/_source/_common/snippets/training/text-classification/spacy.md b/docs/_source/_common/snippets/training/text-classification/spacy.md index 37d12996cc..10f45b47ff 100644 --- a/docs/_source/_common/snippets/training/text-classification/spacy.md +++ b/docs/_source/_common/snippets/training/text-classification/spacy.md @@ -3,7 +3,7 @@ title: spaCy description: The ArgillaSpacyTrainer leverages the features of spaCy to train programmatically with Argilla. links: - linkText: Argilla docs - linkLink: https://docs.argilla.io/en/latest/practical_guides/fine_tune.html#text-classification + linkLink: https://docs.v1.argilla.io/en/latest/practical_guides/fine_tune.html#text-classification - linkText: spaCy docs linkLink: https://spacy.io/usage/training --- diff --git a/docs/_source/_common/snippets/training/text-classification/transformers.md b/docs/_source/_common/snippets/training/text-classification/transformers.md index d594c9aa5d..85f06d2706 100644 --- a/docs/_source/_common/snippets/training/text-classification/transformers.md +++ b/docs/_source/_common/snippets/training/text-classification/transformers.md @@ -3,7 +3,7 @@ title: Transformers description: The ArgillaTransformersTrainer leverages the features of transformers to train programmatically with Argilla. links: - linkText: Argilla docs - linkLink: https://docs.argilla.io/en/latest/practical_guides/fine_tune.html#text-classification + linkLink: https://docs.v1.argilla.io/en/latest/practical_guides/fine_tune.html#text-classification - linkText: Transformers docs linkLink: https://huggingface.co/docs/transformers/training --- diff --git a/docs/_source/_common/snippets/training/text2text/openai.md b/docs/_source/_common/snippets/training/text2text/openai.md index 8075dc60db..57151afa01 100644 --- a/docs/_source/_common/snippets/training/text2text/openai.md +++ b/docs/_source/_common/snippets/training/text2text/openai.md @@ -3,7 +3,7 @@ title: OpenAI description: The ArgillaOpenAITrainer leverages the features of OpenAI to fine-tune programmatically with Argilla. links: - linkText: Argilla docs - linkLink: https://docs.argilla.io/en/latest/practical_guides/fine_tune.html#text-classification + linkLink: https://docs.v1.argilla.io/en/latest/practical_guides/fine_tune.html#text-classification - linkText: OpenAI docs linkLink: https://platform.openai.com/docs/guides/fine-tuning --- diff --git a/docs/_source/_common/snippets/training/token-classification/peft.md b/docs/_source/_common/snippets/training/token-classification/peft.md index 780f86ef9a..46e23a7772 100644 --- a/docs/_source/_common/snippets/training/token-classification/peft.md +++ b/docs/_source/_common/snippets/training/token-classification/peft.md @@ -3,7 +3,7 @@ title: PEFT description: The ArgillaPeftTrainer leverages the base features of transformers and uses the Low Rank Adaptation (LoRA) implementation of Parameter Efficient Fine-Tuning (PEFT). links: - linkText: Argilla docs - linkLink: https://docs.argilla.io/en/practical_guides/fine_tune.html#token-classification + linkLink: https://docs.v1.argilla.io/en/practical_guides/fine_tune.html#token-classification - linkText: Transformers blog linkLink: https://huggingface.co/blog/peft - linkText: Transformers docs diff --git a/docs/_source/_common/snippets/training/token-classification/spacy.md b/docs/_source/_common/snippets/training/token-classification/spacy.md index d7efb98b3a..7006e26e6d 100644 --- a/docs/_source/_common/snippets/training/token-classification/spacy.md +++ b/docs/_source/_common/snippets/training/token-classification/spacy.md @@ -3,7 +3,7 @@ title: spaCy description: The ArgillaSpacyTrainer leverages the features of spaCy to train programmatically with Argilla. links: - linkText: Argilla docs - linkLink: https://docs.argilla.io/en/practical_guides/fine_tune.html#token-classification + linkLink: https://docs.v1.argilla.io/en/practical_guides/fine_tune.html#token-classification - linkText: spaCy docs linkLink: https://spacy.io/usage/training --- diff --git a/docs/_source/_common/snippets/training/token-classification/span_marker.md b/docs/_source/_common/snippets/training/token-classification/span_marker.md index 7e08fdf2c4..5120bbb083 100644 --- a/docs/_source/_common/snippets/training/token-classification/span_marker.md +++ b/docs/_source/_common/snippets/training/token-classification/span_marker.md @@ -3,7 +3,7 @@ title: SpanMarker description: The ArgillaSpanMarkerTrainer leverages SpanMarker to programmatically train with Argilla. links: - linkText: Argilla docs - linkLink: https://docs.argilla.io/en/practical_guides/fine_tune.html#token-classification + linkLink: https://docs.v1.argilla.io/en/practical_guides/fine_tune.html#token-classification - linkText: SpanMarker docs linkLink: https://tomaarsen.github.io/SpanMarkerNER --- diff --git a/docs/_source/_common/snippets/training/token-classification/transformers.md b/docs/_source/_common/snippets/training/token-classification/transformers.md index 11de6ce6c6..2ad43d20bf 100644 --- a/docs/_source/_common/snippets/training/token-classification/transformers.md +++ b/docs/_source/_common/snippets/training/token-classification/transformers.md @@ -3,7 +3,7 @@ title: Transformers description: The ArgillaTransformersTrainer leverages the features of transformers to train programmatically with Argilla. links: - linkText: Argilla docs - linkLink: https://docs.argilla.io/en/latest/practical_guides/fine_tune.html#token-classification + linkLink: https://docs.v1.argilla.io/en/latest/practical_guides/fine_tune.html#token-classification - linkText: Transformers docs linkLink: https://huggingface.co/docs/transformers/training --- diff --git a/docs/_source/conf.py b/docs/_source/conf.py index 48dbbe3cc6..0fa1211cc0 100644 --- a/docs/_source/conf.py +++ b/docs/_source/conf.py @@ -349,7 +349,7 @@ # Open graph meta -ogp_image = "https://docs.argilla.io/en/latest/_static/images/og-doc.png" +ogp_image = "https://docs.v1.argilla.io/en/latest/_static/images/og-doc.png" ogp_custom_meta_tags = [ '', diff --git a/docs/_source/getting_started/argilla.md b/docs/_source/getting_started/argilla.md index 921ac65d49..d0fe6cc960 100644 --- a/docs/_source/getting_started/argilla.md +++ b/docs/_source/getting_started/argilla.md @@ -4,7 +4,7 @@ ```{admonition} Argilla 2.x :class: info -We are announcing that Argilla 1.29 is the final minor release for Argilla 1.x. Although we will continue to release bug fixes for this version, we will neither be adding nor removing any functionalities. Visit the [2.x docs](https://argilla-io.github.io/argilla/)! +We are announcing that Argilla 1.29 is the final minor release for Argilla 1.x. Although we will continue to release bug fixes for this version, we will neither be adding nor removing any functionalities. Visit the [2.x docs](https://docs.argilla.io/)! ``` " ] @@ -94,10 +94,10 @@ " * If you're using Docker, it is `http://localhost:6900` by default.\n", " * If you're using HF Spaces, it is constructed as `https://[your-owner-name]-[your_space_name].hf.space`.\n", "* `ARGILLA_API_KEY`: It is the API key of the Argilla Server. It is `owner` by default.\n", - "* `HF_TOKEN`: It is the Hugging Face API token. It is only needed if you're using a [private HF Space](https://docs.argilla.io/en/latest/getting_started/installation/deployments/huggingface-spaces.html#deploy-argilla-on-spaces). You can configure it in your profile: [Setting > Access Tokens](https://huggingface.co/settings/tokens).\n", + "* `HF_TOKEN`: It is the Hugging Face API token. It is only needed if you're using a [private HF Space](https://docs.v1.argilla.io/en/latest/getting_started/installation/deployments/huggingface-spaces.html#deploy-argilla-on-spaces). You can configure it in your profile: [Setting > Access Tokens](https://huggingface.co/settings/tokens).\n", "* `workspace`: It is a “space” inside your Argilla instance where authorized users can collaborate. It's `argilla` by default.\n", "\n", - "For more info about custom configurations like headers, workspace separation or access credentials, check our [config page](https://docs.argilla.io/en/latest/getting_started/installation/configurations/configurations.html)." + "For more info about custom configurations like headers, workspace separation or access credentials, check our [config page](https://docs.v1.argilla.io/en/latest/getting_started/installation/configurations/configurations.html)." ] }, { @@ -219,7 +219,7 @@ " - `prediction`: Add task-specific model predictions to the record (Optional);\n", " - `metadata`: Add some arbitrary metadata to the record (Optional);\n", "\n", - "A [Dataset](https://docs.argilla.io/en/latest/conceptual_guides/data_model.html#other-datasets) in Argilla is a collection of records of the same type." + "A [Dataset](https://docs.v1.argilla.io/en/latest/conceptual_guides/data_model.html#other-datasets) in Argilla is a collection of records of the same type." ] }, { @@ -802,7 +802,7 @@ "id": "054e40cc-51f4-4321-b42a-2301775c0e9f" }, "source": [ - "We can now read this `Dataset` with Argilla, which will automatically create the records and put them in a [Argilla Dataset](https://docs.argilla.io/en/latest/reference/python/python_client.html#argilla.client.datasets.read_datasets)." + "We can now read this `Dataset` with Argilla, which will automatically create the records and put them in a [Argilla Dataset](https://docs.v1.argilla.io/en/latest/reference/python/python_client.html#argilla.client.datasets.read_datasets)." ] }, { @@ -967,7 +967,7 @@ "\n", "To extract English sentences into a new _text_ column we will write a quick helper function and [map](https://huggingface.co/docs/datasets/process#map) the whole `Dataset` with it.\n", "\n", - "French sentences will be extracted into a new _prediction_ column, wrapped in \"[ ]\", as the prediction field of [Text2TextRecord](https://docs.argilla.io/en/latest/reference/python/python_client.html#argilla.client.models.Text2TextRecord) accepts a list of strings or tuples." + "French sentences will be extracted into a new _prediction_ column, wrapped in \"[ ]\", as the prediction field of [Text2TextRecord](https://docs.v1.argilla.io/en/latest/reference/python/python_client.html#argilla.client.models.Text2TextRecord) accepts a list of strings or tuples." ] }, { @@ -1370,7 +1370,7 @@ "Argilla tries to make this relatively cumbersome approach as painless as possible.\n", "Via an intuitive and adaptive UI, its exhaustive search and filter functionalities, and bulk annotation capabilities, Argilla turns the manual annotation process into an efficient option. \n", "\n", - "Look at our dedicated [feature reference](https://docs.argilla.io/en/latest/reference/webapp/features.html) for a detailed and illustrative guide on manually annotating your dataset with Argilla." + "Look at our dedicated [feature reference](https://docs.v1.argilla.io/en/latest/reference/webapp/features.html) for a detailed and illustrative guide on manually annotating your dataset with Argilla." ] }, { @@ -1402,8 +1402,8 @@ "When guided by pre-trained models, it is common to see human annotators get influenced by them.\n", "Therefore, it is advisable to avoid pre-annotations when building a rigorous test set for the final model evaluation.\n", "\n", - "Check the [introduction tutorial](https://docs.argilla.io/en/latest/tutorials/notebooks/labelling-tokenclassification-spacy-pretrained.html) to learn to add predictions to the records.\n", - "And our [feature reference](https://docs.argilla.io/en/latest/reference/webapp/features.html) includes a detailed guide on validating predictions in the Argilla web app." + "Check the [introduction tutorial](https://docs.v1.argilla.io/en/latest/tutorials/notebooks/labelling-tokenclassification-spacy-pretrained.html) to learn to add predictions to the records.\n", + "And our [feature reference](https://docs.v1.argilla.io/en/latest/reference/webapp/features.html) includes a detailed guide on validating predictions in the Argilla web app." ] }, { @@ -1433,8 +1433,8 @@ "The downside of this approach is that it might be challenging to come up with working heuristic rules for some datasets.\n", "Furthermore, rules are rarely 100% precise and often conflict with each other. These noisy labels can be cleaned up using weak supervision and label models, or something as simple as majority voting. It is usually a trade-off between the amount of annotated data and the quality of the labels.\n", "\n", - "Check [our guide](https://docs.argilla.io/en/latest/practical_guides/annotation_workflows/weak_supervision.html) for an extensive introduction to weak supervision with Argilla.\n", - "Also, check the [feature reference](https://docs.argilla.io/en/latest/reference/webapp/features.html#weak-labeling) for the Define rules mode of the web app and our [various tutorials](https://docs.argilla.io/en/latest/tutorials/techniques/weak_supervision.html) to see practical examples of weak supervision workflows." + "Check [our guide](https://docs.v1.argilla.io/en/latest/practical_guides/annotation_workflows/weak_supervision.html) for an extensive introduction to weak supervision with Argilla.\n", + "Also, check the [feature reference](https://docs.v1.argilla.io/en/latest/reference/webapp/features.html#weak-labeling) for the Define rules mode of the web app and our [various tutorials](https://docs.v1.argilla.io/en/latest/tutorials/techniques/weak_supervision.html) to see practical examples of weak supervision workflows." ] }, { @@ -1446,7 +1446,7 @@ "source": [ "## Train a model\n", "\n", - "The `ArgillaTrainer` is a wrapper around many of our favorite NLP libraries. It provides a very intuitive abstract workflow to facilitate simple training workflows using decent default pre-set configurations without having to worry about any data transformations from Argilla. More info [here](https://docs.argilla.io/en/latest/practical_guides/fine_tune.html)." + "The `ArgillaTrainer` is a wrapper around many of our favorite NLP libraries. It provides a very intuitive abstract workflow to facilitate simple training workflows using decent default pre-set configurations without having to worry about any data transformations from Argilla. More info [here](https://docs.v1.argilla.io/en/latest/practical_guides/fine_tune.html)." ] }, { diff --git a/docs/_source/getting_started/quickstart_workflow_feedback.ipynb b/docs/_source/getting_started/quickstart_workflow_feedback.ipynb index a8d03660af..1fdfcb624a 100644 --- a/docs/_source/getting_started/quickstart_workflow_feedback.ipynb +++ b/docs/_source/getting_started/quickstart_workflow_feedback.ipynb @@ -9,7 +9,7 @@ "\n", "Note\n", " \n", - "This tutorial demonstrates a sample usage for `FeedbackDataset`, which offers implementations different from the old `TextClassificationDataset`, `Text2TextDataset` and `TokenClassificationDataset`. To have info about old datasets, you can have a look at them [here]([../getting_started/quickstart_workflow.html](https://docs.argilla.io/en/latest/getting_started/quickstart_workflow.html)). Not sure which dataset to use? Check out our section on [choosing a dataset](https://docs.argilla.io/en/latest/practical_guides/choose_dataset.html).\n", + "This tutorial demonstrates a sample usage for `FeedbackDataset`, which offers implementations different from the old `TextClassificationDataset`, `Text2TextDataset` and `TokenClassificationDataset`. To have info about old datasets, you can have a look at them [here]([../getting_started/quickstart_workflow.html](https://docs.v1.argilla.io/en/latest/getting_started/quickstart_workflow.html)). Not sure which dataset to use? Check out our section on [choosing a dataset](https://docs.v1.argilla.io/en/latest/practical_guides/choose_dataset.html).\n", " \n", "" ] @@ -90,10 +90,10 @@ " * If you're using Docker, it is `http://localhost:6900` by default.\n", " * If you're using HF Spaces, it is constructed as `https://[your-owner-name]-[your_space_name].hf.space`.\n", "* `ARGILLA_API_KEY`: It is the API key of the Argilla Server. It is `owner` by default.\n", - "* `HF_TOKEN`: It is the Hugging Face API token. It is only needed if you're using a [private HF Space](https://docs.argilla.io/en/latest/getting_started/installation/deployments/huggingface-spaces.html#deploy-argilla-on-spaces). You can configure it in your profile: [Setting > Access Tokens](https://huggingface.co/settings/tokens).\n", + "* `HF_TOKEN`: It is the Hugging Face API token. It is only needed if you're using a [private HF Space](https://docs.v1.argilla.io/en/latest/getting_started/installation/deployments/huggingface-spaces.html#deploy-argilla-on-spaces). You can configure it in your profile: [Setting > Access Tokens](https://huggingface.co/settings/tokens).\n", "* `workspace`: It is a “space” inside your Argilla instance where authorized users can collaborate. It's `argilla` by default.\n", "\n", - "For more info about custom configurations like headers, workspace separation or access credentials, check our [config page](https://docs.argilla.io/en/latest/getting_started/installation/configurations/configurations.html)." + "For more info about custom configurations like headers, workspace separation or access credentials, check our [config page](https://docs.v1.argilla.io/en/latest/getting_started/installation/configurations/configurations.html)." ] }, { @@ -177,7 +177,7 @@ "source": [ "## Create a Dataset\n", "\n", - "FeedbackDataset is the container for Argilla Feedback structure. Argilla Feedback offers different components for FeedbackDatasets that you can employ for various aspects of your workflow. For a more detailed explanation, refer to the [documentation](https://docs.argilla.io/en/latest/practical_guides/practical_guides.html) and the [end-to-end tutorials](https://docs.argilla.io/en/latest/tutorials_and_integrations/tutorials/tutorials.html) for beginners.\n", + "FeedbackDataset is the container for Argilla Feedback structure. Argilla Feedback offers different components for FeedbackDatasets that you can employ for various aspects of your workflow. For a more detailed explanation, refer to the [documentation](https://docs.v1.argilla.io/en/latest/practical_guides/practical_guides.html) and the [end-to-end tutorials](https://docs.v1.argilla.io/en/latest/tutorials_and_integrations/tutorials/tutorials.html) for beginners.\n", "\n", "To start, we need to configure the FeedbackDatasest. To do so, there are two options: use a pre-defined template or create a custom one." ] @@ -189,7 +189,7 @@ "source": [ "### Use a Task Template\n", "\n", - "Argilla offers a set of [pre-defined templates for different tasks](https://docs.argilla.io/en/latest/practical_guides/create_update_dataset/create_dataset.html#task-templates). You can use them to configure your dataset straightforward. For instance, if you want to create a dataset for simple text classification, you can use the following code:" + "Argilla offers a set of [pre-defined templates for different tasks](https://docs.v1.argilla.io/en/latest/practical_guides/create_update_dataset/create_dataset.html#task-templates). You can use them to configure your dataset straightforward. For instance, if you want to create a dataset for simple text classification, you can use the following code:" ] }, { @@ -262,7 +262,7 @@ "source": [ "### Configure a Custom Dataset\n", "\n", - "If your dataset does not fit into one of the pre-defined templates, you [can create a custom dataset](https://docs.argilla.io/en/latest/practical_guides/create_update_dataset/create_dataset.html#define-questions) by defining the fields, the different question types, the metadata properties and the vectors settings." + "If your dataset does not fit into one of the pre-defined templates, you [can create a custom dataset](https://docs.v1.argilla.io/en/latest/practical_guides/create_update_dataset/create_dataset.html#define-questions) by defining the fields, the different question types, the metadata properties and the vectors settings." ] }, { @@ -336,7 +336,7 @@ "\n", "As with other datasets, Feedback datasets also allow to create a training pipeline and make inferences with the resulting model. After you gather responses with Argilla Feedback, you can easily fine-tune an LLM. In this example, we will have to complete a text classification task.\n", "\n", - "For fine-tuning, we will use setfit library and the [Argilla Trainer](https://docs.argilla.io/en/latest/practical_guides/fine_tune.html#the-argillatrainer), which is a powerful wrapper around many of our favorite NLP libraries. It provides a very intuitive abstract representation to facilitate simple training workflows using decent default pre-set configurations without having to worry about any data transformations from Argilla.\n", + "For fine-tuning, we will use setfit library and the [Argilla Trainer](https://docs.v1.argilla.io/en/latest/practical_guides/fine_tune.html#the-argillatrainer), which is a powerful wrapper around many of our favorite NLP libraries. It provides a very intuitive abstract representation to facilitate simple training workflows using decent default pre-set configurations without having to worry about any data transformations from Argilla.\n", "\n", "Let us first create our dataset to train. For this example, we will use the [emotion](https://huggingface.co/datasets/argilla/emotion) dataset from Argilla, which was created using Argilla. Each text item has its responses as 6 different sentiments, which are Sadness, Joy, Love, Anger, Fear and Surprise." ] @@ -383,7 +383,7 @@ "id": "861c3648", "metadata": {}, "source": [ - "We can then start to create a training pipeline by first defining `TrainingTask`, which is used to define how the data should be processed and formatted according to the associated task and framework. Each task has its own classmethod and the data formatting can always be customized via `formatting_func`. You can visit [this page](https://docs.argilla.io/en/latest/practical_guides/fine_tune.html#tasks) for more info. Simpler tasks like text classification can be defined using default definitions, as we do in this example." + "We can then start to create a training pipeline by first defining `TrainingTask`, which is used to define how the data should be processed and formatted according to the associated task and framework. Each task has its own classmethod and the data formatting can always be customized via `formatting_func`. You can visit [this page](https://docs.v1.argilla.io/en/latest/practical_guides/fine_tune.html#tasks) for more info. Simpler tasks like text classification can be defined using default definitions, as we do in this example." ] }, { @@ -487,7 +487,7 @@ "id": "85ad4ad2", "metadata": {}, "source": [ - "We have trained a model with FeedbackDataset in this tutorial. For more info about concepts in Argilla Feedback and LLMs, look [here](https://docs.argilla.io/en/latest/conceptual_guides/llm/llm.html). For a more detailed explanation, refer to the [documentation](https://docs.argilla.io/en/latest/practical_guides/practical_guides.html) and the [end-to-end tutorials](https://docs.argilla.io/en/latest/tutorials_and_integrations/tutorials/tutorials.html) for beginners." + "We have trained a model with FeedbackDataset in this tutorial. For more info about concepts in Argilla Feedback and LLMs, look [here](https://docs.v1.argilla.io/en/latest/conceptual_guides/llm/llm.html). For a more detailed explanation, refer to the [documentation](https://docs.v1.argilla.io/en/latest/practical_guides/practical_guides.html) and the [end-to-end tutorials](https://docs.v1.argilla.io/en/latest/tutorials_and_integrations/tutorials/tutorials.html) for beginners." ] }, { diff --git a/docs/_source/practical_guides/annotation_workflows/job_scheduling.md b/docs/_source/practical_guides/annotation_workflows/job_scheduling.md index 25a5646359..9a59fbd2ed 100644 --- a/docs/_source/practical_guides/annotation_workflows/job_scheduling.md +++ b/docs/_source/practical_guides/annotation_workflows/job_scheduling.md @@ -35,7 +35,7 @@ from argilla.listeners import listener @listener( dataset="my_dataset", # dataset to get record from - query="lucene query", # https://docs.argilla.io/en/latest/guides/query_datasets.html + query="lucene query", # https://docs.v1.argilla.io/en/latest/guides/query_datasets.html execution_interval_in_seconds=3, # interval to check execution of `update_records` ) def update_records(records, ctx): diff --git a/docs/_source/tutorials/notebooks/deploying-textclassification-colab-activelearning.ipynb b/docs/_source/tutorials/notebooks/deploying-textclassification-colab-activelearning.ipynb index dbe798cb6c..cfa6236721 100644 --- a/docs/_source/tutorials/notebooks/deploying-textclassification-colab-activelearning.ipynb +++ b/docs/_source/tutorials/notebooks/deploying-textclassification-colab-activelearning.ipynb @@ -8,7 +8,7 @@ }, "source": [ "# 🚀 Run Argilla with a Transformer in an active learning loop and a free GPU in your browser\n", - "In this tutorial, you will learn how to set up a complete active learning loop with Google Colab with a GPU in the backend. This tutorial is based on the [small-text active learning tutorial](https://docs.argilla.io/en/latest/tutorials/notebooks/training-textclassification-smalltext-activelearning.html). The main difference is that this tutorial is designed to be run in a Google Colab notebook with a GPU as the backend for a more efficient active learning loop with Transformer models. It is recommended to follow this tutorial directly on Google Colab. You can [open the Colab notebook via this hyperlink](https://colab.research.google.com/drive/11oTWno3hzgJnip11EcgqEhdpbW1IX-lP?usp=sharing), create your own copy and modify it for your own use-cases. \n", + "In this tutorial, you will learn how to set up a complete active learning loop with Google Colab with a GPU in the backend. This tutorial is based on the [small-text active learning tutorial](https://docs.v1.argilla.io/en/latest/tutorials/notebooks/training-textclassification-smalltext-activelearning.html). The main difference is that this tutorial is designed to be run in a Google Colab notebook with a GPU as the backend for a more efficient active learning loop with Transformer models. It is recommended to follow this tutorial directly on Google Colab. You can [open the Colab notebook via this hyperlink](https://colab.research.google.com/drive/11oTWno3hzgJnip11EcgqEhdpbW1IX-lP?usp=sharing), create your own copy and modify it for your own use-cases. \n", "\n", "⚠️ Note that this notebook requires manual input to start Argilla in a terminal and to input an ngrok token. Please read the instructions for each cell. If you do not follow the instructions and execute everything in the correct order, the code will bug. If you face an error, restarting your runtime can solve several issues. ⚠️\n", "\n", @@ -110,7 +110,7 @@ "id": "_u7gFMiTBYTs" }, "source": [ - "Elastic Search is a requirement for using Argilla. The [docker installation](https://docs.argilla.io/en/latest/getting_started/quickstart.html) of Elastic Search recommended by Argilla does not work in Google Colab as [Colab does not support docker](https://github.com/googlecolab/colabtools/issues/299). Elastic Search therefore needs to be installed 'manually' with the following code." + "Elastic Search is a requirement for using Argilla. The [docker installation](https://docs.v1.argilla.io/en/latest/getting_started/quickstart.html) of Elastic Search recommended by Argilla does not work in Google Colab as [Colab does not support docker](https://github.com/googlecolab/colabtools/issues/299). Elastic Search therefore needs to be installed 'manually' with the following code." ] }, { @@ -280,7 +280,7 @@ "id": "EObyEHqxhes8" }, "source": [ - "If you click on your public link above, you should be able to access Argilla, but there is no data logged to Argilla yet. The following code downloads an example dataset and logs it to Argilla. You can change the following code to download any other dataset you want to annotate. The following code follows the [active learning with small-text](https://docs.argilla.io/en/latest/tutorials/notebooks/training-textclassification-smalltext-activelearning.html) tutorial and therefore contains fewer explanations. " + "If you click on your public link above, you should be able to access Argilla, but there is no data logged to Argilla yet. The following code downloads an example dataset and logs it to Argilla. You can change the following code to download any other dataset you want to annotate. The following code follows the [active learning with small-text](https://docs.v1.argilla.io/en/latest/tutorials/notebooks/training-textclassification-smalltext-activelearning.html) tutorial and therefore contains fewer explanations. " ] }, { @@ -625,7 +625,7 @@ }, "outputs": [], "source": [ - "## https://docs.argilla.io/en/latest/getting_started/quickstart.html#Manual-extraction\n", + "## https://docs.v1.argilla.io/en/latest/getting_started/quickstart.html#Manual-extraction\n", "\n", "# load your annotations\n", "dataset_annotated = rg.load(DATASET_NAME)\n", diff --git a/docs/_source/tutorials/notebooks/labelling-textclassification-sentencetransformers-semantic.ipynb b/docs/_source/tutorials/notebooks/labelling-textclassification-sentencetransformers-semantic.ipynb index 9fecd1c300..144af598c6 100644 --- a/docs/_source/tutorials/notebooks/labelling-textclassification-sentencetransformers-semantic.ipynb +++ b/docs/_source/tutorials/notebooks/labelling-textclassification-sentencetransformers-semantic.ipynb @@ -772,7 +772,7 @@ "\n", "With Argilla, we can annotate samples using semantic search and the 'find similar' button. There's a complete tutorial on this [here](labelling-textclassification-sentence-transformers-semantic.ipynb). It requires the recently added Similarity search features.\n", "\n", - "![Argilla with images tutorial](https://docs.argilla.io/en/latest/_static/reference/webapp/features-similaritysearch.png)\n", + "![Argilla with images tutorial](https://docs.v1.argilla.io/en/latest/_static/reference/webapp/features-similaritysearch.png)\n", "\n" ] }, diff --git a/docs/_source/tutorials/notebooks/labelling-tokenclassification-using-spacy-llm.ipynb b/docs/_source/tutorials/notebooks/labelling-tokenclassification-using-spacy-llm.ipynb index 46e8e434bc..4a99000831 100644 --- a/docs/_source/tutorials/notebooks/labelling-tokenclassification-using-spacy-llm.ipynb +++ b/docs/_source/tutorials/notebooks/labelling-tokenclassification-using-spacy-llm.ipynb @@ -443,7 +443,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Let us create a list of records from the dataset items. See the [documentation](https://docs.argilla.io/en/latest/reference/python/python_client.html#argilla.client.models.TokenClassificationRecord) for more information on what other fields you can add to your record." + "Let us create a list of records from the dataset items. See the [documentation](https://docs.v1.argilla.io/en/latest/reference/python/python_client.html#argilla.client.models.TokenClassificationRecord) for more information on what other fields you can add to your record." ] }, { @@ -504,7 +504,7 @@ "\n", "\n", "\n", - "In this tutorial, we have implemented a `spacy-llm` pipeline for the NER task by using model predictions from GPT3.5. In addition, we employed a few-shot learning approach to improve the performance of our model, which is facilitated by `spacy-llm`. You can see more tutorials on the use of `spaCy` with Argilla [here](https://docs.argilla.io/en/latest/tutorials/libraries/spacy.html)" + "In this tutorial, we have implemented a `spacy-llm` pipeline for the NER task by using model predictions from GPT3.5. In addition, we employed a few-shot learning approach to improve the performance of our model, which is facilitated by `spacy-llm`. You can see more tutorials on the use of `spaCy` with Argilla [here](https://docs.v1.argilla.io/en/latest/tutorials/libraries/spacy.html)" ] } ], diff --git a/docs/_source/tutorials/notebooks/ner_fine_tune_bert_beginners.ipynb b/docs/_source/tutorials/notebooks/ner_fine_tune_bert_beginners.ipynb index 956a64547b..4d55a25b92 100644 --- a/docs/_source/tutorials/notebooks/ner_fine_tune_bert_beginners.ipynb +++ b/docs/_source/tutorials/notebooks/ner_fine_tune_bert_beginners.ipynb @@ -42,7 +42,7 @@ "\n", "For this purpose, we will first connect to Argilla and log our [dataset](https://huggingface.co/datasets/argilla/spacy_sm_wnut17), so that we can analyse it in a more visual way.\n", "\n", - ">💡 **Tip:** If you want to try with a different dataset than the one in this tutorial, but it's not yet annotated, Argilla has several tutorials on how to do it [manually](/practical_guides/annotate_dataset.html) or [automatically](https://docs.argilla.io/en/latest/tutorials/notebooks/labelling-tokenclassification-spacy-pretrained.html#Appendix:-Log-datasets-to-the-Hugging-Face-Hub).\n", + ">💡 **Tip:** If you want to try with a different dataset than the one in this tutorial, but it's not yet annotated, Argilla has several tutorials on how to do it [manually](/practical_guides/annotate_dataset.html) or [automatically](https://docs.v1.argilla.io/en/latest/tutorials/notebooks/labelling-tokenclassification-spacy-pretrained.html#Appendix:-Log-datasets-to-the-Hugging-Face-Hub).\n", "\n", "\n", "Next, we will preprocess our dataset and fine-tune the model. Here we will be using [DistilBERT](https://huggingface.co/docs/transformers/model_doc/distilbert), to make it easier to understand it and start *playing* with the parameters easily. However, there are still plenty of similar ones to [discover](https://huggingface.co/docs/transformers/index#bigtable).\n", @@ -785,7 +785,7 @@ " height=\"400\"\n", " style=\"display: block; margin: 0 auto\" />\n", "\n", - "In addition, **Argilla** also has more options, e.g. to extract [metrics](https://docs.argilla.io/en/latest/reference/python/python_metrics.html) such as the one shown below.\n", + "In addition, **Argilla** also has more options, e.g. to extract [metrics](https://docs.v1.argilla.io/en/latest/reference/python/python_metrics.html) such as the one shown below.\n", "\n", "\n" ] diff --git a/docs/_source/tutorials/notebooks/training-textclassification-smalltext-activelearning.ipynb b/docs/_source/tutorials/notebooks/training-textclassification-smalltext-activelearning.ipynb index 3cdb6b7b41..3d3eab42ba 100644 --- a/docs/_source/tutorials/notebooks/training-textclassification-smalltext-activelearning.ipynb +++ b/docs/_source/tutorials/notebooks/training-textclassification-smalltext-activelearning.ipynb @@ -572,11 +572,11 @@ "metadata": {}, "source": [ "In the Argilla UI, we will set the number of records per page to 20 since it is also our chosen batch size. \n", - "Furthermore, we will use the [Status filter](https://docs.argilla.io/en/latest/practical_guides/annotate_dataset.html#status-filter) to filter out already annotated records. \n", + "Furthermore, we will use the [Status filter](https://docs.v1.argilla.io/en/latest/practical_guides/annotate_dataset.html#status-filter) to filter out already annotated records. \n", "Now, all we have to do is to annotate the displayed records. \n", "Once annotating everything, the classifier's training will be automatically triggered.\n", "\n", - "After a few seconds, you should see the newly queried batch when pressing the [Refresh button](https://docs.argilla.io/en/latest/reference/webapp/pages.html#refresh). \n", + "After a few seconds, you should see the newly queried batch when pressing the [Refresh button](https://docs.v1.argilla.io/en/latest/reference/webapp/pages.html#refresh). \n", "The training can take longer depending on your machine and whether you have a CUDA device. \n", "You can always check the status of the active learning loop from your notebook." ] diff --git a/docs/_source/tutorials/notebooks/training-textgeneration-unstructured.ipynb b/docs/_source/tutorials/notebooks/training-textgeneration-unstructured.ipynb index 4e79ccb653..947307ef31 100644 --- a/docs/_source/tutorials/notebooks/training-textgeneration-unstructured.ipynb +++ b/docs/_source/tutorials/notebooks/training-textgeneration-unstructured.ipynb @@ -634,7 +634,7 @@ "id": "311cf33b", "metadata": {}, "source": [ - "After uploading the dataset, head over to the Argilla UI and validate and/or adjust the summaries we pulled from the ISW site. You can also check out the [Argilla docs](https://docs.argilla.io/) for more information on all of the exciting tools Argilla provides to help you label, assess, and refine your training data!\n", + "After uploading the dataset, head over to the Argilla UI and validate and/or adjust the summaries we pulled from the ISW site. You can also check out the [Argilla docs](https://docs.v1.argilla.io/) for more information on all of the exciting tools Argilla provides to help you label, assess, and refine your training data!\n", "\n", " " ] diff --git a/docs/_source/tutorials_and_integrations/integrations/llama_index.ipynb b/docs/_source/tutorials_and_integrations/integrations/llama_index.ipynb index 52a240d922..c05673983d 100644 --- a/docs/_source/tutorials_and_integrations/integrations/llama_index.ipynb +++ b/docs/_source/tutorials_and_integrations/integrations/llama_index.ipynb @@ -114,7 +114,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now, we will set up an Argilla global handler for Llama Index. By doing so, we ensure that the predictions that we obtain using Llama Index is automatically uploaded to the Argilla client we initialized before Within the handler, we need to provide the dataset name that we will use. If the dataset does not exist, it will be created with the given name. You can also set the `API KEY`, `API URL`, and the `workspace` name. You can learn more about the variables that controls Argilla initialization [here](https://docs.argilla.io/en/latest/getting_started/installation/configurations/workspace_management.html)." + "Now, we will set up an Argilla global handler for Llama Index. By doing so, we ensure that the predictions that we obtain using Llama Index is automatically uploaded to the Argilla client we initialized before Within the handler, we need to provide the dataset name that we will use. If the dataset does not exist, it will be created with the given name. You can also set the `API KEY`, `API URL`, and the `workspace` name. You can learn more about the variables that controls Argilla initialization [here](https://docs.v1.argilla.io/en/latest/getting_started/installation/configurations/workspace_management.html)." ] }, { diff --git a/docs/_source/tutorials_and_integrations/integrations/process_documents_with_unstructured.ipynb b/docs/_source/tutorials_and_integrations/integrations/process_documents_with_unstructured.ipynb index e7a2be2f8a..01af30ac5f 100644 --- a/docs/_source/tutorials_and_integrations/integrations/process_documents_with_unstructured.ipynb +++ b/docs/_source/tutorials_and_integrations/integrations/process_documents_with_unstructured.ipynb @@ -605,7 +605,7 @@ "id": "311cf33b", "metadata": {}, "source": [ - "After uploading the dataset, head over to the Argilla UI and validate and/or adjust the summaries we pulled from the ISW site. You can also check out the [Argilla docs](https://docs.argilla.io/) for more information on all of the exciting tools Argilla provides to help you label, assess, and refine your training data!\n", + "After uploading the dataset, head over to the Argilla UI and validate and/or adjust the summaries we pulled from the ISW site. You can also check out the [Argilla docs](https://docs.v1.argilla.io/) for more information on all of the exciting tools Argilla provides to help you label, assess, and refine your training data!\n", "\n", " " ] diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/create-dataset-001.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/create-dataset-001.ipynb index 9e13c34b33..11e0ca9ca8 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/create-dataset-001.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/end2end_examples/create-dataset-001.ipynb @@ -362,7 +362,7 @@ "source": [ "This dataset contains a collection of news articles (we can see the content in the `text` column), which have been asigned one of the following classification `labels`: *World (0), Sports (1), Business (2), Sci/Tech (3)*.\n", "\n", - "Let's use the [task templates](https://docs.argilla.io/en/latest/practical_guides/create_update_dataset/create_dataset.html#task-templates) to create a feedback dataset ready for `text-classification`." + "Let's use the [task templates](https://docs.v1.argilla.io/en/latest/practical_guides/create_update_dataset/create_dataset.html#task-templates) to create a feedback dataset ready for `text-classification`." ] }, { @@ -422,7 +422,7 @@ "tags": [] }, "source": [ - "We could compare this dataset with the custom configuration we would use previously (we can take a look at the [custom configuration](https://docs.argilla.io/en/latest/practical_guides/create_update_dataset/create_dataset.html#custom-configuration) for more information on the creation of a `FeedbackDataset` when we want a finer control):" + "We could compare this dataset with the custom configuration we would use previously (we can take a look at the [custom configuration](https://docs.v1.argilla.io/en/latest/practical_guides/create_update_dataset/create_dataset.html#custom-configuration) for more information on the creation of a `FeedbackDataset` when we want a finer control):" ] }, { @@ -512,7 +512,7 @@ "tags": [] }, "source": [ - "The next step once we have our `FeedbackDataset` created is adding the [FeedbackRecords](https://docs.argilla.io/en/latest/getting_started/cheatsheet.html#create-records) to it." + "The next step once we have our `FeedbackDataset` created is adding the [FeedbackRecords](https://docs.v1.argilla.io/en/latest/getting_started/cheatsheet.html#create-records) to it." ] }, { @@ -1105,7 +1105,7 @@ "We created a `FeedbackDataset` for text classification with a `LabelQuestion`, from data stored as a `datasets.Dataset` and a `pandas.DataFrame`.\n", "This dataset was pushed both to `Argilla` where we can curate and label the records, and finally pushed it to the 🤗`hub`.\n", "\n", - "To learn more about how to work with the `FeedbackDataset` check the [cheatsheet](https://docs.argilla.io/en/latest/getting_started/cheatsheet.html#cheatsheet). To continue with assigning records to annotators, you can refer to the [next tutorial](./assign-records-002.ipynb)." + "To learn more about how to work with the `FeedbackDataset` check the [cheatsheet](https://docs.v1.argilla.io/en/latest/getting_started/cheatsheet.html#cheatsheet). To continue with assigning records to annotators, you can refer to the [next tutorial](./assign-records-002.ipynb)." ] } ], diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/fine-tuning-openai-rag-feedback.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/fine-tuning-openai-rag-feedback.ipynb index 5598b69f2e..e29ccb29df 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/fine-tuning-openai-rag-feedback.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/fine-tuning-openai-rag-feedback.ipynb @@ -48,7 +48,7 @@ "\n", "## Setup\n", "\n", - "To run this tutorial, you need to [install and launch Argilla](https://docs.argilla.io/en/latest/getting_started/quickstart_installation.html), as well as some other packages." + "To run this tutorial, you need to [install and launch Argilla](https://docs.v1.argilla.io/en/latest/getting_started/quickstart_installation.html), as well as some other packages." ] }, { diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/monitoring-data-model-drift.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/monitoring-data-model-drift.ipynb index b4d8037f80..0973bffe53 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/monitoring-data-model-drift.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/monitoring-data-model-drift.ipynb @@ -2473,7 +2473,7 @@ "source": [ "Data statistics is another way to analyze data drift. We can use `TextDescriptives` to compare the statistics of the reference and current datasets.\n", "\n", - ">For more information, on how to use TextDescriptives, check the [documentation](https://hlasse.github.io/TextDescriptives/). Note that Argilla also has an integration to [add text descriptives as metadata](https://docs.argilla.io/en/latest/tutorials_and_integrations/integrations/add_text_descriptives_as_metadata.html)." + ">For more information, on how to use TextDescriptives, check the [documentation](https://hlasse.github.io/TextDescriptives/). Note that Argilla also has an integration to [add text descriptives as metadata](https://docs.v1.argilla.io/en/latest/tutorials_and_integrations/integrations/add_text_descriptives_as_metadata.html)." ] }, { diff --git a/docs/_source/tutorials_and_integrations/tutorials/feedback/training-llm-mistral-sft.ipynb b/docs/_source/tutorials_and_integrations/tutorials/feedback/training-llm-mistral-sft.ipynb index 28f714b67e..aef509bb49 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/feedback/training-llm-mistral-sft.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/feedback/training-llm-mistral-sft.ipynb @@ -8,9 +8,9 @@ "In this tutorial, you will learn how to finetune a Large Language Model (LLM), Mistral 7B in particular, on a chat-style instruction dataset. We start with [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1), an LLM that only does text completion, and we end up with our own [argilla/Mistral-7B-v0.1-chat-OIG](https://huggingface.co/argilla/Mistral-7B-v0.1-chat-OIG) model that faithfully follows instructions and acts as a helpful chat assistant.\n", "\n", "This tutorial consists of the following steps:\n", - "1. Preparing a [FeedbackDataset](https://docs.argilla.io/en/latest/conceptual_guides/data_model.html#feedback-dataset) in Argilla. \n", + "1. Preparing a [FeedbackDataset](https://docs.v1.argilla.io/en/latest/conceptual_guides/data_model.html#feedback-dataset) in Argilla. \n", "2. (Optional) Annotate instruction samples.\n", - "3. Set up the [ArgillaTrainer](https://docs.argilla.io/en/latest/practical_guides/fine_tune.html#supervised-finetuning) for Supervised Finetuning.\n", + "3. Set up the [ArgillaTrainer](https://docs.v1.argilla.io/en/latest/practical_guides/fine_tune.html#supervised-finetuning) for Supervised Finetuning.\n", "4. Perform inference using the finetuned LLM.\n", "5. Publish the resulting model and dataset on the Hugging Face Hub.\n", "\n", @@ -169,7 +169,7 @@ "metadata": {}, "source": [ "## Preparing a FeedbackDataset in Argilla\n", - "In Argilla, the [FeedbackDataset](https://docs.argilla.io/en/latest/conceptual_guides/data_model.html#feedback-dataset) is a powerful and widely-configurable class that is in charge of defining the annotation process. In particular, we define [fields](https://docs.argilla.io/en/latest/conceptual_guides/data_model.html#field) and [questions](https://docs.argilla.io/en/latest/conceptual_guides/data_model.html#question).\n", + "In Argilla, the [FeedbackDataset](https://docs.v1.argilla.io/en/latest/conceptual_guides/data_model.html#feedback-dataset) is a powerful and widely-configurable class that is in charge of defining the annotation process. In particular, we define [fields](https://docs.v1.argilla.io/en/latest/conceptual_guides/data_model.html#field) and [questions](https://docs.v1.argilla.io/en/latest/conceptual_guides/data_model.html#question).\n", "\n", "The former is in charge of defining the structure for the data that will be annotated, while the latter determines in what way the annotators can annotate the data. In practice, `FeedbackDataset` instances for finetuning LLMs often have \"prompt\" and \"response\" text fields, sometimes alongside a \"context\" text field or some additional metadata (e.g. sample IDs).\n", "\n", @@ -205,7 +205,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Once created, we have to create individual [records](https://docs.argilla.io/en/latest/conceptual_guides/data_model.html#record) and push those to the dataset. For this tutorial, we will use some existing Apache-2.0 data from the [Open Instruction Generalist](https://huggingface.co/datasets/laion/OIG) chat-style instruction dataset. Due to the large size of this dataset, we will load the `dataset` with `streaming=True`, and semi-randomly sample 30k samples from this dataset." + "Once created, we have to create individual [records](https://docs.v1.argilla.io/en/latest/conceptual_guides/data_model.html#record) and push those to the dataset. For this tutorial, we will use some existing Apache-2.0 data from the [Open Instruction Generalist](https://huggingface.co/datasets/laion/OIG) chat-style instruction dataset. Due to the large size of this dataset, we will load the `dataset` with `streaming=True`, and semi-randomly sample 30k samples from this dataset." ] }, { @@ -333,7 +333,7 @@ "metadata": {}, "source": [ "## (Optional) Annotate instruction samples\n", - "If you are using your own proprietary data, data from an unreliable source, or require on your annotators to provide responses, then you must perform data annotation. However, for the purposes of this tutorial, we will assume that all data is high quality and skip this step. See the [data collection for LLMs](https://docs.argilla.io/en/latest/conceptual_guides/llm/llm.html) documentation for additional information on this phase." + "If you are using your own proprietary data, data from an unreliable source, or require on your annotators to provide responses, then you must perform data annotation. However, for the purposes of this tutorial, we will assume that all data is high quality and skip this step. See the [data collection for LLMs](https://docs.v1.argilla.io/en/latest/conceptual_guides/llm/llm.html) documentation for additional information on this phase." ] }, { @@ -342,7 +342,7 @@ "source": [ "## Set up the ArgillaTrainer for Supervised Finetuning\n", "### Model & Tokenizer\n", - "Next, we have to set up the [ArgillaTrainer](https://docs.argilla.io/en/latest/practical_guides/fine_tune.html#supervised-finetuning). First off, we will load the Mistral 7B `model` and `tokenizer`. We'll load the model using `float16` to improve the memory usage and efficiency, and `device_map=\"auto\"` automatically picks the best device to load the model on. For example, this will prioritize your GPU before your CPU.\n", + "Next, we have to set up the [ArgillaTrainer](https://docs.v1.argilla.io/en/latest/practical_guides/fine_tune.html#supervised-finetuning). First off, we will load the Mistral 7B `model` and `tokenizer`. We'll load the model using `float16` to improve the memory usage and efficiency, and `device_map=\"auto\"` automatically picks the best device to load the model on. For example, this will prioritize your GPU before your CPU.\n", "\n", "Furthermore, setting the `pad_token_id` to `eos_token_id` is required for open-end generation. If you don't define `pad_token_id`, it is often set to `eos_token_id` already, but you will be given warnings that you should do it yourself." ] @@ -367,7 +367,7 @@ "metadata": {}, "source": [ "### Training Task\n", - "Next, we can set up the [TrainingTask](https://docs.argilla.io/en/latest/practical_guides/fine_tune.html#the-trainingtask) for supervised finetuning. This task requires a `formatting_func` that formats the data from Argilla in preparation for training. This formatting function first checks if the data quality is up to par. This is done by inspecting the annotations and checking if a sample was not annotated, annotated as \"Bad\" or discarded. For the purposes of the tutorial, I'll ignore this part and consider all data to be high quality.\n", + "Next, we can set up the [TrainingTask](https://docs.v1.argilla.io/en/latest/practical_guides/fine_tune.html#the-trainingtask) for supervised finetuning. This task requires a `formatting_func` that formats the data from Argilla in preparation for training. This formatting function first checks if the data quality is up to par. This is done by inspecting the annotations and checking if a sample was not annotated, annotated as \"Bad\" or discarded. For the purposes of the tutorial, I'll ignore this part and consider all data to be high quality.\n", "\n", "Then, we convert the data to our desired chat format, where each step consists of:\n", "```\n", @@ -675,7 +675,7 @@ "metadata": {}, "source": [ "### ArgillaTrainer & hyperparameters\n", - "Next, we can initialize the [ArgillaTrainer](https://docs.argilla.io/en/latest/practical_guides/fine_tune.html#the-argillatrainer)! We have already prepared all of the components that it requires." + "Next, we can initialize the [ArgillaTrainer](https://docs.v1.argilla.io/en/latest/practical_guides/fine_tune.html#the-argillatrainer)! We have already prepared all of the components that it requires." ] }, { @@ -966,9 +966,9 @@ "\n", "If you're interested in finetuning LLMs, be sure to also check out these pages:\n", "\n", - "- [🦾 Fine-tune LLMs and other language models](https://docs.argilla.io/en/latest/practical_guides/fine_tune.html)\n", - "- [🪄 Fine-tuning and evaluating GPT-3.5 with human feedback for RAG](https://docs.argilla.io/en/latest/tutorials_and_integrations/tutorials/feedback/fine-tuning-openai-rag-feedback.html)\n", - "- [🏆 Train a reward model for RLHF](https://docs.argilla.io/en/latest/tutorials_and_integrations/tutorials/feedback/train-reward-model-rlhf.html)" + "- [🦾 Fine-tune LLMs and other language models](https://docs.v1.argilla.io/en/latest/practical_guides/fine_tune.html)\n", + "- [🪄 Fine-tuning and evaluating GPT-3.5 with human feedback for RAG](https://docs.v1.argilla.io/en/latest/tutorials_and_integrations/tutorials/feedback/fine-tuning-openai-rag-feedback.html)\n", + "- [🏆 Train a reward model for RLHF](https://docs.v1.argilla.io/en/latest/tutorials_and_integrations/tutorials/feedback/train-reward-model-rlhf.html)" ] } ], diff --git a/docs/_source/tutorials_and_integrations/tutorials/other_datasets/labelling-tokenclassification-using-spacy-llm.ipynb b/docs/_source/tutorials_and_integrations/tutorials/other_datasets/labelling-tokenclassification-using-spacy-llm.ipynb index ea08594b44..b7257d0f4d 100644 --- a/docs/_source/tutorials_and_integrations/tutorials/other_datasets/labelling-tokenclassification-using-spacy-llm.ipynb +++ b/docs/_source/tutorials_and_integrations/tutorials/other_datasets/labelling-tokenclassification-using-spacy-llm.ipynb @@ -443,7 +443,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Let us create a list of records from the dataset items. See the [documentation](https://docs.argilla.io/en/latest/reference/python/python_client.html#argilla.client.models.TokenClassificationRecord) for more information on what other fields you can add to your record." + "Let us create a list of records from the dataset items. See the [documentation](https://docs.v1.argilla.io/en/latest/reference/python/python_client.html#argilla.client.models.TokenClassificationRecord) for more information on what other fields you can add to your record." ] }, { @@ -504,7 +504,7 @@ "\n", "\n", "\n", - "In this tutorial, we have implemented a `spacy-llm` pipeline for the NER task by using model predictions from GPT3.5. In addition, we employed a few-shot learning approach to improve the performance of our model, which is facilitated by `spacy-llm`. You can see more tutorials on the use of `spaCy` with Argilla [here](https://docs.argilla.io/en/latest/tutorials/libraries/spacy.html)" + "In this tutorial, we have implemented a `spacy-llm` pipeline for the NER task by using model predictions from GPT3.5. In addition, we employed a few-shot learning approach to improve the performance of our model, which is facilitated by `spacy-llm`. You can see more tutorials on the use of `spaCy` with Argilla [here](https://docs.v1.argilla.io/en/latest/tutorials/libraries/spacy.html)" ] } ], diff --git a/examples/deployments/docker/docker-compose.yaml b/examples/deployments/docker/docker-compose.yaml index 923cae49db..cc0f2c652b 100644 --- a/examples/deployments/docker/docker-compose.yaml +++ b/examples/deployments/docker/docker-compose.yaml @@ -12,9 +12,9 @@ services: ARGILLA_ELASTICSEARCH: http://elasticsearch:9200 ARGILLA_AUTH_SECRET_KEY: ${ARGILLA_AUTH_SECRET_KEY:? Please generate a 32 character random string with `openssl rand -hex 32`} - # ARGILLA_ENABLE_TELEMETRY: 0 # Opt-out for telemetry https://docs.argilla.io/en/latest/reference/telemetry.html + # ARGILLA_ENABLE_TELEMETRY: 0 # Opt-out for telemetry https://docs.v1.argilla.io/en/latest/reference/telemetry.html - # Set user configuration https://docs.argilla.io/en/latest/getting_started/installation/configurations/user_management.html + # Set user configuration https://docs.v1.argilla.io/en/latest/getting_started/installation/configurations/user_management.html # ARGILLA_LOCAL_AUTH_USERS_DB_FILE: /config/.users.yaml # volumes: #- ${PWD}/.users.yaml:/config/.users.yaml