From e3213efd971af131c1e5938c74d2885bcf928825 Mon Sep 17 00:00:00 2001
From: davidberenstein1957 <davidberenstein1957@users.noreply.github.com>
Date: Mon, 7 Oct 2024 13:51:18 +0000
Subject: [PATCH] Deployed 446a18d to docs_update-advanced-custom-field-example
 with MkDocs 1.6.0 and mike 2.1.3

---
 .../how_to_guides/custom_fields/index.html    | 59 +++++++++++--------
 .../search/search_index.json                  |  2 +-
 2 files changed, 35 insertions(+), 26 deletions(-)
diff --git a/docs_update-advanced-custom-field-example/how_to_guides/custom_fields/index.html b/docs_update-advanced-custom-field-example/how_to_guides/custom_fields/index.html
index 08250106c9..2ffd49d045 100644
--- a/docs_update-advanced-custom-field-example/how_to_guides/custom_fields/index.html
+++ b/docs_update-advanced-custom-field-example/how_to_guides/custom_fields/index.html
@@ -2586,34 +2586,43 @@ <h2 id="advanced-mode">Advanced Mode<a class="headerlink" href="#advanced-mode"
 <h3 id="usage-example_1">Usage example<a class="headerlink" href="#usage-example_1" title="Permanent link">&para;</a></h3>
 <p>Let's reproduce example from the <a href="#without-advanced-mode">Without advanced mode</a> section but this time we will insert the <a href="https://handlebarsjs.com/">handlebars syntax engine</a> into the template ourselves.</p>
 <div class="language-python highlight"><pre><span></span><code><span id="__span-5-1"><a id="__codelineno-5-1" name="__codelineno-5-1" href="#__codelineno-5-1"></a><span class="n">template</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
-</span><span id="__span-5-2"><a id="__codelineno-5-2" name="__codelineno-5-2" href="#__codelineno-5-2"></a><span class="s2">&lt;div id=&quot;custom-field-container&quot;&gt;&lt;/div&gt;</span>
+</span><span id="__span-5-2"><a id="__codelineno-5-2" name="__codelineno-5-2" href="#__codelineno-5-2"></a><span class="s2">&lt;div id=&quot;content&quot;&gt;&lt;/div&gt;</span>
 </span><span id="__span-5-3"><a id="__codelineno-5-3" name="__codelineno-5-3" href="#__codelineno-5-3"></a><span class="s2">&lt;script id=&quot;template&quot; type=&quot;text/x-handlebars-template&quot;&gt;</span>
-</span><span id="__span-5-4"><a id="__codelineno-5-4" name="__codelineno-5-4" href="#__codelineno-5-4"></a><span class="s2">    &lt;div id=&quot;container&quot;&gt;</span>
-</span><span id="__span-5-5"><a id="__codelineno-5-5" name="__codelineno-5-5" href="#__codelineno-5-5"></a><span class="s2">        &lt;div class=&quot;column&quot;&gt;</span>
-</span><span id="__span-5-6"><a id="__codelineno-5-6" name="__codelineno-5-6" href="#__codelineno-5-6"></a><span class="s2">            &lt;h3&gt;Original&lt;/h3&gt;</span>
-</span><span id="__span-5-7"><a id="__codelineno-5-7" name="__codelineno-5-7" href="#__codelineno-5-7"></a><span class="s2">            &lt;img src=&quot;{{record.fields.image.original}}&quot; /&gt;</span>
-</span><span id="__span-5-8"><a id="__codelineno-5-8" name="__codelineno-5-8" href="#__codelineno-5-8"></a><span class="s2">        &lt;/div&gt;</span>
-</span><span id="__span-5-9"><a id="__codelineno-5-9" name="__codelineno-5-9" href="#__codelineno-5-9"></a><span class="s2">        &lt;div class=&quot;column&quot;&gt;</span>
-</span><span id="__span-5-10"><a id="__codelineno-5-10" name="__codelineno-5-10" href="#__codelineno-5-10"></a><span class="s2">            &lt;h3&gt;Revision&lt;/h3&gt;</span>
-</span><span id="__span-5-11"><a id="__codelineno-5-11" name="__codelineno-5-11" href="#__codelineno-5-11"></a><span class="s2">            &lt;img src=&quot;{{record.fields.image.revision}}&quot; /&gt;</span>
-</span><span id="__span-5-12"><a id="__codelineno-5-12" name="__codelineno-5-12" href="#__codelineno-5-12"></a><span class="s2">        &lt;/div&gt;</span>
-</span><span id="__span-5-13"><a id="__codelineno-5-13" name="__codelineno-5-13" href="#__codelineno-5-13"></a><span class="s2">    &lt;/div&gt;</span>
-</span><span id="__span-5-14"><a id="__codelineno-5-14" name="__codelineno-5-14" href="#__codelineno-5-14"></a><span class="s2">&lt;/script&gt;</span>
-</span><span id="__span-5-15"><a id="__codelineno-5-15" name="__codelineno-5-15" href="#__codelineno-5-15"></a><span class="s2">&quot;&quot;&quot;</span> <span class="c1"># (1)</span>
-</span><span id="__span-5-16"><a id="__codelineno-5-16" name="__codelineno-5-16" href="#__codelineno-5-16"></a>
-</span><span id="__span-5-17"><a id="__codelineno-5-17" name="__codelineno-5-17" href="#__codelineno-5-17"></a><span class="n">script</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
-</span><span id="__span-5-18"><a id="__codelineno-5-18" name="__codelineno-5-18" href="#__codelineno-5-18"></a><span class="s2">&lt;script src=&quot;https://cdn.jsdelivr.net/npm/handlebars@latest/dist/handlebars.js&quot;&gt;&lt;/script&gt;</span>
-</span><span id="__span-5-19"><a id="__codelineno-5-19" name="__codelineno-5-19" href="#__codelineno-5-19"></a><span class="s2">&lt;script&gt;</span>
-</span><span id="__span-5-20"><a id="__codelineno-5-20" name="__codelineno-5-20" href="#__codelineno-5-20"></a><span class="s2">    const template = document.getElementById(&quot;template&quot;).innerHTML;</span>
-</span><span id="__span-5-21"><a id="__codelineno-5-21" name="__codelineno-5-21" href="#__codelineno-5-21"></a><span class="s2">    const compiledTemplate = Handlebars.compile(template);</span>
-</span><span id="__span-5-22"><a id="__codelineno-5-22" name="__codelineno-5-22" href="#__codelineno-5-22"></a><span class="s2">    const html = compiledTemplate({ record });</span>
-</span><span id="__span-5-23"><a id="__codelineno-5-23" name="__codelineno-5-23" href="#__codelineno-5-23"></a><span class="s2">    document.getElementById(&quot;custom-field-container&quot;).innerHTML = html;</span>
-</span><span id="__span-5-24"><a id="__codelineno-5-24" name="__codelineno-5-24" href="#__codelineno-5-24"></a><span class="s2">&lt;/script&gt;</span>
-</span><span id="__span-5-25"><a id="__codelineno-5-25" name="__codelineno-5-25" href="#__codelineno-5-25"></a><span class="s2">&quot;&quot;&quot;</span> <span class="c1"># (2)</span>
+</span><span id="__span-5-4"><a id="__codelineno-5-4" name="__codelineno-5-4" href="#__codelineno-5-4"></a><span class="s2">    &lt;style&gt;</span>
+</span><span id="__span-5-5"><a id="__codelineno-5-5" name="__codelineno-5-5" href="#__codelineno-5-5"></a><span class="s2">    #container {</span>
+</span><span id="__span-5-6"><a id="__codelineno-5-6" name="__codelineno-5-6" href="#__codelineno-5-6"></a><span class="s2">        display: flex;</span>
+</span><span id="__span-5-7"><a id="__codelineno-5-7" name="__codelineno-5-7" href="#__codelineno-5-7"></a><span class="s2">        gap: 10px;</span>
+</span><span id="__span-5-8"><a id="__codelineno-5-8" name="__codelineno-5-8" href="#__codelineno-5-8"></a><span class="s2">    }</span>
+</span><span id="__span-5-9"><a id="__codelineno-5-9" name="__codelineno-5-9" href="#__codelineno-5-9"></a><span class="s2">    .column {</span>
+</span><span id="__span-5-10"><a id="__codelineno-5-10" name="__codelineno-5-10" href="#__codelineno-5-10"></a><span class="s2">        flex: 1;</span>
+</span><span id="__span-5-11"><a id="__codelineno-5-11" name="__codelineno-5-11" href="#__codelineno-5-11"></a><span class="s2">    }</span>
+</span><span id="__span-5-12"><a id="__codelineno-5-12" name="__codelineno-5-12" href="#__codelineno-5-12"></a><span class="s2">    &lt;/style&gt;</span>
+</span><span id="__span-5-13"><a id="__codelineno-5-13" name="__codelineno-5-13" href="#__codelineno-5-13"></a><span class="s2">    &lt;div id=&quot;container&quot;&gt;</span>
+</span><span id="__span-5-14"><a id="__codelineno-5-14" name="__codelineno-5-14" href="#__codelineno-5-14"></a><span class="s2">        &lt;div class=&quot;column&quot;&gt;</span>
+</span><span id="__span-5-15"><a id="__codelineno-5-15" name="__codelineno-5-15" href="#__codelineno-5-15"></a><span class="s2">            &lt;h3&gt;Original&lt;/h3&gt;</span>
+</span><span id="__span-5-16"><a id="__codelineno-5-16" name="__codelineno-5-16" href="#__codelineno-5-16"></a><span class="s2">            &lt;img src=&quot;{{record.fields.image.original}}&quot; /&gt;</span>
+</span><span id="__span-5-17"><a id="__codelineno-5-17" name="__codelineno-5-17" href="#__codelineno-5-17"></a><span class="s2">        &lt;/div&gt;</span>
+</span><span id="__span-5-18"><a id="__codelineno-5-18" name="__codelineno-5-18" href="#__codelineno-5-18"></a><span class="s2">        &lt;div class=&quot;column&quot;&gt;</span>
+</span><span id="__span-5-19"><a id="__codelineno-5-19" name="__codelineno-5-19" href="#__codelineno-5-19"></a><span class="s2">            &lt;h3&gt;Revision&lt;/h3&gt;</span>
+</span><span id="__span-5-20"><a id="__codelineno-5-20" name="__codelineno-5-20" href="#__codelineno-5-20"></a><span class="s2">            &lt;img src=&quot;{{record.fields.image.revision}}&quot; /&gt;</span>
+</span><span id="__span-5-21"><a id="__codelineno-5-21" name="__codelineno-5-21" href="#__codelineno-5-21"></a><span class="s2">        &lt;/div&gt;</span>
+</span><span id="__span-5-22"><a id="__codelineno-5-22" name="__codelineno-5-22" href="#__codelineno-5-22"></a><span class="s2">    &lt;/div&gt;</span>
+</span><span id="__span-5-23"><a id="__codelineno-5-23" name="__codelineno-5-23" href="#__codelineno-5-23"></a><span class="s2">&lt;/script&gt;</span>
+</span><span id="__span-5-24"><a id="__codelineno-5-24" name="__codelineno-5-24" href="#__codelineno-5-24"></a><span class="s2">&quot;&quot;&quot;</span> <span class="c1"># (1)</span>
+</span><span id="__span-5-25"><a id="__codelineno-5-25" name="__codelineno-5-25" href="#__codelineno-5-25"></a>
+</span><span id="__span-5-26"><a id="__codelineno-5-26" name="__codelineno-5-26" href="#__codelineno-5-26"></a><span class="n">script</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
+</span><span id="__span-5-27"><a id="__codelineno-5-27" name="__codelineno-5-27" href="#__codelineno-5-27"></a><span class="s2">&lt;script src=&quot;https://cdn.jsdelivr.net/npm/handlebars@latest/dist/handlebars.js&quot;&gt;&lt;/script&gt;</span>
+</span><span id="__span-5-28"><a id="__codelineno-5-28" name="__codelineno-5-28" href="#__codelineno-5-28"></a><span class="s2">&lt;script&gt;</span>
+</span><span id="__span-5-29"><a id="__codelineno-5-29" name="__codelineno-5-29" href="#__codelineno-5-29"></a><span class="s2">    const template = document.getElementById(&quot;template&quot;).innerHTML;</span>
+</span><span id="__span-5-30"><a id="__codelineno-5-30" name="__codelineno-5-30" href="#__codelineno-5-30"></a><span class="s2">    const compiledTemplate = Handlebars.compile(template);</span>
+</span><span id="__span-5-31"><a id="__codelineno-5-31" name="__codelineno-5-31" href="#__codelineno-5-31"></a><span class="s2">    const html = compiledTemplate({ record });</span>
+</span><span id="__span-5-32"><a id="__codelineno-5-32" name="__codelineno-5-32" href="#__codelineno-5-32"></a><span class="s2">    document.getElementById(&quot;content&quot;).innerHTML = html;</span>
+</span><span id="__span-5-33"><a id="__codelineno-5-33" name="__codelineno-5-33" href="#__codelineno-5-33"></a><span class="s2">&lt;/script&gt;</span>
+</span><span id="__span-5-34"><a id="__codelineno-5-34" name="__codelineno-5-34" href="#__codelineno-5-34"></a><span class="s2">&quot;&quot;&quot;</span> <span class="c1"># (2)</span>
 </span></code></pre></div>
 <ol>
-<li>This is a JavaScript template script. We set <code>id</code> to <code>template</code> to use it later in our JavaScript code and <code>type</code> to <code>text/x-handlebars-template</code> to indicate that this is a Handlebars template. Note that we also added a <code>div</code> with <code>id</code> to <code>custom-field-container</code> to render the template into.</li>
-<li>This is a JavaScript template script. We load the Handlebars library and then use it to compile the template and render the record. Eventually, we render the result into the <code>div</code> with <code>id</code> to <code>custom-field-container</code>.</li>
+<li>This is a JavaScript template script. We set <code>id</code> to <code>template</code> to use it later in our JavaScript code and <code>type</code> to <code>text/x-handlebars-template</code> to indicate that this is a Handlebars template. Note that we also added a <code>div</code> with <code>id</code> to <code>content</code> to render the template into.</li>
+<li>This is a JavaScript template script. We load the Handlebars library and then use it to compile the template and render the record. Eventually, we render the result into the <code>div</code> with <code>id</code> to <code>content</code>.</li>
 </ol>
 <p>We can now pass these templates to the <code>CustomField</code> class, ensuring that the <code>advanced_mode</code> is set to <code>True</code>.</p>
 <div class="language-python highlight"><pre><span></span><code><span id="__span-6-1"><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a><span class="kn">import</span> <span class="nn">argilla</span> <span class="k">as</span> <span class="nn">rg</span>
diff --git a/docs_update-advanced-custom-field-example/search/search_index.json b/docs_update-advanced-custom-field-example/search/search_index.json
index e30224a0a2..c16e75092e 100644
--- a/docs_update-advanced-custom-field-example/search/search_index.json
+++ b/docs_update-advanced-custom-field-example/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Welcome to Argilla","text":"<p>Argilla is a collaboration tool for AI engineers and domain experts to build high-quality datasets.</p> <p>To get started:</p> <ul> <li> <p>Get started in 5 minutes!</p> <p>Deploy Argilla for free on the Hugging Face Hub or with <code>Docker</code>. Install the Python SDK with <code>pip</code> and create your first project.</p> <p> Quickstart</p> </li> <li> <p>How-to guides</p> <p>Get familiar with the basic workflows of Argilla. Learn how to manage <code>Users</code>, <code>Workspaces</code>, <code>Datasets</code>, and <code>Records</code> to set up your data annotation projects.</p> <p> Learn more</p> </li> </ul> <p>Or, play with the Argilla UI by signing in with your Hugging Face account:</p> <p>Looking for Argilla 1.x?</p> <p>Looking for documentation for Argilla 1.x? Visit the latest release.</p> <p>Migrate to Argilla 2.x</p> <p>Want to learn how to migrate from Argilla 1.x to 2.x? Take a look at our dedicated Migration Guide.</p>"},{"location":"#why-use-argilla","title":"Why use Argilla?","text":"<p>Argilla can be used for collecting human feedback for a wide variety of AI projects like traditional NLP (text classification, NER, etc.), LLMs (RAG, preference tuning, etc.), or multimodal models (text to image, etc.).</p> <p>Argilla's programmatic approach lets you build workflows for continuous evaluation and model improvement. The goal of Argilla is to ensure your data work pays off by quickly iterating on the right data and models.</p> <p>Improve your AI output quality through data quality</p> <p>Compute is expensive and output quality is important. We help you focus on data, which tackles the root cause of both of these problems at once. Argilla helps you to achieve and keep high-quality standards for your data. This means you can improve the quality of your AI outputs.</p> <p>Take control of your data and models</p> <p>Most AI tools are black boxes. Argilla is different. We believe that you should be the owner of both your data and your models. That's why we provide you with all the tools your team needs to manage your data and models in a way that suits you best.</p> <p>Improve efficiency by quickly iterating on the right data and models</p> <p>Gathering data is a time-consuming process. Argilla helps by providing a tool that allows you to interact with your data in a more engaging way. This means you can quickly and easily label your data with filters, AI feedback suggestions and semantic search. So you can focus on training your models and monitoring their performance.</p>"},{"location":"#what-do-people-build-with-argilla","title":"What do people build with Argilla?","text":"<p>Datasets and models</p> <p>Argilla is a tool that can be used to achieve and keep high-quality data standards with a focus on NLP and LLMs. The community uses Argilla to create amazing open-source datasets and models, and we love contributions to open-source too.</p> <ul> <li>cleaned UltraFeedback dataset and the Notus and Notux models, where we improved benchmark and empirical human judgment for the Mistral and Mixtral models with cleaner data using human feedback.</li> <li>distilabeled Intel Orca DPO dataset and the improved OpenHermes model, show how we improve model performance by filtering out 50% of the original dataset through human and AI feedback.</li> </ul> <p>Projects and pipelines</p> <p>AI teams from companies like the Red Cross, Loris.ai and Prolific use Argilla to improve the quality and efficiency of AI projects. They shared their experiences in the AI community meetup.</p> <ul> <li>AI for good: the Red Cross presentation showcases how their experts and AI team collaborate by classifying and redirecting requests from refugees of the Ukrainian crisis to streamline the support processes of the Red Cross.</li> <li>Customer support: during the Loris meetup they showed how their AI team uses unsupervised and few-shot contrastive learning to help them quickly validate and gain labelled samples for a huge amount of multi-label classifiers.</li> <li>Research studies: the showcase from Prolific announced their integration with Argilla. They use it to actively distribute data collection projects among their annotating workforce. This allows them to quickly and efficiently collect high-quality data for their research studies.</li> </ul>"},{"location":"community/","title":"Community","text":"<p>We are an open-source community-driven project not only focused on building a great product but also on building a great community, where you can get support, share your experiences, and contribute to the project! We would love to hear from you and help you get started with Argilla.</p> <ul> <li> <p>Discord</p> <p>In our Discord channels (#argilla-distilabel-general and #argilla-distilabel-help), you can get direct support from the community.</p> <p> Discord \u2197</p> </li> <li> <p>Community Meetup</p> <p>We host bi-weekly community meetups where you can listen in or present your work.</p> <p> Community Meetup \u2197</p> </li> <li> <p>Changelog</p> <p>The changelog is where you can find the latest updates and changes to the Argilla project.</p> <p> Changelog \u2197</p> </li> <li> <p>Roadmap</p> <p>We love to discuss our plans with the community. Feel encouraged to participate in our roadmap discussions.</p> <p> Roadmap \u2197</p> </li> </ul>"},{"location":"community/changelog/","title":"Changelog","text":"<p>All notable changes to this project will be documented in this file.</p> <p>The format is based on Keep a Changelog, and this project adheres to Semantic Versioning.</p>"},{"location":"community/changelog/#unreleased","title":"Unreleased","text":""},{"location":"community/changelog/#230","title":"2.3.0","text":""},{"location":"community/changelog/#added","title":"Added","text":"<ul> <li>Added support for <code>CustomField</code>. (#5422)</li> <li>Added <code>inserted_at</code> and <code>updated_at</code> to <code>Resource</code> model as properties. (#5540)</li> <li>Added <code>limit</code> argument when fetching records. (#5525</li> <li>Added similarity search support. (#5546)</li> <li>Added filter support for <code>id</code>, <code>_server_id</code>, <code>inserted_at</code> and <code>updated_at</code> record attributes. (#5545)</li> <li>Added support to read argilla credentials from colab secrets. (#5541))</li> </ul>"},{"location":"community/changelog/#changed","title":"Changed","text":"<ul> <li>Changed the repr method for <code>SettingsProperties</code> to display the details of all the properties in <code>Setting</code> object. (#5380)</li> <li>Changed error messages when creating datasets with insufficient permissions. (#5540)</li> </ul>"},{"location":"community/changelog/#fixed","title":"Fixed","text":"<ul> <li>Fixed serialization of <code>ChatField</code> when collecting records from the hub and exporting to <code>datasets</code>. (#5554)</li> </ul>"},{"location":"community/changelog/#222","title":"2.2.2","text":""},{"location":"community/changelog/#fixed_1","title":"Fixed","text":"<ul> <li>Fixed <code>from_hub</code> with unsupported column names. (#5524)</li> <li>Fixed <code>from_hub</code> with missing dataset <code>subset</code> configuration value. (#5524)</li> </ul>"},{"location":"community/changelog/#changed_1","title":"Changed","text":"<ul> <li>Changed <code>from_hub</code> to only generate fields not questions for strings in dataset. (#5524)</li> </ul>"},{"location":"community/changelog/#221","title":"2.2.1","text":""},{"location":"community/changelog/#fixed_2","title":"Fixed","text":"<ul> <li>Fixed <code>from_hub</code> errors when columns names contain uppercase letters. (#5523)</li> <li>Fixed <code>from_hub</code> errors when class feature values contains unlabelled values. (#5523)</li> <li>Fixed <code>from_hub</code> errors when loading cached datasets. (#5523)</li> </ul>"},{"location":"community/changelog/#220","title":"2.2.0","text":"<ul> <li>Added new <code>ChatField</code> supporting chat messages. (#5376)</li> <li>Added template settings to <code>rg.Settings</code> for classification, rating, and ranking questions. (#5426)</li> <li>Added <code>rg.Settings</code> definition based on <code>datasets.Features</code> within <code>rg.Dataset.from_hub</code>. (#5426)</li> <li>Added persistent record mapping to <code>rg.Settings</code> to be used in <code>rg.Dataset.records.log</code>. (#5466)</li> <li>Added multiple error handling methods to the <code>rg.Dataset.records.log</code> method to warn, ignore, or raise errors. (#5466)</li> <li>Changed dataset import and export of <code>rg.LabelQuestion</code> to use <code>datasets.ClassLabel</code> not <code>datasets.Value</code>. (#5474)</li> </ul>"},{"location":"community/changelog/#210","title":"2.1.0","text":""},{"location":"community/changelog/#added_1","title":"Added","text":"<ul> <li>Added new <code>ImageField</code> supporting URLs and Data URLs. (#5279)</li> <li>Added dark mode (#5412)</li> <li>Added settings parameter to <code>rg.Dataset.from_hub</code> to define the dataset settings before ingesting a dataset from the hub. (#5418)</li> </ul>"},{"location":"community/changelog/#201","title":"2.0.1","text":""},{"location":"community/changelog/#fixed_3","title":"Fixed","text":"<ul> <li>Fixed error when creating optional fields. (#5362)</li> <li>Fixed error creating integer and float metadata with <code>visible_for_annotators</code>. (#5364)</li> <li>Fixed error when logging records with <code>suggestions</code> or <code>responses</code> for non-existent questions. (#5396 by @maxserras)</li> <li>Fixed error from conflicts in testing suite when running tests in parallel. (#5349)</li> <li>Fixed error in response model when creating a response with a <code>None</code> value. (#5343)</li> </ul>"},{"location":"community/changelog/#changed_2","title":"Changed","text":"<ul> <li>Changed <code>from_hub</code> method to raise an error when a dataset with the same name exists. (#5258)</li> <li>Changed <code>log</code> method when ingesting records with no known keys to raise a descriptive error. (#5356)</li> <li>Changed <code>code snippets</code> to add new datasets (#5395)</li> </ul>"},{"location":"community/changelog/#added_2","title":"Added","text":"<ul> <li>Added Google Analytics to the documentation site. (#5366)</li> <li>Added frontend skeletons to progress metrics to optimise load time and improve user experience. (#5391)</li> <li>Added documentation in methods in API references for the Python SDK. (#5400)</li> </ul>"},{"location":"community/changelog/#fixed_4","title":"Fixed","text":"<ul> <li>Fix bug when submit the latest record, sometimes you navigate to non existing page #5419</li> </ul>"},{"location":"community/changelog/#200","title":"2.0.0","text":""},{"location":"community/changelog/#added_3","title":"Added","text":"<ul> <li>Added core class refactors. For an overview, see this blog post</li> <li>Added <code>TaskDistribution</code> to define distribution of records to users .</li> <li>Added new documentation site and structure and migrated legacy documentation.</li> </ul>"},{"location":"community/changelog/#changed_3","title":"Changed","text":"<ul> <li>Changed <code>FeedbackDataset</code> to <code>Dataset</code>.</li> <li>Changed <code>rg.init</code> into <code>rg.Argilla</code> class to interact with Argilla server.</li> </ul>"},{"location":"community/changelog/#deprecated","title":"Deprecated","text":"<ul> <li>Deprecated task specific dataset classes like <code>TextClassification</code> and <code>TokenClassification</code>. To migrate legacy datasets to <code>rg.Dataset</code> class, see the how-to-guide.</li> <li>Deprecated use case extensions like <code>listeners</code> and <code>ArgillaTrainer</code>.</li> </ul>"},{"location":"community/changelog/#200rc1","title":"2.0.0rc1","text":"<p>[!NOTE] This release for 2.0.0rc1 does not contain any changelog entries because it is the first release candidate for the 2.0.0 version. The following versions will contain the changelog entries again. For a general overview of the changes in the 2.0.0 version, please refer to our blog or our new documentation.</p>"},{"location":"community/changelog/#1290","title":"1.29.0","text":""},{"location":"community/changelog/#added_4","title":"Added","text":"<ul> <li>Added support for rating questions to include <code>0</code> as a valid value. (#4860)</li> <li>Added support for Python 3.12. (#4837)</li> <li>Added search by field in the <code>FeedbackDataset</code> UI search. (#4746)</li> <li>Added record metadata info in the <code>FeedbackDataset</code> UI. (#4851)</li> <li>Added highlight on search results in the <code>FeedbackDataset</code> UI. (#4747)</li> </ul>"},{"location":"community/changelog/#fixed_5","title":"Fixed","text":"<ul> <li>Fix wildcard import for the whole argilla module. (#4874)</li> <li>Fix issue when record does not have vectors related. (#4856)</li> <li>Fix issue on character level. (#4836)</li> </ul>"},{"location":"community/changelog/#1280","title":"1.28.0","text":""},{"location":"community/changelog/#added_5","title":"Added","text":"<ul> <li>Added suggestion multi score attribute. (#4730)</li> <li>Added order by suggestion first. (#4731)</li> <li>Added multi selection entity dropdown for span annotation overlap. (#4735)</li> <li>Added pre selection highlight for span annotation. (#4726)</li> <li>Added banner when persistent storage is not enabled. (#4744)</li> <li>Added support on Python SDK for new multi-label questions <code>labels_order</code> attribute. (#4757)</li> </ul>"},{"location":"community/changelog/#changed_4","title":"Changed","text":"<ul> <li>Changed the way how Hugging Face space and user is showed in sign in. (#4748)</li> </ul>"},{"location":"community/changelog/#fixed_6","title":"Fixed","text":"<ul> <li>Fixed Korean character reversed. (#4753)</li> </ul>"},{"location":"community/changelog/#fixed_7","title":"Fixed","text":"<ul> <li>Fixed requirements for version of wrapt library conflicting with Python 3.11 (#4693)</li> </ul>"},{"location":"community/changelog/#1270","title":"1.27.0","text":""},{"location":"community/changelog/#added_6","title":"Added","text":"<ul> <li>Added Allow overlap spans in the <code>FeedbackDataset</code>. (#4668)</li> <li>Added <code>allow_overlapping</code> parameter for span questions. (#4697)</li> <li>Added overall progress bar on <code>Datasets</code> table. (#4696)</li> <li>Added German language translation. (#4688)</li> </ul>"},{"location":"community/changelog/#changed_5","title":"Changed","text":"<ul> <li>New UI design for suggestions. (#4682)</li> </ul>"},{"location":"community/changelog/#fixed_8","title":"Fixed","text":"<ul> <li>Improve performance for more than 250 labels. (#4702)</li> </ul>"},{"location":"community/changelog/#1261","title":"1.26.1","text":""},{"location":"community/changelog/#added_7","title":"Added","text":"<ul> <li>Added support for automatic detection of RTL languages. (#4686)</li> </ul>"},{"location":"community/changelog/#1260","title":"1.26.0","text":""},{"location":"community/changelog/#added_8","title":"Added","text":"<ul> <li>If you expand the labels of a <code>single or multi</code> label Question, the state is maintained during the entire annotation process. (#4630)</li> <li>Added support for span questions in the Python SDK. (#4617)</li> <li>Added support for span values in suggestions and responses. (#4623)</li> <li>Added <code>span</code> questions for <code>FeedbackDataset</code>. (#4622)</li> <li>Added <code>ARGILLA_CACHE_DIR</code> environment variable to configure the client cache directory. (#4509)</li> </ul>"},{"location":"community/changelog/#fixed_9","title":"Fixed","text":"<ul> <li>Fixed contextualized workspaces. (#4665)</li> <li>Fixed prepare for training when passing <code>RankingValueSchema</code> instances to suggestions. (#4628)</li> <li>Fixed parsing ranking values in suggestions from HF datasets. (#4629)</li> <li>Fixed reading description from API response payload. (#4632)</li> <li>Fixed pulling (n*chunk_size)+1 records when using <code>ds.pull</code> or iterating over the dataset. (#4662)</li> <li>Fixed client's resolution of enum values when calling the Search and Metrics api, to support Python &gt;=3.11 enum handling. (#4672)</li> </ul>"},{"location":"community/changelog/#1250","title":"1.25.0","text":"<p>[!NOTE] For changes in the argilla-server module, visit the argilla-server release notes</p>"},{"location":"community/changelog/#added_9","title":"Added","text":"<ul> <li>Reorder labels in <code>dataset settings page</code> for single/multi label questions (#4598)</li> <li>Added pandas v2 support using the python SDK. (#4600)</li> </ul>"},{"location":"community/changelog/#removed","title":"Removed","text":"<ul> <li>Removed <code>missing</code> response for status filter. Use <code>pending</code> instead. (#4533)</li> </ul>"},{"location":"community/changelog/#fixed_10","title":"Fixed","text":"<ul> <li>Fixed FloatMetadataProperty: value is not a valid float (#4570)</li> <li>Fixed redirect to <code>user-settings</code> instead of 404 <code>user_settings</code> (#4609)</li> </ul>"},{"location":"community/changelog/#1240","title":"1.24.0","text":"<p>[!NOTE] This release does not contain any new features, but it includes a major change in the <code>argilla-server</code> dependency. The package is using the <code>argilla-server</code> dependency defined here. (#4537)</p>"},{"location":"community/changelog/#changed_6","title":"Changed","text":"<ul> <li>The package is using the <code>argilla-server</code> dependency defined here. (#4537)</li> </ul>"},{"location":"community/changelog/#1231","title":"1.23.1","text":""},{"location":"community/changelog/#fixed_11","title":"Fixed","text":"<ul> <li>Fixed Responsive view for Feedback Datasets. (#4579)</li> </ul>"},{"location":"community/changelog/#1230","title":"1.23.0","text":""},{"location":"community/changelog/#added_10","title":"Added","text":"<ul> <li>Added bulk annotation by filter criteria. (#4516)</li> <li>Automatically fetch new datasets on focus tab. (#4514)</li> <li>API v1 responses returning <code>Record</code> schema now always include <code>dataset_id</code> as attribute. (#4482)</li> <li>API v1 responses returning <code>Response</code> schema now always include <code>record_id</code> as attribute. (#4482)</li> <li>API v1 responses returning <code>Question</code> schema now always include <code>dataset_id</code> attribute. (#4487)</li> <li>API v1 responses returning <code>Field</code> schema now always include <code>dataset_id</code> attribute. (#4488)</li> <li>API v1 responses returning <code>MetadataProperty</code> schema now always include <code>dataset_id</code> attribute. (#4489)</li> <li>API v1 responses returning <code>VectorSettings</code> schema now always include <code>dataset_id</code> attribute. (#4490)</li> <li>Added <code>pdf_to_html</code> function to <code>.html_utils</code> module that convert PDFs to dataURL to be able to render them in tha Argilla UI. (#4481)</li> <li>Added <code>ARGILLA_AUTH_SECRET_KEY</code> environment variable. (#4539)</li> <li>Added <code>ARGILLA_AUTH_ALGORITHM</code> environment variable. (#4539)</li> <li>Added <code>ARGILLA_AUTH_TOKEN_EXPIRATION</code> environment variable. (#4539)</li> <li>Added <code>ARGILLA_AUTH_OAUTH_CFG</code> environment variable. (#4546)</li> <li>Added OAuth2 support for HuggingFace Hub. (#4546)</li> </ul>"},{"location":"community/changelog/#deprecated_1","title":"Deprecated","text":"<ul> <li>Deprecated <code>ARGILLA_LOCAL_AUTH_*</code> environment variables. Will be removed in the release v1.25.0. (#4539)</li> </ul>"},{"location":"community/changelog/#changed_7","title":"Changed","text":"<ul> <li>Changed regex pattern for <code>username</code> attribute in <code>UserCreate</code>. Now uppercase letters are allowed. (#4544)</li> </ul>"},{"location":"community/changelog/#removed_1","title":"Removed","text":"<ul> <li>Remove sending <code>Authorization</code> header from python SDK requests. (#4535)</li> </ul>"},{"location":"community/changelog/#fixed_12","title":"Fixed","text":"<ul> <li>Fixed keyboard shortcut for label questions. (#4530)</li> </ul>"},{"location":"community/changelog/#1220","title":"1.22.0","text":""},{"location":"community/changelog/#added_11","title":"Added","text":"<ul> <li>Added Bulk annotation support. (#4333)</li> <li>Restore filters from feedback dataset settings. ([#4461])(https://github.com/argilla-io/argilla/pull/4461)</li> <li>Warning on feedback dataset settings when leaving page with unsaved changes. (#4461)</li> <li>Added pydantic v2 support using the python SDK. (#4459)</li> <li>Added <code>vector_settings</code> to the <code>__repr__</code> method of the <code>FeedbackDataset</code> and <code>RemoteFeedbackDataset</code>. (#4454)</li> <li>Added integration for <code>sentence-transformers</code> using <code>SentenceTransformersExtractor</code> to configure <code>vector_settings</code> in <code>FeedbackDataset</code> and <code>FeedbackRecord</code>. (#4454)</li> </ul>"},{"location":"community/changelog/#changed_8","title":"Changed","text":"<ul> <li>Module <code>argilla.cli.server</code> definitions have been moved to <code>argilla.server.cli</code> module. (#4472)</li> <li>[breaking] Changed <code>vector_settings_by_name</code> for generic <code>property_by_name</code> usage, which will return <code>None</code> instead of raising an error. (#4454)</li> <li>The constant definition <code>ES_INDEX_REGEX_PATTERN</code> in module <code>argilla._constants</code> is now private. (#4472)</li> <li><code>nan</code> values in metadata properties will raise a 422 error when creating/updating records. (#4300)</li> <li><code>None</code> values are now allowed in metadata properties. (#4300)</li> <li>Refactor and add <code>width</code>, <code>height</code>, <code>autoplay</code> and <code>loop</code> attributes as optional args in <code>to_html</code> functions. (#4481)</li> </ul>"},{"location":"community/changelog/#fixed_13","title":"Fixed","text":"<ul> <li>Paginating to a new record, automatically scrolls down to selected form area. (#4333)</li> </ul>"},{"location":"community/changelog/#deprecated_2","title":"Deprecated","text":"<ul> <li>The <code>missing</code> response status for filtering records is deprecated and will be removed in the release v1.24.0. Use <code>pending</code> instead. (#4433)</li> </ul>"},{"location":"community/changelog/#removed_2","title":"Removed","text":"<ul> <li>The deprecated <code>python -m argilla database</code> command has been removed. (#4472)</li> </ul>"},{"location":"community/changelog/#1210","title":"1.21.0","text":""},{"location":"community/changelog/#added_12","title":"Added","text":"<ul> <li>Added new draft queue for annotation view (#4334)</li> <li>Added annotation metrics module for the <code>FeedbackDataset</code> (<code>argilla.client.feedback.metrics</code>). (#4175).</li> <li>Added strategy to handle and translate errors from the server for <code>401</code> HTTP status code` (#4362)</li> <li>Added integration for <code>textdescriptives</code> using <code>TextDescriptivesExtractor</code> to configure <code>metadata_properties</code> in <code>FeedbackDataset</code> and <code>FeedbackRecord</code>. (#4400). Contributed by @m-newhauser</li> <li>Added <code>POST /api/v1/me/responses/bulk</code> endpoint to create responses in bulk for current user. (#4380)</li> <li>Added list support for term metadata properties. (Closes #4359)</li> <li>Added new CLI task to reindex datasets and records into the search engine. (#4404)</li> <li>Added <code>httpx_extra_kwargs</code> argument to <code>rg.init</code> and <code>Argilla</code> to allow passing extra arguments to <code>httpx.Client</code> used by <code>Argilla</code>. (#4440)</li> <li>Added <code>ResponseStatusFilter</code> enum in <code>__init__</code> imports of Argilla (#4118). Contributed by @Piyush-Kumar-Ghosh.</li> </ul>"},{"location":"community/changelog/#changed_9","title":"Changed","text":"<ul> <li>More productive and simpler shortcut system (#4215)</li> <li>Move <code>ArgillaSingleton</code>, <code>init</code> and <code>active_client</code> to a new module <code>singleton</code>. (#4347)</li> <li>Updated <code>argilla.load</code> functions to also work with <code>FeedbackDataset</code>s. (#4347)</li> <li>[breaking] Updated <code>argilla.delete</code> functions to also work with <code>FeedbackDataset</code>s. It now raises an error if the dataset does not exist. (#4347)</li> <li>Updated <code>argilla.list_datasets</code> functions to also work with <code>FeedbackDataset</code>s. (#4347)</li> </ul>"},{"location":"community/changelog/#fixed_14","title":"Fixed","text":"<ul> <li>Fixed error in <code>TextClassificationSettings.from_dict</code> method in which the <code>label_schema</code> created was a list of <code>dict</code> instead of a list of <code>str</code>. (#4347)</li> <li>Fixed total records on pagination component (#4424)</li> </ul>"},{"location":"community/changelog/#removed_3","title":"Removed","text":"<ul> <li>Removed <code>draft</code> auto save for annotation view (#4334)</li> </ul>"},{"location":"community/changelog/#1200","title":"1.20.0","text":""},{"location":"community/changelog/#added_13","title":"Added","text":"<ul> <li>Added <code>GET /api/v1/datasets/:dataset_id/records/search/suggestions/options</code> endpoint to return suggestion available options for searching. (#4260)</li> <li>Added <code>metadata_properties</code> to the <code>__repr__</code> method of the <code>FeedbackDataset</code> and <code>RemoteFeedbackDataset</code>.(#4192).</li> <li>Added <code>get_model_kwargs</code>, <code>get_trainer_kwargs</code>, <code>get_trainer_model</code>, <code>get_trainer_tokenizer</code> and <code>get_trainer</code> -methods to the <code>ArgillaTrainer</code> to improve interoperability across frameworks. (#4214).</li> <li>Added additional formatting checks to the <code>ArgillaTrainer</code> to allow for better interoperability of <code>defaults</code> and <code>formatting_func</code> usage. (#4214).</li> <li>Added a warning to the <code>update_config</code>-method of <code>ArgillaTrainer</code> to emphasize if the <code>kwargs</code> were updated correctly. (#4214).</li> <li>Added <code>argilla.client.feedback.utils</code> module with <code>html_utils</code> (this mainly includes <code>video/audio/image_to_html</code> that convert media to dataURL to be able to render them in tha Argilla UI and <code>create_token_highlights</code> to highlight tokens in a custom way. Both work on TextQuestion and TextField with use_markdown=True) and <code>assignments</code> (this mainly includes <code>assign_records</code> to assign records according to a number of annotators and records, an overlap and the shuffle option; and <code>assign_workspace</code> to assign and create if needed a workspace according to the record assignment). (#4121)</li> </ul>"},{"location":"community/changelog/#fixed_15","title":"Fixed","text":"<ul> <li>Fixed error in <code>ArgillaTrainer</code>, with numerical labels, using <code>RatingQuestion</code> instead of <code>RankingQuestion</code> (#4171)</li> <li>Fixed error in <code>ArgillaTrainer</code>, now we can train for <code>extractive_question_answering</code> using a validation sample (#4204)</li> <li>Fixed error in <code>ArgillaTrainer</code>, when training for <code>sentence-similarity</code> it didn't work with a list of values per record (#4211)</li> <li>Fixed error in the unification strategy for <code>RankingQuestion</code> (#4295)</li> <li>Fixed <code>TextClassificationSettings.labels_schema</code> order was not being preserved. Closes #3828 (#4332)</li> <li>Fixed error when requesting non-existing API endpoints. Closes #4073 (#4325)</li> <li>Fixed error when passing <code>draft</code> responses to create records endpoint. (#4354)</li> </ul>"},{"location":"community/changelog/#changed_10","title":"Changed","text":"<ul> <li>[breaking] Suggestions <code>agent</code> field only accepts now some specific characters and a limited length. (#4265)</li> <li>[breaking] Suggestions <code>score</code> field only accepts now float values in the range <code>0</code> to <code>1</code>. (#4266)</li> <li>Updated <code>POST /api/v1/dataset/:dataset_id/records/search</code> endpoint to support optional <code>query</code> attribute. (#4327)</li> <li>Updated <code>POST /api/v1/dataset/:dataset_id/records/search</code> endpoint to support <code>filter</code> and <code>sort</code> attributes. (#4327)</li> <li>Updated <code>POST /api/v1/me/datasets/:dataset_id/records/search</code> endpoint to support optional <code>query</code> attribute. (#4270)</li> <li>Updated <code>POST /api/v1/me/datasets/:dataset_id/records/search</code> endpoint to support <code>filter</code> and <code>sort</code> attributes. (#4270)</li> <li>Changed the logging style while pulling and pushing <code>FeedbackDataset</code> to Argilla from <code>tqdm</code> style to <code>rich</code>. (#4267). Contributed by @zucchini-nlp.</li> <li>Updated <code>push_to_argilla</code> to print <code>repr</code> of the pushed <code>RemoteFeedbackDataset</code> after push and changed <code>show_progress</code> to True by default. (#4223)</li> <li>Changed <code>models</code> and <code>tokenizer</code> for the <code>ArgillaTrainer</code> to explicitly allow for changing them when needed. (#4214).</li> </ul>"},{"location":"community/changelog/#1190","title":"1.19.0","text":""},{"location":"community/changelog/#added_14","title":"Added","text":"<ul> <li>Added <code>POST /api/v1/datasets/:dataset_id/records/search</code> endpoint to search for records without user context, including responses by all users. (#4143)</li> <li>Added <code>POST /api/v1/datasets/:dataset_id/vectors-settings</code> endpoint for creating vector settings for a dataset. (#3776)</li> <li>Added <code>GET /api/v1/datasets/:dataset_id/vectors-settings</code> endpoint for listing the vectors settings for a dataset. (#3776)</li> <li>Added <code>DELETE /api/v1/vectors-settings/:vector_settings_id</code> endpoint for deleting a vector settings. (#3776)</li> <li>Added <code>PATCH /api/v1/vectors-settings/:vector_settings_id</code> endpoint for updating a vector settings. (#4092)</li> <li>Added <code>GET /api/v1/records/:record_id</code> endpoint to get a specific record. (#4039)</li> <li>Added support to include vectors for <code>GET /api/v1/datasets/:dataset_id/records</code> endpoint response using <code>include</code> query param. (#4063)</li> <li>Added support to include vectors for <code>GET /api/v1/me/datasets/:dataset_id/records</code> endpoint response using <code>include</code> query param. (#4063)</li> <li>Added support to include vectors for <code>POST /api/v1/me/datasets/:dataset_id/records/search</code> endpoint response using <code>include</code> query param. (#4063)</li> <li>Added <code>show_progress</code> argument to <code>from_huggingface()</code> method to make the progress bar for parsing records process optional.(#4132).</li> <li>Added a progress bar for parsing records process to <code>from_huggingface()</code> method with <code>trange</code> in <code>tqdm</code>.(#4132).</li> <li>Added to sort by <code>inserted_at</code> or <code>updated_at</code> for datasets with no metadata. (4147)</li> <li>Added <code>max_records</code> argument to <code>pull()</code> method for <code>RemoteFeedbackDataset</code>.(#4074)</li> <li>Added functionality to push your models to the Hugging Face hub with <code>ArgillaTrainer.push_to_huggingface</code> (#3976). Contributed by @Racso-3141.</li> <li>Added <code>filter_by</code> argument to <code>ArgillaTrainer</code> to filter by <code>response_status</code> (#4120).</li> <li>Added <code>sort_by</code> argument to <code>ArgillaTrainer</code> to sort by <code>metadata</code> (#4120).</li> <li>Added <code>max_records</code> argument to <code>ArgillaTrainer</code> to limit record used for training (#4120).</li> <li>Added <code>add_vector_settings</code> method to local and remote <code>FeedbackDataset</code>. (#4055)</li> <li>Added <code>update_vectors_settings</code> method to local and remote <code>FeedbackDataset</code>. (#4122)</li> <li>Added <code>delete_vectors_settings</code> method to local and remote <code>FeedbackDataset</code>. (#4130)</li> <li>Added <code>vector_settings_by_name</code> method to local and remote <code>FeedbackDataset</code>. (#4055)</li> <li>Added <code>find_similar_records</code> method to local and remote <code>FeedbackDataset</code>. (#4023)</li> <li>Added <code>ARGILLA_SEARCH_ENGINE</code> environment variable to configure the search engine to use. (#4019)</li> </ul>"},{"location":"community/changelog/#changed_11","title":"Changed","text":"<ul> <li>[breaking] Remove support for Elasticsearch &lt; 8.5 and OpenSearch &lt; 2.4. (#4173)</li> <li>[breaking] Users working with OpenSearch engines must use version &gt;=2.4 and set <code>ARGILLA_SEARCH_ENGINE=opensearch</code>. (#4019 and #4111)</li> <li>[breaking] Changed <code>FeedbackDataset.*_by_name()</code> methods to return <code>None</code> when no match is found (#4101).</li> <li>[breaking] <code>limit</code> query parameter for <code>GET /api/v1/datasets/:dataset_id/records</code> endpoint is now only accepting values greater or equal than <code>1</code> and less or equal than <code>1000</code>. (#4143)</li> <li>[breaking] <code>limit</code> query parameter for <code>GET /api/v1/me/datasets/:dataset_id/records</code> endpoint is now only accepting values greater or equal than <code>1</code> and less or equal than <code>1000</code>. (#4143)</li> <li>Update <code>GET /api/v1/datasets/:dataset_id/records</code> endpoint to fetch record using the search engine. (#4142)</li> <li>Update <code>GET /api/v1/me/datasets/:dataset_id/records</code> endpoint to fetch record using the search engine. (#4142)</li> <li>Update <code>POST /api/v1/datasets/:dataset_id/records</code> endpoint to allow to create records with <code>vectors</code> (#4022)</li> <li>Update <code>PATCH /api/v1/datasets/:dataset_id</code> endpoint to allow updating <code>allow_extra_metadata</code> attribute. (#4112)</li> <li>Update <code>PATCH /api/v1/datasets/:dataset_id/records</code> endpoint to allow to update records with <code>vectors</code>. (#4062)</li> <li>Update <code>PATCH /api/v1/records/:record_id</code> endpoint to allow to update record with <code>vectors</code>. (#4062)</li> <li>Update <code>POST /api/v1/me/datasets/:dataset_id/records/search</code> endpoint to allow to search records with vectors. (#4019)</li> <li>Update <code>BaseElasticAndOpenSearchEngine.index_records</code> method to also index record vectors. (#4062)</li> <li>Update <code>FeedbackDataset.__init__</code> to allow passing a list of vector settings. (#4055)</li> <li>Update <code>FeedbackDataset.push_to_argilla</code> to also push vector settings. (#4055)</li> <li>Update <code>FeedbackDatasetRecord</code> to support the creation of records with vectors. (#4043)</li> <li>Using cosine similarity to compute similarity between vectors. (#4124)</li> </ul>"},{"location":"community/changelog/#fixed_16","title":"Fixed","text":"<ul> <li>Fixed svg images out of screen with too large images (#4047)</li> <li>Fixed creating records with responses from multiple users. Closes #3746 and #3808 (#4142)</li> <li>Fixed deleting or updating responses as an owner for annotators. (Commit 403a66d)</li> <li>Fixed passing user_id when getting records by id. (Commit 98c7927)</li> <li>Fixed non-basic tags serialized when pushing a dataset to the Hugging Face Hub. Closes #4089 (#4200)</li> </ul>"},{"location":"community/changelog/#1180","title":"1.18.0","text":""},{"location":"community/changelog/#added_15","title":"Added","text":"<ul> <li>New <code>GET /api/v1/datasets/:dataset_id/metadata-properties</code> endpoint for listing dataset metadata properties. (#3813)</li> <li>New <code>POST /api/v1/datasets/:dataset_id/metadata-properties</code> endpoint for creating dataset metadata properties. (#3813)</li> <li>New <code>PATCH /api/v1/metadata-properties/:metadata_property_id</code> endpoint allowing the update of a specific metadata property. (#3952)</li> <li>New <code>DELETE /api/v1/metadata-properties/:metadata_property_id</code> endpoint for deletion of a specific metadata property. (#3911)</li> <li>New <code>GET /api/v1/metadata-properties/:metadata_property_id/metrics</code> endpoint to compute metrics for a specific metadata property. (#3856)</li> <li>New <code>PATCH /api/v1/records/:record_id</code> endpoint to update a record. (#3920)</li> <li>New <code>PATCH /api/v1/dataset/:dataset_id/records</code> endpoint to bulk update the records of a dataset. (#3934)</li> <li>Missing validations to <code>PATCH /api/v1/questions/:question_id</code>. Now <code>title</code> and <code>description</code> are using the same validations used to create questions. (#3967)</li> <li>Added <code>TermsMetadataProperty</code>, <code>IntegerMetadataProperty</code> and <code>FloatMetadataProperty</code> classes allowing to define metadata properties for a <code>FeedbackDataset</code>. (#3818)</li> <li>Added <code>metadata_filters</code> to <code>filter_by</code> method in <code>RemoteFeedbackDataset</code> to filter based on metadata i.e. <code>TermsMetadataFilter</code>, <code>IntegerMetadataFilter</code>, and <code>FloatMetadataFilter</code>. (#3834)</li> <li>Added a validation layer for both <code>metadata_properties</code> and <code>metadata_filters</code> in their schemas and as part of the <code>add_records</code> and <code>filter_by</code> methods, respectively. (#3860)</li> <li>Added <code>sort_by</code> query parameter to listing records endpoints that allows to sort the records by <code>inserted_at</code>, <code>updated_at</code> or metadata property. (#3843)</li> <li>Added <code>add_metadata_property</code> method to both <code>FeedbackDataset</code> and <code>RemoteFeedbackDataset</code> (i.e. <code>FeedbackDataset</code> in Argilla). (#3900)</li> <li>Added fields <code>inserted_at</code> and <code>updated_at</code> in <code>RemoteResponseSchema</code>. (#3822)</li> <li>Added support for <code>sort_by</code> for <code>RemoteFeedbackDataset</code> i.e. a <code>FeedbackDataset</code> uploaded to Argilla. (#3925)</li> <li>Added <code>metadata_properties</code> support for both <code>push_to_huggingface</code> and <code>from_huggingface</code>. (#3947)</li> <li>Add support for update records (<code>metadata</code>) from Python SDK. (#3946)</li> <li>Added <code>delete_metadata_properties</code> method to delete metadata properties. (#3932)</li> <li>Added <code>update_metadata_properties</code> method to update <code>metadata_properties</code>. (#3961)</li> <li>Added automatic model card generation through <code>ArgillaTrainer.save</code> (#3857)</li> <li>Added <code>FeedbackDataset</code> <code>TaskTemplateMixin</code> for pre-defined task templates. (#3969)</li> <li>A maximum limit of 50 on the number of options a ranking question can accept. (#3975)</li> <li>New <code>last_activity_at</code> field to <code>FeedbackDataset</code> exposing when the last activity for the associated dataset occurs. (#3992)</li> </ul>"},{"location":"community/changelog/#changed_12","title":"Changed","text":"<ul> <li><code>GET /api/v1/datasets/{dataset_id}/records</code>, <code>GET /api/v1/me/datasets/{dataset_id}/records</code> and <code>POST /api/v1/me/datasets/{dataset_id}/records/search</code> endpoints to return the <code>total</code> number of records. (#3848, #3903)</li> <li>Implemented <code>__len__</code> method for filtered datasets to return the number of records matching the provided filters. (#3916)</li> <li>Increase the default max result window for Elasticsearch created for Feedback datasets. (#3929)</li> <li>Force elastic index refresh after records creation. (#3929)</li> <li>Validate metadata fields for filtering and sorting in the Python SDK. (#3993)</li> <li>Using metadata property name instead of id for indexing data in search engine index. (#3994)</li> </ul>"},{"location":"community/changelog/#fixed_17","title":"Fixed","text":"<ul> <li>Fixed response schemas to allow <code>values</code> to be <code>None</code> i.e. when a record is discarded the <code>response.values</code> are set to <code>None</code>. (#3926)</li> </ul>"},{"location":"community/changelog/#1170","title":"1.17.0","text":""},{"location":"community/changelog/#added_16","title":"Added","text":"<ul> <li>Added fields <code>inserted_at</code> and <code>updated_at</code> in <code>RemoteResponseSchema</code> (#3822).</li> <li>Added automatic model card generation through <code>ArgillaTrainer.save</code> (#3857).</li> <li>Added task templates to the <code>FeedbackDataset</code> (#3973).</li> </ul>"},{"location":"community/changelog/#changed_13","title":"Changed","text":"<ul> <li>Updated <code>Dockerfile</code> to use multi stage build (#3221 and #3793).</li> <li>Updated active learning for text classification notebooks to use the most recent small-text version (#3831).</li> <li>Changed argilla dataset name in the active learning for text classification notebooks to be consistent with the default names in the huggingface spaces (#3831).</li> <li>FeedbackDataset API methods have been aligned to be accessible through the several implementations (#3937).</li> <li>The <code>unify_responses</code> support for remote datasets (#3937).</li> </ul>"},{"location":"community/changelog/#fixed_18","title":"Fixed","text":"<ul> <li>Fix field not shown in the order defined in the dataset settings. Closes #3959 (#3984)</li> <li>Updated active learning for text classification notebooks to pass ids of type int to <code>TextClassificationRecord</code> (#3831).</li> <li>Fixed record fields validation that was preventing from logging records with optional fields (i.e. <code>required=True</code>) when the field value was <code>None</code> (#3846).</li> <li>Always set <code>pretrained_model_name_or_path</code> attribute as string in <code>ArgillaTrainer</code> (#3914).</li> <li>The <code>inserted_at</code> and <code>updated_at</code> attributes are create using the <code>utcnow</code> factory to avoid unexpected race conditions on timestamp creation (#3945)</li> <li>Fixed <code>configure_dataset_settings</code> when providing the workspace via the arg <code>workspace</code> (#3887).</li> <li>Fixed saving of models trained with <code>ArgillaTrainer</code> with a <code>peft_config</code> parameter (#3795).</li> <li>Fixed backwards compatibility on <code>from_huggingface</code> when loading a <code>FeedbackDataset</code> from the Hugging Face Hub that was previously dumped using another version of Argilla, starting at 1.8.0, when it was first introduced (#3829).</li> <li>Fixed wrong <code>__repr__</code> problem for <code>TrainingTask</code>. (#3969)</li> <li>Fixed wrong key return error <code>prepare_for_training_with_*</code> for <code>TrainingTask</code>. (#3969)</li> </ul>"},{"location":"community/changelog/#deprecated_3","title":"Deprecated","text":"<ul> <li>Function <code>rg.configure_dataset</code> is deprecated in favour of <code>rg.configure_dataset_settings</code>. The former will be removed in version 1.19.0</li> </ul>"},{"location":"community/changelog/#1160","title":"1.16.0","text":""},{"location":"community/changelog/#added_17","title":"Added","text":"<ul> <li>Added <code>ArgillaTrainer</code> integration with sentence-transformers, allowing fine tuning for sentence similarity (#3739)</li> <li>Added <code>ArgillaTrainer</code> integration with <code>TrainingTask.for_question_answering</code> (#3740)</li> <li>Added <code>Auto save record</code> to save automatically the current record that you are working on (#3541)</li> <li>Added <code>ArgillaTrainer</code> integration with OpenAI, allowing fine tuning for chat completion (#3615)</li> <li>Added <code>workspaces list</code> command to list Argilla workspaces (#3594).</li> <li>Added <code>datasets list</code> command to list Argilla datasets (#3658).</li> <li>Added <code>users create</code> command to create users (#3667).</li> <li>Added <code>whoami</code> command to get current user (#3673).</li> <li>Added <code>users delete</code> command to delete users (#3671).</li> <li>Added <code>users list</code> command to list users (#3688).</li> <li>Added <code>workspaces delete-user</code> command to remove a user from a workspace (#3699).</li> <li>Added <code>datasets list</code> command to list Argilla datasets (#3658).</li> <li>Added <code>users create</code> command to create users (#3667).</li> <li>Added <code>users delete</code> command to delete users (#3671).</li> <li>Added <code>workspaces create</code> command to create an Argilla workspace (#3676).</li> <li>Added <code>datasets push-to-hub</code> command to push a <code>FeedbackDataset</code> from Argilla into the HuggingFace Hub (#3685).</li> <li>Added <code>info</code> command to get info about the used Argilla client and server (#3707).</li> <li>Added <code>datasets delete</code> command to delete a <code>FeedbackDataset</code> from Argilla (#3703).</li> <li>Added <code>created_at</code> and <code>updated_at</code> properties to <code>RemoteFeedbackDataset</code> and <code>FilteredRemoteFeedbackDataset</code> (#3709).</li> <li>Added handling <code>PermissionError</code> when executing a command with a logged in user with not enough permissions (#3717).</li> <li>Added <code>workspaces add-user</code> command to add a user to workspace (#3712).</li> <li>Added <code>workspace_id</code> param to <code>GET /api/v1/me/datasets</code> endpoint (#3727).</li> <li>Added <code>workspace_id</code> arg to <code>list_datasets</code> in the Python SDK (#3727).</li> <li>Added <code>argilla</code> script that allows to execute Argilla CLI using the <code>argilla</code> command (#3730).</li> <li>Added support for passing already initialized <code>model</code> and <code>tokenizer</code> instances to the <code>ArgillaTrainer</code> (#3751)</li> <li>Added <code>server_info</code> function to check the Argilla server information (also accessible via <code>rg.server_info</code>) (#3772).</li> </ul>"},{"location":"community/changelog/#changed_14","title":"Changed","text":"<ul> <li>Move <code>database</code> commands under <code>server</code> group of commands (#3710)</li> <li><code>server</code> commands only included in the CLI app when <code>server</code> extra requirements are installed (#3710).</li> <li>Updated <code>PUT /api/v1/responses/{response_id}</code> to replace <code>values</code> stored with received <code>values</code> in request (#3711).</li> <li>Display a <code>UserWarning</code> when the <code>user_id</code> in <code>Workspace.add_user</code> and <code>Workspace.delete_user</code> is the ID of an user with the owner role as they don't require explicit permissions (#3716).</li> <li>Rename <code>tasks</code> sub-package to <code>cli</code> (#3723).</li> <li>Changed <code>argilla database</code> command in the CLI to now be accessed via <code>argilla server database</code>, to be deprecated in the upcoming release (#3754).</li> <li>Changed <code>visible_options</code> (of label and multi label selection questions) validation in the backend to check that the provided value is greater or equal than/to 3 and less or equal than/to the number of provided options (#3773).</li> </ul>"},{"location":"community/changelog/#fixed_19","title":"Fixed","text":"<ul> <li>Fixed <code>remove user modification in text component on clear answers</code> (#3775)</li> <li>Fixed <code>Highlight raw text field in dataset feedback task</code> (#3731)</li> <li>Fixed <code>Field title too long</code> (#3734)</li> <li>Fixed error messages when deleting a <code>DatasetForTextClassification</code> (#3652)</li> <li>Fixed <code>Pending queue</code> pagination problems when during data annotation (#3677)</li> <li>Fixed <code>visible_labels</code> default value to be 20 just when <code>visible_labels</code> not provided and <code>len(labels) &gt; 20</code>, otherwise it will either be the provided <code>visible_labels</code> value or <code>None</code>, for <code>LabelQuestion</code> and <code>MultiLabelQuestion</code> (#3702).</li> <li>Fixed <code>DatasetCard</code> generation when <code>RemoteFeedbackDataset</code> contains suggestions (#3718).</li> <li>Add missing <code>draft</code> status in <code>ResponseSchema</code> as now there can be responses with <code>draft</code> status when annotating via the UI (#3749).</li> <li>Searches when queried words are distributed along the record fields (#3759).</li> <li>Fixed Python 3.11 compatibility issue with <code>/api/datasets</code> endpoints due to the <code>TaskType</code> enum replacement in the endpoint URL (#3769).</li> <li>Fixed <code>RankingValueSchema</code> and <code>FeedbackRankingValueModel</code> schemas to allow <code>rank=None</code> when <code>status=draft</code> (#3781).</li> </ul>"},{"location":"community/changelog/#1151","title":"1.15.1","text":""},{"location":"community/changelog/#fixed_20","title":"Fixed","text":"<ul> <li>Fixed <code>Text component</code> text content sanitization behavior just for markdown to prevent disappear the text(#3738)</li> <li>Fixed <code>Text component</code> now you need to press Escape to exit the text area (#3733)</li> <li>Fixed <code>SearchEngine</code> was creating the same number of primary shards and replica shards for each <code>FeedbackDataset</code> (#3736).</li> </ul>"},{"location":"community/changelog/#1150","title":"1.15.0","text":""},{"location":"community/changelog/#added_18","title":"Added","text":"<ul> <li>Added <code>Enable to update guidelines and dataset settings for Feedback Datasets directly in the UI</code> (#3489)</li> <li>Added <code>ArgillaTrainer</code> integration with TRL, allowing for easy supervised finetuning, reward modeling, direct preference optimization and proximal policy optimization (#3467)</li> <li>Added <code>formatting_func</code> to <code>ArgillaTrainer</code> for <code>FeedbackDataset</code> datasets add a custom formatting for the data (#3599).</li> <li>Added <code>login</code> function in <code>argilla.client.login</code> to login into an Argilla server and store the credentials locally (#3582).</li> <li>Added <code>login</code> command to login into an Argilla server (#3600).</li> <li>Added <code>logout</code> command to logout from an Argilla server (#3605).</li> <li>Added <code>DELETE /api/v1/suggestions/{suggestion_id}</code> endpoint to delete a suggestion given its ID (#3617).</li> <li>Added <code>DELETE /api/v1/records/{record_id}/suggestions</code> endpoint to delete several suggestions linked to the same record given their IDs (#3617).</li> <li>Added <code>response_status</code> param to <code>GET /api/v1/datasets/{dataset_id}/records</code> to be able to filter by <code>response_status</code> as previously included for <code>GET /api/v1/me/datasets/{dataset_id}/records</code> (#3613).</li> <li>Added <code>list</code> classmethod to <code>ArgillaMixin</code> to be used as <code>FeedbackDataset.list()</code>, also including the <code>workspace</code> to list from as arg (#3619).</li> <li>Added <code>filter_by</code> method in <code>RemoteFeedbackDataset</code> to filter based on <code>response_status</code> (#3610).</li> <li>Added <code>list_workspaces</code> function (to be used as <code>rg.list_workspaces</code>, but <code>Workspace.list</code> is preferred) to list all the workspaces from an user in Argilla (#3641).</li> <li>Added <code>list_datasets</code> function (to be used as <code>rg.list_datasets</code>) to list the <code>TextClassification</code>, <code>TokenClassification</code>, and <code>Text2Text</code> datasets in Argilla (#3638).</li> <li>Added <code>RemoteSuggestionSchema</code> to manage suggestions in Argilla, including the <code>delete</code> method to delete suggestios from Argilla via <code>DELETE /api/v1/suggestions/{suggestion_id}</code> (#3651).</li> <li>Added <code>delete_suggestions</code> to <code>RemoteFeedbackRecord</code> to remove suggestions from Argilla via <code>DELETE /api/v1/records/{record_id}/suggestions</code> (#3651).</li> </ul>"},{"location":"community/changelog/#changed_15","title":"Changed","text":"<ul> <li>Changed <code>Optional label for * mark for required question</code> (#3608)</li> <li>Updated <code>RemoteFeedbackDataset.delete_records</code> to use batch delete records endpoint (#3580).</li> <li>Included <code>allowed_for_roles</code> for some <code>RemoteFeedbackDataset</code>, <code>RemoteFeedbackRecords</code>, and <code>RemoteFeedbackRecord</code> methods that are only allowed for users with roles <code>owner</code> and <code>admin</code> (#3601).</li> <li>Renamed <code>ArgillaToFromMixin</code> to <code>ArgillaMixin</code> (#3619).</li> <li>Move <code>users</code> CLI app under <code>database</code> CLI app (#3593).</li> <li>Move server <code>Enum</code> classes to <code>argilla.server.enums</code> module (#3620).</li> </ul>"},{"location":"community/changelog/#fixed_21","title":"Fixed","text":"<ul> <li>Fixed <code>Filter by workspace in breadcrumbs</code> (#3577)</li> <li>Fixed <code>Filter by workspace in datasets table</code> (#3604)</li> <li>Fixed <code>Query search highlight</code> for Text2Text and TextClassification (#3621)</li> <li>Fixed <code>RatingQuestion.values</code> validation to raise a <code>ValidationError</code> when values are out of range i.e. [1, 10] (#3626).</li> </ul>"},{"location":"community/changelog/#removed_4","title":"Removed","text":"<ul> <li>Removed <code>multi_task_text_token_classification</code> from <code>TaskType</code> as not used (#3640).</li> <li>Removed <code>argilla_id</code> in favor of <code>id</code> from <code>RemoteFeedbackDataset</code> (#3663).</li> <li>Removed <code>fetch_records</code> from <code>RemoteFeedbackDataset</code> as now the records are lazily fetched from Argilla (#3663).</li> <li>Removed <code>push_to_argilla</code> from <code>RemoteFeedbackDataset</code>, as it just works when calling it through a <code>FeedbackDataset</code> locally, as now the updates of the remote datasets are automatically pushed to Argilla (#3663).</li> <li>Removed <code>set_suggestions</code> in favor of <code>update(suggestions=...)</code> for both <code>FeedbackRecord</code> and <code>RemoteFeedbackRecord</code>, as all the updates of any \"updateable\" attribute of a record will go through <code>update</code> instead (#3663).</li> <li>Remove unused <code>owner</code> attribute for client Dataset data model (#3665)</li> </ul>"},{"location":"community/changelog/#1141","title":"1.14.1","text":""},{"location":"community/changelog/#fixed_22","title":"Fixed","text":"<ul> <li>Fixed PostgreSQL database not being updated after <code>begin_nested</code> because of missing <code>commit</code> (#3567).</li> </ul>"},{"location":"community/changelog/#fixed_23","title":"Fixed","text":"<ul> <li>Fixed <code>settings</code> could not be provided when updating a <code>rating</code> or <code>ranking</code> question (#3552).</li> </ul>"},{"location":"community/changelog/#1140","title":"1.14.0","text":""},{"location":"community/changelog/#added_19","title":"Added","text":"<ul> <li>Added <code>PATCH /api/v1/fields/{field_id}</code> endpoint to update the field title and markdown settings (#3421).</li> <li>Added <code>PATCH /api/v1/datasets/{dataset_id}</code> endpoint to update dataset name and guidelines (#3402).</li> <li>Added <code>PATCH /api/v1/questions/{question_id}</code> endpoint to update question title, description and some settings (depending on the type of question) (#3477).</li> <li>Added <code>DELETE /api/v1/records/{record_id}</code> endpoint to remove a record given its ID (#3337).</li> <li>Added <code>pull</code> method in <code>RemoteFeedbackDataset</code> (a <code>FeedbackDataset</code> pushed to Argilla) to pull all the records from it and return it as a local copy as a <code>FeedbackDataset</code> (#3465).</li> <li>Added <code>delete</code> method in <code>RemoteFeedbackDataset</code> (a <code>FeedbackDataset</code> pushed to Argilla) (#3512).</li> <li>Added <code>delete_records</code> method in <code>RemoteFeedbackDataset</code>, and <code>delete</code> method in <code>RemoteFeedbackRecord</code> to delete records from Argilla (#3526).</li> </ul>"},{"location":"community/changelog/#changed_16","title":"Changed","text":"<ul> <li>Improved efficiency of weak labeling when dataset contains vectors (#3444).</li> <li>Added <code>ArgillaDatasetMixin</code> to detach the Argilla-related functionality from the <code>FeedbackDataset</code> (#3427)</li> <li>Moved <code>FeedbackDataset</code>-related <code>pydantic.BaseModel</code> schemas to <code>argilla.client.feedback.schemas</code> instead, to be better structured and more scalable and maintainable (#3427)</li> <li>Update CLI to use database async connection (#3450).</li> <li>Limit rating questions values to the positive range [1, 10] (#3451).</li> <li>Updated <code>POST /api/users</code> endpoint to be able to provide a list of workspace names to which the user should be linked to (#3462).</li> <li>Updated Python client <code>User.create</code> method to be able to provide a list of workspace names to which the user should be linked to (#3462).</li> <li>Updated <code>GET /api/v1/me/datasets/{dataset_id}/records</code> endpoint to allow getting records matching one of the response statuses provided via query param (#3359).</li> <li>Updated <code>POST /api/v1/me/datasets/{dataset_id}/records</code> endpoint to allow searching records matching one of the response statuses provided via query param (#3359).</li> <li>Updated <code>SearchEngine.search</code> method to allow searching records matching one of the response statuses provided (#3359).</li> <li>After calling <code>FeedbackDataset.push_to_argilla</code>, the methods <code>FeedbackDataset.add_records</code> and <code>FeedbackRecord.set_suggestions</code> will automatically call Argilla with no need of calling <code>push_to_argilla</code> explicitly (#3465).</li> <li>Now calling <code>FeedbackDataset.push_to_huggingface</code> dumps the <code>responses</code> as a <code>List[Dict[str, Any]]</code> instead of <code>Sequence</code> to make it more readable via \ud83e\udd17<code>datasets</code> (#3539).</li> </ul>"},{"location":"community/changelog/#fixed_24","title":"Fixed","text":"<ul> <li>Fixed issue with <code>bool</code> values and <code>default</code> from Jinja2 while generating the HuggingFace <code>DatasetCard</code> from <code>argilla_template.md</code> (#3499).</li> <li>Fixed <code>DatasetConfig.from_yaml</code> which was failing when calling <code>FeedbackDataset.from_huggingface</code> as the UUIDs cannot be deserialized automatically by <code>PyYAML</code>, so UUIDs are neither dumped nor loaded anymore (#3502).</li> <li>Fixed an issue that didn't allow the Argilla server to work behind a proxy (#3543).</li> <li><code>TextClassificationSettings</code> and <code>TokenClassificationSettings</code> labels are properly parsed to strings both in the Python client and in the backend endpoint (#3495).</li> <li>Fixed <code>PUT /api/v1/datasets/{dataset_id}/publish</code> to check whether at least one field and question has <code>required=True</code> (#3511).</li> <li>Fixed <code>FeedbackDataset.from_huggingface</code> as <code>suggestions</code> were being lost when there were no <code>responses</code> (#3539).</li> <li>Fixed <code>QuestionSchema</code> and <code>FieldSchema</code> not validating <code>name</code> attribute (#3550).</li> </ul>"},{"location":"community/changelog/#deprecated_4","title":"Deprecated","text":"<ul> <li>After calling <code>FeedbackDataset.push_to_argilla</code>, calling <code>push_to_argilla</code> again won't do anything since the dataset is already pushed to Argilla (#3465).</li> <li>After calling <code>FeedbackDataset.push_to_argilla</code>, calling <code>fetch_records</code> won't do anything since the records are lazily fetched from Argilla (#3465).</li> <li>After calling <code>FeedbackDataset.push_to_argilla</code>, the Argilla ID is no longer stored in the attribute/property <code>argilla_id</code> but in <code>id</code> instead (#3465).</li> </ul>"},{"location":"community/changelog/#1133","title":"1.13.3","text":""},{"location":"community/changelog/#fixed_25","title":"Fixed","text":"<ul> <li>Fixed <code>ModuleNotFoundError</code> caused because the <code>argilla.utils.telemetry</code> module used in the <code>ArgillaTrainer</code> was importing an optional dependency not installed by default (#3471).</li> <li>Fixed <code>ImportError</code> caused because the <code>argilla.client.feedback.config</code> module was importing <code>pyyaml</code> optional dependency not installed by default (#3471).</li> </ul>"},{"location":"community/changelog/#1132","title":"1.13.2","text":""},{"location":"community/changelog/#fixed_26","title":"Fixed","text":"<ul> <li>The <code>suggestion_type_enum</code> ENUM data type created in PostgreSQL didn't have any value (#3445).</li> </ul>"},{"location":"community/changelog/#1131","title":"1.13.1","text":""},{"location":"community/changelog/#fixed_27","title":"Fixed","text":"<ul> <li>Fix database migration for PostgreSQL (See #3438)</li> </ul>"},{"location":"community/changelog/#1130","title":"1.13.0","text":""},{"location":"community/changelog/#added_20","title":"Added","text":"<ul> <li>Added <code>GET /api/v1/users/{user_id}/workspaces</code> endpoint to list the workspaces to which a user belongs (#3308 and #3343).</li> <li>Added <code>HuggingFaceDatasetMixin</code> for internal usage, to detach the <code>FeedbackDataset</code> integrations from the class itself, and use Mixins instead (#3326).</li> <li>Added <code>GET /api/v1/records/{record_id}/suggestions</code> API endpoint to get the list of suggestions for the responses associated to a record (#3304).</li> <li>Added <code>POST /api/v1/records/{record_id}/suggestions</code> API endpoint to create a suggestion for a response associated to a record (#3304).</li> <li>Added support for <code>RankingQuestionStrategy</code>, <code>RankingQuestionUnification</code> and the <code>.for_text_classification</code> method for the <code>TrainingTaskMapping</code> (#3364)</li> <li>Added <code>PUT /api/v1/records/{record_id}/suggestions</code> API endpoint to create or update a suggestion for a response associated to a record (#3304 &amp; 3391).</li> <li>Added <code>suggestions</code> attribute to <code>FeedbackRecord</code>, and allow adding and retrieving suggestions from the Python client (#3370)</li> <li>Added <code>allowed_for_roles</code> Python decorator to check whether the current user has the required role to access the decorated function/method for <code>User</code> and <code>Workspace</code> (#3383)</li> <li>Added API and Python Client support for workspace deletion (Closes #3260)</li> <li>Added <code>GET /api/v1/me/workspaces</code> endpoint to list the workspaces of the current active user (#3390)</li> </ul>"},{"location":"community/changelog/#changed_17","title":"Changed","text":"<ul> <li>Updated output payload for <code>GET /api/v1/datasets/{dataset_id}/records</code>, <code>GET /api/v1/me/datasets/{dataset_id}/records</code>, <code>POST /api/v1/me/datasets/{dataset_id}/records/search</code> endpoints to include the suggestions of the records based on the value of the <code>include</code> query parameter (#3304).</li> <li>Updated <code>POST /api/v1/datasets/{dataset_id}/records</code> input payload to add suggestions (#3304).</li> <li>The <code>POST /api/datasets/:dataset-id/:task/bulk</code> endpoints don't create the dataset if does not exists (Closes #3244)</li> <li>Added Telemetry support for <code>ArgillaTrainer</code> (closes #3325)</li> <li><code>User.workspaces</code> is no longer an attribute but a property, and is calling <code>list_user_workspaces</code> to list all the workspace names for a given user ID (#3334)</li> <li>Renamed <code>FeedbackDatasetConfig</code> to <code>DatasetConfig</code> and export/import from YAML as default instead of JSON (just used internally on <code>push_to_huggingface</code> and <code>from_huggingface</code> methods of <code>FeedbackDataset</code>) (#3326).</li> <li>The protected metadata fields support other than textual info - existing datasets must be reindex. See docs for more detail (Closes #3332).</li> <li>Updated <code>Dockerfile</code> parent image from <code>python:3.9.16-slim</code> to <code>python:3.10.12-slim</code> (#3425).</li> <li>Updated <code>quickstart.Dockerfile</code> parent image from <code>elasticsearch:8.5.3</code> to <code>argilla/argilla-server:${ARGILLA_VERSION}</code> (#3425).</li> </ul>"},{"location":"community/changelog/#removed_5","title":"Removed","text":"<ul> <li>Removed support to non-prefixed environment variables. All valid env vars start with <code>ARGILLA_</code> (See #3392).</li> </ul>"},{"location":"community/changelog/#fixed_28","title":"Fixed","text":"<ul> <li>Fixed <code>GET /api/v1/me/datasets/{dataset_id}/records</code> endpoint returning always the responses for the records even if <code>responses</code> was not provided via the <code>include</code> query parameter (#3304).</li> <li>Values for protected metadata fields are not truncated (Closes #3331).</li> <li>Big number ids are properly rendered in UI (Closes #3265)</li> <li>Fixed <code>ArgillaDatasetCard</code> to include the values/labels for all the existing questions (#3366)</li> </ul>"},{"location":"community/changelog/#deprecated_5","title":"Deprecated","text":"<ul> <li>Integer support for record id in text classification, token classification and text2text datasets.</li> </ul>"},{"location":"community/changelog/#1121","title":"1.12.1","text":""},{"location":"community/changelog/#fixed_29","title":"Fixed","text":"<ul> <li>Using <code>rg.init</code> with default <code>argilla</code> user skips setting the default workspace if not available. (Closes #3340)</li> <li>Resolved wrong import structure for <code>ArgillaTrainer</code> and <code>TrainingTaskMapping</code> (Closes #3345)</li> <li>Pin pydantic dependency to version &lt; 2 (Closes 3348)</li> </ul>"},{"location":"community/changelog/#1120","title":"1.12.0","text":""},{"location":"community/changelog/#added_21","title":"Added","text":"<ul> <li>Added <code>RankingQuestionSettings</code> class allowing to create ranking questions in the API using <code>POST /api/v1/datasets/{dataset_id}/questions</code> endpoint (#3232)</li> <li>Added <code>RankingQuestion</code> in the Python client to create ranking questions (#3275).</li> <li>Added <code>Ranking</code> component in feedback task question form (#3177 &amp; #3246).</li> <li>Added <code>FeedbackDataset.prepare_for_training</code> method for generaring a framework-specific dataset with the responses provided for <code>RatingQuestion</code>, <code>LabelQuestion</code> and <code>MultiLabelQuestion</code> (#3151).</li> <li>Added <code>ArgillaSpaCyTransformersTrainer</code> class for supporting the training with <code>spacy-transformers</code> (#3256).</li> </ul>"},{"location":"community/changelog/#docs","title":"Docs","text":"<ul> <li>Added instructions for how to run the Argilla frontend in the developer docs (#3314).</li> </ul>"},{"location":"community/changelog/#changed_18","title":"Changed","text":"<ul> <li>All docker related files have been moved into the <code>docker</code> folder (#3053).</li> <li><code>release.Dockerfile</code> have been renamed to <code>Dockerfile</code> (#3133).</li> <li>Updated <code>rg.load</code> function to raise a <code>ValueError</code> with a explanatory message for the cases in which the user tries to use the function to load a <code>FeedbackDataset</code> (#3289).</li> <li>Updated <code>ArgillaSpaCyTrainer</code> to allow re-using <code>tok2vec</code> (#3256).</li> </ul>"},{"location":"community/changelog/#fixed_30","title":"Fixed","text":"<ul> <li>Check available workspaces on Argilla on <code>rg.set_workspace</code> (Closes #3262)</li> </ul>"},{"location":"community/changelog/#1110","title":"1.11.0","text":""},{"location":"community/changelog/#fixed_31","title":"Fixed","text":"<ul> <li>Replaced <code>np.float</code> alias by <code>float</code> to avoid <code>AttributeError</code> when using <code>find_label_errors</code> function with <code>numpy&gt;=1.24.0</code> (#3214).</li> <li>Fixed <code>format_as(\"datasets\")</code> when no responses or optional respones in <code>FeedbackRecord</code>, to set their value to what \ud83e\udd17 Datasets expects instead of just <code>None</code> (#3224).</li> <li>Fixed <code>push_to_huggingface()</code> when <code>generate_card=True</code> (default behaviour), as we were passing a sample record to the <code>ArgillaDatasetCard</code> class, and <code>UUID</code>s introduced in 1.10.0 (#3192), are not JSON-serializable (#3231).</li> <li>Fixed <code>from_argilla</code> and <code>push_to_argilla</code> to ensure consistency on both field and question re-construction, and to ensure <code>UUID</code>s are properly serialized as <code>str</code>, respectively (#3234).</li> <li>Refactored usage of <code>import argilla as rg</code> to clarify package navigation (#3279).</li> </ul>"},{"location":"community/changelog/#docs_1","title":"Docs","text":"<ul> <li>Fixed URLs in Weak Supervision with Sentence Tranformers tutorial #3243.</li> <li>Fixed library buttons' formatting on Tutorials page (#3255).</li> <li>Modified styling of error code outputs in notebooks (#3270).</li> <li>Added ElasticSearch and OpenSearch versions (#3280).</li> <li>Removed template notebook from table of contents (#3271).</li> <li>Fixed tutorials with <code>pip install argilla</code> to not use older versions of the package (#3282).</li> </ul>"},{"location":"community/changelog/#added_22","title":"Added","text":"<ul> <li>Added <code>metadata</code> attribute to the <code>Record</code> of the <code>FeedbackDataset</code> (#3194)</li> <li>New <code>users update</code> command to update the role for an existing user (#3188)</li> <li>New <code>Workspace</code> class to allow users manage their Argilla workspaces and the users assigned to those workspaces via the Python client (#3180)</li> <li>Added <code>User</code> class to let users manage their Argilla users via the Python client (#3169).</li> <li>Added an option to display <code>tqdm</code> progress bar to <code>FeedbackDataset.push_to_argilla</code> when looping over the records to upload (#3233).</li> </ul>"},{"location":"community/changelog/#changed_19","title":"Changed","text":"<ul> <li>The role system now support three different roles <code>owner</code>, <code>admin</code> and <code>annotator</code> (#3104)</li> <li><code>admin</code> role is scoped to workspace-level operations (#3115)</li> <li>The <code>owner</code> user is created among the default pool of users in the quickstart, and the default user in the server has now <code>owner</code> role (#3248), reverting (#3188).</li> </ul>"},{"location":"community/changelog/#deprecated_6","title":"Deprecated","text":"<ul> <li>As of Python 3.7 end-of-life (EOL) on 2023-06-27, Argilla will no longer support Python 3.7 (#3188). More information at https://peps.python.org/pep-0537/</li> </ul>"},{"location":"community/changelog/#1100","title":"1.10.0","text":""},{"location":"community/changelog/#added_23","title":"Added","text":"<ul> <li>Added search component for feedback datasets (#3138)</li> <li>Added markdown support for feedback dataset guidelines (#3153)</li> <li>Added Train button for feedback datasets (#3170)</li> </ul>"},{"location":"community/changelog/#changed_20","title":"Changed","text":"<ul> <li>Updated <code>SearchEngine</code> and <code>POST /api/v1/me/datasets/{dataset_id}/records/search</code> to return the <code>total</code> number of records matching the search query (#3166)</li> </ul>"},{"location":"community/changelog/#fixed_32","title":"Fixed","text":"<ul> <li>Replaced Enum for string value in URLs for client API calls (Closes #3149)</li> <li>Resolve breaking issue with <code>ArgillaSpanMarkerTrainer</code> for Named Entity Recognition with <code>span_marker</code> v1.1.x onwards.</li> <li>Move <code>ArgillaDatasetCard</code> import under <code>@requires_version</code> decorator, so that the <code>ImportError</code> on <code>huggingface_hub</code> is handled properly (#3174)</li> <li>Allow flow <code>FeedbackDataset.from_argilla</code> -&gt; <code>FeedbackDataset.push_to_argilla</code> under different dataset names and/or workspaces (#3192)</li> </ul>"},{"location":"community/changelog/#docs_2","title":"Docs","text":"<ul> <li>Resolved typos in the docs (#3240).</li> <li>Fixed mention of master branch (#3254).</li> </ul>"},{"location":"community/changelog/#190","title":"1.9.0","text":""},{"location":"community/changelog/#added_24","title":"Added","text":"<ul> <li>Added boolean <code>use_markdown</code> property to <code>TextFieldSettings</code> model.</li> <li>Added boolean <code>use_markdown</code> property to <code>TextQuestionSettings</code> model.</li> <li>Added new status <code>draft</code> for the <code>Response</code> model.</li> <li>Added <code>LabelSelectionQuestionSettings</code> class allowing to create label selection (single-choice) questions in the API (#3005)</li> <li>Added <code>MultiLabelSelectionQuestionSettings</code> class allowing to create multi-label selection (multi-choice) questions in the API (#3010).</li> <li>Added <code>POST /api/v1/me/datasets/{dataset_id}/records/search</code> endpoint (#3068).</li> <li>Added new components in feedback task Question form: MultiLabel (#3064) and SingleLabel (#3016).</li> <li>Added docstrings to the <code>pydantic.BaseModel</code>s defined at <code>argilla/client/feedback/schemas.py</code> (#3137)</li> <li>Added the information about executing tests in the developer documentation ([#3143]).</li> </ul>"},{"location":"community/changelog/#changed_21","title":"Changed","text":"<ul> <li>Updated <code>GET /api/v1/me/datasets/:dataset_id/metrics</code> output payload to include the count of responses with <code>draft</code> status.</li> <li>Added <code>LabelSelectionQuestionSettings</code> class allowing to create label selection (single-choice) questions in the API.</li> <li>Added <code>MultiLabelSelectionQuestionSettings</code> class allowing to create multi-label selection (multi-choice) questions in the API.</li> <li>Database setup for unit tests. Now the unit tests use a different database than the one used by the local Argilla server (Closes #2987).</li> <li>Updated <code>alembic</code> setup to be able to autogenerate revision/migration scripts using SQLAlchemy metadata from Argilla server models (#3044)</li> <li>Improved <code>DatasetCard</code> generation on <code>FeedbackDataset.push_to_huggingface</code> when <code>generate_card=True</code>, following the official HuggingFace Hub template, but suited to <code>FeedbackDataset</code>s from Argilla (#3110)</li> </ul>"},{"location":"community/changelog/#fixed_33","title":"Fixed","text":"<ul> <li>Disallow <code>fields</code> and <code>questions</code> in <code>FeedbackDataset</code> with the same name (#3126).</li> <li>Fixed broken links in the documentation and updated the development branch name from <code>development</code> to <code>develop</code> ([#3145]).</li> </ul>"},{"location":"community/changelog/#180","title":"1.8.0","text":""},{"location":"community/changelog/#added_25","title":"Added","text":"<ul> <li><code>/api/v1/datasets</code> new endpoint to list and create datasets (#2615).</li> <li><code>/api/v1/datasets/{dataset_id}</code> new endpoint to get and delete datasets (#2615).</li> <li><code>/api/v1/datasets/{dataset_id}/publish</code> new endpoint to publish a dataset (#2615).</li> <li><code>/api/v1/datasets/{dataset_id}/questions</code> new endpoint to list and create dataset questions (#2615)</li> <li><code>/api/v1/datasets/{dataset_id}/fields</code> new endpoint to list and create dataset fields (#2615)</li> <li><code>/api/v1/datasets/{dataset_id}/questions/{question_id}</code> new endpoint to delete a dataset questions (#2615)</li> <li><code>/api/v1/datasets/{dataset_id}/fields/{field_id}</code> new endpoint to delete a dataset field (#2615)</li> <li><code>/api/v1/workspaces/{workspace_id}</code> new endpoint to get workspaces by id (#2615)</li> <li><code>/api/v1/responses/{response_id}</code> new endpoint to update and delete a response (#2615)</li> <li><code>/api/v1/datasets/{dataset_id}/records</code> new endpoint to create and list dataset records (#2615)</li> <li><code>/api/v1/me/datasets</code> new endpoint to list user visible datasets (#2615)</li> <li><code>/api/v1/me/dataset/{dataset_id}/records</code> new endpoint to list dataset records with user responses (#2615)</li> <li><code>/api/v1/me/datasets/{dataset_id}/metrics</code> new endpoint to get the dataset user metrics (#2615)</li> <li><code>/api/v1/me/records/{record_id}/responses</code> new endpoint to create record user responses (#2615)</li> <li>showing new feedback task datasets in datasets list ([#2719])</li> <li>new page for feedback task ([#2680])</li> <li>show feedback task metrics ([#2822])</li> <li>user can delete dataset in dataset settings page ([#2792])</li> <li>Support for <code>FeedbackDataset</code> in Python client (parent PR #2615, and nested PRs: [#2949], [#2827], [#2943], [#2945], [#2962], and [#3003])</li> <li>Integration with the HuggingFace Hub ([#2949])</li> <li>Added <code>ArgillaPeftTrainer</code> for text and token classificaiton #2854</li> <li>Added <code>predict_proba()</code> method to <code>ArgillaSetFitTrainer</code></li> <li>Added <code>ArgillaAutoTrainTrainer</code> for Text Classification #2664</li> <li>New <code>database revisions</code> command showing database revisions info</li> </ul>"},{"location":"community/changelog/#fixes","title":"Fixes","text":"<ul> <li>Avoid rendering html for invalid html strings in Text2text ([#2911]https://github.com/argilla-io/argilla/issues/2911)</li> </ul>"},{"location":"community/changelog/#changed_22","title":"Changed","text":"<ul> <li>The <code>database migrate</code> command accepts a <code>--revision</code> param to provide specific revision id</li> <li><code>tokens_length</code> metrics function returns empty data (#3045)</li> <li><code>token_length</code> metrics function returns empty data (#3045)</li> <li><code>mention_length</code> metrics function returns empty data (#3045)</li> <li><code>entity_density</code> metrics function returns empty data (#3045)</li> </ul>"},{"location":"community/changelog/#deprecated_7","title":"Deprecated","text":"<ul> <li>Using Argilla with Python 3.7 runtime is deprecated and support will be removed from version 1.11.0 (#2902)</li> <li><code>tokens_length</code> metrics function has been deprecated and will be removed in 1.10.0 (#3045)</li> <li><code>token_length</code> metrics function has been deprecated and will be removed in 1.10.0 (#3045)</li> <li><code>mention_length</code> metrics function has been deprecated and will be removed in 1.10.0 (#3045)</li> <li><code>entity_density</code> metrics function has been deprecated and will be removed in 1.10.0 (#3045)</li> </ul>"},{"location":"community/changelog/#removed_6","title":"Removed","text":"<ul> <li>Removed mention <code>density</code>, <code>tokens_length</code> and <code>chars_length</code> metrics from token classification metrics storage (#3045)</li> <li>Removed token <code>char_start</code>, <code>char_end</code>, <code>tag</code>, and <code>score</code> metrics from token classification metrics storage (#3045)</li> <li>Removed tags-related metrics from token classification metrics storage (#3045)</li> </ul>"},{"location":"community/changelog/#170","title":"1.7.0","text":""},{"location":"community/changelog/#added_26","title":"Added","text":"<ul> <li>add <code>max_retries</code> and <code>num_threads</code> parameters to <code>rg.log</code> to run data logging request concurrently with backoff retry policy. See #2458 and #2533</li> <li><code>rg.load</code> accepts <code>include_vectors</code> and <code>include_metrics</code> when loading data. Closes #2398</li> <li>Added <code>settings</code> param to <code>prepare_for_training</code> (#2689)</li> <li>Added <code>prepare_for_training</code> for <code>openai</code> (#2658)</li> <li>Added <code>ArgillaOpenAITrainer</code> (#2659)</li> <li>Added <code>ArgillaSpanMarkerTrainer</code> for Named Entity Recognition (#2693)</li> <li>Added <code>ArgillaTrainer</code> CLI support. Closes (#2809)</li> </ul>"},{"location":"community/changelog/#fixes_1","title":"Fixes","text":"<ul> <li>fix image alignment on token classification</li> </ul>"},{"location":"community/changelog/#changed_23","title":"Changed","text":"<ul> <li>Argilla quickstart image dependencies are externalized into <code>quickstart.requirements.txt</code>. See #2666</li> <li>bulk endpoints will upsert data when record <code>id</code> is present. Closes #2535</li> <li>moved from <code>click</code> to <code>typer</code> CLI support. Closes (#2815)</li> <li>Argilla server docker image is built with PostgreSQL support. Closes #2686</li> <li>The <code>rg.log</code> computes all batches and raise an error for all failed batches.</li> <li>The default batch size for <code>rg.log</code> is now 100.</li> </ul>"},{"location":"community/changelog/#fixed_34","title":"Fixed","text":"<ul> <li><code>argilla.training</code> bugfixes and unification (#2665)</li> <li>Resolved several small bugs in the <code>ArgillaTrainer</code>.</li> </ul>"},{"location":"community/changelog/#deprecated_8","title":"Deprecated","text":"<ul> <li>The <code>rg.log_async</code> function is deprecated and will be removed in next minor release.</li> </ul>"},{"location":"community/changelog/#160","title":"1.6.0","text":""},{"location":"community/changelog/#added_27","title":"Added","text":"<ul> <li><code>ARGILLA_HOME_PATH</code> new environment variable (#2564).</li> <li><code>ARGILLA_DATABASE_URL</code> new environment variable (#2564).</li> <li>Basic support for user roles with <code>admin</code> and <code>annotator</code> (#2564).</li> <li><code>id</code>, <code>first_name</code>, <code>last_name</code>, <code>role</code>, <code>inserted_at</code> and <code>updated_at</code> new user fields (#2564).</li> <li><code>/api/users</code> new endpoint to list and create users (#2564).</li> <li><code>/api/users/{user_id}</code> new endpoint to delete users (#2564).</li> <li><code>/api/workspaces</code> new endpoint to list and create workspaces (#2564).</li> <li><code>/api/workspaces/{workspace_id}/users</code> new endpoint to list workspace users (#2564).</li> <li><code>/api/workspaces/{workspace_id}/users/{user_id}</code> new endpoint to create and delete workspace users (#2564).</li> <li><code>argilla.tasks.users.migrate</code> new task to migrate users from old YAML file to database (#2564).</li> <li><code>argilla.tasks.users.create</code> new task to create a user (#2564).</li> <li><code>argilla.tasks.users.create_default</code> new task to create a user with default credentials (#2564).</li> <li><code>argilla.tasks.database.migrate</code> new task to execute database migrations (#2564).</li> <li><code>release.Dockerfile</code> and <code>quickstart.Dockerfile</code> now creates a default <code>argilladata</code> volume to persist data (#2564).</li> <li>Add user settings page. Closes #2496</li> <li>Added <code>Argilla.training</code> module with support for <code>spacy</code>, <code>setfit</code>, and <code>transformers</code>. Closes #2504</li> </ul>"},{"location":"community/changelog/#fixes_2","title":"Fixes","text":"<ul> <li>Now the <code>prepare_for_training</code> method is working when <code>multi_label=True</code>. Closes #2606</li> </ul>"},{"location":"community/changelog/#changed_24","title":"Changed","text":"<ul> <li><code>ARGILLA_USERS_DB_FILE</code> environment variable now it's only used to migrate users from YAML file to database (#2564).</li> <li><code>full_name</code> user field is now deprecated and <code>first_name</code> and <code>last_name</code> should be used instead (#2564).</li> <li><code>password</code> user field now requires a minimum of <code>8</code> and a maximum of <code>100</code> characters in size (#2564).</li> <li><code>quickstart.Dockerfile</code> image default users from <code>team</code> and <code>argilla</code> to <code>admin</code> and <code>annotator</code> including new passwords and API keys (#2564).</li> <li>Datasets to be managed only by users with <code>admin</code> role (#2564).</li> <li>The list of rules is now accessible while metrics are computed. Closes#2117</li> <li>Style updates for weak labeling and adding feedback toast when delete rules. See #2626 and #2648</li> </ul>"},{"location":"community/changelog/#removed_7","title":"Removed","text":"<ul> <li><code>email</code> user field (#2564).</li> <li><code>disabled</code> user field (#2564).</li> <li>Support for private workspaces (#2564).</li> <li><code>ARGILLA_LOCAL_AUTH_DEFAULT_APIKEY</code> and <code>ARGILLA_LOCAL_AUTH_DEFAULT_PASSWORD</code> environment variables. Use <code>python -m argilla.tasks.users.create_default</code> instead (#2564).</li> <li>The old headers for <code>API Key</code> and <code>workspace</code> from python client</li> <li>The default value for old <code>API Key</code> constant. Closes #2251</li> </ul>"},{"location":"community/changelog/#151-2023-03-30","title":"1.5.1 - 2023-03-30","text":""},{"location":"community/changelog/#fixes_3","title":"Fixes","text":"<ul> <li>Copying datasets between workspaces with proper owner/workspace info. Closes #2562</li> <li>Copy dataset with empty workspace to the default user workspace 905d4de</li> <li>Using elasticsearch config to request backend version. Closes #2311</li> <li>Remove sorting by score in labels. Closes #2622</li> </ul>"},{"location":"community/changelog/#changed_25","title":"Changed","text":"<ul> <li>Update field name in metadata for image url. See #2609</li> <li>Improvements in tutorial doc cards. Closes #2216</li> </ul>"},{"location":"community/changelog/#150-2023-03-21","title":"1.5.0 - 2023-03-21","text":""},{"location":"community/changelog/#added_28","title":"Added","text":"<ul> <li>Add the fields to retrieve when loading the data from argilla. <code>rg.load</code> takes too long because of the vector field, even when users don't need it. Closes #2398</li> <li>Add new page and components for dataset settings. Closes #2442</li> <li>Add ability to show image in records (for TokenClassification and TextClassification) if an URL is passed in metadata with the key _image_url</li> <li>Non-searchable fields support in metadata. #2570</li> <li>Add record ID references to the prepare for training methods. Closes #2483</li> <li>Add tutorial on Image Classification. #2420</li> <li>Add Train button, visible for \"admin\" role, with code snippets from a selection of libraries. Closes [#2591] (https://github.com/argilla-io/argilla/pull/2591)</li> </ul>"},{"location":"community/changelog/#changed_26","title":"Changed","text":"<ul> <li>Labels are now centralized in a specific vuex ORM called GlobalLabel Model, see https://github.com/argilla-io/argilla/issues/2210. This model is the same for TokenClassification and TextClassification (so both task have labels with color_id and shortcuts parameters in the vuex ORM)</li> <li>The shortcuts improvement for labels #2339 have been moved to the vuex ORM in dataset settings feature #2444</li> <li>Update \"Define a labeling schema\" section in docs.</li> <li>The record inputs are sorted alphabetically in UI by default. #2581</li> <li>The record inputs are fully visible when pagination size is one and the height of collapsed area size is bigger for laptop screen. #2587</li> </ul>"},{"location":"community/changelog/#fixes_4","title":"Fixes","text":"<ul> <li>Allow URL to be clickable in Jupyter notebook again. Closes #2527</li> </ul>"},{"location":"community/changelog/#removed_8","title":"Removed","text":"<ul> <li>Removing some data scan deprecated endpoints used by old clients. This change will break compatibility with client <code>&lt;v1.3.0</code></li> <li>Stop using old scan deprecated endpoints in python client. This logic will break client compatibility with server version <code>&lt;1.3.0</code></li> <li>Remove the previous way to add labels through the dataset page. Now labels can be added only through dataset settings page.</li> </ul>"},{"location":"community/contributor/","title":"How to contribute?","text":"<p>Thank you for investing your time in contributing to the project! Any contribution you make will be reflected in the most recent version of Argilla \ud83e\udd29.</p> New to contributing in general? <p>If you're a new contributor, read the README to get an overview of the project. In addition, here are some resources to help you get started with open-source contributions:</p> <ul> <li>Discord: You are welcome to join the Argilla Discord community, where you can keep in touch with other users, contributors and the Argilla team. In the following section, you can find more information on how to get started in Discord.</li> <li>Git: This is a very useful tool to keep track of the changes in your files. Using the command-line interface (CLI), you can make your contributions easily. For that, you need to have it installed and updated on your computer.</li> <li>GitHub: It is a platform and cloud-based service that uses git and allows developers to collaborate on projects. To contribute to Argilla, you'll need to create an account. Check the Contributor Workflow with Git and Github for more info.</li> <li>Developer Documentation: To collaborate, you'll need to set up an efficient environment. Check the developer documentation to know how to do it.</li> </ul>"},{"location":"community/contributor/#first-contact-in-discord","title":"First Contact in Discord","text":"<p>Discord is a handy tool for more casual conversations and to answer day-to-day questions. As part of Hugging Face, we have set up some Argilla channels on the server. Click here to join the Hugging Face Discord community effortlessly.</p> <p>When part of the Hugging Face Discord, you can select \"Channels &amp; roles\" and select \"Argilla\" along with any of the other groups that are interesting to you. \"Argilla\" will cover anything about Argilla and Distilabel. You can join the following channels:</p> <ul> <li>#argilla-announcements: \ud83d\udce2 Important announcements and updates.</li> <li>#argilla-distilabel-general: \ud83d\udcac General discussions about Argilla and Distilabel.</li> <li>#argilla-distilabel-help: \ud83d\ude4b\u200d\u2640\ufe0f Need assistance? We're always here to help. Select the appropriate label (<code>argilla</code> or <code>distilabel</code>) for your issue and post it.</li> </ul> <p>So now there is only one thing left to do: introduce yourself and talk to the community. You'll always be welcome! \ud83e\udd17\ud83d\udc4b</p>"},{"location":"community/contributor/#contributor-workflow-with-git-and-github","title":"Contributor Workflow with Git and GitHub","text":"<p>If you're working with Argilla and suddenly a new idea comes to your mind or you find an issue that can be improved, it's time to actively participate and contribute to the project!</p>"},{"location":"community/contributor/#report-an-issue","title":"Report an issue","text":"<p>If you spot a problem, search if an issue already exists. You can use the <code>Label</code> filter. If that is the case, participate in the conversation. If it does not exist, create an issue by clicking on <code>New Issue</code>.</p> <p></p> <p>This will show various templates, choose the one that best suits your issue.</p> <p></p> <p>Below, you can see an example of the <code>Feature request</code> template. Once you choose one, you will need to fill in it following the guidelines. Try to be as clear as possible. In addition, you can assign yourself to the issue and add or choose the right labels. Finally, click on <code>Submit new issue</code>.</p> <p></p>"},{"location":"community/contributor/#work-with-a-fork","title":"Work with a fork","text":""},{"location":"community/contributor/#fork-the-argilla-repository","title":"Fork the Argilla repository","text":"<p>After having reported the issue, you can start working on it. For that, you will need to create a fork of the project. To do that, click on the <code>Fork</code> button.</p> <p></p> <p>Now, fill in the information. Remember to uncheck the <code>Copy develop branch only</code> if you are going to work in or from another branch (for instance, to fix documentation the <code>main</code> branch is used). Then, click on <code>Create fork</code>.</p> <p></p> <p>Now, you will be redirected to your fork. You can see that you are in your fork because the name of the repository will be your <code>username/argilla</code>, and it will indicate <code>forked from argilla-io/argilla</code>.</p>"},{"location":"community/contributor/#clone-your-forked-repository","title":"Clone your forked repository","text":"<p>In order to make the required adjustments, clone the forked repository to your local machine. Choose the destination folder and run the following command:</p> <pre><code>git clone https://github.com/[your-github-username]/argilla.git\ncd argilla\n</code></pre> <p>To keep your fork\u2019s main/develop branch up to date with our repo, add it as an upstream remote branch.</p> <pre><code>git remote add upstream https://github.com/argilla-io/argilla.git\n</code></pre>"},{"location":"community/contributor/#create-a-new-branch","title":"Create a new branch","text":"<p>For each issue you're addressing, it's advisable to create a new branch. GitHub offers a straightforward method to streamline this process.</p> <p>\u26a0\ufe0f Never work directly on the <code>main</code> or <code>develop</code> branch. Always create a new branch for your changes.</p> <p>Navigate to your issue and on the right column, select <code>Create a branch</code>.</p> <p></p> <p>After the new window pops up, the branch will be named after the issue, include a prefix such as feature/, bug/, or docs/ to facilitate quick recognition of the issue type. In the <code>Repository destination</code>, pick your fork ( [your-github-username]/argilla), and then select <code>Change branch source</code> to specify the source branch for creating the new one. Complete the process by clicking <code>Create branch</code>.</p> <p>\ud83e\udd14 Remember that the <code>main</code> branch is only used to work with the documentation. For any other changes, use the <code>develop</code> branch.</p> <p></p> <p>Now, locally change to the new branch you just created.</p> <pre><code>git fetch origin\ngit checkout [branch-name]\n</code></pre>"},{"location":"community/contributor/#use-changelogmd","title":"Use CHANGELOG.md","text":"<p>If you are working on a new feature, it is a good practice to make note of it for others to keep up with the changes. For that, we utilize the <code>CHANGELOG.md</code> file in the root directory. This file is used to list changes made in each version of the project and there are headers that we use to denote each type of change.</p> <ul> <li>Added: for new features.</li> <li>Changed: for changes in existing functionality.</li> <li>Deprecated: for soon-to-be removed features.</li> <li>Removed: for now removed features.</li> <li>Fixed: for any bug fixes.</li> <li>Security: in case of vulnerabilities.</li> </ul> <p>A sample addition would be:</p> <pre><code>- Fixed the key errors for the `init` method ([#NUMBER_OF_PR](LINK_TO_PR)). Contributed by @github_handle.\n</code></pre> <p>You can have a look at the CHANGELOG.md) file to see more cases and examples.</p>"},{"location":"community/contributor/#make-changes-and-push-them","title":"Make changes and push them","text":"<p>Make the changes you want in your local repository, and test that everything works and you are following the guidelines.</p> <p>Check the developer documentation to set up your environment and start working on the project.</p> <p>Once you have finished, you can check the status of your repository and synchronize with the upstreaming repo with the following command:</p> <pre><code># Check the status of your repository\ngit status\n\n# Synchronize with the upstreaming repo\ngit checkout [branch-name]\ngit rebase [default-branch]\n</code></pre> <p>If everything is right, we need to commit and push the changes to your fork. For that, run the following commands:</p> <pre><code># Add the changes to the staging area\ngit add filename\n\n# Commit the changes by writing a proper message\ngit commit -m \"commit-message\"\n\n# Push the changes to your fork\ngit push origin [branch-name]\n</code></pre> <p>When pushing, you will be asked to enter your GitHub login credentials. Once the push is complete, all local commits will be on your GitHub repository.</p>"},{"location":"community/contributor/#create-a-pull-request","title":"Create a pull request","text":"<p>Come back to GitHub, navigate to the original repository where you created your fork, and click on <code>Compare &amp; pull request</code>.</p> <p></p> <p>First, click on <code>compare across forks</code> and select the right repositories and branches.</p> <p>In the base repository, keep in mind to select either <code>main</code> or <code>develop</code> based on the modifications made. In the head repository, indicate your forked repository and the branch corresponding to the issue.</p> <p></p> <p>Then, fill in the pull request template. You should add a prefix to the PR name as we did with the branch above. If you are working on a new feature, you can name your PR as <code>feat: TITLE</code>. If your PR consists of a solution for a bug, you can name your PR as <code>bug: TITLE</code> And, if your work is for improving the documentation, you can name your PR as <code>docs: TITLE</code>.</p> <p>In addition, on the right side, you can select a reviewer (for instance, if you discussed the issue with a member of the Argilla team) and assign the pull request to yourself. It is highly advisable to add labels to PR as well. You can do this again by the labels section right to the screen. For instance, if you are addressing a bug, add the <code>bug</code> label or if the PR is related to the documentation, add the <code>documentation</code> label. This way, PRs can be easily filtered.</p> <p>Finally, fill in the template carefully and follow the guidelines. Remember to link the original issue and enable the checkbox to allow maintainer edits so the branch can be updated for a merge. Then, click on <code>Create pull request</code>.</p>"},{"location":"community/contributor/#review-your-pull-request","title":"Review your pull request","text":"<p>Once you submit your PR, a team member will review your proposal. We may ask questions, request additional information or ask for changes to be made before a PR can be merged, either using suggested changes or pull request comments.</p> <p>You can apply the changes directly through the UI (check the files changed and click on the right-corner three dots, see image below) or from your fork, and then commit them to your branch. The PR will be updated automatically and the suggestions will appear as outdated.</p> <p></p> <p>If you run into any merge issues, check out this git tutorial to help you resolve merge conflicts and other issues.</p>"},{"location":"community/contributor/#your-pr-is-merged","title":"Your PR is merged!","text":"<p>Congratulations \ud83c\udf89\ud83c\udf8a We thank you \ud83e\udd29</p> <p>Once your PR is merged, your contributions will be publicly visible on the Argilla GitHub.</p> <p>Additionally, we will include your changes in the next release based on our development branch.</p>"},{"location":"community/contributor/#additional-resources","title":"Additional resources","text":"<p>Here are some helpful resources for your reference.</p> <ul> <li>Configuring Discord, a guide to learn how to get started with Discord.</li> <li>Pro Git, a book to learn Git.</li> <li>Git in VSCode, a guide to learn how to easily use Git in VSCode.</li> <li>GitHub Skills, an interactive course to learn GitHub.</li> </ul>"},{"location":"community/developer/","title":"Developer documentation","text":"<p>As an Argilla developer, you are already part of the community, and your contribution is to our development. This guide will help you set up your development environment and start contributing.</p> <p>Argilla core components</p> <ul> <li> <p>Documentation: Argilla's documentation serves as an invaluable resource, providing a comprehensive and in-depth guide for users seeking to explore, understand, and effectively harness the core components of the Argilla ecosystem.</p> </li> <li> <p>Python SDK: A Python SDK installable with <code>pip install argilla</code> to interact with the Argilla Server and the Argilla UI. It provides an API to manage the data, configuration, and annotation workflows.</p> </li> <li> <p>FastAPI Server: The core of Argilla is a Python <code>FastAPI server</code> that manages the data by pre-processing it and storing it in the vector database. Also, it stores application information in the relational database. It provides an REST API that interacts with the data from the Python SDK and the Argilla UI. It also provides a web interface to visualize the data.</p> </li> <li> <p>Relational Database: A relational database to store the metadata of the records and the annotations. <code>SQLite</code> is used as the default built-in option and is deployed separately with the Argilla Server, but a separate <code>PostgreSQL</code> can be used.</p> </li> <li> <p>Vector Database: A vector database to store the records data and perform scalable vector similarity searches and basic document searches. We currently support <code>ElasticSearch</code> and <code>OpenSearch</code>, which can be deployed as separate Docker images.</p> </li> <li> <p>Vue.js UI: A web application to visualize and annotate your data, users, and teams. It is built with <code>Vue.js</code> and is directly deployed alongside the Argilla Server within our Argilla Docker image.</p> </li> </ul>"},{"location":"community/developer/#the-argilla-repository","title":"The Argilla repository","text":"<p>The Argilla repository has a monorepo structure, which means that all the components are located in the same repository: <code>argilla-io/argilla</code>. This repo is divided into the following folders:</p> <ul> <li><code>argilla</code>: The python SDK project</li> <li><code>argilla-server</code>: The FastAPI server project</li> <li><code>argilla-frontend</code>: The Vue.js UI project</li> <li><code>argilla/docs</code>: The documentation project</li> <li><code>examples</code>: Example resources for deployments, scripts and notebooks</li> </ul> <p>How to contribute?</p> <p>Before starting to develop, we recommend reading our contribution guide to understand the contribution process and the guidelines to follow. Once you have cloned the Argilla repository and checked out to the correct branch, you can start setting up your development environment.</p>"},{"location":"community/developer/#set-up-the-python-environment","title":"Set up the Python environment","text":"<p>To work on the Argilla Python SDK, you must install the Argilla package on your system.</p> <p>Create a virtual environment</p> <p>We recommend creating a dedicated virtual environment for SDK development to prevent conflicts. For this, you can use the manager of your choice, such as <code>venv</code>, <code>conda</code>, <code>pyenv</code>, or <code>uv</code>.</p> <p>From the root of the cloned Argilla repository, you should move to the <code>argilla</code> folder in your terminal.</p> <pre><code>cd argilla\n</code></pre> <p>Next, activate your virtual environment and make the required installations:</p> <pre><code># Install the `pdm` package manager\npip install pdm\n\n# Install argilla in editable mode and the development dependencies\npdm install --dev\n</code></pre>"},{"location":"community/developer/#linting-and-formatting","title":"Linting and formatting","text":"<p>To maintain a consistent code format, install the <code>pre-commit</code> hooks to run before each commit automatically.</p> <pre><code>pre-commit install\n</code></pre> <p>In addition, run the following scripts to check the code formatting and linting:</p> <pre><code>pdm run format\npdm run lint\n</code></pre>"},{"location":"community/developer/#running-tests","title":"Running tests","text":"<p>Running tests at the end of every development cycle is indispensable to ensure no breaking changes.</p> <pre><code># Run all tests\npdm run tests\n\n# Run specific tests\npytest tests/integration\npytest tests/unit\n</code></pre> Running linting, formatting, and tests <p>You can run all the checks at once by using the following command:</p> <pre><code>    pdm run all\n</code></pre>"},{"location":"community/developer/#set-up-the-databases","title":"Set up the databases","text":"<p>To run your development environment, you need to set up Argilla's databases.</p>"},{"location":"community/developer/#vector-database","title":"Vector database","text":"<p>Argilla supports ElasticSearch as its primary search engine for the vector database by default. For more information about setting OpenSearch, check the Server configuration.</p> <p>You can run ElasticSearch locally using Docker:</p> <pre><code># Argilla supports ElasticSearch versions &gt;=8.5\ndocker run -d --name elasticsearch-for-argilla -p 9200:9200 -p 9300:9300 -e \"ES_JAVA_OPTS=-Xms512m -Xmx512m\" -e \"discovery.type=single-node\" -e \"xpack.security.enabled=false\" docker.elastic.co/elasticsearch/elasticsearch:8.5.3\n</code></pre> <p>Install Docker</p> <p>You can find the Docker installation guides for Windows, macOS and Linux on Docker website.</p>"},{"location":"community/developer/#relational-database","title":"Relational database","text":"<p>Argilla will use SQLite as the default built-in option to store information about users, workspaces, etc., for the relational database. No additional configuration is required to start using SQLite.</p> <p>By default, the database file will be created at <code>~/.argilla/argilla.db</code>; this can be configured by setting different values for <code>ARGILLA_DATABASE_URL</code> and <code>ARGILLA_HOME_PATH</code> environment variables.</p> <p>Manage the database</p> <p>For more information about the database migration and user management, refer to the Argilla server README.</p>"},{"location":"community/developer/#set-up-the-server","title":"Set up the server","text":"<p>Once you have set up the databases, you can start the Argilla server. To run the server, you can check the Argilla server README file.</p>"},{"location":"community/developer/#set-up-the-frontend","title":"Set up the frontend","text":"<p>Optionally, if you need to run the Argilla frontend, you can follow the instructions in the Argilla frontend README.</p>"},{"location":"community/developer/#set-up-the-documentation","title":"Set up the documentation","text":"<p>Documentation is essential to provide users with a comprehensive guide about Argilla.</p> <p>From <code>main</code> or <code>develop</code>?</p> <p>If you are updating, improving, or fixing the current documentation without a code change, work on the <code>main</code> branch. For new features or bug fixes that require documentation, use the <code>develop</code> branch.</p> <p>To contribute to the documentation and generate it locally, ensure you installed the development dependencies as shown in the \"Set up the Python environment\" section, and run the following command to create the development server with <code>mkdocs</code>:</p> <pre><code>mkdocs serve\n</code></pre>"},{"location":"community/developer/#documentation-guidelines","title":"Documentation guidelines","text":"<p>As mentioned, we use <code>mkdocs</code> to build the documentation. You can write the documentation in <code>markdown</code> format, and it will automatically be converted to HTML. In addition, you can include elements such as tables, tabs, images, and others, as shown in this guide. We recommend following these guidelines:</p> <ul> <li>Use clear and concise language: Ensure the documentation is easy to understand for all users by using straightforward language and including meaningful examples. Images are not easy to maintain, so use them only when necessary and place them in the appropriate folder within the <code>docs/assets/images</code> directory.</li> <li>Verify code snippets: Double-check that all code snippets are correct and runnable.</li> <li>Review spelling and grammar: Check the spelling and grammar of the documentation.</li> <li>Update the table of contents: If you add a new page, include it in the relevant <code>index.md</code> or the <code>mkdocs.yml</code> file.</li> </ul> <p>Contribute with a tutorial</p> <p>You can also contribute a tutorial (<code>.ipynb</code>) to the \"Community\" section. We recommend aligning the tutorial with the structure of the existing tutorials. For an example, check this tutorial.</p>"},{"location":"community/popular_issues/","title":"Issue dashboard","text":"Most engaging open issuesLatest issues open by the communityPlanned issues for upcoming releases Rank Issue Reactions Comments 1 4637 - [FEATURE] Label breakdown in Feedback dataset stats \ud83d\udc4d 6 \ud83d\udcac 4 2 1607 - Support for hierarchical multilabel text classification (taxonomy) \ud83d\udc4d 5 \ud83d\udcac 15 3 4658 - Active listeners for Feedback Dataset \ud83d\udc4d 5 \ud83d\udcac 5 4 1800 - Add comments/notes to annotation datasets to share with teammates.  \ud83d\udc4d 2 \ud83d\udcac 6 5 1837 - Custom Record UI Templates \ud83d\udc4d 2 \ud83d\udcac 6 6 1922 - Show potential number of records during filter selection \ud83d\udc4d 2 \ud83d\udcac 4 7 1630 - Accepting several predictions/annotations for the same record \ud83d\udc4d 2 \ud83d\udcac 2 8 5348 - [FEATURE] Ability to create new labels on-the-fly \ud83d\udc4d 2 \ud83d\udcac 0 9 3625 - [IMPROVE] Fields with empty title shall have exactly the same value as the user entered in the name field, without altering it \ud83d\udc4d 2 \ud83d\udcac 0 10 4372 - [FEATURE] distribution indication for <code>filters</code> \ud83d\udc4d 1 \ud83d\udcac 6 Rank Issue Author 1 \ud83d\udfe2 5570 - [BUG-python/deployment] by lecheuklun 2 \ud83d\udfe2 5561 - [FEATURE] Force predetermined sorting for a dataset by lgienapp 3 \ud83d\udfe2 5557 - [DOCS] \"Bulk Labeling Multimodal Data\" Notebook outdated  by trojblue 4 \ud83d\udfe2 5548 - [BUG-python/deployment] verify=False parameter is not passed to httpx.Client through Argilla class (v2.2.0) by xiajing10 5 \ud83d\udfe3 5543 - automatically load token from collab secrets if it exists by not-lain 6 \ud83d\udfe3 5530 - [FEATURE] updated_at / inserted_at properties on retrieved Records by maxserras 7 \ud83d\udfe3 5529 - [BUG-UI/UX] API Key copy button not working by cceyda 8 \ud83d\udfe2 5528 - [FEATURE] Filter by responses &amp; suggestions by cceyda 9 \ud83d\udfe2 5516 - [FEATURE] Allow all annotators in workspace to see all the submitted records by cceyda 10 \ud83d\udfe2 5513 - [ENHANCEMENT] Improve ImageField error messaging to deal with paths, urls, none by cceyda Rank Issue Milestone 1 \ud83d\udfe2 5415 - [FEATURE] Do not stop logging records if <code>UnprocessableEntityError</code> is raised because one single record v2.2.0 2 \ud83d\udfe2 5534 - [FEATURE] preview custom field data in dataset settings page v2.3.0 3 \ud83d\udfe2 5520 - [BUG-UI/UX] Incorrect iframe height calculation in sandBox Component v2.4.0 4 \ud83d\udfe2 5513 - [ENHANCEMENT] Improve ImageField error messaging to deal with paths, urls, none v2.4.0 5 \ud83d\udfe2 5458 - [FEATURE] Controls for data schema for images when exporting datasets and records v2.4.0 6 \ud83d\udfe2 4931 - [REFACTOR] Improve handling of question models and dicts v2.4.0 7 \ud83d\udfe2 4935 - [CONFIG] Resolve python requirements for python version and dependencies with server. v2.4.0 8 \ud83d\udfe2 1836 - Webhooks v2.4.0 <p>Last update: 2024-10-07</p>"},{"location":"community/integrations/llamaindex_rag_github/","title":"LlamaIndex","text":"<pre><code>!pip install \"argilla-llama-index\"\n!pip install \"llama-index-readers-github==0.1.9\"\n</code></pre> <p>Let's make the required imports:</p> <pre><code>from llama_index.core import (\n    Settings,\n    VectorStoreIndex,\n    set_global_handler,\n)\nfrom llama_index.llms.openai import OpenAI\nfrom llama_index.readers.github import (\n    GithubClient,\n    GithubRepositoryReader,\n)\n</code></pre> <p>We need to set the OpenAI API key and the GitHub token. The OpenAI API key is required to run queries using GPT models, while the GitHub token ensures you have access to the repository you're using. Although the GitHub token might not be necessary for public repositories, it is still recommended.</p> <pre><code>import os\n\nos.environ[\"OPENAI_API_KEY\"] = \"sk-...\"\nopenai_api_key = os.getenv(\"OPENAI_API_KEY\")\n\nos.environ[\"GITHUB_TOKEN\"] = \"ghp_...\"\ngithub_token = os.getenv(\"GITHUB_TOKEN\")\n</code></pre> <pre><code>set_global_handler(\n    \"argilla\",\n    dataset_name=\"github_query_model\",\n    api_url=\"http://localhost:6900\",\n    api_key=\"argilla.apikey\",\n    number_of_retrievals=2,\n)\n</code></pre> <pre><code>github_client = GithubClient(github_token=github_token, verbose=True)\n</code></pre> <p>Before creating our <code>GithubRepositoryReader</code> instance, we need to adjust the nesting. Since the Jupyter kernel operates on an event loop, we must prevent this loop from finishing before the repository is fully read.</p> <pre><code>import nest_asyncio\n\nnest_asyncio.apply()\n</code></pre> <p>Now, let\u2019s create a GithubRepositoryReader instance with the necessary repository details. In this case, we'll target the <code>main</code> branch of the <code>argilla</code> repository. As we will focus on the documentation, we will focus on the <code>argilla/docs/</code> folder, excluding images, json files, and ipynb files.</p> <pre><code>documents = GithubRepositoryReader(\n    github_client=github_client,\n    owner=\"argilla-io\",\n    repo=\"argilla\",\n    use_parser=False,\n    verbose=False,\n    filter_directories=(\n        [\"argilla/docs/\"],\n        GithubRepositoryReader.FilterType.INCLUDE,\n    ),\n    filter_file_extensions=(\n        [\n            \".png\",\n            \".jpg\",\n            \".jpeg\",\n            \".gif\",\n            \".svg\",\n            \".ico\",\n            \".json\",\n            \".ipynb\",   # Erase this line if you want to include notebooks\n\n        ],\n        GithubRepositoryReader.FilterType.EXCLUDE,\n    ),\n).load_data(branch=\"main\")\n</code></pre> <p>Now, let's create a LlamaIndex index out of this document, and we can start querying the RAG system.</p> <pre><code># LLM settings\nSettings.llm = OpenAI(\n    model=\"gpt-3.5-turbo\", temperature=0.8, openai_api_key=openai_api_key\n)\n\n# Load the data and create the index\nindex = VectorStoreIndex.from_documents(documents)\n\n# Create the query engine\nquery_engine = index.as_query_engine()\n</code></pre> <pre><code>response = query_engine.query(\"How do I create a Dataset in Argilla?\")\nresponse\n</code></pre> <p>The generated response will be automatically logged in our Argilla instance. Check it out! From Argilla you can quickly have a look at your predictions and annotate them, so you can combine both synthetic data and human feedback.</p> <p></p> <p>Let's ask a couple of more questions to see the overall behavior of the RAG chatbot. Remember that the answers are automatically logged into your Argilla instance.</p> <pre><code>questions = [\n    \"How can I list the available datasets?\",\n    \"Which are the user credentials?\",\n    \"Can I use markdown in Argilla?\",\n    \"Could you explain how to annotate datasets in Argilla?\",\n]\n\nanswers = []\n\nfor question in questions:\n    answers.append(query_engine.query(question))\n\nfor question, answer in zip(questions, answers):\n    print(f\"Question: {question}\")\n    print(f\"Answer: {answer}\")\n    print(\"----------------------------\")\n</code></pre> <pre>\n<code>Question: How can I list the available datasets?\nAnswer: You can list all the datasets available in a workspace by utilizing the `datasets` attribute of the `Workspace` class. Additionally, you can determine the number of datasets in a workspace by using `len(workspace.datasets)`. To list the datasets, you can iterate over them and print out each dataset. Remember that dataset settings are not preloaded when listing datasets, and if you need to work with settings, you must load them explicitly for each dataset.\n----------------------------\nQuestion: Which are the user credentials?\nAnswer: The user credentials in Argilla consist of a username, password, and API key.\n----------------------------\nQuestion: Can I use markdown in Argilla?\nAnswer: Yes, you can use Markdown in Argilla.\n----------------------------\nQuestion: Could you explain how to annotate datasets in Argilla?\nAnswer: To annotate datasets in Argilla, users can manage their data annotation projects by setting up `Users`, `Workspaces`, `Datasets`, and `Records`. By deploying Argilla on the Hugging Face Hub or with `Docker`, installing the Python SDK with `pip`, and creating the first project, users can get started in just 5 minutes. The tool allows for interacting with data in a more engaging way through features like quick labeling with filters, AI feedback suggestions, and semantic search, enabling users to focus on training models and monitoring their performance effectively.\n----------------------------\n</code>\n</pre> <pre>\n<code>\n</code>\n</pre>"},{"location":"community/integrations/llamaindex_rag_github/#create-a-rag-system-expert-in-a-github-repository-and-log-your-predictions-in-argilla","title":"\ud83d\udd75\ud83c\udffb\u200d\u2640\ufe0f Create a RAG system expert in a GitHub repository and log your predictions in Argilla","text":"<p>In this tutorial, we'll show you how to create a RAG system that can answer questions about a specific GitHub repository. As example, we will target the Argilla repository. This RAG system will target the docs of the repository, as that's where most of the natural language information about the repository can be found.</p> <p>This tutorial includes the following steps:</p> <ul> <li>Setting up the Argilla callback handler for LlamaIndex.</li> <li>Initializing a GitHub client</li> <li>Creating an index with a specific set of files from the GitHub repository of our choice.</li> <li>Create a RAG system out of the Argilla repository, ask questions, and automatically log the answers to Argilla.</li> </ul> <p>This tutorial is based on the Github Repository Reader made by LlamaIndex.</p>"},{"location":"community/integrations/llamaindex_rag_github/#getting-started","title":"Getting started","text":""},{"location":"community/integrations/llamaindex_rag_github/#deploy-the-argilla-server","title":"Deploy the Argilla server\u00b6","text":"<p>If you already have deployed Argilla, you can skip this step. Otherwise, you can quickly deploy Argilla following this guide.</p>"},{"location":"community/integrations/llamaindex_rag_github/#set-up-the-environment","title":"Set up the environment\u00b6","text":"<p>To complete this tutorial, you need to install this integration and a third-party library via pip.</p> <p>Note</p> <p>Check the integration GitHub repository here.</p>"},{"location":"community/integrations/llamaindex_rag_github/#set-the-argillas-llamaindex-handler","title":"Set the Argilla's LlamaIndex handler","text":"<p>To easily log your data into Argilla within your LlamaIndex workflow, you only need a simple step. Just call the Argilla global handler for Llama Index before starting production with your LLM. This ensured that the predictions obtained using Llama Index are automatically logged to the Argilla instance.</p> <ul> <li><code>dataset_name</code>: The name of the dataset. If the dataset does not exist, it will be created with the specified name. Otherwise, it will be updated.</li> <li><code>api_url</code>: The URL to connect to the Argilla instance.</li> <li><code>api_key</code>: The API key to authenticate with the Argilla instance.</li> <li><code>number_of_retrievals</code>: The number of retrieved documents to be logged. Defaults to 0.</li> <li><code>workspace_name</code>: The name of the workspace to log the data. By default, the first available workspace.</li> </ul> <p>&gt; For more information about the credentials, check the documentation for users and workspaces.</p>"},{"location":"community/integrations/llamaindex_rag_github/#retrieve-the-data-from-github","title":"Retrieve the data from GitHub","text":"<p>First, we need to initialize the GitHub client, which will include the GitHub token for repository access.</p>"},{"location":"community/integrations/llamaindex_rag_github/#create-the-index-and-make-some-queries","title":"Create the index and make some queries","text":""},{"location":"getting_started/faq/","title":"FAQs","text":"What is Argilla? <p>Argilla is a collaboration tool for AI engineers and domain experts that require high-quality outputs, full data ownership, and overall efficiency. It is designed to help you achieve and keep high-quality data standards, store your training data, store the results of your models, evaluate their performance, and improve the data through human and AI feedback.</p> Does Argilla cost money? <p>No. Argilla is an open-source project and is free to use. You can deploy Argilla on your own infrastructure or use our cloud offering.</p> What data types does Argilla support? <p>Text data, mostly. Argilla natively supports textual data, however, we do support rich text, which means you can represent different types of data in Argilla as long as you can convert it to text. For example, you can store images, audio, video, and any other type of data as long as you can convert it to their base64 representation or render them as HTML in for example an IFrame.</p> Does Argilla train models? <p>No. Argilla is a collaboration tool to achieve and keep high-quality data standards. You can use Argilla to store your training data, store the results of your models, evaluate their performance and improve the data. For training models, you can use any machine learning framework or library that you prefer even though we recommend starting with Hugging Face Transformers.</p> Does Argilla provide annotation workforces? <p>Yes, kind of. We don't provide annotation workforce in-house but we do have partnerships with workforce providers that ensure ethical practices and secure work environments. Feel free to schedule a meeting here or contact us via email.</p> How does Argilla differ from competitors like Lilac, Snorkel, Prodigy and Scale? <p>Argilla distinguishes itself for its focus on specific use cases and human-in-the-loop approaches. While it does offer programmatic features, Argilla\u2019s core value lies in actively involving human experts in the tool-building process, setting it apart from other competitors.</p> <p>Furthermore, Argilla places particular emphasis on smooth integration with other tools in the community, particularly within the realms of MLOps and NLP. So, its compatibility with popular frameworks like spaCy and Hugging Face makes it exceptionally user-friendly and accessible.</p> <p>Finally, platforms like Snorkel, Prodigy or Scale, while more comprehensive, often require a significant commitment. Argilla, on the other hand, works more as a tool within the MLOps ecosystem, allowing users to begin with specific use cases and then scale up as needed. This flexibility is particularly beneficial for users and customers who prefer to start small and expand their applications over time, as opposed to committing to an all-encompassing tool from the outset.</p> What is the difference between Argilla 2.0 and the legacy datasets in 1.0? <p>Argilla 1.0 relied on 3 main task datasets: <code>DatasetForTextClassification</code>, <code>DatasetForTokenClassification</code>, and <code>DatasetForText2Text</code>. These tasks were designed to be simple, easy to use and high in functionality but they were limited in adaptability. With the introduction of Large Language Models (LLMs) and the increasing complexity of NLP tasks, we realized that we needed to expand the capabilities of Argilla to support more advanced feedback mechanisms which led to the introduction of the <code>FeedbackDataset</code>. Compared to its predecessor it was high in adaptability but still limited in functionality. After having ported all of the functionality of the legacy tasks to the new <code>FeedbackDataset</code>, we decided to deprecate the legacy tasks in favor of a brand new SDK with the <code>FeedbackDataset</code> at its core.</p>"},{"location":"getting_started/how-to-configure-argilla-on-huggingface/","title":"Hugging Face Spaces Settings","text":"<p>This section details how to configure and deploy Argilla on Hugging Face Spaces. It covers:</p> <ul> <li>Persistent storage</li> <li>How to deploy Argilla under a Hugging Face Organization</li> <li>How to configure and disable HF OAuth access</li> <li>How to use Private Spaces</li> </ul> <p>Looking to get started easily?</p> <p>If you just discovered Argilla and want to get started quickly, go to the Quickstart guide.</p>"},{"location":"getting_started/how-to-configure-argilla-on-huggingface/#persistent-storage","title":"Persistent storage","text":"<p>In the Space creation UI, persistent storage is set to <code>Small PAID</code>, which is a paid service, charged per hour of usage.</p> <p>Spaces get restarted due to maintainance, inactivity, and every time you change your Spaces settings. Persistent storage enables Argilla to save to disk your datasets and configurations across restarts.</p> <p>Ephimeral FREE persistent storage</p> <p>Not setting persistent storage to <code>Small</code> means that you will loose your data when the Space restarts.</p> <p>If you plan to use the Argilla Space beyond testing, it's highly recommended to set persistent storage to <code>Small</code>.</p> <p>If you just want to quickly test or use Argilla for a few hours with the risk of loosing your datasets, choose <code>Ephemeral FREE</code>. <code>Ephemeral FREE</code> means your datasets and configuration will not be saved to disk, when the Space is restarted your datasets, workspaces, and users will be lost.</p> <p>If you want to disable the persistence storage warning, you can set the environment variable <code>ARGILLA_SHOW_HUGGINGFACE_SPACE_PERSISTENT_STORAGE_WARNING=false</code></p> <p>Read this if you have datasets and want to enable persistent storage</p> <p>If you want to enable persistent storage <code>Small PAID</code> and you have created datasets, users, or workspaces, follow this process:</p> <ul> <li>First, make a local or remote copy of your datasets, following the Import and Export guide. This is the most important step, because changing the settings of your Space leads to a restart and thus a data loss.</li> <li>If you have created users (not signed in with Hugging Face login), consider storing a copy of users following the manage users guide.</li> <li>Once you have stored all your data safely, go to you Space Settings Tab and select <code>Small</code>.</li> <li>Your Space will be restarted and existing data will be lost. From now on, all the new data you create in Argilla will be kept safely</li> <li>Recover your data, by following the above mentioned guides.</li> </ul>"},{"location":"getting_started/how-to-configure-argilla-on-huggingface/#how-to-configure-and-disable-oauth-access","title":"How to configure and disable OAuth access","text":"<p>By default, Argilla Spaces are configured with Hugging Face OAuth, in the following way:</p> <ul> <li>Any Hugging Face user that can see your Space, can use the Sign in button, join as an <code>annotator</code>, and contribute to the datasets available under the <code>argilla</code> workspace. This workspace is created during the deployment process.</li> <li>These users can only explore and annotate datasets in the <code>argilla</code> workspace but can't perform any critical operation like create, delete, update, or configure datasets. By default, any other workspace you create, won't be visible to these users.</li> </ul> <p>To restrict access or change the default behaviour, there's two options:</p> <p>Set your Space to private. This is especially useful if your Space is under an organization. This will only allow members within your organization to see and join your Argilla space. It can also be used for personal, solo projects.</p> <p>Modify the <code>.oauth.yml</code> configuration file. You can find and modify this file under the <code>Files</code> tab of your Space. The default file looks like this:</p> <p><pre><code># Change to `false` to disable HF oauth integration\n#enabled: false\n\nproviders:\n  - name: huggingface\n\n# Allowed workspaces must exists\nallowed_workspaces:\n  - name: argilla\n</code></pre> You can modify two things:</p> <ul> <li>Uncomment <code>enabled: false</code> to completely disable the Sign in with Hugging Face. If you disable it make sure to set the <code>USERNAME</code> and <code>PASSWORD</code> Space secrets to be able to login as an <code>owner</code>.</li> <li>Change the list of <code>allowed</code> workspaces.</li> </ul> <p>For example if you want to let users join a new workspace <code>community-initiative</code>:</p> <pre><code>allowed_workspaces:\n  - name: argilla\n  - name: community-initiative\n</code></pre>"},{"location":"getting_started/how-to-configure-argilla-on-huggingface/#how-to-deploy-argilla-under-a-hugging-face-organization","title":"How to deploy Argilla under a Hugging Face Organization","text":"<p>Creating an Argilla Space within an organization is useful for several scenarios:</p> <ul> <li>You want to only enable members of your organization to join your Space. You can achieve this by setting your Space to private.</li> <li>You want manage the Space together with other users (e.g., Space settings, etc.). Note that if you just want to manage your Argilla datasets, workspaces, you can achieve this by adding other Argilla <code>owner</code> roles to your Argilla Server.</li> <li>More generally, you want to make available your space under an organization/community umbrella.</li> </ul> <p>The steps are very similar the Quickstart guide with two important differences:</p> <p>Setup USERNAME</p> <p>You need to set up the <code>USERNAME</code> Space Secret with your Hugging Face username. This way, the first time you enter with the <code>Hugging Face Sign in</code> button, you'll be granted the <code>owner</code> role.</p> <p>Enable Persistent Storage <code>SMALL</code></p> <p>Not setting persistent storage to <code>Small</code> means that you will loose your data when the Space restarts.</p> <p>For Argilla Spaces with many users, it's strongly recommended to set persistent storage to <code>Small</code>.</p>"},{"location":"getting_started/how-to-configure-argilla-on-huggingface/#how-to-use-private-spaces","title":"How to use Private Spaces","text":"<p>Setting your Space visibility to private can be useful if:</p> <ul> <li>You want to work on your personal, solo project.</li> <li>You want your Argilla to be available only to members of the organization where you deploy the Argilla Space.</li> </ul> <p>You can set the visibility of the Space during the Space creation process or afterwards under the <code>Settings</code> Tab.</p> <p>To use the Python SDK with private Spaces you need to specify your <code>HF_TOKEN</code> which can be found here, when creating the client:</p> <pre><code>import argilla as rg\n\nHF_TOKEN = \"...\"\n\nclient = rg.Argilla(\n    api_url=\"&lt;api_url&gt;\",\n    api_key=\"&lt;api_key&gt;\"\n    headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"}\n)\n</code></pre>"},{"location":"getting_started/how-to-configure-argilla-on-huggingface/#space-secrets-overview","title":"Space Secrets overview","text":"<p>There's two optional secrets to set up the <code>USERNAME</code> and <code>PASSWORD</code> of the <code>owner</code> of the Argilla Space. Remember that, by default Argilla Spaces are configured with a Sign in with Hugging Face button, which is also used to grant an <code>owner</code> to the creator of the Space for personal spaces.</p> <p>The <code>USERNAME</code> and <code>PASSWORD</code> are only useful in a couple of scenarios:</p> <ul> <li>You have disabled Hugging Face OAuth.</li> <li>You want to set up Argilla under an organization and want your Hugging Face username to be granted the <code>owner</code> role.</li> </ul> <p>In summary, when setting up a Space:</p> <p>Creating a Space under your personal account</p> <p>If you are creating the Space under your personal account, don't insert any value for <code>USERNAME</code> and <code>PASSWORD</code>. Once you launch the Space you will be able to Sign in with your Hugging Face username and the <code>owner</code> role.</p> <p>Creating a Space under an organization</p> <p>If you are creating the Space under an organization make sure to insert your Hugging Face username in the secret <code>USERNAME</code>. In this way, you'll be able to Sign in with your Hugging Face user.</p>"},{"location":"getting_started/how-to-deploy-argilla-with-docker/","title":"Deploy with Docker","text":"<p>This guide describes how to deploy the Argilla Server with <code>docker compose</code>. This is useful if you want to deploy Argilla locally, and/or have full control over the configuration the server, database, and search engine (Elasticsearch).</p> <p>First, you need to install <code>docker</code> on your machine and make sure you can run <code>docker compose</code>.</p> <p>Then, create a folder (you can modify the folder name):</p> <pre><code>mkdir argilla &amp;&amp; cd argilla\n</code></pre> <p>Download <code>docker-compose.yaml</code>:</p> <pre><code>wget -O docker-compose.yaml https://raw.githubusercontent.com/argilla-io/argilla/main/examples/deployments/docker/docker-compose.yaml\n</code></pre> <p>or using curl: <pre><code>curl https://raw.githubusercontent.com/argilla-io/argilla/main/examples/deployments/docker/docker-compose.yaml -o docker-compose.yaml\n</code></pre></p> <p>Run to deploy the server on <code>http://localhost:6900</code>:</p> <pre><code>docker compose up -d\n</code></pre> <p>Once is completed, go to this URL with your browser: http://localhost:6900 and you should see the Argilla login page.</p> <p>If it's not available, check the logs:</p> <pre><code>docker compose logs -f\n</code></pre> <p>Most of the deployment issues are related to ElasticSearch. Join Hugging Face Discord's server and ask for support on the Argilla channel.</p>"},{"location":"getting_started/quickstart/","title":"Quickstart","text":"<p>Argilla is a free, open-source, self-hosted tool. This means you need to deploy its UI to start using it. There is two main ways to deploy Argilla:</p> <p>Deploy on the Hugging Face Hub</p> <p>The recommended choice to get started. You can get up and running in under 5 minutes and don't need to maintain a server or run any commands.</p> <p>If you're just getting started with Argilla, click the deploy button below:</p> <p>  You can use the default values following these steps:</p> <ul> <li>Leave the default Space owner (your personal account)</li> <li>Leave <code>USERNAME</code> and <code>PASSWORD</code> secrets empty since you'll sign in with your HF user as the Argilla Space <code>owner</code>.</li> <li>Click create Space to launch Argilla \ud83d\ude80.</li> <li>Once you see the Argilla UI, go to the Sign in into the Argilla UI section. If you see the <code>Building</code> message for longer than 2-3 min refresh the page.</li> </ul> <p>Persistent storage <code>SMALL</code></p> <p>Not setting persistent storage to <code>SMALL</code> means that you will loose your data when the Space restarts. Spaces get restarted due to maintainance, inactivity, and every time you change your Spaces settings. If you want to use the Space just for testing you can use <code>FREE</code> temporarily.</p> <p>If you want to deploy Argilla within a Hugging Face organization, setup a more stable Space, or understand the settings, check out the HF Spaces settings guide.</p> <p>Deploy with Docker</p> <p>If you want to run Argilla locally on your machine or a server, or tune the server configuration, choose this option. To use this option, check this guide.</p>"},{"location":"getting_started/quickstart/#sign-in-into-the-argilla-ui","title":"Sign in into the Argilla UI","text":"<p>If everything went well, you should see the Argilla sign in page that looks like this:</p> <p></p> <p>Building errors</p> <p>If you get a build error, sometimes restarting the Space from the Settings page works, otherwise check the HF Spaces settings guide.</p> <p>In the sign in page:</p> <ol> <li>Click on Sign in with Hugging Face</li> <li>Authorize the application and you will be logged in into Argilla as an <code>owner</code>.</li> </ol> <p>Unauthorized error</p> <p>Sometimes, after authorizing you'll see an unauthorized error, and get redirected to the sign in page. Typically, clicking the Sign in button solves the issue.</p> <p>Congrats! Your Argilla server is ready to start your first project using the Python SDK. You now have full rights to create datasets. Follow the instructions in the home page, or keep reading this guide if you want a more detailed explanation.</p>"},{"location":"getting_started/quickstart/#install-the-python-sdk","title":"Install the Python SDK","text":"<p>To manage workspaces and datasets in Argilla, you need to use the Argilla Python SDK. You can install it with pip as follows:</p> <pre><code>pip install argilla\n</code></pre>"},{"location":"getting_started/quickstart/#create-your-first-dataset","title":"Create your first dataset","text":"<p>For getting started with Argilla and its SDK, we recommend to use Jupyter Notebook or Google Colab.</p> <p>To start interacting with your Argilla server, you need to create a instantiate a client with an API key and API URL:</p> <ul> <li> <p>The <code>&lt;api_key&gt;</code> is in the <code>My Settings</code> page of your Argilla Space.</p> </li> <li> <p>The <code>&lt;api_url&gt;</code> is the URL shown in your browser if it ends with <code>*.hf.space</code>.</p> </li> </ul> <pre><code>import argilla as rg\n\nclient = rg.Argilla(\n    api_url=\"&lt;api_url&gt;\",\n    api_key=\"&lt;api_key&gt;\"\n)\n</code></pre> <p>You can't find your API URL</p> <p>If you're using Spaces, sometimes the Argilla UI is embedded into the Hub UI so the URL of the browser won't match the API URL. In these scenarios, there are two options:     1. Click on the three points menu at the top of the Space, select \"Embed this Space\", and open the direct URL.     2. Use this pattern: <code>https://[your-owner-name]-[your_space_name].hf.space</code>.</p> <p>To create a dataset with a simple text classification task, first, you need to define the dataset settings.</p> <pre><code>settings = rg.Settings(\n    guidelines=\"Classify the reviews as positive or negative.\",\n    fields=[\n        rg.TextField(\n            name=\"review\",\n            title=\"Text from the review\",\n            use_markdown=False,\n        ),\n    ],\n    questions=[\n        rg.LabelQuestion(\n            name=\"my_label\",\n            title=\"In which category does this article fit?\",\n            labels=[\"positive\", \"negative\"],\n        )\n    ],\n)\n</code></pre> <p>Now you can create the dataset with these settings. Publish the dataset to make it available in the UI and add the records.</p> <p>About workspaces</p> <p>Workspaces in Argilla group datasets and user access rights. The <code>workspace</code> parameter is optional in this case. If you don't specify it, the dataset will be created in the default workspace <code>argilla</code>.</p> <p>By default, this workspace will be visible to users joining with the Sign in with Hugging Face button. You can create other workspaces and decide to grant access to users either with the SDK or the changing the OAuth configuration.</p> <pre><code>dataset = rg.Dataset(\n    name=f\"my_first_dataset\",\n    settings=settings,\n    client=client,\n    #workspace=\"argilla\"\n)\ndataset.create()\n</code></pre> <p>Now you can add records to your dataset. We will use the IMDB dataset from the Hugging Face Datasets library as an example. The <code>mapping</code> parameter indicates which keys/columns in the source dataset correspond to the Argilla dataset fields.</p> <pre><code>from datasets import load_dataset\n\ndata = load_dataset(\"imdb\", split=\"train[:100]\").to_list()\n\ndataset.records.log(records=data, mapping={\"text\": \"review\"})\n</code></pre> <p>\ud83c\udf89 You have successfully created your first dataset with Argilla. You can now access it in the Argilla UI and start annotating the records.</p>"},{"location":"getting_started/quickstart/#next-steps","title":"Next steps","text":"<ul> <li> <p>To learn how to create your datasets, workspace, and manage users, check the how-to guides.</p> </li> <li> <p>To learn Argilla with hands-on examples, check the Tutorials section.</p> </li> <li> <p>To further configure your Argilla Space, check the Hugging Face Spaces settings guide.</p> </li> </ul>"},{"location":"how_to_guides/","title":"How-to guides","text":"<p>These guides provide step-by-step instructions for common scenarios, including detailed explanations and code samples. They are divided into two categories: basic and advanced. The basic guides will help you get started with the core concepts of Argilla, while the advanced guides will help you explore more advanced features.</p>"},{"location":"how_to_guides/#basic","title":"Basic","text":"<ul> <li> <p>Manage users and credentials</p> <p>Learn what they are and how to manage (create, read and delete) <code>Users</code> in Argilla.</p> <p> How-to guide</p> </li> <li> <p>Manage workspaces</p> <p>Learn what they are and how to manage (create, read and delete) <code>Workspaces</code> in Argilla.</p> <p> How-to guide</p> </li> <li> <p>Create, update, and delete datasets</p> <p>Learn what they are and how to manage (create, read and delete) <code>Datasets</code> and customize them using the <code>Settings</code> for <code>Fields</code>, <code>Questions</code>,  <code>Metadata</code> and <code>Vectors</code>.</p> <p> How-to guide</p> </li> <li> <p>Add, update, and delete records</p> <p>Learn what they are and how to add, update and delete the values for a <code>Record</code>, which are made up of <code>Metadata</code>, <code>Vectors</code>, <code>Suggestions</code> and <code>Responses</code>.</p> <p> How-to guide</p> </li> <li> <p>Distribute the annotation</p> <p>Learn how to use Argilla's automatic <code>TaskDistribution</code> to annotate as a team efficiently.</p> <p> How-to guide</p> </li> <li> <p>Annotate a dataset</p> <p>Learn how to use the Argilla UI to navigate <code>Datasets</code> and submit <code>Responses</code>.</p> <p> How-to guide</p> </li> <li> <p>Query and filter a dataset</p> <p>Learn how to query and filter a <code>Dataset</code>.</p> <p> How-to guide</p> </li> <li> <p>Import and export datasets and records</p> <p>Learn how to export your <code>Dataset</code> or its <code>Records</code> to Python, your local disk, or the Hugging Face Hub.</p> <p> How-to guide</p> </li> </ul>"},{"location":"how_to_guides/#advanced","title":"Advanced","text":"<ul> <li> <p>Custom fields with layout templates</p> <p>Learn how to create <code>CustomFields</code> with HTML, CSS and JavaScript templates.</p> <p> How-to guide</p> </li> <li> <p>Use Markdown to format rich content</p> <p>Learn how to use Markdown and HTML in <code>TextField</code> to format chat conversations and allow for basic multi-modal support for images, audio, video and PDFs.</p> <p> How-to guide</p> </li> <li> <p>Migrate to Argilla V2</p> <p>Learn how to migrate <code>Users</code>, <code>Workspaces</code> and <code>Datasets</code> from Argilla V1 to V2.</p> <p> How-to guide</p> </li> </ul>"},{"location":"how_to_guides/annotate/","title":"Annotate your dataset","text":"<p>To experience the UI features firsthand, you can take a look at the Demo \u2197.</p> <p>Argilla UI offers many functions to help you manage your annotation workflow, aiming to provide the most flexible approach to fit the wide variety of use cases handled by the community.</p>"},{"location":"how_to_guides/annotate/#annotation-interface-overview","title":"Annotation interface overview","text":""},{"location":"how_to_guides/annotate/#flexible-layout","title":"Flexible layout","text":"<p>The UI is responsive with two columns for larger devices and one column for smaller devices. This enables you to annotate data using your mobile phone for simple datasets (i.e., not very long text and 1-2 questions) or resize your screen to get a more compact UI.</p> HeaderLeft paneRight paneLeft bottom panelRight bottom panel <p>At the right side of the navigation breadcrumb, you can customize the dataset settings and edit your profile.</p> <p>This area displays the control panel on the top. The control panel is used for performing keyword-based search, applying filters, and sorting the results.</p> <p>Below the control panel, the record card(s) are displayed one by one (Focus view) or in a vertical list (Bulk view).</p> <p>This is where you annotate your dataset. Simply fill it out as a form, then choose to <code>Submit</code>, <code>Save as Draft</code>, or <code>Discard</code>.</p> <p>This expandable area displays the annotation guidelines. The annotation guidelines can be edited by owner and admin roles in the dataset settings.</p> <p>This expandable area displays your annotation progress.</p>"},{"location":"how_to_guides/annotate/#shortcuts","title":"Shortcuts","text":"<p>The Argilla UI includes a range of shortcuts. For the main actions (submit, discard, save as draft and selecting labels) the keys are showed in the corresponding button.</p> <p>To learn how to move from one question to another or between records using the keyboard, take a look at the table below.</p> <p>Shortcuts provide a smoother annotation experience, especially with datasets using a single question (Label, MultiLabel, Rating, or Ranking).</p> Available shortcuts Action Keys Activate form \u21e5 Tab Move between questions \u2193 Down arrow\u00a0or\u00a0\u2191 Up arrow Select and unselect label 1,\u00a02,\u00a03 Move between labels or ranking options \u21e5 Tab\u00a0or\u00a0\u21e7 Shift\u00a0\u21e5 Tab Select rating and rank 1,\u00a02,\u00a03 Fit span to character selection Hold\u00a0\u21e7 Shift Activate text area \u21e7 Shift\u00a0\u21b5 Enter Exit text area Esc Discard \u232b Backspace Save draft (Mac os) \u2318 Cmd\u00a0S Save draft (Other) Ctrl\u00a0S Submit \u21b5 Enter Move between pages \u2192 Right arrow\u00a0or\u00a0\u2190 Left arrow"},{"location":"how_to_guides/annotate/#view-by-status","title":"View by status","text":"<p>The view selector is set by default on Pending.</p> <p>If you are starting an annotation effort, all the records are initially kept in the Pending view. Once you start annotating, the records will move to the other queues: Draft, Submitted, Discarded.</p> <ul> <li>Pending: The records without a response.</li> <li>Draft: The records with partial responses. They can be submitted or discarded later. You can\u2019t move them back to the pending queue.</li> <li>Discarded: The records may or may not have responses. They can be edited but you can\u2019t move them back to the pending queue.</li> <li>Submitted: The records have been fully annotated and have already been submitted. You can remove them from this queue and send them to the draft or discarded queues, but never back to the pending queue.</li> </ul> <p>Note</p> <p>If you are working as part of a team, the number of records in your Pending queue may change as other members of the team submit responses and those records get completed.</p> <p>Tip</p> <p>If you are working as part of a team, the records in the draft queue that have been completed by other team members will show a check mark to indicate that there is no need to provide a response.</p>"},{"location":"how_to_guides/annotate/#suggestions","title":"Suggestions","text":"<p>If your dataset includes model predictions, you will see them represented by a sparkle icon <code>\u2728</code> in the label or value button. We call them \u201cSuggestions\u201d and they appear in the form as pre-filled responses. If confidence scores have been included by the dataset admin, they will be shown alongside with the label. Additionally, admins can choose to always show suggested labels at the beginning of the list. This can be configured from the dataset settings.</p> <p>If you agree with the suggestions, you just need to click on the <code>Submit</code> button, and they will be considered as your response. If the suggestion is incorrect, you can modify it and submit your final response.</p>"},{"location":"how_to_guides/annotate/#focus-view","title":"Focus view","text":"<p>This is the default view to annotate your dataset linearly, displaying one record after another.</p> <p>Tip</p> <p>You should use this view if you have a large number of required questions or need a strong focus on the record content to be labelled. This is also the recommended view for annotating a dataset sample to avoid potential biases introduced by using filters, search, sorting and bulk labelling.</p> <p>Once you submit your first response, the next record will appear automatically. To see again your submitted response, just click on <code>Prev</code>.</p> <p>Navigating through the records</p> <p>To navigate through the records, you can use the\u00a0<code>Prev</code>, shown as\u00a0<code>&lt;</code>, and\u00a0<code>Next</code>,\u00a0<code>&gt;</code> buttons on top of the record card.</p> <p>Each time the page is fully refreshed, the records with modified statuses (Pending to Discarded, Pending to Save as Draft, Pending to Submitted) are sent to the corresponding queue. The control panel displays the status selector, which is set to Pending by default.</p>"},{"location":"how_to_guides/annotate/#bulk-view","title":"Bulk view","text":"<p>The bulk view is designed to speed up the annotation and get a quick overview of the whole dataset.</p> <p>The bulk view displays the records in a vertical list. Once this view is active, some functions from the control panel will activate to optimize the view. You can define the number of records to display by page between <code>10</code>, <code>25</code>, <code>50</code>, <code>100</code> and whether records are shown with a fixed (<code>Collapse records</code>) or their natural height (<code>Expand records</code>).</p> <p>Tip</p> <p>You should use this to quickly explore a dataset. This view is also recommended if you have a good understanding of the domain and want to apply your knowledge based on things like similarity and keyword search, filters, and suggestion score thresholds. For a datasets with a large number of required questions or very long fields, the focus view would be more suitable.</p> <p>With multiple questions, think about using the bulk view to annotate massively one question. Then, you can complete the annotation per record from the draft queue.</p> <p>Note</p> <p>Please note that suggestions are not shown in bulk view (except for Spans) and that you will need to save as a draft when you are not providing responses to all required questions.</p>"},{"location":"how_to_guides/annotate/#annotation-progress","title":"Annotation progress","text":"<p>You can track the progress of an annotation task in the progress bar shown in the dataset list and in the progress panel inside the dataset. This bar shows the number of records that have been completed (i.e., those that have the minimum number of submitted responses) and those left to be completed.</p> <p>You can also track your own progress in real time expanding the right-bottom panel inside the dataset page. There you can see the number of records for which you have <code>Pending</code>,\u00a0<code>Draft</code>,\u00a0<code>Submitted</code>\u00a0and\u00a0<code>Discarded</code> responses.</p> <p>Note</p> <p>You can also explore the dataset progress from the SDK. Check the Track your team's progress to know more about it.</p>"},{"location":"how_to_guides/annotate/#use-search-filters-and-sort","title":"Use search, filters, and sort","text":"<p>The UI offers various features designed for data exploration and understanding. Combining these features with bulk labelling can save you and your team hours of time.</p> <p>Tip</p> <p>You should use this when you are familiar with your data and have large volumes to annotate based on verified beliefs and experience.</p>"},{"location":"how_to_guides/annotate/#search","title":"Search","text":"<p>From the control panel at the top of the left pane, you can search by keyword across the entire dataset. If you have more than one field in your records, you may specify if the search is to be performed \u201cAll\u201d fields or on a specific one. Matched results are highlighted in color.</p> <p>Note</p> <p>If you introduce more than one keyword, the search will return results where all keywords have a match.</p> <p>Tip</p> <p>For more advanced searches, take a look at the advanced queries DSL.</p>"},{"location":"how_to_guides/annotate/#order-by-record-semantic-similarity","title":"Order by record semantic similarity","text":"<p>You can retrieve records based on their similarity to another record if vectors have been added to the dataset.</p> <p>Note</p> <p>Check these guides to know how to add vectors to your\u00a0dataset and\u00a0records.</p> <p>To use the search by semantic similarity function, click on <code>Find similar</code> within the record you wish to use as a reference. If multiple vectors are available, select the desired vector. You can also choose whether to retrieve the most or least similar records.</p> <p>The retrieved records are then ordered by similarity, with the similarity score displayed on each record card.</p> <p>While the semantic search is active, you can update the selected vector or adjust the order of similarity, and specify the number of desired results.</p> <p>To cancel the search, click on the cross icon next to the reference record.</p>"},{"location":"how_to_guides/annotate/#filter-and-sort-by-metadata-responses-and-suggestions","title":"Filter and sort by metadata, responses, and suggestions","text":""},{"location":"how_to_guides/annotate/#filter","title":"Filter","text":"<p>If the dataset contains metadata, responses and suggestions, click on\u00a0Filter in the control panel to display the available filters. You can select multiple filters and combine them.</p> <p>Note</p> <p>Record info including metadata is visible from the ellipsis menu in the record card.</p> <p>From the <code>Metadata</code> dropdown, type and select the property. You can set a range for integer and float properties, and select specific values for term metadata.</p> <p>Note</p> <p>Note that if a metadata property was set to <code>visible_for_annotators=False</code> this metadata property will only appear in the metadata filter for users with the <code>admin</code> or <code>owner</code> role.</p> <p>From the <code>Responses</code> dropdown, type and select the question. You can set a range for rating questions and select specific values for label, multi-label, and span questions.</p> <p>Note</p> <p>The text and ranking questions are not available for filtering.</p> <p>From the Suggestions dropdown, filter the suggestions by\u00a0<code>Suggestion values</code>,\u00a0<code>Score</code>\u00a0, or\u00a0<code>Agent</code>.\u00a0</p>"},{"location":"how_to_guides/annotate/#sort","title":"Sort","text":"<p>You can sort your records according to one or several attributes.</p> <p>The insertion time and last update are general to all records.</p> <p>The suggestion scores, response, and suggestion values for rating questions and metadata properties are available only when they were provided.</p>"},{"location":"how_to_guides/custom_fields/","title":"Custom fields with layout templates","text":"<p>This guide demonstrates how to create custom fields in Argilla using HTML, CSS, and JavaScript templates.</p> <p>Main Class</p> <pre><code>rg.CustomField(\n    name=\"custom\",\n    title=\"Custom\",\n    template=\"&lt;div&gt;{{record.fields.custom.key}}&lt;/div&gt;\",\n    advanced_mode=False,\n    required=True,\n    description=\"Field description\",\n)\n</code></pre> <p>Check the CustomField - Python Reference to see the attributes, arguments, and methods of the <code>CustomField</code> class in detail.</p>"},{"location":"how_to_guides/custom_fields/#understanding-the-record-object","title":"Understanding the Record Object","text":"<p>The <code>record</code> object is the main JavaScript object that contains all the information about the Argilla <code>record</code> object in the UI, like <code>fields</code>, <code>metadata</code>, etc. Your template can use this object to display record information within the custom field. You can for example access the fields of the record by navigating to <code>record.fields.&lt;field_name&gt;</code> and this generally works the same for <code>metadata</code>, <code>responses</code>, etc.</p>"},{"location":"how_to_guides/custom_fields/#using-handlebars-in-your-template","title":"Using Handlebars in your template","text":"<p>By default, custom fields will use the handlebars syntax engine to render templates with <code>record</code> information. This engine will convert the content inside the brackets <code>{{}}</code> to the values of record's field's object that you reference within your template. As described in the Understanding the Record Object section, you can access the fields of the record by navigating to <code>{{record.fields.&lt;field_name&gt;}}</code>. For more complex use cases, handlebars has various expressions, partials, and helpers that you can use to render your data. You can deactivate the <code>handlebars</code> engine with the <code>advanced_mode=True</code> parameter in <code>CustomField</code>, then you will need to define custom javascript to access the record attributes, like described in the Advanced Mode section.</p>"},{"location":"how_to_guides/custom_fields/#usage-example","title":"Usage example","text":"<p>Because of the handlebars syntax engine, we only need to pass the HTML and potentially some CSS in between the <code>&lt;style&gt;</code> tags.</p> <pre><code>css_template = \"\"\"\n&lt;style&gt;\n#container {\n    display: flex;\n    gap: 10px;\n}\n.column {\n    flex: 1;\n}\n&lt;/style&gt;\n\"\"\" # (1)\n\nhtml_template = \"\"\"\n&lt;div id=\"container\"&gt;\n    &lt;div class=\"column\"&gt;\n        &lt;h3&gt;Original&lt;/h3&gt;\n        &lt;img src=\"{{record.fields.image.original}}\" /&gt;\n    &lt;/div&gt;\n    &lt;div class=\"column\"&gt;\n        &lt;h3&gt;Revision&lt;/h3&gt;\n        &lt;img src=\"{{record.fields.image.revision}}\" /&gt;\n    &lt;/div&gt;\n&lt;/div&gt;\n\"\"\" # (2)\n</code></pre> <ol> <li>This is a CSS template, which ensures that the container and columns are styled.</li> <li>This is an HTML template, which creates a <code>container</code> with two columns and injects the value corresponding to the <code>key</code> of the <code>image</code> field into it.</li> </ol> <p>We can now pass these templates to the <code>CustomField</code> class.</p> <pre><code>import argilla as rg\n\ncustom_field = rg.CustomField(\n    name=\"image\",\n    template=css_template + html_template,\n)\n\nsettings = rg.Settings(\n    fields=[custom_field],\n    questions=[rg.TextQuestion(name=\"response\")],\n)\n\ndataset = rg.Dataset(\n    name=\"custom_field_dataset\",\n    settings=settings,\n).create()\n\ndataset.records.log([\n    rg.Record(\n        fields={\n            \"image\": {\n                \"original\": \"https://argilla.io/brand-assets/argilla/argilla-logo-color-black.png\",\n                \"revision\": \"https://argilla.io/brand-assets/argilla/argilla-logo-black.png\",\n            }\n        }\n    )]\n)\n</code></pre> <p>The result will be the following:</p> <p></p>"},{"location":"how_to_guides/custom_fields/#example-gallery","title":"Example Gallery","text":"Metadata in a table <p>You can make it easier to read metadata by displaying it in a table. This uses handlebars to iterate over the metadata object and display each key-value pair in a row.</p> <p><pre><code>template = \"\"\"\n&lt;style&gt;\n    .container {\n        border: 1px solid #ddd;\n        font-family: sans-serif;\n    }\n    .row {\n        display: flex;\n        border-bottom: 1px solid #ddd;\n    }\n    .row:last-child {\n        border-bottom: none;\n    }\n    .column {\n        flex: 1;\n        padding: 8px;\n    }\n    .column:first-child {\n        border-right: 1px solid #ddd;\n    }\n&lt;/style&gt;\n&lt;div class=\"container\"&gt;\n    &lt;div class=\"header\"&gt;\n        &lt;div class=\"column\"&gt;Metadata&lt;/div&gt;\n        &lt;div class=\"column\"&gt;Value&lt;/div&gt;\n    &lt;/div&gt;\n    {{#each record.metadata}}\n    &lt;div class=\"row\"&gt;\n        &lt;div class=\"column\"&gt;{{@key}}&lt;/div&gt;\n        &lt;div class=\"column\"&gt;{{this}}&lt;/div&gt;\n    &lt;/div&gt;\n    {{/each}}\n&lt;/div&gt;\n\"\"\"\nrecord = rg.Record(\n    fields={\"text\": \"hello\"},\n    metadata={\n        \"name\": \"John Doe\",\n        \"age\": 25,\n    }\n)\n</code></pre> </p> JSON viewer <p>The value of a custom field is a dictionary in Python and a JavaScript object in the browser. You can render this object as a JSON string using the <code>json</code> helper. This is implemented in Argilla's frontend for convenience. If you want to learn more about handlebars helpers, you can check the handlebars documentation.</p> <pre><code>template = \"{{ json record.fields.user_profile }}\"\n\nrecord = rg.Record(\n    fields={\n        \"user_profile\": {\n            \"name\": \"John Doe\",\n            \"age\": 30,\n            \"address\": \"123 Main St\",\n            \"email\": \"john.doe@hooli.com\",\n        }\n    },\n)\n</code></pre>"},{"location":"how_to_guides/custom_fields/#advanced-mode","title":"Advanced Mode","text":"<p>When <code>advanced_mode=True</code>, you can use the <code>template</code> argument to pass a full HTML page. This allows for more complex customizations, including the use of JavaScript. The record object will be available in the global scope, so you can access it in your JavaScript code as described in the Understanding the Record Object section.</p>"},{"location":"how_to_guides/custom_fields/#usage-example_1","title":"Usage example","text":"<p>Let's reproduce example from the Without advanced mode section but this time we will insert the handlebars syntax engine into the template ourselves.</p> <pre><code>template = \"\"\"\n&lt;div id=\"custom-field-container\"&gt;&lt;/div&gt;\n&lt;script id=\"template\" type=\"text/x-handlebars-template\"&gt;\n    &lt;div id=\"container\"&gt;\n        &lt;div class=\"column\"&gt;\n            &lt;h3&gt;Original&lt;/h3&gt;\n            &lt;img src=\"{{record.fields.image.original}}\" /&gt;\n        &lt;/div&gt;\n        &lt;div class=\"column\"&gt;\n            &lt;h3&gt;Revision&lt;/h3&gt;\n            &lt;img src=\"{{record.fields.image.revision}}\" /&gt;\n        &lt;/div&gt;\n    &lt;/div&gt;\n&lt;/script&gt;\n\"\"\" # (1)\n\nscript = \"\"\"\n&lt;script src=\"https://cdn.jsdelivr.net/npm/handlebars@latest/dist/handlebars.js\"&gt;&lt;/script&gt;\n&lt;script&gt;\n    const template = document.getElementById(\"template\").innerHTML;\n    const compiledTemplate = Handlebars.compile(template);\n    const html = compiledTemplate({ record });\n    document.getElementById(\"custom-field-container\").innerHTML = html;\n&lt;/script&gt;\n\"\"\" # (2)\n</code></pre> <ol> <li>This is a JavaScript template script. We set <code>id</code> to <code>template</code> to use it later in our JavaScript code and <code>type</code> to <code>text/x-handlebars-template</code> to indicate that this is a Handlebars template. Note that we also added a <code>div</code> with <code>id</code> to <code>custom-field-container</code> to render the template into.</li> <li>This is a JavaScript template script. We load the Handlebars library and then use it to compile the template and render the record. Eventually, we render the result into the <code>div</code> with <code>id</code> to <code>custom-field-container</code>.</li> </ol> <p>We can now pass these templates to the <code>CustomField</code> class, ensuring that the <code>advanced_mode</code> is set to <code>True</code>.</p> <pre><code>import argilla as rg\n\ncustom_field = rg.CustomField(\n    name=\"image\",\n    template=template + script,\n    advanced_mode=True\n)\n</code></pre> <p>Besides the new <code>CustomField</code> code above, reusing the same approach as in the Using handlebars in your template section, will create a dataset and log a record to it, yielding the same result.</p> <p></p>"},{"location":"how_to_guides/custom_fields/#example-gallery_1","title":"Example Gallery","text":"3D object viewer <p>We will now use native javascript and three.js to create a 3D object viewer. We will then use the <code>record</code> object directly to insert URLs from the record's fields.</p> <pre><code>template = \"\"\"\n&lt;script src=\"https://cdnjs.cloudflare.com/ajax/libs/three.js/r128/three.min.js\"&gt;&lt;/script&gt;\n&lt;script src=\"https://cdn.jsdelivr.net/npm/three@0.128.0/examples/js/loaders/GLTFLoader.js\"&gt;&lt;/script&gt;\n&lt;script src=\"https://cdn.jsdelivr.net/npm/three@0.128.0/examples/js/controls/OrbitControls.js\"&gt;&lt;/script&gt;\n\n\n&lt;div style=\"display: flex;\"&gt;\n    &lt;div&gt;\n        &lt;h3&gt;Option A&lt;/h3&gt;\n        &lt;canvas id=\"canvas1\" width=\"400\" height=\"400\"&gt;&lt;/canvas&gt;\n    &lt;/div&gt;\n    &lt;div&gt;\n        &lt;h3&gt;Option B&lt;/h3&gt;\n        &lt;canvas id=\"canvas2\" width=\"400\" height=\"400\"&gt;&lt;/canvas&gt;\n    &lt;/div&gt;\n&lt;/div&gt;\n\n&lt;script&gt;\n    function init(canvasId, modelUrl) {\n    let scene, camera, renderer, controls;\n\n    const canvas = document.getElementById(canvasId);\n    scene = new THREE.Scene();\n    camera = new THREE.PerspectiveCamera(75, 1, 0.1, 1000);\n    renderer = new THREE.WebGLRenderer({ canvas, alpha: true });\n\n    renderer.setSize(canvas.clientWidth, canvas.clientHeight);\n\n    const directionalLight = new THREE.DirectionalLight(0xffffff, 1);\n    directionalLight.position.set(2, 2, 5);\n    scene.add(directionalLight);\n\n    const ambientLight = new THREE.AmbientLight(0x404040, 7);\n    scene.add(ambientLight);\n\n    controls = new THREE.OrbitControls(camera, renderer.domElement);\n    controls.maxPolarAngle = Math.PI / 2;\n\n    const loader = new THREE.GLTFLoader();\n    loader.load(\n        modelUrl,\n        function (gltf) {\n        const model = gltf.scene;\n        scene.add(model);\n        model.position.set(0, 0, 0);\n\n        const box = new THREE.Box3().setFromObject(model);\n        const center = box.getCenter(new THREE.Vector3());\n        model.position.sub(center);\n        camera.position.set(center.x, center.y, center.z + 1.2);\n\n        animate();\n        },\n        undefined,\n        function (error) {\n        console.error(error);\n        }\n    );\n\n    function animate() {\n        requestAnimationFrame(animate);\n        controls.update();\n        renderer.render(scene, camera);\n    }\n    }\n\n    init(\"canvas1\", record.fields.object.option_a);\n    init(\"canvas2\", record.fields.object.option_b);\n&lt;/script&gt;\n\n\"\"\"\n</code></pre> <p>Next, we will create a record with two URLs to 3D objects from the 3d-arena dataset.</p> <pre><code>record = rg.Record(\n    fields={\n        \"object\": {\n            \"option_a\": \"https://huggingface.co/datasets/dylanebert/3d-arena/resolve/main/outputs/Strawb3rry/a_bookshelf_with_ten_books_stacked_vertically.glb\",\n            \"option_b\": \"https://huggingface.co/datasets/dylanebert/3d-arena/resolve/main/outputs/MeshFormer/a_bookshelf_with_ten_books_stacked_vertically.glb\",\n        }\n    }\n)\n</code></pre> <p></p>"},{"location":"how_to_guides/custom_fields/#updating-templates","title":"Updating templates","text":"<p>As described in the dataset guide, you can update certain setting attributes for a published dataset. This includes the custom fields templates, which is a usefule feature when you want to iterate on the template of a custom field without the need to create a new dataset. The following example shows how to update the template of a custom field.</p> <pre><code>dataset.settings.fields[\"custom\"].template = \"&lt;new-template&gt;\"\ndataset.update()\n</code></pre>"},{"location":"how_to_guides/dataset/","title":"Dataset management","text":"<p>This guide provides an overview of datasets, explaining the basics of how to set them up and manage them in Argilla.</p> <p>A dataset is a collection of records that you can configure for labelers to provide feedback using the UI. Depending on the specific requirements of your task, you may need various types of feedback. You can customize the dataset to include different kinds of questions, so the first step will be to define the aim of your project and the kind of data and feedback you will need. With this information, you can start configuring a dataset by defining fields, questions, metadata, vectors, and guidelines through settings.</p> Question: Who can manage datasets? <p>Only users with the <code>owner</code> role can manage (create, retrieve, update and delete) all the datasets.</p> <p>The users with the <code>admin</code> role can manage (create, retrieve, update and delete) the datasets in the workspaces they have access to.</p> <p>Main Classes</p> <code>rg.Dataset</code><code>rg.Settings</code> <pre><code>rg.Dataset(\n    name=\"name\",\n    workspace=\"workspace\",\n    settings=settings,\n    client=client\n)\n</code></pre> <p>Check the Dataset - Python Reference to see the attributes, arguments, and methods of the <code>Dataset</code> class in detail.</p> <pre><code>rg.Settings(\n    fields=[rg.TextField(name=\"text\")],\n    questions=[\n        rg.LabelQuestion(\n            name=\"label\",\n            labels=[\"label_1\", \"label_2\", \"label_3\"]\n        )\n    ],\n    metadata=[rg.TermsMetadataProperty(name=\"metadata\")],\n    vectors=[rg.VectorField(name=\"vector\", dimensions=10)],\n    guidelines=\"guidelines\",\n    allow_extra_metadata=True,\n    distribution=rg.TaskDistribution(min_submitted=2),\n)\n</code></pre> <p>Check the Settings - Python Reference to see the attributes, arguments, and methods of the <code>Settings</code> class in detail.</p>"},{"location":"how_to_guides/dataset/#create-a-dataset","title":"Create a dataset","text":"<p>To create a dataset, you can define it in the <code>Dataset</code> class and then call the <code>create</code> method that will send the dataset to the server so that it can be visualized in the UI. If the dataset does not appear in the UI, you may need to click the refresh button to update the view. For further configuration of the dataset, you can refer to the settings section.</p> <p>Info</p> <p>If you have deployed Argilla with Hugging Face Spaces and HF Sign in, you can use <code>argilla</code> as a workspace name. Otherwise, you might need to create a workspace following this guide.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nsettings = rg.Settings(\n    guidelines=\"These are some guidelines.\",\n    fields=[\n        rg.TextField(\n            name=\"text\",\n        ),\n    ],\n    questions=[\n        rg.LabelQuestion(\n            name=\"label\",\n            labels=[\"label_1\", \"label_2\", \"label_3\"]\n        ),\n    ],\n)\n\ndataset = rg.Dataset(\n    name=\"my_dataset\",\n    workspace=\"my_workspace\",\n    settings=settings,\n)\n\ndataset.create()\n</code></pre> <p>The created dataset will be empty, to add records go to this how-to guide.</p> <p>Accessing attributes</p> <p>Access the attributes of a dataset by calling them directly on the <code>dataset</code> object. For example, <code>dataset.id</code>, <code>dataset.name</code> or <code>dataset.settings</code>. You can similarly access the fields, questions, metadata, vectors and guidelines. For instance, <code>dataset.fields</code> or <code>dataset.questions</code>.</p>"},{"location":"how_to_guides/dataset/#create-multiple-datasets-with-the-same-settings","title":"Create multiple datasets with the same settings","text":"<p>To create multiple datasets with the same settings, define the settings once and pass it to each dataset.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nsettings = rg.Settings(\n    guidelines=\"These are some guidelines.\",\n    fields=[rg.TextField(name=\"text\", use_markdown=True)],\n    questions=[\n        rg.LabelQuestion(name=\"label\", labels=[\"label_1\", \"label_2\", \"label_3\"])\n    ],\n    distribution=rg.TaskDistribution(min_submitted=3),\n)\n\ndataset1 = rg.Dataset(name=\"my_dataset_1\", settings=settings)\ndataset2 = rg.Dataset(name=\"my_dataset_2\", settings=settings)\n\n# Create the datasets on the server\ndataset1.create()\ndataset2.create()\n</code></pre>"},{"location":"how_to_guides/dataset/#create-a-dataset-from-an-existing-dataset","title":"Create a dataset from an existing dataset","text":"<p>To create a new dataset from an existing dataset, get the settings from the existing dataset and pass them to the new dataset.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nexisting_dataset = client.datasets(\"my_dataset\")\n\nnew_dataset = rg.Dataset(name=\"my_dataset_copy\", settings=existing_dataset.settings)\n\nnew_dataset.create()\n</code></pre> <p>Info</p> <p>You can also copy the records from the original dataset to the new one:</p> <pre><code>records = list(existing_dataset.records)\nnew_dataset.records.log(records)\n</code></pre>"},{"location":"how_to_guides/dataset/#define-dataset-settings","title":"Define dataset settings","text":"<p>Tip</p> <p>Instead of defining your own custom settings, you can use some of our pre-built templates for text classification, ranking and rating. Learn more here.</p>"},{"location":"how_to_guides/dataset/#fields","title":"Fields","text":"<p>The fields in a dataset consist of one or more data items requiring annotation. Currently, Argilla supports plain text and markdown through the <code>TextField</code>, images through the <code>ImageField</code>, chat formatted data through the <code>ChatField</code> and full custom templates through our <code>CustomField</code>.</p> <p>Note</p> <p>The order of the fields in the UI follows the order in which these are added to the fields attribute in the Python SDK.</p> <p>Check the Field - Python Reference to see the field classes in detail.</p> TextImageChatCustom <p><pre><code>rg.TextField(\n    name=\"text\",\n    title=\"Text\",\n    use_markdown=False,\n    required=True,\n    description=\"Field description\",\n)\n</code></pre> </p> <p><pre><code>rg.ImageField(\n    name=\"image\",\n    title=\"Image\",\n    required=True,\n    description=\"Field description\",\n)\n</code></pre> </p> <p><pre><code>rg.ChatField(\n    name=\"chat\",\n    title=\"Chat\",\n    use_markdown=True,\n    required=True,\n    description=\"Field description\",\n)\n</code></pre> </p> <p>A <code>CustomField</code> allows you to use a custom template for the field. This is useful if you want to use a custom UI for the field. You can use the <code>template</code> argument to pass a string that will be rendered as the field's UI.</p> <p>By default, <code>advanced_mode=False</code>, which will use a brackets syntax engine for the templates. This engine converts <code>{{record.fields.field.key}}</code> to the values of record's field's object. You can also use <code>advanced_mode=True</code>, which deactivates the above brackets syntax engine and allows you to add custom javascript to your template to render the field.</p> <pre><code>rg.CustomField(\n    name=\"custom\",\n    title=\"Custom\",\n    template=\"&lt;div&gt;{{record.fields.custom.key}}&lt;/div&gt;\",\n    advanced_mode=False,\n    required=True,\n    description=\"Field description\",\n)\n</code></pre> <p>Tip</p> <p>To learn more about how to create custom fields with HTML and CSS templates, check this how-to guide.</p>"},{"location":"how_to_guides/dataset/#questions","title":"Questions","text":"<p>To collect feedback for your dataset, you need to formulate questions that annotators will be asked to answer.</p> <p>Check the Questions - Python Reference to see the question classes in detail.</p> LabelMulti-labelRankingRatingSpanText <p>A <code>LabelQuestion</code> asks annotators to choose a unique label from a list of options. This type is useful for text classification tasks. In the UI, they will have a rounded shape.</p> <p><pre><code>rg.LabelQuestion(\n    name=\"label\",\n    labels={\"YES\": \"Yes\", \"NO\": \"No\"}, # or [\"YES\", \"NO\"]\n    title=\"Is the response relevant for the given prompt?\",\n    description=\"Select the one that applies.\",\n    required=True,\n    visible_labels=10\n)\n</code></pre> </p> <p>A <code>MultiLabelQuestion</code> asks annotators to choose all applicable labels from a list of options. This type is useful for multi-label text classification tasks. In the UI, they will have a squared shape.</p> <p><pre><code>rg.MultiLabelQuestion(\n    name=\"multi_label\",\n    labels={\n        \"hate\": \"Hate Speech\",\n        \"sexual\": \"Sexual content\",\n        \"violent\": \"Violent content\",\n        \"pii\": \"Personal information\",\n        \"untruthful\": \"Untruthful info\",\n        \"not_english\": \"Not English\",\n        \"inappropriate\": \"Inappropriate content\"\n    }, # or [\"hate\", \"sexual\", \"violent\", \"pii\", \"untruthful\", \"not_english\", \"inappropriate\"]\n    title=\"Does the response include any of the following?\",\n    description=\"Select all that apply.\",\n    required=True,\n    visible_labels=10,\n    labels_order=\"natural\"\n)\n</code></pre> </p> <p>A <code>RankingQuestion</code> asks annotators to order a list of options. It is useful to gather information on the preference or relevance of a set of options.</p> <pre><code>rg.RankingQuestion(\n    name=\"ranking\",\n    values={\n        \"reply-1\": \"Reply 1\",\n        \"reply-2\": \"Reply 2\",\n        \"reply-3\": \"Reply 3\"\n    } # or [\"reply-1\", \"reply-2\", \"reply-3\"]\n    title=\"Order replies based on your preference\",\n    description=\"1 = best, 3 = worst. Ties are allowed.\",\n    required=True,\n)\n</code></pre> <p></p> <p>A <code>RatingQuestion</code> asks annotators to select one option from a list of integer values. This type is useful for collecting numerical scores.</p> <pre><code>rg.RatingQuestion(\n    name=\"rating\",\n    values=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n    title=\"How satisfied are you with the response?\",\n    description=\"1 = very unsatisfied, 10 = very satisfied\",\n    required=True,\n)\n</code></pre> <p></p> <p>A <code>SpanQuestion</code> asks annotators to select a portion of the text of a specific field and apply a label to it. This type of question is useful for named entity recognition or information extraction tasks.</p> <pre><code>rg.SpanQuestion(\n    name=\"span\",\n    field=\"text\",\n    labels={\n        \"PERSON\": \"Person\",\n        \"ORG\": \"Organization\",\n        \"LOC\": \"Location\",\n        \"MISC\": \"Miscellaneous\"\n    }, # or [\"PERSON\", \"ORG\", \"LOC\", \"MISC\"]\n    title=\"Select the entities in the text\",\n    description=\"Select the entities in the text\",\n    required=True,\n    allow_overlapping=False,\n    visible_labels=10\n)\n</code></pre> <p></p> <p>A <code>TextQuestion</code> offers to annotators a free-text area where they can enter any text. This type is useful for collecting natural language data, such as corrections or explanations.</p> <pre><code>rg.TextQuestion(\n    name=\"text\",\n    title=\"Please provide feedback on the response\",\n    description=\"Please provide feedback on the response\",\n    required=True,\n    use_markdown=True\n)\n</code></pre> <p></p>"},{"location":"how_to_guides/dataset/#metadata","title":"Metadata","text":"<p>Metadata properties allow you to configure the use of metadata information for the filtering and sorting features available in the UI and Python SDK.</p> <p>Check the Metadata - Python Reference to see the metadata classes in detail.</p> TermsIntegerFloat <p>A <code>TermsMetadataProperty</code> allows to add a list of strings as metadata options.</p> <p><pre><code>rg.TermsMetadataProperty(\n    name=\"terms\",\n    options=[\"group-a\", \"group-b\", \"group-c\"]\n    title=\"Annotation groups\",\n    visible_for_annotators=True,\n)\n</code></pre> </p> <p>An <code>IntegerMetadataProperty</code> allows to add integer values as metadata.</p> <p><pre><code>rg.IntegerMetadataProperty(\n    name=\"integer\",\n    title=\"length-input\",\n    min=42,\n    max=1984,\n)\n</code></pre> </p> <p>A <code>FloatMetadataProperty</code> allows to add float values as metadata.</p> <p><pre><code>rg.FloatMetadataProperty(\n    name=\"float\",\n    title=\"Reading ease\",\n    min=-92.29914,\n    max=119.6975,\n)\n</code></pre> </p> <p>Note</p> <p>You can also set the <code>allow_extra_metadata</code> argument in the dataset to <code>True</code> to specify whether the dataset will allow metadata fields in the records other than those specified under metadata. Note that these will not be accessible from the UI for any user, only retrievable using the Python SDK.</p>"},{"location":"how_to_guides/dataset/#vectors","title":"Vectors","text":"<p>To use the similarity search in the UI and the Python SDK, you will need to configure vectors using the <code>VectorField</code> class.</p> <p>Check the Vector - Python Reference to see the <code>VectorField</code> class in detail.</p> <p><pre><code>rg.VectorField(\n    name=\"my_vector\",\n    title=\"My Vector\",\n    dimensions=768\n)\n</code></pre> </p>"},{"location":"how_to_guides/dataset/#guidelines","title":"Guidelines","text":"<p>Once you have decided on the data to show and the questions to ask, it's important to provide clear guidelines to the annotators. These guidelines help them understand the task and answer the questions consistently. You can provide guidelines in two ways:</p> <ul> <li>In the dataset guidelines: this is added as an argument when you create your dataset in the Python SDK. They will appear in the annotation interface.</li> </ul> <p><pre><code>guidelines = \"In this dataset, you will find a collection of records that show a category, an instruction, a context and a response to that instruction. [...]\"\n</code></pre> </p> <ul> <li>As question descriptions: these are added as an argument when you create questions in the Python SDK. This text will appear in a tooltip next to the question in the UI. </li> </ul> <p>It is good practice to use at least the dataset guidelines if not both methods. Question descriptions should be short and provide context to a specific question. They can be a summary of the guidelines to that question, but often that is not sufficient to align the whole annotation team. In the guidelines, you can include a description of the project, details on how to answer each question with examples, instructions on when to discard a record, etc.</p> <p>Tip</p> <p>If you want further guidance on good practices for guidelines during the project development, check our blog post.</p>"},{"location":"how_to_guides/dataset/#distribution","title":"Distribution","text":"<p>When working as a team, you may want to distribute the annotation task to ensure efficiency and quality. You can use the\u00a0<code>TaskDistribution</code> settings to configure the number of minimum submitted responses expected for each record. Argilla will use this setting to automatically handle records in your team members' pending queues.</p> <p>Check the Task Distribution - Python Reference to see the <code>TaskDistribution</code> class in detail.</p> <pre><code>rg.TaskDistribution(\n    min_submitted = 2\n)\n</code></pre> <p>To learn more about how to distribute the task among team members in the Distribute the annotation guide.</p>"},{"location":"how_to_guides/dataset/#list-datasets","title":"List datasets","text":"<p>You can list all the datasets available in a workspace using the <code>datasets</code> attribute of the <code>Workspace</code> class. You can also use <code>len(workspace.datasets)</code> to get the number of datasets in a workspace.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nworkspace = client.workspaces(\"my_workspace\")\n\ndatasets = workspace.datasets\n\nfor dataset in datasets:\n    print(dataset)\n</code></pre> <p>When you list datasets, dataset settings are not preloaded, since this can introduce extra requests to the server. If you want to work with settings when listing datasets, you need to load them:</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nfor dataset in client.datasets:\n    dataset.settings.get() # this will get the dataset settings from the server\n    print(dataset.settings)\n</code></pre> <p>Notebooks</p> <p>When using a notebook, executing <code>client.datasets</code> will display a table with the <code>name</code>of the existing datasets, the <code>id</code>, <code>workspace_id</code> to which they belong, and the last update as <code>updated_at</code>. .</p>"},{"location":"how_to_guides/dataset/#retrieve-a-dataset","title":"Retrieve a dataset","text":"<p>You can retrieve a dataset by calling the <code>datasets</code> method on the <code>Argilla</code> class and passing the <code>name</code> or <code>id</code> of the dataset as an argument. If the dataset does not exist, a warning message will be raised and <code>None</code> will be returned.</p> By nameBy id <p>By default, this method attempts to retrieve the dataset from the first workspace. If the dataset is in a different workspace, you must specify either the workspace or workspace name as an argument.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\n# Retrieve the dataset from the first workspace\nretrieved_dataset = client.datasets(name=\"my_dataset\")\n\n# Retrieve the dataset from the specified workspace\nretrieved_dataset = client.datasets(name=\"my_dataset\", workspace=\"my_workspace\")\n</code></pre> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(id=\"&lt;uuid-or-uuid-string&gt;\")\n</code></pre>"},{"location":"how_to_guides/dataset/#check-dataset-existence","title":"Check dataset existence","text":"<p>You can check if a dataset exists. The <code>client.datasets</code> method will return <code>None</code> if the dataset was not found.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\")\n\nif dataset is not None:\n    pass\n</code></pre>"},{"location":"how_to_guides/dataset/#update-a-dataset","title":"Update a dataset","text":"<p>Once a dataset is published, there are limited things you can update. Here is a summary of the attributes you can change for each setting:</p> FieldsQuestionsMetadataVectorsGuidelinesDistribution Attributes From SDK From UI Name \u274c \u274c Title \u2705 \u2705 Required \u274c \u274c Use markdown \u2705 \u2705 Template \u2705 \u274c Attributes From SDK From UI Name \u274c \u274c Title \u274c \u2705 Description \u274c \u2705 Required \u274c \u274c Labels \u274c \u274c Values \u274c \u274c Label order \u274c \u2705 Suggestions first \u274c \u2705 Visible labels \u274c \u2705 Field \u274c \u274c Allow overlapping \u274c \u274c Use markdown \u274c \u2705 Attributes From SDK From UI Name \u274c \u274c Title \u2705 \u2705 Options \u274c \u274c Minimum value \u274c \u274c Maximum value \u274c \u274c Visible for annotators \u2705 \u2705 Allow extra metadata \u2705 \u2705 Attributes From SDK From UI Name \u274c \u274c Title \u2705 \u2705 Dimensions \u274c \u274c From SDK From UI \u2705 \u2705 Attributes From SDK From UI Minimum submitted \u2705 \u2705 <p>To modify these attributes, you can simply set the new value of the attributes you wish to change and call the <code>update</code> method on the <code>Dataset</code> object.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(\"my_dataset\")\n\ndataset.settings.fields[\"text\"].use_markdown = True\ndataset.settings.metadata[\"my_metadata\"].visible_for_annotators = False\n\ndataset.update()\n</code></pre> <p>You can also add and delete metadata properties and vector fields using the <code>add</code> and <code>delete</code> methods.</p> AddDelete <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(\"my_dataset\")\n\ndataset.settings.vectors.add(rg.VectorField(name=\"my_new_vector\", dimensions=123))\ndataset.settings.metadata.add(\n    rg.TermsMetadataProperty(\n        name=\"my_new_metadata\",\n        options=[\"option_1\", \"option_2\", \"option_3\"],\n    ),\n)\ndataset.update()\n</code></pre> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(\"my_dataset\")\n\ndataset.settings.vectors[\"my_old_vector\"].delete()\ndataset.settings.metadata[\"my_old_metadata\"].delete()\n\ndataset.update()\n</code></pre>"},{"location":"how_to_guides/dataset/#delete-a-dataset","title":"Delete a dataset","text":"<p>You can delete an existing dataset by calling the <code>delete</code> method on the <code>Dataset</code> class.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset_to_delete = client.datasets(name=\"my_dataset\")\n\ndataset_deleted = dataset_to_delete.delete()\n</code></pre>"},{"location":"how_to_guides/distribution/","title":"Distribute the annotation task among the team","text":"<p>This guide explains how you can use Argilla\u2019s automatic task distribution to efficiently divide the task of annotating a dataset among multiple team members.</p> <p>Owners and admins can define the minimum number of submitted responses expected for each record. Argilla will use this setting to handle automatically the records that will be shown in the pending queues of all users with access to the dataset.</p> <p>When a record has met the minimum number of submissions, the status of the record will change to <code>completed</code>, and the record will be removed from the <code>Pending</code> queue of all team members so they can focus on providing responses where they are most needed. The dataset\u2019s annotation task will be fully completed once all records have the <code>completed</code> status.</p> <p></p> <p>Note</p> <p>The status of a record can be either <code>completed</code>, when it has the required number of responses with <code>submitted</code> status, or <code>pending</code>, when it doesn\u2019t meet this requirement.</p> <p>Each record can have multiple responses, and each of those can have the status <code>submitted</code>, <code>discarded</code>, or <code>draft.</code></p> <p>Main Class</p> <pre><code>rg.TaskDistribution(\n    min_submitted = 2\n)\n</code></pre> <p>Check the Task Distribution - Python Reference to see the attributes, arguments, and methods of the <code>TaskDistribution</code> class in detail.</p>"},{"location":"how_to_guides/distribution/#configure-task-distribution-settings","title":"Configure task distribution settings","text":"<p>By default, Argilla will set the required minimum submitted responses to 1. This means that whenever a record has at least 1 response with the status <code>submitted</code> the status of the record will be <code>completed</code> and removed from the <code>Pending</code> queue of other team members.</p> <p>Tip</p> <p>Leave the default value of minimum submissions (1) if you are working on your own or when you don't require more than one submitted response per record.</p> <p>If you wish to set a different number, you can do so through the <code>distribution</code> setting in your dataset settings:</p> <pre><code>settings = rg.Settings(\n    guidelines=\"These are some guidelines.\",\n    fields=[\n        rg.TextField(\n            name=\"text\",\n        ),\n    ],\n    questions=[\n        rg.LabelQuestion(\n            name=\"label\",\n            labels=[\"label_1\", \"label_2\", \"label_3\"]\n        ),\n    ],\n    distribution=rg.TaskDistribution(min_submitted=3)\n)\n</code></pre> <p>Learn more about configuring dataset settings in the Dataset management guide.</p> <p>Tip</p> <p>Increase the number of minimum subsmissions if you\u2019d like to ensure you get more than one submitted response per record. Make sure that this number is never higher than the number of members in your team. Note that the lower this number is, the faster the task will be completed.</p> <p>Note</p> <p>Note that some records may have more responses than expected if multiple team members submit responses on the same record simultaneously.</p>"},{"location":"how_to_guides/distribution/#change-task-distribution-settings","title":"Change task distribution settings","text":"<p>If you wish to change the minimum submitted responses required in a dataset, you can do so as long as the annotation hasn\u2019t started, i.e., the dataset has no responses for any records.</p> <p>Admins and owners can change this value from the dataset settings page in the UI or from the SDK:</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(\"my_dataset\")\n\ndataset.settings.distribution.min_submitted = 4\n\ndataset.update()\n</code></pre>"},{"location":"how_to_guides/distribution/#track-your-teams-progress","title":"Track your team's progress","text":"<p>You can check the progress of the annotation task by using the <code>dataset.progress</code> method. This method will return the number of records that have the status <code>completed</code>, <code>pending</code>, and the total number of records in the dataset.</p> <p><pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(\"my_dataset\")\n\nprogress = dataset.progress()\n</code></pre> <pre><code>{\n    \"total\": 100,\n    \"completed\": 10,\n    \"pending\": 90\n}\n</code></pre></p> <p>You can see also include to the progress the users distribution by setting the <code>with_users_distribution</code> parameter to <code>True</code>. This will return the number of records that have the status <code>completed</code>, <code>pending</code>, and the total number of records in the dataset, as well as the number of completed submissions per user. You can visit the Annotation Progress section for more information.</p> <p><pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(\"my_dataset\")\n\nprogress = dataset.progress(with_users_distribution=True)\n</code></pre> <pre><code>{\n    \"total\": 100,\n    \"completed\": 50,\n    \"pending\": 50,\n    \"users\": {\n        \"user1\": {\n           \"completed\": { \"submitted\": 10, \"draft\": 5, \"discarded\": 5},\n           \"pending\": { \"submitted\": 5, \"draft\": 10, \"discarded\": 10},\n        },\n        \"user2\": {\n           \"completed\": { \"submitted\": 20, \"draft\": 10, \"discarded\": 5},\n           \"pending\": { \"submitted\": 2, \"draft\": 25, \"discarded\": 0},\n        },\n        ...\n}\n</code></pre></p> <p>Note</p> <p>Since the completed records can contain submissions from multiple users, the number of completed submissions per user may not match the total number of completed records.</p>"},{"location":"how_to_guides/import_export/","title":"Importing and exporting datasets and records","text":"<p>This guide provides an overview of how to import and export your dataset or its records to Python, your local disk, or the Hugging Face Hub.</p> <p>In Argilla, you can import/export two main components of a dataset:</p> <ul> <li>The dataset's complete configuration is defined in <code>rg.Settings</code>. This is useful if you want to share your feedback task or restore it later in Argilla.</li> <li>The records stored in the dataset, including <code>Metadata</code>, <code>Vectors</code>, <code>Suggestions</code>, and <code>Responses</code>. This is useful if you want to use your dataset's records outside of Argilla.</li> </ul> <p>Check the Dataset - Python Reference to see the attributes, arguments, and methods of the export <code>Dataset</code> class in detail.</p> <p>Main Classes</p> <code>rg.Dataset.to_hub</code><code>rg.Dataset.from_hub</code><code>rg.Dataset.to_disk</code><code>rg.Dataset.from_disk</code><code>rg.Dataset.records.to_datasets()</code><code>rg.Dataset.records.to_dict()</code><code>rg.Dataset.records.to_list()</code> <pre><code>rg.Dataset.to_hub(\n    repo_id=\"&lt;my_org&gt;/&lt;my_dataset&gt;\",\n    with_records=True,\n    generate_card=True\n)\n</code></pre> <pre><code>rg.Dataset.from_hub(\n    repo_id=\"&lt;my_org&gt;/&lt;my_dataset&gt;\",\n    name=\"my_dataset\",\n    workspace=\"my_workspace\",\n    client=rg.Client(),\n    with_records=True\n)\n</code></pre> <pre><code>rg.Dataset.to_disk(\n    path=\"&lt;path-empty-directory&gt;\",\n    with_records=True\n)\n</code></pre> <pre><code>rg.Dataset.from_disk(\n    path=\"&lt;path-dataset-directory&gt;\",\n    name=\"my_dataset\",\n    workspace=\"my_workspace\",\n    client=rg.Client(),\n    with_records=True\n)\n</code></pre> <pre><code>rg.Dataset.records.to_datasets()\n</code></pre> <pre><code>rg.Dataset.records.to_dict()\n</code></pre> <pre><code>rg.Dataset.records.to_list()\n</code></pre> <p>Check the Dataset - Python Reference to see the attributes, arguments, and methods of the export <code>Dataset</code> class in detail.</p> <p>Check the Record - Python Reference to see the attributes, arguments, and methods of the <code>Record</code> class in detail.</p>"},{"location":"how_to_guides/import_export/#importing-and-exporting-datasets","title":"Importing and exporting datasets","text":"<p>First, we will go through exporting a complete dataset from Argilla. This includes the dataset's settings and records. All of these methods use the <code>rg.Dataset.from_*</code> and <code>rg.Dataset.to_*</code> methods.</p>"},{"location":"how_to_guides/import_export/#hugging-face-hub","title":"Hugging Face Hub","text":""},{"location":"how_to_guides/import_export/#export-to-hub","title":"Export to Hub","text":"<p>You can push a dataset from Argilla to the Hugging Face Hub. This is useful if you want to share your dataset with the community or version control it. You can push the dataset to the Hugging Face Hub using the <code>rg.Dataset.to_hub</code> method.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\")\n\ndataset.to_hub(repo_id=\"&lt;my_org&gt;/&lt;my_dataset&gt;\")\n</code></pre> <p>With or without records</p> <p>The example above will push the dataset's <code>Settings</code> and records to the hub. If you only want to push the dataset's configuration, you can set the <code>with_records</code> parameter to <code>False</code>. This is useful if you're just interested in a specific dataset template or you want to make changes in the dataset settings and/or records.</p> <pre><code>dataset.to_hub(repo_id=\"&lt;my_org&gt;/&lt;my_dataset&gt;\", with_records=False)\n</code></pre>"},{"location":"how_to_guides/import_export/#import-from-hub","title":"Import from Hub","text":"<p>You can pull a dataset from the Hugging Face Hub to Argilla. This is useful if you want to restore a dataset and its configuration. You can pull the dataset from the Hugging Face Hub using the <code>rg.Dataset.from_hub</code> method.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = rg.Dataset.from_hub(repo_id=\"&lt;my_org&gt;/&lt;my_dataset&gt;\")\n</code></pre> <p>The <code>rg.Dataset.from_hub</code> method loads the configuration and records from the dataset repo. If you only want to load records, you can pass a <code>datasets.Dataset</code> object to the <code>rg.Dataset.log</code> method. This enables you to configure your own dataset and reuse existing Hub datasets. See the guide on records for more information.</p> <p>With or without records</p> <p>The example above will pull the dataset's <code>Settings</code> and records from the hub. If you only want to pull the dataset's configuration, you can set the <code>with_records</code> parameter to <code>False</code>. This is useful if you're just interested in a specific dataset template or you want to make changes in the records.</p> <pre><code>dataset = rg.Dataset.from_hub(repo_id=\"&lt;my_org&gt;/&lt;my_dataset&gt;\", with_records=False)\n</code></pre> <p>You could then log the dataset's records using the <code>load_dataset</code> method of the <code>datasets</code> package and pass the dataset to the <code>rg.Dataset.log</code> method.</p> <pre><code>hf_dataset = load_dataset(\"&lt;my_org&gt;/&lt;my_dataset&gt;\")\ndataset.records.log(hf_dataset) # (1)\n</code></pre> <ol> <li>You could also use the <code>mapping</code> parameter to map record field names to argilla field and question names.</li> </ol>"},{"location":"how_to_guides/import_export/#import-settings-from-hub","title":"Import settings from Hub","text":"<p>When importing datasets from the hub, Argilla will load settings from the hub in three ways:</p> <ol> <li>If the dataset was pushed to hub by Argilla, then the settings will be loaded from the hub via the configuration file.</li> <li>If the dataset was loaded by another source, then Argilla will define the settings based on the dataset's features in <code>datasets.Features</code>. For example, creating a <code>TextField</code> for a text feature or a <code>LabelQuestion</code> for a label class.</li> <li>You can pass a custom <code>rg.Settings</code> object to the <code>rg.Dataset.from_hub</code> method via the <code>settings</code> parameter. This will override the settings loaded from the hub.</li> </ol> <pre><code>settings = rg.Settings(\n    fields=[rg.TextField(name=\"text\")],\n    questions=[rg.TextQuestion(name=\"answer\")]\n) # (1)\n\ndataset = rg.Dataset.from_hub(repo_id=\"&lt;my_org&gt;/&lt;my_dataset&gt;\", settings=settings)\n</code></pre> <ol> <li>The settings that you pass to the <code>rg.Dataset.from_hub</code> method will override the settings loaded from the hub, and need to align with the dataset being loaded.</li> </ol>"},{"location":"how_to_guides/import_export/#local-disk","title":"Local Disk","text":""},{"location":"how_to_guides/import_export/#export-to-disk","title":"Export to Disk","text":"<p>You can save a dataset from Argilla to your local disk. This is useful if you want to back up your dataset. You can use the <code>rg.Dataset.to_disk</code> method. We recommend you to use an empty directory.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\")\n\ndataset.to_disk(path=\"&lt;path-empty-directory&gt;\")\n</code></pre> <p>This will save the dataset's configuration and records to the specified path. If you only want to save the dataset's configuration, you can set the <code>with_records</code> parameter to <code>False</code>.</p> <pre><code>dataset.to_disk(path=\"&lt;path-empty-directory&gt;\", with_records=False)\n</code></pre>"},{"location":"how_to_guides/import_export/#import-from-disk","title":"Import from Disk","text":"<p>You can load a dataset from your local disk to Argilla. This is useful if you want to restore a dataset's configuration. You can use the <code>rg.Dataset.from_disk</code> method.</p> <pre><code>import argilla as rg\n\ndataset = rg.Dataset.from_disk(path=\"&lt;path-dataset-directory&gt;\")\n</code></pre> <p>Directing the dataset to a name and workspace</p> <p>You can also specify the name and workspace of the dataset when loading it from the disk.</p> <pre><code>dataset = rg.Dataset.from_disk(path=\"&lt;path-dataset-directory&gt;\", name=\"my_dataset\", workspace=\"my_workspace\")\n</code></pre>"},{"location":"how_to_guides/import_export/#importing-and-exporting-records","title":"Importing and exporting records","text":"<p>The records alone can be exported from a dataset in Argilla.  This is useful if you want to process the records in Python, export them to a different platform, or use them in model training. All of these methods use the <code>rg.Dataset.records</code> attribute.</p>"},{"location":"how_to_guides/import_export/#export-records","title":"Export records","text":"<p>The records can be exported as a dictionary, a list of dictionaries, or a <code>Dataset</code> of the <code>datasets</code> package.</p> <p>With images</p> <p>If your dataset includes images, the recommended approach for exporting records is to use the <code>to_datasets</code> method, which exports the images as rescaled PIL objects. With other methods, the images will be exported using the data URI schema.</p> To a python dictionaryTo a python listTo the <code>datasets</code> package <p>Records can be exported from <code>Dataset.records</code> as a dictionary. The <code>to_dict</code> method can be used to export records as a dictionary. You can specify the orientation of the dictionary output. You can also decide if to flatten or not the dictionary.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\ndataset = client.datasets(name=\"my_dataset\")\n\n# Export records as a dictionary\nexported_records = dataset.records.to_dict()\n# {'fields': [{'text': 'Hello'},{'text': 'World'}], suggestions': [{'label': {'value': 'positive'}}, {'label': {'value': 'negative'}}]\n\n# Export records as a dictionary with orient=index\nexported_records = dataset.records.to_dict(orient=\"index\")\n# {\"uuid\": {'fields': {'text': 'Hello'}, 'suggestions': {'label': {'value': 'positive'}}}, {\"uuid\": {'fields': {'text': 'World'}, 'suggestions': {'label': {'value': 'negative'}}},\n\n# Export records as a dictionary with flatten=True\nexported_records = dataset.records.to_dict(flatten=True)\n# {\"text\": [\"Hello\", \"World\"], \"label.suggestion\": [\"greeting\", \"greeting\"]}\n</code></pre> <p>Records can be exported from <code>Dataset.records</code> as a list of dictionaries. The <code>to_list</code> method can be used to export records as a list of dictionaries. You can decide if to flatten it or not.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nworkspace = client.workspaces(\"my_workspace\")\n\ndataset = client.datasets(name=\"my_dataset\", workspace=workspace)\n\n# Export records as a list of dictionaries\nexported_records = dataset.records.to_list()\n# [{'fields': {'text': 'Hello'}, 'suggestion': {'label': {value: 'greeting'}}}, {'fields': {'text': 'World'}, 'suggestion': {'label': {value: 'greeting'}}}]\n\n# Export records as a list of dictionaries with flatten=True\nexported_records = dataset.records.to_list(flatten=True)\n# [{\"text\": \"Hello\", \"label\": \"greeting\"}, {\"text\": \"World\", \"label\": \"greeting\"}]\n</code></pre> <p>Records can be exported from <code>Dataset.records</code> to the <code>datasets</code> package. The <code>to_dataset</code> method can be used to export records to the <code>datasets</code> package. You can specify the name of the dataset and the split to export the records.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\ndataset = client.datasets(name=\"my_dataset\")\n\n# Export records as a dictionary\nexported_dataset = dataset.records.to_datasets()\n</code></pre>"},{"location":"how_to_guides/import_export/#import-records","title":"Import records","text":"<p>To import records to a dataset, use the <code>rg.Datasets.records.log</code> method. There is a guide on how to do this in How-to guides - Record, or you can check the Record - Python Reference.</p>"},{"location":"how_to_guides/migrate_from_legacy_datasets/","title":"Migrate users, workspaces and datasets to Argilla 2.x","text":"<p>This guide will help you migrate task to Argilla V2. These do not include the <code>FeedbackDataset</code> which is just an interim naming convention for the latest extensible dataset. Task-specific datasets are datasets that are used for a specific task, such as text classification, token classification, etc. If you would like to learn about the backstory of SDK this migration, please refer to the SDK migration blog post. Additionally, we will provide guidance on how to maintain your <code>User</code>'s and <code>Workspace</code>'s within the new Argilla V2 format.</p> <p>Note</p> <p>Legacy datasets include: <code>DatasetForTextClassification</code>, <code>DatasetForTokenClassification</code>, and <code>DatasetForText2Text</code>.</p> <p><code>FeedbackDataset</code>'s do not need to be migrated as they are already in the Argilla V2 format. Anyway, since the 2.x version includes changes to the search index structure, you should reindex the datasets by enabling the docker environment variable REINDEX_DATASET (This step is automatically executed if you're running Argilla in an HF Space). See the server configuration docs section for more details.</p> <p>To follow this guide, you will need to have the following prerequisites:</p> <ul> <li>An argilla 1.* server instance running with legacy datasets.</li> <li>An argilla &gt;=1.29 server instance running. If you don't have one, you can create one by following this Argilla guide.</li> <li>The <code>argilla</code> sdk package installed in your environment.</li> </ul> <p>Warning</p> <p>This guide will recreate all <code>User</code>'s' and <code>Workspace</code>'s' on a new server. Hence, they will be created with new passwords and IDs. If you want to keep the same passwords and IDs, you can can copy the datasets to a temporary v2 instance, then upgrade your current instance to v2.0 and copy the datasets back to your original instance after.</p> <p>If your current legacy datasets are on a server with Argilla release after 1.29, you could chose to recreate your legacy datasets as new datasets on the same server. You could then upgrade the server to Argilla 2.0 and carry on working their. Your legacy datasets will not be visible on the new server, but they will remain in storage layers if you need to access them.</p> <p>For migrating the  guides you will need to install the new <code>argilla</code> package. This includes a new <code>v1</code> module that allows you to connect to the Argilla V1 server.</p> <pre><code>pip install \"argilla&gt;=2.0.0\"\n</code></pre>"},{"location":"how_to_guides/migrate_from_legacy_datasets/#migrate-users-and-workspaces","title":"Migrate Users and Workspaces","text":"<p>The guide will take you through two steps:</p> <ol> <li>Retrieve the old users and workspaces from the Argilla V1 server using the new <code>argilla</code> package.</li> <li>Recreate the users and workspaces on the Argilla V2 server based op <code>name</code> as unique identifier.</li> </ol>"},{"location":"how_to_guides/migrate_from_legacy_datasets/#step-1-retrieve-the-old-users-and-workspaces","title":"Step 1: Retrieve the old users and workspaces","text":"<p>You can use the <code>v1</code> module to connect to the Argilla V1 server.</p> <pre><code>import argilla.v1 as rg_v1\n\n# Initialize the API with an Argilla server less than 2.0\napi_url = \"&lt;your-url&gt;\"\napi_key = \"&lt;your-api-key&gt;\"\nrg_v1.init(api_url, api_key)\n</code></pre> <p>Next, load the dataset <code>User</code> and <code>Workspaces</code> and from the Argilla V1 server:</p> <pre><code>users_v1 = rg_v1.User.list()\nworkspaces_v1 = rg_v1.Workspace.list()\n</code></pre>"},{"location":"how_to_guides/migrate_from_legacy_datasets/#step-2-recreate-the-users-and-workspaces","title":"Step 2: Recreate the users and workspaces","text":"<p>To recreate the users and workspaces on the Argilla V2 server, you can use the <code>argilla</code> package.</p> <p>First, instantiate the <code>Argilla</code> class to connect to the Argilla V2 server:</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla()\n</code></pre> <p>Next, recreate the users and workspaces on the Argilla V2 server:</p> <pre><code>for workspace in workspaces_v1:\n    rg.Workspace(\n        name=workspace.name\n    ).create()\n</code></pre> <pre><code>for user in users_v1:\n    user = rg.User(\n        username=user.username,\n        first_name=user.first_name,\n        last_name=user.last_name,\n        role=user.role,\n        password=\"&lt;your_chosen_password&gt;\" # (1)\n    ).create()\n    if user.role == \"owner\":\n       continue\n\n    for workspace_name in user.workspaces:\n        if workspace_name != user.name:\n            workspace = client.workspaces(name=workspace_name)\n            user.add_to_workspace(workspace)\n</code></pre> <ol> <li>You need to chose a new password for the user, to do this programmatically you can use the <code>uuid</code> package to generate a random password. Take care to keep track of the passwords you chose, since you will not be able to retrieve them later.</li> </ol> <p>Now you have successfully migrated your users and workspaces to Argilla V2 and can continue with the next steps.</p>"},{"location":"how_to_guides/migrate_from_legacy_datasets/#migrate-datasets","title":"Migrate datasets","text":"<p>The guide will take you through three steps:</p> <ol> <li>Retrieve the legacy dataset from the Argilla V1 server using the new <code>argilla</code> package.</li> <li>Define the new dataset in the Argilla V2 format.</li> <li>Upload the dataset records to the new Argilla V2 dataset format and attributes.</li> </ol>"},{"location":"how_to_guides/migrate_from_legacy_datasets/#step-1-retrieve-the-legacy-dataset","title":"Step 1: Retrieve the legacy dataset","text":"<p>You can use the <code>v1</code> module to connect to the Argilla V1 server.</p> <pre><code>import argilla.v1 as rg_v1\n\n# Initialize the API with an Argilla server less than 2.0\napi_url = \"&lt;your-url&gt;\"\napi_key = \"&lt;your-api-key&gt;\"\nrg_v1.init(api_url, api_key)\n</code></pre> <p>Next, load the dataset settings and records from the Argilla V1 server:</p> <pre><code>dataset_name = \"news-programmatic-labeling\"\nworkspace = \"demo\"\n\nsettings_v1 = rg_v1.load_dataset_settings(dataset_name, workspace)\nrecords_v1 = rg_v1.load(dataset_name, workspace)\nhf_dataset = records_v1.to_datasets()\n</code></pre> <p>Your legacy dataset is now loaded into the <code>hf_dataset</code> object.</p>"},{"location":"how_to_guides/migrate_from_legacy_datasets/#step-2-define-the-new-dataset","title":"Step 2: Define the new dataset","text":"<p>Define the new dataset in the Argilla V2 format. The new dataset format is defined in the <code>argilla</code> package. You can create a new dataset with the <code>Settings</code> and <code>Dataset</code> classes:</p> <p>First, instantiate the <code>Argilla</code> class to connect to the Argilla V2 server:</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla()\n</code></pre> <p>Next, define the new dataset settings:</p> For single-label classificationFor multi-label classificationFor token classificationFor text generation <pre><code>settings = rg.Settings(\n    fields=[\n        rg.TextField(name=\"text\"), # (1)\n    ],\n    questions=[\n        rg.LabelQuestion(name=\"label\", labels=settings_v1.label_schema),\n    ],\n    metadata=[\n        rg.TermsMetadataProperty(name=\"split\"), # (2)\n    ],\n    vectors=[\n        rg.VectorField(name='mini-lm-sentence-transformers', dimensions=384), # (3)\n    ],\n)\n</code></pre> <ol> <li>The default field in <code>DatasetForTextClassification</code> is <code>text</code>, but make sure you provide all fields included in <code>record.inputs</code>.</li> <li>Make sure you provide all relevant metadata fields available in the dataset.</li> <li>Make sure you provide all relevant vectors available in the dataset.</li> </ol> <pre><code>settings = rg.Settings(\n    fields=[\n        rg.TextField(name=\"text\"), # (1)\n    ],\n    questions=[\n        rg.MultiLabelQuestion(name=\"labels\", labels=settings_v1.label_schema),\n    ],\n    metadata=[\n        rg.TermsMetadataProperty(name=\"split\"), # (2)\n    ],\n    vectors=[\n        rg.VectorField(name='mini-lm-sentence-transformers', dimensions=384), # (3)\n    ],\n)\n</code></pre> <ol> <li>The default field in <code>DatasetForTextClassification</code> is <code>text</code>, but we should provide all fields included in <code>record.inputs</code>.</li> <li>Make sure you provide all relevant metadata fields available in the dataset.</li> <li>Make sure you provide all relevant vectors available in the dataset.</li> </ol> <pre><code>settings = rg.Settings(\n    fields=[\n        rg.TextField(name=\"text\"),\n    ],\n    questions=[\n        rg.SpanQuestion(name=\"spans\", labels=settings_v1.label_schema),\n    ],\n    metadata=[\n        rg.TermsMetadataProperty(name=\"split\"), # (1)\n    ],\n    vectors=[\n        rg.VectorField(name='mini-lm-sentence-transformers', dimensions=384), # (2)\n    ],\n)\n</code></pre> <ol> <li>Make sure you provide all relevant metadata fields available in the dataset.</li> <li>Make sure you provide all relevant vectors available in the dataset.</li> </ol> <pre><code>settings = rg.Settings(\n    fields=[\n        rg.TextField(name=\"text\"),\n    ],\n    questions=[\n        rg.TextQuestion(name=\"text_generation\"),\n    ],\n    metadata=[\n        rg.TermsMetadataProperty(name=\"split\"), # (1)\n    ],\n    vectors=[\n        rg.VectorField(name='mini-lm-sentence-transformers', dimensions=384), # (2)\n    ],\n)\n</code></pre> <ol> <li>We should provide all relevant metadata fields available in the dataset.</li> <li>We should provide all relevant vectors available in the dataset.</li> </ol> <p>Finally, create the new dataset on the Argilla V2 server:</p> <pre><code>dataset = rg.Dataset(name=dataset_name, workspace=workspace, settings=settings)\ndataset.create()\n</code></pre> <p>Note</p> <p>If a dataset with the same name already exists, the <code>create</code> method will raise an exception. You can check if the dataset exists and delete it before creating a new one.</p> <pre><code>dataset = client.datasets(name=dataset_name, workspace=workspace)\n\nif dataset is not None:\n    dataset.delete()\n</code></pre>"},{"location":"how_to_guides/migrate_from_legacy_datasets/#step-3-upload-the-dataset-records","title":"Step 3: Upload the dataset records","text":"<p>To upload the records to the new server, we will need to convert the records from the Argilla V1 format to the Argilla V2 format. The new <code>argilla</code> sdk package uses a generic <code>Record</code> class, but legacy datasets have specific record classes. We will need to convert the records to the generic <code>Record</code> class.</p> <p>Here are a set of example functions to convert the records for single-label and multi-label classification. You can modify these functions to suit your dataset.</p> For single-label classificationFor multi-label classificationFor token classificationFor text generation <pre><code>def map_to_record_for_single_label(data: dict, users_by_name: dict, current_user: rg.User) -&gt; rg.Record:\n    \"\"\" This function maps a text classification record dictionary to the new Argilla record.\"\"\"\n    suggestions = []\n    responses = []\n\n    if prediction := data.get(\"prediction\"):\n        label, score = prediction[0].values()\n        agent = data[\"prediction_agent\"]\n        suggestions.append(\n            rg.Suggestion(\n                question_name=\"label\", # (1)\n                value=label,\n                score=score,\n                agent=agent\n            )\n        )\n\n    if annotation := data.get(\"annotation\"):\n        user_id = users_by_name.get(data[\"annotation_agent\"], current_user).id\n        responses.append(\n            rg.Response(\n                question_name=\"label\", # (2)\n                value=annotation,\n                user_id=user_id\n            )\n        )\n\n    return rg.Record(\n        id=data[\"id\"],\n        fields=data[\"inputs\"],\n        # The inputs field should be a dictionary with the same keys as the `fields` in the settings\n        metadata=data[\"metadata\"],\n        # The metadata field should be a dictionary with the same keys as the `metadata` in the settings\n        vectors=data.get(\"vectors\") or {},\n        suggestions=suggestions,\n        responses=responses,\n    )\n</code></pre> <ol> <li> <p>Make sure the <code>question_name</code> matches the name of the question in question settings.</p> </li> <li> <p>Make sure the <code>question_name</code> matches the name of the question in question settings.</p> </li> </ol> <pre><code>def map_to_record_for_multi_label(data: dict, users_by_name: dict, current_user: rg.User) -&gt; rg.Record:\n    \"\"\" This function maps a text classification record dictionary to the new Argilla record.\"\"\"\n    suggestions = []\n    responses = []\n\n    if prediction := data.get(\"prediction\"):\n        labels, scores = zip(*[(pred[\"label\"], pred[\"score\"]) for pred in prediction])\n        agent = data[\"prediction_agent\"]\n        suggestions.append(\n            rg.Suggestion(\n                question_name=\"labels\", # (1)\n                value=labels,\n                score=scores,\n                agent=agent\n            )\n        )\n\n    if annotation := data.get(\"annotation\"):\n        user_id = users_by_name.get(data[\"annotation_agent\"], current_user).id\n        responses.append(\n            rg.Response(\n                question_name=\"labels\", # (2)\n                value=annotation,\n                user_id=user_id\n            )\n        )\n\n    return rg.Record(\n        id=data[\"id\"],\n        fields=data[\"inputs\"],\n        # The inputs field should be a dictionary with the same keys as the `fields` in the settings\n        metadata=data[\"metadata\"],\n        # The metadata field should be a dictionary with the same keys as the `metadata` in the settings\n        vectors=data.get(\"vectors\") or {},\n        suggestions=suggestions,\n        responses=responses,\n    )\n</code></pre> <ol> <li> <p>Make sure the <code>question_name</code> matches the name of the question in question settings.</p> </li> <li> <p>Make sure the <code>question_name</code> matches the name of the question in question settings.</p> </li> </ol> <pre><code>def map_to_record_for_span(data: dict, users_by_name: dict, current_user: rg.User) -&gt; rg.Record:\n    \"\"\" This function maps a token classification record dictionary to the new Argilla record.\"\"\"\n    suggestions = []\n    responses = []\n\n    if prediction := data.get(\"prediction\"):\n        scores = [span[\"score\"] for span in prediction]\n        agent = data[\"prediction_agent\"]\n        suggestions.append(\n            rg.Suggestion(\n                question_name=\"spans\", # (1)\n                value=prediction,\n                score=scores,\n                agent=agent\n            )\n        )\n\n    if annotation := data.get(\"annotation\"):\n        user_id = users_by_name.get(data[\"annotation_agent\"], current_user).id\n        responses.append(\n            rg.Response(\n                question_name=\"spans\", # (2)\n                value=annotation,\n                user_id=user_id\n            )\n        )\n\n    return rg.Record(\n        id=data[\"id\"],\n        fields={\"text\": data[\"text\"]},\n        # The inputs field should be a dictionary with the same keys as the `fields` in the settings\n        metadata=data[\"metadata\"],\n        # The metadata field should be a dictionary with the same keys as the `metadata` in the settings\n        vectors=data.get(\"vectors\") or {},\n        # The vectors field should be a dictionary with the same keys as the `vectors` in the settings\n        suggestions=suggestions,\n        responses=responses,\n    )\n</code></pre> <ol> <li> <p>Make sure the <code>question_name</code> matches the name of the question in question settings.</p> </li> <li> <p>Make sure the <code>question_name</code> matches the name of the question in question settings.</p> </li> </ol> <pre><code>def map_to_record_for_text_generation(data: dict, users_by_name: dict, current_user: rg.User) -&gt; rg.Record:\n    \"\"\" This function maps a text2text record dictionary to the new Argilla record.\"\"\"\n    suggestions = []\n    responses = []\n\n    if prediction := data.get(\"prediction\"):\n        first = prediction[0]\n        agent = data[\"prediction_agent\"]\n        suggestions.append(\n            rg.Suggestion(\n                question_name=\"text_generation\", # (1)\n                value=first[\"text\"],\n                score=first[\"score\"],\n                agent=agent\n            )\n        )\n\n    if annotation := data.get(\"annotation\"):\n        # From data[annotation]\n        user_id = users_by_name.get(data[\"annotation_agent\"], current_user).id\n        responses.append(\n            rg.Response(\n                question_name=\"text_generation\", # (2)\n                value=annotation,\n                user_id=user_id\n            )\n        )\n\n    return rg.Record(\n        id=data[\"id\"],\n        fields={\"text\": data[\"text\"]},\n        # The inputs field should be a dictionary with the same keys as the `fields` in the settings\n        metadata=data[\"metadata\"],\n        # The metadata field should be a dictionary with the same keys as the `metadata` in the settings\n        vectors=data.get(\"vectors\") or {},\n        # The vectors field should be a dictionary with the same keys as the `vectors` in the settings\n        suggestions=suggestions,\n        responses=responses,\n    )\n</code></pre> <ol> <li> <p>Make sure the <code>question_name</code> matches the name of the question in question settings.</p> </li> <li> <p>Make sure the <code>question_name</code> matches the name of the question in question settings.</p> </li> </ol> <p>The functions above depend on the <code>users_by_name</code> dictionary and the <code>current_user</code> object to assign responses to users, we need to load the existing users. You can retrieve the users from the Argilla V2 server and the current user as follows:</p> <pre><code>users_by_name = {user.username: user for user in client.users}\ncurrent_user = client.me\n</code></pre> <p>Finally, upload the records to the new dataset using the <code>log</code> method and map functions.</p> <pre><code>records = []\n\nfor data in hf_records:\n    records.append(map_to_record_for_single_label(data, users_by_name, current_user))\n\n# Upload the records to the new dataset\ndataset.records.log(records)\n</code></pre> <p>You have now successfully migrated your legacy dataset to Argilla V2. For more guides on how to use the Argilla SDK, please refer to the How to guides.</p>"},{"location":"how_to_guides/query/","title":"Query and filter records","text":"<p>This guide provides an overview of how to query and filter a dataset in Argilla.</p> <p>You can search for records in your dataset by querying or filtering. The query focuses on the content of the text field, while the filter is used to filter the records based on conditions. You can use them independently or combine multiple filters to create complex search queries. You can also export records from a dataset either as a single dictionary or a list of dictionaries.</p> <p>Main Classes</p> <code>rg.Query</code><code>rg.Filter</code><code>rg.Similar</code> <pre><code>rg.Query(\n    query=\"query\",\n    filter=filter\n)\n</code></pre> <p>Check the Query - Python Reference to see the attributes, arguments, and methods of the <code>Query</code> class in detail.</p> <pre><code>rg.Filter(\n    [\n        (\"field\", \"==\", \"value\"),\n    ]\n)\n</code></pre> <p>Check the Filter - Python Reference to see the attributes, arguments, and methods of the <code>Filter</code> class in detail.</p> <pre><code>rg.Similar(\n    name=\"vector\",\n    value=[0.1, 0.2, 0.3],\n)\n</code></pre> <p>Check the Similar - Python Reference to see the attributes, arguments, and methods of the <code>Similar</code> class in detail.</p>"},{"location":"how_to_guides/query/#query-with-search-terms","title":"Query with search terms","text":"<p>To search for records with terms, you can use the <code>Dataset.records</code> attribute with a query string. The search terms are used to search for records that contain the terms in the text field. You can search a single term or various terms, in the latter, all of them should appear in the record to be retrieved.</p> Single term searchMultiple terms search <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\", workspace=\"my_workspace\")\n\nquery = rg.Query(query=\"my_term\")\n\nqueried_records = dataset.records(query=query).to_list(flatten=True)\n</code></pre> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\", workspace=\"my_workspace\")\n\nquery = rg.Query(query=\"my_term1 my_term2\")\n\nqueried_records = dataset.records(query=query).to_list(flatten=True)\n</code></pre>"},{"location":"how_to_guides/query/#advanced-queries","title":"Advanced queries","text":"<p>If you need more complex searches, you can use Elasticsearch's simple query string syntax. Here is a summary of the different available operators:</p> operator description example <code>+</code> or <code>space</code> AND: search both terms <code>argilla + distilabel</code> or <code>argilla distilabel</code> return records that include the terms \"argilla\" and \"distilabel\" <code>|</code> OR: search either term <code>argilla | distilabel</code>  returns records that include the term \"argilla\" or \"distilabel\" <code>-</code> Negation: exclude a term <code>argilla -distilabel</code>  returns records that contain the term \"argilla\" and don't have the term \"distilabel\" <code>*</code> Prefix: search a prefix <code>arg*</code> returns records with any words starting with \"arg-\" <code>\"</code> Phrase: search a phrase <code>\"argilla and distilabel\"</code>  returns records that contain the phrase \"argilla and distilabel\" <code>(</code> and <code>)</code> Precedence: group terms <code>(argilla | distilabel) rules</code>  returns records that contain either \"argilla\" or \"distilabel\" and \"rules\" <code>~N</code> Edit distance: search a term or phrase with an edit distance <code>argilla~1</code>  returns records that contain the term \"argilla\" with an edit distance of 1, e.g. \"argila\" <p>Tip</p> <p>To use one of these characters literally, escape it with a preceding backslash <code>\\</code>, e.g. <code>\"1 \\+ 2\"</code> would match records where the phrase \"1 + 2\" is found.</p>"},{"location":"how_to_guides/query/#filter-by-conditions","title":"Filter by conditions","text":"<p>You can use the <code>Filter</code> class to define the conditions and pass them to the <code>Dataset.records</code> attribute to fetch records based on the conditions. Conditions include \"==\", \"&gt;=\", \"&lt;=\", or \"in\". Conditions can be combined with dot notation to filter records based on metadata, suggestions, or responses. You can use a single condition or multiple conditions to filter records.</p> operator description <code>==</code> The <code>field</code> value is equal to the <code>value</code> <code>&gt;=</code> The <code>field</code> value is greater than or equal to the <code>value</code> <code>&lt;=</code> The <code>field</code> value is less than or equal to the <code>value</code> <code>in</code> The <code>field</code> value is included in a list of values Single conditionMultiple conditions <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\", workspace=\"my_workspace\")\n\nfilter_label = rg.Filter((\"label\", \"==\", \"positive\"))\n\nfiltered_records = dataset.records(query=rg.Query(filter=filter_label)).to_list(\n    flatten=True\n)\n</code></pre> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\", workspace=\"my_workspace\")\n\nfilters = rg.Filter(\n    [\n        (\"label.suggestion\", \"==\", \"positive\"),\n        (\"metadata.count\", \"&gt;=\", 10),\n        (\"metadata.count\", \"&lt;=\", 20),\n        (\"label\", \"in\", [\"positive\", \"negative\"])\n    ]\n)\n\nfiltered_records = dataset.records(\n    query=rg.Query(filter=filters), with_suggestions=True\n).to_list(flatten=True)\n</code></pre>"},{"location":"how_to_guides/query/#available-fields","title":"Available fields","text":"<p>You can filter records based on the following fields:</p> field description example <code>id</code> The record id <code>(\"id\", \"in\", [\"1\",\"2\",\"3\"])</code> <code>_server_id</code> The internal record id. This value must be a valida UUID <code>(\"_server_id\", \"==\", \"ba69a996-85c2-4af0-a473-23138929641b\")</code> <code>inserted_at</code> The date and time the record was inserted. You can pass a datetime or a string <code>(\"inserted_at\" \"&gt;=\", \"2024-10-10\")</code> <code>updated_at</code> The date and time the record was updated. <code>(\"updated_at\" \"&gt;=\", \"2024-10-10\")</code> <code>status</code> The record status, which can be <code>pending</code> or <code>completed</code>. <code>(\"status\", \"==\", \"completed\")</code> <code>response.status</code> The response status, which can be <code>draft</code>, <code>submitted</code>, or <code>discarded</code>. <code>(\"response.status\", \"==\", \"submitted\")</code> <code>metadata.&lt;name&gt;</code> Filter by a metadata property <code>(\"metadata.split\", \"==\", \"train\")</code> <code>&lt;question&gt;.suggestion</code> Filter by a question suggestion value <code>(\"label.sugggestion\", \"==\", \"positive\")</code> <code>&lt;question&gt;.score</code> Filter by a suggestion score <code>(\"label.score\", \"&lt;=\", \"0.9\")</code> <code>&lt;question&gt;.agent</code> Filter by a suggestion agent <code>(\"label.agent\", \"&lt;=\", \"ChatGPT4.0\")</code> <code>&lt;question&gt;.response</code> Filter by a question response <code>(\"label.response\", \"==\", \"negative\")</code>"},{"location":"how_to_guides/query/#filter-by-status","title":"Filter by status","text":"<p>You can filter records based on record or response status. Record status can be <code>pending</code> or <code>completed</code>, and response status can be <code>draft</code>, <code>submitted</code>, or <code>discarded</code>.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\", workspace=\"my_workspace\")\n\nstatus_filter = rg.Query(\n    filter=rg.Filter(\n        [\n            (\"status\", \"==\", \"completed\"),\n            (\"response.status\", \"==\", \"discarded\")\n        ]\n    )\n)\n\nfiltered_records = dataset.records(status_filter).to_list(flatten=True)\n</code></pre>"},{"location":"how_to_guides/query/#similarity-search","title":"Similarity search","text":"<p>You can search for records that are similar to a given vector. You can use the <code>Similar</code> class to define the vector and pass it as part of the query argument to the <code>Dataset.records</code>.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\", workspace=\"my_workspace\")\n\n\nsimilar_filter = rg.Query(\n    similar=rg.Similar(\n        name=\"vector\", value=[0.1, 0.2, 0.3],\n    )\n)\n\nfiltered_records = dataset.records(similar_filter).to_list(flatten=True)\n</code></pre> <p>Note</p> <p>The <code>Similar</code> search expects a vector field definition as part of the dataset settings. If the dataset does not have a vector field, the search will return an error. Vist the Vectors section for more details on how to define a vector field.</p>"},{"location":"how_to_guides/query/#query-and-filter-a-dataset","title":"Query and filter a dataset","text":"<p>As mentioned, you can use a query with a search term and a filter or various filters to create complex search queries.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\", workspace=\"my_workspace\")\n\nquery_filter = rg.Query(\n    query=\"my_term\",\n    filter=rg.Filter(\n        [\n            (\"label.suggestion\", \"==\", \"positive\"),\n            (\"metadata.count\", \"&gt;=\", 10),\n        ]\n    )\n)\n\nqueried_filtered_records = dataset.records(\n    query=query_filter,\n    with_metadata=True,\n    with_suggestions=True\n).to_list(flatten=True)\n</code></pre>"},{"location":"how_to_guides/record/","title":"Add, update, and delete records","text":"<p>This guide provides an overview of records, explaining the basics of how to define and manage them in Argilla.</p> <p>A record in Argilla is a data item that requires annotation, consisting of one or more fields. These are the pieces of information displayed to the user in the UI to facilitate the completion of the annotation task. Each record also includes questions that annotators are required to answer, with the option of adding suggestions and responses to assist them. Guidelines are also provided to help annotators effectively complete their tasks.</p> <p>A record is part of a dataset, so you will need to create a dataset before adding records. Check this guide to learn how to create a dataset.</p> <p>Main Class</p> <pre><code>rg.Record(\n    external_id=\"1234\",\n    fields={\n        \"question\": \"Do you need oxygen to breathe?\",\n        \"answer\": \"Yes\"\n    },\n    metadata={\n        \"category\": \"A\"\n    },\n    vectors={\n        \"my_vector\": [0.1, 0.2, 0.3],\n    },\n    suggestions=[\n        rg.Suggestion(\"my_label\", \"positive\", score=0.9, agent=\"model_name\")\n    ],\n    responses=[\n        rg.Response(\"label\", \"positive\", user_id=user_id)\n    ],\n)\n</code></pre> <p>Check the Record - Python Reference to see the attributes, arguments, and methods of the <code>Record</code> class in detail.</p>"},{"location":"how_to_guides/record/#add-records","title":"Add records","text":"<p>You can add records to a dataset in two different ways: either by using a dictionary or by directly initializing a <code>Record</code> object. You should ensure that fields, metadata and vectors match those configured in the dataset settings. In both cases, are added via the <code>Dataset.records.log</code> method. As soon as you add the records, these will be available in the Argilla UI. If they do not appear in the UI, you may need to click the refresh button to update the view.</p> <p>Tip</p> <p>Take some time to inspect the data before adding it to the dataset in case this triggers changes in the <code>questions</code> or <code>fields</code>.</p> <p>Note</p> <p>If you are planning to use public data, the Datasets page of the Hugging Face Hub is a good place to start. Remember to always check the license to make sure you can legally use it for your specific use case.</p> As <code>Record</code> objectsFrom a generic data structureFrom a Hugging Face dataset <p>You can add records to a dataset by initializing a <code>Record</code> object directly. This is ideal if you need to apply logic to the data before defining the record. If the data is already structured, you should consider adding it directly as a dictionary or Hugging Face dataset.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\")\n\nrecords = [\n    rg.Record(\n        fields={\n            \"question\": \"Do you need oxygen to breathe?\",\n            \"answer\": \"Yes\"\n        },\n    ),\n    rg.Record(\n        fields={\n            \"question\": \"What is the boiling point of water?\",\n            \"answer\": \"100 degrees Celsius\"\n        },\n    ), # (1)\n]\n\ndataset.records.log(records)\n</code></pre> <ol> <li>This is an illustration of a definition. In a real-world scenario, you would iterate over a data structure and create <code>Record</code> objects for each iteration.</li> </ol> <p>You can add the data directly as a dictionary like structure, where the keys correspond to the names of fields, questions, metadata or vectors in the dataset and the values are the data to be added.</p> <p>If your data structure does not correspond to your Argilla dataset names, you can use a <code>mapping</code> to indicate which keys in the source data correspond to the dataset fields, metadata, vectors, suggestions, or responses. If you need to add the same data to multiple attributes, you can also use a list with the name of the attributes.</p> <p>We illustrate this python dictionaries that represent your data, but we would not advise you to define dictionaries. Instead, use the <code>Record</code> object to instantiate records.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\")\n\n# Add records to the dataset with the fields 'question' and 'answer'\ndata = [\n    {\n        \"question\": \"Do you need oxygen to breathe?\",\n        \"answer\": \"Yes\",\n    },\n    {\n        \"question\": \"What is the boiling point of water?\",\n        \"answer\": \"100 degrees Celsius\",\n    }, # (1)\n]\ndataset.records.log(data)\n\n# Add records to the dataset with a mapping of the fields 'question' and 'answer'\ndata = [\n    {\n        \"query\": \"Do you need oxygen to breathe?\",\n        \"response\": \"Yes\",\n    },\n    {\n        \"query\": \"What is the boiling point of water?\",\n        \"response\": \"100 degrees Celsius\",\n    },\n]\ndataset.records.log(data, mapping={\"query\": \"question\", \"response\": \"answer\"}) # (2)\n</code></pre> <ol> <li>The data structure's keys must match the fields or questions in the Argilla dataset. In this case, there are fields named <code>question</code> and <code>answer</code>.</li> <li>The data structure has keys <code>query</code> and <code>response</code>, and the Argilla dataset has fields <code>question</code> and <code>answer</code>. You can use the <code>mapping</code> parameter to map the keys in the data structure to the fields in the Argilla dataset.</li> </ol> <p>You can also add records to a dataset using a Hugging Face dataset. This is useful when you want to use a dataset from the Hugging Face Hub and add it to your Argilla dataset.</p> <p>You can add the dataset where the column names correspond to the names of fields, metadata or vectors in the Argilla dataset.</p> <pre><code>import argilla as rg\nfrom datasets import load_dataset\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\ndataset = client.datasets(name=\"my_dataset\") # (1)\n\nhf_dataset = load_dataset(\"imdb\", split=\"train[:100]\") # (2)\n\ndataset.records.log(records=hf_dataset)\n</code></pre> <ol> <li> <p>In this case, we are using the <code>my_dataset</code> dataset from the Argilla workspace. The dataset has a <code>text</code> field and a <code>label</code> question.</p> </li> <li> <p>In this example, the Hugging Face dataset matches the Argilla dataset schema. If that is not the case, you could use the <code>.map</code> of the <code>datasets</code> library to prepare the data before adding it to the Argilla dataset.</p> </li> </ol> <p>If the Hugging Face dataset's schema does not correspond to your Argilla dataset field names, you can use a <code>mapping</code> to specify the relationship. You should indicate as key the column name of the Hugging Face dataset and, as value, the field name of the Argilla dataset.</p> <pre><code>dataset.records.log(\n    records=hf_dataset, mapping={\"text\": \"review\", \"label\": \"sentiment\"}\n) # (1)\n</code></pre> <ol> <li>In this case, the <code>text</code> key in the Hugging Face dataset would correspond to the <code>review</code> field in the Argilla dataset, and the <code>label</code> key in the Hugging Face dataset would correspond to the <code>sentiment</code> field in the Argilla dataset.</li> </ol>"},{"location":"how_to_guides/record/#fields","title":"Fields","text":"<p>Fields are the main pieces of information of the record. These are shown at first sight in the UI together with the questions form. You may only include fields that you have previously configured in the dataset settings. Depending on the type of fields included in the dataset, the data format may be slightly different:</p> TextImageChatCustom <p>Text fields expect input in the form of a <code>string</code>.</p> <pre><code>record = rg.Record(\n    fields={\"text\": \"Hello World, how are you?\"}\n)\n</code></pre> <p>Image fields expect a remote URL or local path to an image file in the form of a <code>string</code>, or a PIL object.</p> <p>Check the Dataset.records - Python Reference to see how to add records with with images in detail.</p> <pre><code>records = [\n    rg.Record(\n        fields={\"image\": \"https://example.com/image.jpg\"}\n    ),\n    rg.Record(\n        fields={\"image\": \"path/to/image.jpg\"}\n    ),\n    rg.Record(\n        fields={\"image\": Image.open(\"path/to/image.jpg\")}\n    ),\n]\n</code></pre> <p>Chat fields expect a list of dictionaries with the keys <code>role</code> and <code>content</code>, where the <code>role</code> identifies the interlocutor type (e.g., user, assistant, model, etc.), whereas the <code>content</code> contains the text of the message.</p> <pre><code>record = rg.Record(\n    fields={\n        \"chat\": [\n            {\"role\": \"user\", \"content\": \"What is Argilla?\"},\n            {\"role\": \"assistant\", \"content\": \"Argilla is a collaboration tool for AI engineers and domain experts to build high-quality datasets\"},\n        ]\n    }\n)\n</code></pre> <p>Custom fields expect a dictionary with the keys and values you define in the dataset settings. You need to ensure these are aligned with <code>CustomField.template</code> in order for them to be rendered in the UI.</p> <pre><code>record = rg.Record(\n    fields={\"custom\": {\"key\": \"value\"}}\n)\n</code></pre>"},{"location":"how_to_guides/record/#metadata","title":"Metadata","text":"<p>Record metadata can include any information about the record that is not part of the fields in the form of a dictionary. To use metadata for filtering and sorting records, make sure that the key of the dictionary corresponds with the metadata property <code>name</code>. When the key doesn't correspond, this will be considered extra metadata that will get stored with the record (as long as <code>allow_extra_metadata</code> is set to <code>True</code> for the dataset), but will not be usable for filtering and sorting.</p> <p>Note</p> <p>Remember that to use metadata within a dataset, you must define a metadata property in the dataset settings.</p> <p>Check the Metadata - Python Reference to see the attributes, arguments, and methods for using metadata in detail.</p> As <code>Record</code> objectsFrom a generic data structure <p>You can add metadata to a record in an initialized <code>Record</code> object.</p> <pre><code># Add records to the dataset with the metadata 'category'\nrecords = [\n    rg.Record(\n        fields={\n            \"question\": \"Do you need oxygen to breathe?\",\n            \"answer\": \"Yes\"\n        },\n        metadata={\"my_metadata\": \"option_1\"},\n    ),\n    rg.Record(\n        fields={\n            \"question\": \"What is the boiling point of water?\",\n            \"answer\": \"100 degrees Celsius\"\n        },\n        metadata={\"my_metadata\": \"option_1\"},\n    ),\n]\ndataset.records.log(records)\n</code></pre> <p>You can add metadata to a record directly as a dictionary structure, where the keys correspond to the names of metadata properties in the dataset and the values are the metadata to be added. Remember that you can also use the <code>mapping</code> parameter to specify the data structure.</p> <pre><code># Add records to the dataset with the metadata 'category'\ndata = [\n    {\n        \"question\": \"Do you need oxygen to breathe?\",\n        \"answer\": \"Yes\",\n        \"my_metadata\": \"option_1\",\n    },\n    {\n        \"question\": \"What is the boiling point of water?\",\n        \"answer\": \"100 degrees Celsius\",\n        \"my_metadata\": \"option_1\",\n    },\n]\ndataset.records.log(data)\n</code></pre>"},{"location":"how_to_guides/record/#vectors","title":"Vectors","text":"<p>You can associate vectors, like text embeddings, to your records. They can be used for semantic search in the UI and the Python SDK. Make sure that the length of the list corresponds to the dimensions set in the vector settings.</p> <p>Note</p> <p>Remember that to use vectors within a dataset, you must define them in the dataset settings.</p> <p>Check the Vector - Python Reference to see the attributes, arguments, and methods of the <code>Vector</code> class in detail.</p> As <code>Record</code> objectsFrom a generic data structure <p>You can also add vectors to a record in an initialized <code>Record</code> object.</p> <pre><code># Add records to the dataset with the vector 'my_vector' and dimension=3\nrecords = [\n    rg.Record(\n        fields={\n            \"question\": \"Do you need oxygen to breathe?\",\n            \"answer\": \"Yes\"\n        },\n        vectors={\n            \"my_vector\": [0.1, 0.2, 0.3]\n        },\n    ),\n    rg.Record(\n        fields={\n            \"question\": \"What is the boiling point of water?\",\n            \"answer\": \"100 degrees Celsius\"\n        },\n        vectors={\n            \"my_vector\": [0.2, 0.5, 0.3]\n        },\n    ),\n]\ndataset.records.log(records)\n</code></pre> <p>You can add vectors from a dictionary-like structure, where the keys correspond to the <code>name</code>s of the vector settings that were configured for your dataset and the value is a list of floats. Remember that you can also use the <code>mapping</code> parameter to specify the data structure.</p> <pre><code># Add records to the dataset with the vector 'my_vector' and dimension=3\ndata = [\n    {\n        \"question\": \"Do you need oxygen to breathe?\",\n        \"answer\": \"Yes\",\n        \"my_vector\": [0.1, 0.2, 0.3],\n    },\n    {\n        \"question\": \"What is the boiling point of water?\",\n        \"answer\": \"100 degrees Celsius\",\n        \"my_vector\": [0.2, 0.5, 0.3],\n    },\n]\ndataset.records.log(data)\n</code></pre>"},{"location":"how_to_guides/record/#suggestions","title":"Suggestions","text":"<p>Suggestions refer to suggested responses (e.g. model predictions) that you can add to your records to make the annotation process faster. These can be added during the creation of the record or at a later stage. Only one suggestion can be provided for each question, and suggestion values must be compliant with the pre-defined questions e.g. if we have a <code>RatingQuestion</code> between 1 and 5, the suggestion should have a valid value within that range.</p> <p>Check the Suggestions - Python Reference to see the attributes, arguments, and methods of the <code>Suggestion</code> class in detail.</p> <p>Tip</p> <p>Check the Suggestions - Python Reference for different formats per <code>Question</code> type.</p> As <code>Record</code> objectsFrom a generic data structure <p>You can also add suggestions to a record in an initialized <code>Record</code> object.</p> <pre><code># Add records to the dataset with the label 'my_label'\nrecords = [\n    rg.Record(\n        fields={\n            \"question\": \"Do you need oxygen to breathe?\",\n            \"answer\": \"Yes\"\n        },\n        suggestions=[\n            rg.Suggestion(\n                \"my_label\",\n                \"positive\",\n                score=0.9,\n                agent=\"model_name\"\n            )\n        ],\n    ),\n    rg.Record(\n        fields={\n            \"question\": \"What is the boiling point of water?\",\n            \"answer\": \"100 degrees Celsius\"\n        },\n        suggestions=[\n            rg.Suggestion(\n                \"my_label\",\n                \"negative\",\n                score=0.9,\n                agent=\"model_name\"\n            )\n        ],\n    ),\n]\ndataset.records.log(records)\n</code></pre> <p>You can add suggestions as a dictionary, where the keys correspond to the <code>name</code>s of the labels that were configured for your dataset. Remember that you can also use the <code>mapping</code> parameter to specify the data structure.</p> <pre><code># Add records to the dataset with the label question 'my_label'\ndata =  [\n    {\n        \"question\": \"Do you need oxygen to breathe?\",\n        \"answer\": \"Yes\",\n        \"label\": \"positive\",\n        \"score\": 0.9,\n        \"agent\": \"model_name\",\n    },\n    {\n        \"question\": \"What is the boiling point of water?\",\n        \"answer\": \"100 degrees Celsius\",\n        \"label\": \"negative\",\n        \"score\": 0.9,\n        \"agent\": \"model_name\",\n    },\n]\ndataset.records.log(\n    data=data,\n    mapping={\n        \"label\": \"my_label\",\n        \"score\": \"my_label.suggestion.score\",\n        \"agent\": \"my_label.suggestion.agent\",\n    },\n)\n</code></pre>"},{"location":"how_to_guides/record/#responses","title":"Responses","text":"<p>If your dataset includes some annotations, you can add those to the records as you create them. Make sure that the responses adhere to the same format as Argilla's output and meet the schema requirements for the specific type of question being answered. Make sure to include the <code>user_id</code> in case you're planning to add more than one response for the same question, if not responses will apply to all the annotators.</p> <p>Check the Responses - Python Reference to see the attributes, arguments, and methods of the <code>Response</code> class in detail.</p> <p>Note</p> <p>Keep in mind that records with responses will be displayed as \"Draft\" in the UI.</p> <p>Tip</p> <p>Check the Responses - Python Reference for different formats per <code>Question</code> type.</p> As <code>Record</code> objectsFrom a generic data structure <p>You can also add suggestions to a record in an initialized <code>Record</code> object.</p> <pre><code># Add records to the dataset with the label 'my_label'\nrecords = [\n    rg.Record(\n        fields={\n            \"question\": \"Do you need oxygen to breathe?\",\n            \"answer\": \"Yes\"\n        },\n        responses=[\n            rg.Response(\"my_label\", \"positive\", user_id=user.id)\n        ]\n    ),\n    rg.Record(\n        fields={\n            \"question\": \"What is the boiling point of water?\",\n            \"answer\": \"100 degrees Celsius\"\n        },\n        responses=[\n            rg.Response(\"my_label\", \"negative\", user_id=user.id)\n        ]\n    ),\n]\ndataset.records.log(records)\n</code></pre> <p>You can add suggestions as a dictionary, where the keys correspond to the <code>name</code>s of the labels that were configured for your dataset. Remember that you can also use the <code>mapping</code> parameter to specify the data structure. If you want to specify the user that added the response, you can use the <code>user_id</code> parameter.</p> <pre><code># Add records to the dataset with the label 'my_label'\ndata = [\n    {\n        \"question\": \"Do you need oxygen to breathe?\",\n        \"answer\": \"Yes\",\n        \"label\": \"positive\",\n    },\n    {\n        \"question\": \"What is the boiling point of water?\",\n        \"answer\": \"100 degrees Celsius\",\n        \"label\": \"negative\",\n    },\n]\ndataset.records.log(data, user_id=user.id, mapping={\"label\": \"my_label.response\"})\n</code></pre>"},{"location":"how_to_guides/record/#list-records","title":"List records","text":"<p>To list records in a dataset, you can use the <code>records</code> method on the <code>Dataset</code> object. This method returns a list of <code>Record</code> objects that can be iterated over to access the record properties.</p> <pre><code>for record in dataset.records(\n    with_suggestions=True,\n    with_responses=True,\n    with_vectors=True\n):\n\n    # Access the record properties\n    print(record.metadata)\n    print(record.vectors)\n    print(record.suggestions)\n    print(record.responses)\n\n    # Access the responses of the record\n    for response in record.responses:\n        print(response.value)\n</code></pre>"},{"location":"how_to_guides/record/#update-records","title":"Update records","text":"<p>You can update records in a dataset by calling the <code>log</code> method on the <code>Dataset</code> object. To update a record, you need to provide the record <code>id</code> and the new data to be updated.</p> <pre><code>data = dataset.records.to_list(flatten=True)\n\nupdated_data = [\n    {\n        \"text\": sample[\"text\"],\n        \"label\": \"positive\",\n        \"id\": sample[\"id\"],\n    }\n    for sample in data\n]\ndataset.records.log(records=updated_data)\n</code></pre> Update the metadataUpdate vectorsUpdate suggestionsUpdate responses <p>The <code>metadata</code> of the <code>Record</code> object is a python dictionary. To update it, you can iterate over the records and update the metadata by key. After that, you should update the records in the dataset.</p> <p>Tip</p> <p>Check the Metadata - Python Reference for different formats per <code>MetadataProperty</code> type.</p> <pre><code>updated_records = []\n\nfor record in dataset.records():\n\n    record.metadata[\"my_metadata\"] = \"new_value\"\n    record.metadata[\"my_new_metadata\"] = \"new_value\"\n\n    updated_records.append(record)\n\ndataset.records.log(records=updated_records)\n</code></pre> <p>If a new vector field is added to the dataset settings or some value for the existing record vectors must be updated, you can iterate over the records and update the vectors by key. After that, you should update the records in the dataset.</p> <pre><code>updated_records = []\n\nfor record in dataset.records(with_vectors=True):\n\n    record.vectors[\"my_vector\"] = [ 0, 1, 2, 3, 4, 5 ]\n    record.vectors[\"my_new_vector\"] = [ 0, 1, 2, 3, 4, 5 ]\n\n    updated_records.append(record)\n\ndataset.records.log(records=updated_records)\n</code></pre> <p>If some value for the existing record suggestions must be updated, you can iterate over the records and update the suggestions by key. You can also add a suggestion using the <code>add</code> method. After that, you should update the records in the dataset.</p> <p>Tip</p> <p>Check the Suggestions - Python Reference for different formats per <code>Question</code> type.</p> <pre><code>updated_records = []\n\nfor record in dataset.records(with_suggestions=True):\n\n    # We can update existing suggestions\n    record.suggestions[\"label\"].value = \"new_value\"\n    record.suggestions[\"label\"].score = 0.9\n    record.suggestions[\"label\"].agent = \"model_name\"\n\n    # We can also add new suggestions with the `add` method:\n    if not record.suggestions[\"label\"]:\n        record.suggestions.add(\n            rg.Suggestion(\"value\", \"label\", score=0.9, agent=\"model_name\")\n        )\n\n    updated_records.append(record)\n\ndataset.records.log(records=updated_records)\n</code></pre> <p>If some value for the existing record responses must be updated, you can iterate over the records and update the responses by key. You can also add a response using the <code>add</code> method. After that, you should update the records in the dataset.</p> <p>Tip</p> <p>Check the Responses - Python Reference for different formats per <code>Question</code> type.</p> <pre><code>updated_records = []\n\nfor record in dataset.records(with_responses=True):\n\n    for response in record.responses[\"label\"]:\n\n        if response:\n                response.value = \"new_value\"\n                response.user_id = \"existing_user_id\"\n\n        else:\n            record.responses.add(rg.Response(\"label\", \"YES\", user_id=user.id))\n\n    updated_records.append(record)\n\ndataset.records.log(records=updated_records)\n</code></pre>"},{"location":"how_to_guides/record/#delete-records","title":"Delete records","text":"<p>You can delete records in a dataset calling the <code>delete</code> method on the <code>Dataset</code> object. To delete records, you need to retrieve them from the server and get a list with those that you want to delete.</p> <pre><code>records_to_delete = list(dataset.records)[:5]\ndataset.records.delete(records=records_to_delete)\n</code></pre> <p>Delete records based on a query</p> <p>It can be very useful to avoid eliminating records with responses.</p> <p>For more information about the query syntax, check this how-to guide.</p> <pre><code>status_filter = rg.Query(\n    filter = rg.Filter((\"response.status\", \"==\", \"pending\"))\n)\nrecords_to_delete = list(dataset.records(status_filter))\n\ndataset.records.delete(records_to_delete)\n</code></pre>"},{"location":"how_to_guides/use_markdown_to_format_rich_content/","title":"Use Markdown to format rich content","text":"<p>This guide provides an overview of how to use Markdown and HTML in <code>TextFields</code> to format chat conversations and allow for basic multi-modal support for images, audio, video and PDFs.</p> <p>The <code>TextField</code> and <code>TextQuestion</code> provide the option to enable Markdown and therefore HTML by setting <code>use_markdown=True</code>. Given the flexibility of HTML, we can get great control over the presentation of data to our annotators. We provide some out-of-the-box methods for multi-modality and chat templates in the examples below.</p> <p>Main Methods</p> image_to_htmlaudio_to_htmlvideo_to_htmlpdf_to_htmlchat_to_html <pre><code>image_to_html(\"local_image_file.png\")\n</code></pre> <pre><code>audio_to_html(\"local_audio_file.mp3\")\n</code></pre> <pre><code>audio_to_html(\"local_video_file.mp4\")\n</code></pre> <pre><code>pdf_to_html(\"local_pdf_file.pdf\")\n</code></pre> <pre><code>chat_to_html([{\"role\": \"user\", \"content\": \"hello\"}])\n</code></pre> <p>Check the Markdown - Python Reference to see the arguments of the <code>rg.markdown</code> methods in detail.</p> <p>Tip</p> <p>You can get pretty creative with HTML. For example, think about visualizing graphs and tables. You can use some interesting Python packages methods like <code>pandas.DataFrame.to_html</code> and <code>plotly.io.to_html</code>.</p>"},{"location":"how_to_guides/use_markdown_to_format_rich_content/#multi-modal-support-images-audio-video-pdfs-and-more","title":"Multi-modal support: images, audio, video, PDFs and more","text":"<p>Argilla has basic multi-modal support in different ways, each with pros and cons, but they both offer the same UI experience because they both rely on HTML.</p> <p></p>"},{"location":"how_to_guides/use_markdown_to_format_rich_content/#local-content-through-dataurls","title":"Local content through DataURLs","text":"<p>A DataURL is a scheme that allows data to be encoded into a base64-encoded string and then embedded directly into HTML. To facilitate this, we offer some functions: <code>image_to_html</code>, <code>audio_to_html</code>, <code>video_to_thml</code>, and <code>pdf_to_html</code>. These functions accept either the file path or the file's byte data and return the corresponding HTMurl to render the media file within the Argilla user interface. Additionally, you can also set the <code>width</code> and <code>height</code> in pixel or percentage for video and image (defaults to the original dimensions) and the autoplay and loop attributes to True for audio and video (defaults to False).</p> <p>Warning</p> <p>DataURLs increase the memory usage of the original filesize. Additionally, different browsers enforce different size limitations for rendering DataURLs which might block the visualization experience per user.</p> ImageAudioVideoPDF <pre><code>from argilla.markdown import image_to_html\n\nhtml = image_to_html(\n    \"local_image_file.png\",\n    width=\"300px\",\n    height=\"300px\"\n)\n\nrg.Record(\n    fields={\"markdown_enabled_field\": html}\n)\n</code></pre> <pre><code>from argilla.markdown import audio_to_html\n\nhtml = audio_to_html(\n    \"local_audio_file.mp3\",\n    width=\"300px\",\n    height=\"300px\",\n    autoplay=True,\n    loop=True\n)\n\nrg.Record(\n    fields={\"markdown_enabled_field\": html}\n)\n</code></pre> <pre><code>from argilla.markdown import video_to_thml\n\nhtml = video_to_html(\n    \"local_video_file.mp4\",\n    width=\"300px\",\n    height=\"300px\",\n    autoplay=True,\n    loop=True\n)\n\nrg.Record(\n    fields={\"markdown_enabled_field\": html}\n)\n</code></pre> <pre><code>from argilla.markdown import pdf_to_html\n\nhtml = pdf_to_html(\n    \"local_pdf_file.pdf\",\n    width=\"300px\",\n    height=\"300px\"\n)\n\nrg.Record(\n    fields={\"markdown_enabled_field\": html}\n)\n</code></pre>"},{"location":"how_to_guides/use_markdown_to_format_rich_content/#hosted-content","title":"Hosted content","text":"<p>Instead of uploading local files through DataURLs, we can also visualize URLs directly linking to media files such as images, audio, video, and PDFs hosted on a public or private server. In this case, you can use basic HTML to visualize content available on platforms like Google Drive or decide to configure a private media server.</p> <p>Warning</p> <p>When trying to access content from a private media server you have to ensure that the Argilla server has network access to the private media server, which might be done through something like IP whitelisting.</p> ImageAudioVideoPDF <pre><code>html = \"&lt;img src='https://example.com/public-image-file.jpg'&gt;\"\n\nrg.Record(\n    fields={\"markdown_enabled_field\": html}\n)\n</code></pre> <pre><code>html = \"\"\"\n&lt;audio controls&gt;\n    &lt;source src=\"https://example.com/public-audio-file.mp3\" type=\"audio/mpeg\"&gt;\n&lt;/audio&gt;\n\"\"\"\"\n\nrg.Record(\n    fields={\"markdown_enabled_field\": html}\n)\n</code></pre> <pre><code>html = \"\"\"\n&lt;video width=\"320\" height=\"240\" controls&gt;\n    &lt;source src=\"https://example.com/public-video-file.mp4\" type=\"video/mp4\"&gt;\n&lt;/video&gt;\n\"\"\"\"\n\nrg.Record(\n    fields={\"markdown_enabled_field\": html}\n)\n</code></pre> <pre><code>html = \"\"\"\n&lt;iframe\n    src=\"https://example.com/public-pdf-file.pdf\"\n    width=\"600\"\n    height=\"500\"&gt;\n&lt;/iframe&gt;\n\"\"\"\"\n\nrg.Record(\n    fields={\"markdown_enabled_field\": html}\n)\n</code></pre>"},{"location":"how_to_guides/use_markdown_to_format_rich_content/#chat-and-conversation-support","title":"Chat and conversation support","text":"<p>When working with chat data from multi-turn interaction with a Large Language Model, it might be nice to be able to visualize the conversation in a similar way as a common chat interface. To facilitate this, we offer the <code>chat_to_html</code> function, which converts messages from OpenAI chat format to an HTML-formatted chat interface.</p> OpenAI chat format <p>The OpenAI chat format is a way to structure a list of messages as input from users and returns a model-generated message as output. These messages can only contain the <code>roles</code> \"user\" for human messages and \"assistant\", \"system\" or \"model\" for model-generated messages.</p> <pre><code>from argilla.markdown import chat_to_html\n\nmessages = [\n    {\"role\": \"user\", \"content\": \"Hello! How are you?\"},\n    {\"role\": \"assistant\", \"content\": \"I'm good, thank you!\"}\n]\n\nhtml = chat_to_html(messages)\n\nrg.Record(\n    fields={\"markdown_enabled_field\": html}\n)\n</code></pre> <p></p>"},{"location":"how_to_guides/user/","title":"User management","text":"<p>This guide provides an overview of user roles and credentials, explaining how to set up and manage users in Argilla.</p> <p>A user in Argilla is an authorized person who, depending on their role, can use the Python SDK and access the UI in a running Argilla instance. We differentiate between three types of users depending on their role, permissions and needs: <code>owner</code>, <code>admin</code> and <code>annotator</code>.</p> OverviewOwnerAdminAnnotator Owner Admin Annotator Number Unlimited Unlimited Unlimited Create and delete workspaces Yes No No Assign users to workspaces Yes No No Create, configure, update, and delete datasets Yes Only within assigned workspaces No Create, update, and delete users Yes No No Provide feedback with Argila UI Yes Yes Yes <p>The <code>owner</code> refers to the root user who created the Argilla instance. Using workspaces within Argilla proves highly beneficial for organizing tasks efficiently. So, the owner has full access to all workspaces and their functionalities:</p> <ul> <li>Workspace management: It can create, read and delete a workspace.</li> <li>User management: It can create a new user, assign it to a workspace, and delete it. It can also list them and search for a specific one.</li> <li>Dataset management: It can create, configure, retrieve, update, and delete datasets.</li> <li>Annotation: It can annotate datasets in the Argilla UI.</li> <li>Feedback: It can provide feedback with the Argilla UI.</li> </ul> <p>An <code>admin</code> user can only access the workspaces it has been assigned to and cannot assign other users to it. An admin user has the following permissions:</p> <ul> <li>Dataset management: It can create, configure, retrieve, update, and delete datasets only on the assigned workspaces.</li> <li>Annotation: It can annotate datasets in the assigned workspaces via the Argilla UI.</li> <li>Feedback: It can provide feedback with the Argilla UI.</li> </ul> <p>An <code>annotator</code> user is limited to accessing only the datasets assigned to it within the workspace. It has two specific permissions:</p> <ul> <li>Annotation: It can annotate the assigned datasets in the Argilla UI.</li> <li>Feedback: It can provide feedback with the Argilla UI.</li> </ul> Question: Who can manage users? <p>Only users with the <code>owner</code> role can manage (create, retrieve, delete) other users.</p>"},{"location":"how_to_guides/user/#initial-users-and-credentials","title":"Initial users and credentials","text":"<p>Depending on your Argilla deployment, the initial user with the <code>owner</code> role will vary.</p> <ul> <li>If you deploy on the Hugging Face Hub, the initial user will correspond to the Space owner (your personal account). The API key is automatically generated and can be copied from the \"Settings\" section of the UI.</li> <li>If you deploy with Docker, the default values for the environment variables are: USERNAME: argilla, PASSWORD: 12345678, API_KEY: argilla.apikey.</li> </ul> <p>For the new users, the username and password are set during the creation process. The API key can be copied from the \"Settings\" section of the UI.</p> <p>Main Class</p> <pre><code>rg.User(\n    username=\"username\",\n    first_name=\"first_name\",\n    last_name=\"last_name\",\n    role=\"owner\",\n    password=\"password\",\n    client=client\n)\n</code></pre> <p>Check the User - Python Reference to see the attributes, arguments, and methods of the <code>User</code> class in detail.</p>"},{"location":"how_to_guides/user/#get-current-user","title":"Get current user","text":"<p>To ensure you're using the correct credentials for managing users, you can get the current user in Argilla using the <code>me</code> attribute of the <code>Argilla</code> class.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ncurrent_user = client.me\n</code></pre>"},{"location":"how_to_guides/user/#create-a-user","title":"Create a user","text":"<p>To create a new user in Argilla, you can define it in the <code>User</code> class and then call the <code>create</code> method. This method is inherited from the <code>Resource</code> base class and operates without modifications.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nuser_to_create = rg.User(\n    username=\"my_username\",\n    password=\"12345678\",\n)\n\ncreated_user = user_to_create.create()\n</code></pre> <p>Accessing attributes</p> <p>Access the attributes of a user by calling them directly on the <code>User</code> object. For example, <code>user.id</code> or <code>user.username</code>.</p>"},{"location":"how_to_guides/user/#list-users","title":"List users","text":"<p>You can list all the existing users in Argilla by accessing the <code>users</code> attribute on the <code>Argilla</code> class and iterating over them. You can also use <code>len(client.users)</code> to get the number of users.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nusers = client.users\n\nfor user in users:\n    print(user)\n</code></pre> <p>Notebooks</p> <p>When using a notebook, executing <code>client.users</code> will display a table with <code>username</code>, <code>id</code>, <code>role</code>, and the last update as <code>updated_at</code>.</p>"},{"location":"how_to_guides/user/#retrieve-a-user","title":"Retrieve a user","text":"<p>You can retrieve an existing user from Argilla by accessing the <code>users</code> attribute on the <code>Argilla</code> class and passing the <code>username</code> or <code>id</code> as an argument. If the user does not exist, a warning message will be raised and <code>None</code> will be returned.</p> By usernameBy id <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nretrieved_user = client.users(\"my_username\")\n</code></pre> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nretrieved_user = client.users(id=\"&lt;uuid-or-uuid-string&gt;\")\n</code></pre>"},{"location":"how_to_guides/user/#check-user-existence","title":"Check user existence","text":"<p>You can check if a user exists. The <code>client.users</code> method will return <code>None</code> if the user was not found.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nuser = client.users(\"my_username\")\n\nif user is not None:\n    pass\n</code></pre>"},{"location":"how_to_guides/user/#list-users-in-a-workspace","title":"List users in a workspace","text":"<p>You can list all the users in a workspace by accessing the <code>users</code> attribute on the <code>Workspace</code> class and iterating over them. You can also use <code>len(workspace.users)</code> to get the number of users by workspace.</p> <p>For further information on how to manage workspaces, check this how-to guide.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nworkspace = client.workspaces('my_workspace')\n\nfor user in workspace.users:\n    print(user)\n</code></pre>"},{"location":"how_to_guides/user/#add-a-user-to-a-workspace","title":"Add a user to a workspace","text":"<p>You can add an existing user to a workspace in Argilla by calling the <code>add_to_workspace</code> method on the <code>User</code> class.</p> <p>For further information on how to manage workspaces, check this how-to guide.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nuser = client.users('my_username')\nworkspace = client.workspaces('my_workspace')\n\nadded_user = user.add_to_workspace(workspace)\n</code></pre>"},{"location":"how_to_guides/user/#remove-a-user-from-a-workspace","title":"Remove a user from a workspace","text":"<p>You can remove an existing user from a workspace in Argilla by calling the <code>remove_from_workspace</code> method on the <code>User</code> class.</p> <p>For further information on how to manage workspaces, check this how-to guide.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nuser = client.users('my_username')\nworkspace = client.workspaces('my_workspace')\n\nremoved_user = user.remove_from_workspace(workspace)\n</code></pre>"},{"location":"how_to_guides/user/#delete-a-user","title":"Delete a user","text":"<p>You can delete an existing user from Argilla by calling the <code>delete</code> method on the <code>User</code> class.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nuser_to_delete = client.users('my_username')\n\ndeleted_user = user_to_delete.delete()\n</code></pre>"},{"location":"how_to_guides/workspace/","title":"Workspace management","text":"<p>This guide provides an overview of workspaces, explaining how to set up and manage workspaces in Argilla.</p> <p>A workspace is a space inside your Argilla instance where authorized users can collaborate on datasets. It is accessible through the Python SDK and the UI.</p> Question: Who can manage workspaces? <p>Only users with the <code>owner</code> role can manage (create, read and delete) workspaces.</p> <p>A user with the <code>admin</code> role can only read the workspace to which it belongs.</p>"},{"location":"how_to_guides/workspace/#initial-workspaces","title":"Initial workspaces","text":"<p>Depending on your Argilla deployment, the initial workspace will vary.</p> <ul> <li>If you deploy on the Hugging Face Hub, the initial workspace will be the one indicated in the <code>.oauth.yaml</code> file. By default, <code>argilla</code>.</li> <li>If you deploy with Docker, you will need to create a workspace as shown in the next section.</li> </ul> <p>Main Class</p> <pre><code>rg.Workspace(\n    name = \"name\",\n    client=client\n)\n</code></pre> <p>Check the Workspace - Python Reference to see the attributes, arguments, and methods of the <code>Workspace</code> class in detail.</p>"},{"location":"how_to_guides/workspace/#create-a-new-workspace","title":"Create a new workspace","text":"<p>To create a new workspace in Argilla, you can define it in the <code>Workspace</code> class and then call the <code>create</code> method. This method is inherited from the <code>Resource</code> base class and operates without modifications.</p> <p>When you create a new workspace, it will be empty. To create and add a new dataset, check these guides.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nworkspace_to_create = rg.Workspace(name=\"my_workspace\")\n\ncreated_workspace = workspace_to_create.create()\n</code></pre> <p>Accessing attributes</p> <p>Access the attributes of a workspace by calling them directly on the <code>Workspace</code> object. For example, <code>workspace.id</code> or <code>workspace.name</code>.</p>"},{"location":"how_to_guides/workspace/#list-workspaces","title":"List workspaces","text":"<p>You can list all the existing workspaces in Argilla by calling the <code>workspaces</code> attribute on the <code>Argilla</code> class and iterating over them. You can also use <code>len(client.workspaces)</code> to get the number of workspaces.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nworkspaces = client.workspaces\n\nfor workspace in workspaces:\n    print(workspace)\n</code></pre> <p>Notebooks</p> <p>When using a notebook, executing <code>client.workspaces</code> will display a table with the number of <code>datasets</code> in each workspace, <code>name</code>, <code>id</code>, and the last update as <code>updated_at</code>.</p>"},{"location":"how_to_guides/workspace/#retrieve-a-workspace","title":"Retrieve a workspace","text":"<p>You can retrieve a workspace by accessing the <code>workspaces</code> method on the <code>Argilla</code> class and passing the <code>name</code> or <code>id</code> of the workspace as an argument. If the workspace does not exist, a warning message will be raised and <code>None</code> will be returned.</p> By nameBy id <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nretrieved_workspace = client.workspaces(\"my_workspace\")\n</code></pre> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nretrieved_workspace = client.workspaces(id=\"&lt;uuid-or-uuid-string&gt;\")\n</code></pre>"},{"location":"how_to_guides/workspace/#check-workspace-existence","title":"Check workspace existence","text":"<p>You can check if a workspace exists. The <code>client.workspaces</code> method will return <code>None</code> if the workspace is not found.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nworkspace = client.workspaces(\"my_workspace\")\n\nif workspace is not None:\n    pass\n</code></pre>"},{"location":"how_to_guides/workspace/#list-users-in-a-workspace","title":"List users in a workspace","text":"<p>You can list all the users in a workspace by accessing the <code>users</code> attribute on the <code>Workspace</code> class and iterating over them. You can also use <code>len(workspace.users)</code> to get the number of users by workspace.</p> <p>For further information on how to manage users, check this how-to guide.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nworkspace = client.workspaces('my_workspace')\n\nfor user in workspace.users:\n    print(user)\n</code></pre>"},{"location":"how_to_guides/workspace/#add-a-user-to-a-workspace","title":"Add a user to a workspace","text":"<p>You can also add a user to a workspace by calling the <code>add_user</code> method on the <code>Workspace</code> class.</p> <p>For further information on how to manage users, check this how-to guide.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nworkspace = client.workspaces(\"my_workspace\")\n\nadded_user = workspace.add_user(\"my_username\")\n</code></pre>"},{"location":"how_to_guides/workspace/#remove-a-user-from-workspace","title":"Remove a user from workspace","text":"<p>You can also remove a user from a workspace by calling the <code>remove_user</code> method on the <code>Workspace</code> class.</p> <p>For further information on how to manage users, check this how-to guide.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nworkspace = client.workspaces(\"my_workspace\")\n\nremoved_user = workspace.remove_user(\"my_username\")\n</code></pre>"},{"location":"how_to_guides/workspace/#delete-a-workspace","title":"Delete a workspace","text":"<p>To delete a workspace, no dataset can be associated with it. If the workspace contains any dataset, deletion will fail. You can delete a workspace by calling the <code>delete</code> method on the <code>Workspace</code> class.</p> <p>To clear a workspace and delete all their datasets, refer to this how-to guide.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nworkspace_to_delete = client.workspaces(\"my_workspace\")\n\ndeleted_workspace = workspace_to_delete.delete()\n</code></pre>"},{"location":"reference/argilla/SUMMARY/","title":"SUMMARY","text":"<ul> <li>rg.Argilla</li> <li>rg.Workspace</li> <li>rg.User</li> <li>rg.Dataset<ul> <li>rg.Dataset.records</li> </ul> </li> <li>rg.Settings<ul> <li>Fields</li> <li>Questions</li> <li>Metadata</li> <li>Vectors</li> <li>Distribution</li> </ul> </li> <li>rg.Record<ul> <li>rg.Response</li> <li>rg.Suggestion</li> <li>rg.Vector</li> <li>rg.Metadata</li> </ul> </li> <li>rg.Query</li> <li>rg.markdown</li> </ul>"},{"location":"reference/argilla/client/","title":"<code>rg.Argilla</code>","text":"<p>To interact with the Argilla server from Python you can use the <code>Argilla</code> class. The <code>Argilla</code> client is used to create, get, update, and delete all Argilla resources, such as workspaces, users, datasets, and records.</p>"},{"location":"reference/argilla/client/#usage-examples","title":"Usage Examples","text":""},{"location":"reference/argilla/client/#connecting-to-an-argilla-server","title":"Connecting to an Argilla server","text":"<p>To connect to an Argilla server, instantiate the <code>Argilla</code> class and pass the <code>api_url</code> of the server and the <code>api_key</code> to authenticate.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(\n    api_url=\"https://argilla.example.com\",\n    api_key=\"my_api_key\",\n)\n</code></pre>"},{"location":"reference/argilla/client/#accessing-dataset-workspace-and-user-objects","title":"Accessing Dataset, Workspace, and User objects","text":"<p>The <code>Argilla</code> clients provides access to the <code>Dataset</code>, <code>Workspace</code>, and <code>User</code> objects of the Argilla server.</p> <pre><code>my_dataset = client.datasets(\"my_dataset\")\n\nmy_workspace = client.workspaces(\"my_workspace\")\n\nmy_user = client.users(\"my_user\")\n</code></pre> <p>These resources can then be interacted with to access their properties and methods. For example, to list all datasets in a workspace:</p> <pre><code>for dataset in my_workspace.datasets:\n    print(dataset.name)\n</code></pre>"},{"location":"reference/argilla/client/#src.argilla.client.Argilla","title":"<code>Argilla</code>","text":"<p>               Bases: <code>APIClient</code></p> <p>Argilla API client. This is the main entry point to interact with the API.</p> <p>Attributes:</p> Name Type Description <code>workspaces</code> <code>Workspaces</code> <p>A collection of workspaces.</p> <code>datasets</code> <code>Datasets</code> <p>A collection of datasets.</p> <code>users</code> <code>Users</code> <p>A collection of users.</p> <code>me</code> <code>User</code> <p>The current user.</p> Source code in <code>src/argilla/client.py</code> <pre><code>class Argilla(_api.APIClient):\n    \"\"\"Argilla API client. This is the main entry point to interact with the API.\n\n    Attributes:\n        workspaces: A collection of workspaces.\n        datasets: A collection of datasets.\n        users: A collection of users.\n        me: The current user.\n    \"\"\"\n\n    # Default instance of Argilla\n    _default_client: Optional[\"Argilla\"] = None\n\n    def __init__(\n        self,\n        api_url: Optional[str] = DEFAULT_HTTP_CONFIG.api_url,\n        api_key: Optional[str] = DEFAULT_HTTP_CONFIG.api_key,\n        timeout: int = DEFAULT_HTTP_CONFIG.timeout,\n        retries: int = DEFAULT_HTTP_CONFIG.retries,\n        **http_client_args,\n    ) -&gt; None:\n        \"\"\"Inits the `Argilla` client.\n\n        Args:\n            api_url: the URL of the Argilla API. If not provided, then the value will try\n                to be set from `ARGILLA_API_URL` environment variable. Defaults to\n                `\"http://localhost:6900\"`.\n            api_key: the key to be used to authenticate in the Argilla API. If not provided,\n                then the value will try to be set from `ARGILLA_API_KEY` environment variable.\n                Defaults to `None`.\n            timeout: the maximum time in seconds to wait for a request to the Argilla API\n                to be completed before raising an exception. Defaults to `60`.\n            retries: the number of times to retry the HTTP connection to the Argilla API\n                before raising an exception. Defaults to `5`.\n        \"\"\"\n        super().__init__(api_url=api_url, api_key=api_key, timeout=timeout, retries=retries, **http_client_args)\n\n        self._set_default(self)\n\n    @property\n    def workspaces(self) -&gt; \"Workspaces\":\n        \"\"\"A collection of workspaces on the server.\"\"\"\n        return Workspaces(client=self)\n\n    @property\n    def datasets(self) -&gt; \"Datasets\":\n        \"\"\"A collection of datasets on the server.\"\"\"\n        return Datasets(client=self)\n\n    @property\n    def users(self) -&gt; \"Users\":\n        \"\"\"A collection of users on the server.\"\"\"\n        return Users(client=self)\n\n    @cached_property\n    def me(self) -&gt; \"User\":\n        from argilla.users import User\n\n        return User(client=self, _model=self.api.users.get_me())\n\n    ############################\n    # Private methods\n    ############################\n\n    @classmethod\n    def _set_default(cls, client: \"Argilla\") -&gt; None:\n        \"\"\"Set the default instance of Argilla.\"\"\"\n        cls._default_client = client\n\n    @classmethod\n    def _get_default(cls) -&gt; \"Argilla\":\n        \"\"\"Get the default instance of Argilla. If it doesn't exist, create a new one.\"\"\"\n        if cls._default_client is None:\n            cls._default_client = Argilla()\n        return cls._default_client\n</code></pre>"},{"location":"reference/argilla/client/#src.argilla.client.Argilla.workspaces","title":"<code>workspaces: Workspaces</code>  <code>property</code>","text":"<p>A collection of workspaces on the server.</p>"},{"location":"reference/argilla/client/#src.argilla.client.Argilla.datasets","title":"<code>datasets: Datasets</code>  <code>property</code>","text":"<p>A collection of datasets on the server.</p>"},{"location":"reference/argilla/client/#src.argilla.client.Argilla.users","title":"<code>users: Users</code>  <code>property</code>","text":"<p>A collection of users on the server.</p>"},{"location":"reference/argilla/client/#src.argilla.client.Argilla.__init__","title":"<code>__init__(api_url=DEFAULT_HTTP_CONFIG.api_url, api_key=DEFAULT_HTTP_CONFIG.api_key, timeout=DEFAULT_HTTP_CONFIG.timeout, retries=DEFAULT_HTTP_CONFIG.retries, **http_client_args)</code>","text":"<p>Inits the <code>Argilla</code> client.</p> <p>Parameters:</p> Name Type Description Default <code>api_url</code> <code>Optional[str]</code> <p>the URL of the Argilla API. If not provided, then the value will try to be set from <code>ARGILLA_API_URL</code> environment variable. Defaults to <code>\"http://localhost:6900\"</code>.</p> <code>api_url</code> <code>api_key</code> <code>Optional[str]</code> <p>the key to be used to authenticate in the Argilla API. If not provided, then the value will try to be set from <code>ARGILLA_API_KEY</code> environment variable. Defaults to <code>None</code>.</p> <code>api_key</code> <code>timeout</code> <code>int</code> <p>the maximum time in seconds to wait for a request to the Argilla API to be completed before raising an exception. Defaults to <code>60</code>.</p> <code>timeout</code> <code>retries</code> <code>int</code> <p>the number of times to retry the HTTP connection to the Argilla API before raising an exception. Defaults to <code>5</code>.</p> <code>retries</code> Source code in <code>src/argilla/client.py</code> <pre><code>def __init__(\n    self,\n    api_url: Optional[str] = DEFAULT_HTTP_CONFIG.api_url,\n    api_key: Optional[str] = DEFAULT_HTTP_CONFIG.api_key,\n    timeout: int = DEFAULT_HTTP_CONFIG.timeout,\n    retries: int = DEFAULT_HTTP_CONFIG.retries,\n    **http_client_args,\n) -&gt; None:\n    \"\"\"Inits the `Argilla` client.\n\n    Args:\n        api_url: the URL of the Argilla API. If not provided, then the value will try\n            to be set from `ARGILLA_API_URL` environment variable. Defaults to\n            `\"http://localhost:6900\"`.\n        api_key: the key to be used to authenticate in the Argilla API. If not provided,\n            then the value will try to be set from `ARGILLA_API_KEY` environment variable.\n            Defaults to `None`.\n        timeout: the maximum time in seconds to wait for a request to the Argilla API\n            to be completed before raising an exception. Defaults to `60`.\n        retries: the number of times to retry the HTTP connection to the Argilla API\n            before raising an exception. Defaults to `5`.\n    \"\"\"\n    super().__init__(api_url=api_url, api_key=api_key, timeout=timeout, retries=retries, **http_client_args)\n\n    self._set_default(self)\n</code></pre>"},{"location":"reference/argilla/markdown/","title":"<code>rg.markdown</code>","text":"<p>To support the usage of Markdown within Argilla, we've created some helper functions to easy the usage of DataURL conversions and chat message visualizations.</p>"},{"location":"reference/argilla/markdown/#src.argilla.markdown.media","title":"<code>media</code>","text":""},{"location":"reference/argilla/markdown/#src.argilla.markdown.media.video_to_html","title":"<code>video_to_html(file_source, file_type=None, width=None, height=None, autoplay=False, loop=False)</code>","text":"<p>Convert a video file to an HTML tag with embedded base64 data.</p> <p>Parameters:</p> Name Type Description Default <code>file_source</code> <code>Union[str, bytes]</code> <p>The path to the media file or a non-b64 encoded byte string.</p> required <code>file_type</code> <code>Optional[str]</code> <p>The type of the video file. If not provided, it will be inferred from the file extension.</p> <code>None</code> <code>width</code> <code>Optional[str]</code> <p>Display width in HTML. Defaults to None.</p> <code>None</code> <code>height</code> <code>Optional[str]</code> <p>Display height in HTML. Defaults to None.</p> <code>None</code> <code>autoplay</code> <code>bool</code> <p>True to autoplay media. Defaults to False.</p> <code>False</code> <code>loop</code> <code>bool</code> <p>True to loop media. Defaults to False.</p> <code>False</code> <p>Returns:</p> Type Description <code>str</code> <p>The HTML tag with embedded base64 data.</p> <p>Examples:</p> <pre><code>from argilla.markdown import video_to_html\nhtml = video_to_html(\"my_video.mp4\", width=\"300px\", height=\"300px\", autoplay=True, loop=True)\n</code></pre> Source code in <code>src/argilla/markdown/media.py</code> <pre><code>def video_to_html(\n    file_source: Union[str, bytes],\n    file_type: Optional[str] = None,\n    width: Optional[str] = None,\n    height: Optional[str] = None,\n    autoplay: bool = False,\n    loop: bool = False,\n) -&gt; str:\n    \"\"\"\n    Convert a video file to an HTML tag with embedded base64 data.\n\n    Args:\n        file_source: The path to the media file or a non-b64 encoded byte string.\n        file_type: The type of the video file. If not provided, it will be inferred from the file extension.\n        width: Display width in HTML. Defaults to None.\n        height: Display height in HTML. Defaults to None.\n        autoplay: True to autoplay media. Defaults to False.\n        loop: True to loop media. Defaults to False.\n\n    Returns:\n        The HTML tag with embedded base64 data.\n\n    Examples:\n        ```python\n        from argilla.markdown import video_to_html\n        html = video_to_html(\"my_video.mp4\", width=\"300px\", height=\"300px\", autoplay=True, loop=True)\n        ```\n    \"\"\"\n    return _media_to_html(\"video\", file_source, file_type, width, height, autoplay, loop)\n</code></pre>"},{"location":"reference/argilla/markdown/#src.argilla.markdown.media.audio_to_html","title":"<code>audio_to_html(file_source, file_type=None, width=None, height=None, autoplay=False, loop=False)</code>","text":"<p>Convert an audio file to an HTML tag with embedded base64 data.</p> <p>Parameters:</p> Name Type Description Default <code>file_source</code> <code>Union[str, bytes]</code> <p>The path to the media file or a non-b64 encoded byte string.</p> required <code>file_type</code> <code>Optional[str]</code> <p>The type of the audio file. If not provided, it will be inferred from the file extension.</p> <code>None</code> <code>width</code> <code>Optional[str]</code> <p>Display width in HTML. Defaults to None.</p> <code>None</code> <code>height</code> <code>Optional[str]</code> <p>Display height in HTML. Defaults to None.</p> <code>None</code> <code>autoplay</code> <code>bool</code> <p>True to autoplay media. Defaults to False.</p> <code>False</code> <code>loop</code> <code>bool</code> <p>True to loop media. Defaults to False.</p> <code>False</code> <p>Returns:</p> Type Description <code>str</code> <p>The HTML tag with embedded base64 data.</p> <p>Examples:</p> <pre><code>from argilla.markdown import audio_to_html\nhtml = audio_to_html(\"my_audio.mp3\", width=\"300px\", height=\"300px\", autoplay=True, loop=True)\n</code></pre> Source code in <code>src/argilla/markdown/media.py</code> <pre><code>def audio_to_html(\n    file_source: Union[str, bytes],\n    file_type: Optional[str] = None,\n    width: Optional[str] = None,\n    height: Optional[str] = None,\n    autoplay: bool = False,\n    loop: bool = False,\n) -&gt; str:\n    \"\"\"\n    Convert an audio file to an HTML tag with embedded base64 data.\n\n    Args:\n        file_source: The path to the media file or a non-b64 encoded byte string.\n        file_type: The type of the audio file. If not provided, it will be inferred from the file extension.\n        width: Display width in HTML. Defaults to None.\n        height: Display height in HTML. Defaults to None.\n        autoplay: True to autoplay media. Defaults to False.\n        loop: True to loop media. Defaults to False.\n\n    Returns:\n        The HTML tag with embedded base64 data.\n\n    Examples:\n        ```python\n        from argilla.markdown import audio_to_html\n        html = audio_to_html(\"my_audio.mp3\", width=\"300px\", height=\"300px\", autoplay=True, loop=True)\n        ```\n    \"\"\"\n    return _media_to_html(\"audio\", file_source, file_type, width, height, autoplay, loop)\n</code></pre>"},{"location":"reference/argilla/markdown/#src.argilla.markdown.media.image_to_html","title":"<code>image_to_html(file_source, file_type=None, width=None, height=None)</code>","text":"<p>Convert an image file to an HTML tag with embedded base64 data.</p> <p>Parameters:</p> Name Type Description Default <code>file_source</code> <code>Union[str, bytes]</code> <p>The path to the media file or a non-b64 encoded byte string.</p> required <code>file_type</code> <code>Optional[str]</code> <p>The type of the image file. If not provided, it will be inferred from the file extension.</p> <code>None</code> <code>width</code> <code>Optional[str]</code> <p>Display width in HTML. Defaults to None.</p> <code>None</code> <code>height</code> <code>Optional[str]</code> <p>Display height in HTML. Defaults to None.</p> <code>None</code> <p>Returns:</p> Type Description <code>str</code> <p>The HTML tag with embedded base64 data.</p> <p>Examples:</p> <pre><code>from argilla.markdown import image_to_html\nhtml = image_to_html(\"my_image.png\", width=\"300px\", height=\"300px\")\n</code></pre> Source code in <code>src/argilla/markdown/media.py</code> <pre><code>def image_to_html(\n    file_source: Union[str, bytes],\n    file_type: Optional[str] = None,\n    width: Optional[str] = None,\n    height: Optional[str] = None,\n) -&gt; str:\n    \"\"\"\n    Convert an image file to an HTML tag with embedded base64 data.\n\n    Args:\n        file_source: The path to the media file or a non-b64 encoded byte string.\n        file_type: The type of the image file. If not provided, it will be inferred from the file extension.\n        width: Display width in HTML. Defaults to None.\n        height: Display height in HTML. Defaults to None.\n\n    Returns:\n        The HTML tag with embedded base64 data.\n\n    Examples:\n        ```python\n        from argilla.markdown import image_to_html\n        html = image_to_html(\"my_image.png\", width=\"300px\", height=\"300px\")\n        ```\n    \"\"\"\n    return _media_to_html(\"image\", file_source, file_type, width, height)\n</code></pre>"},{"location":"reference/argilla/markdown/#src.argilla.markdown.media.pdf_to_html","title":"<code>pdf_to_html(file_source, width='1000px', height='1000px')</code>","text":"<p>Convert a pdf file to an HTML tag with embedded data.</p> <p>Parameters:</p> Name Type Description Default <code>file_source</code> <code>Union[str, bytes]</code> <p>The path to the PDF file, a bytes object with PDF data, or a URL.</p> required <code>width</code> <code>Optional[str]</code> <p>Display width in HTML. Defaults to \"1000px\".</p> <code>'1000px'</code> <code>height</code> <code>Optional[str]</code> <p>Display height in HTML. Defaults to \"1000px\".</p> <code>'1000px'</code> <p>Returns:</p> Type Description <code>str</code> <p>HTML string embedding the PDF.</p> <p>Raises:</p> Type Description <code>ValueError</code> <p>If the width and height are not pixel or percentage.</p> <p>Examples:</p> <pre><code>from argilla.markdown import pdf_to_html\nhtml = pdf_to_html(\"my_pdf.pdf\", width=\"300px\", height=\"300px\")\n</code></pre> Source code in <code>src/argilla/markdown/media.py</code> <pre><code>def pdf_to_html(\n    file_source: Union[str, bytes], width: Optional[str] = \"1000px\", height: Optional[str] = \"1000px\"\n) -&gt; str:\n    \"\"\"\n    Convert a pdf file to an HTML tag with embedded data.\n\n    Args:\n        file_source: The path to the PDF file, a bytes object with PDF data, or a URL.\n        width: Display width in HTML. Defaults to \"1000px\".\n        height: Display height in HTML. Defaults to \"1000px\".\n\n    Returns:\n        HTML string embedding the PDF.\n\n    Raises:\n        ValueError: If the width and height are not pixel or percentage.\n\n    Examples:\n        ```python\n        from argilla.markdown import pdf_to_html\n        html = pdf_to_html(\"my_pdf.pdf\", width=\"300px\", height=\"300px\")\n        ```\n    \"\"\"\n    if not _is_valid_dimension(width) or not _is_valid_dimension(height):\n        raise ValueError(\"Width and height must be valid pixel (e.g., '300px') or percentage (e.g., '50%') values.\")\n\n    if isinstance(file_source, str) and urlparse(file_source).scheme in [\"http\", \"https\"]:\n        return f'&lt;embed src=\"{file_source}\" type=\"application/pdf\" width=\"{width}\" height=\"{height}\"&gt;&lt;/embed&gt;'\n\n    file_data, _ = _get_file_data(file_source, \"pdf\")\n    pdf_base64 = base64.b64encode(file_data).decode(\"utf-8\")\n    data_url = f\"data:application/pdf;base64,{pdf_base64}\"\n    return f'&lt;object id=\"pdf\" data=\"{data_url}\" type=\"application/pdf\" width=\"{width}\" height=\"{height}\"&gt;&lt;/object&gt;'\n</code></pre>"},{"location":"reference/argilla/markdown/#src.argilla.markdown.chat","title":"<code>chat</code>","text":""},{"location":"reference/argilla/markdown/#src.argilla.markdown.chat.chat_to_html","title":"<code>chat_to_html(messages)</code>","text":"<p>Converts a list of chat messages in the OpenAI format to HTML.</p> <p>Parameters:</p> Name Type Description Default <code>messages</code> <code>List[Dict[str, str]]</code> <p>A list of dictionaries where each dictionary represents a chat message. Each dictionary should have the keys:     - \"role\": A string indicating the role of the sender (e.g., \"user\", \"model\", \"assistant\", \"system\").     - \"content\": The content of the message.</p> required <p>Returns:</p> Name Type Description <code>str</code> <code>str</code> <p>An HTML string that represents the chat conversation.</p> <p>Raises:</p> Type Description <code>ValueError</code> <p>If the an invalid role is passed.</p> <p>Examples:</p> <pre><code>from argilla.markdown import chat_to_html\nhtml = chat_to_html([\n    {\"role\": \"user\", \"content\": \"hello\"},\n    {\"role\": \"assistant\", \"content\": \"goodbye\"}\n])\n</code></pre> Source code in <code>src/argilla/markdown/chat.py</code> <pre><code>def chat_to_html(messages: List[Dict[str, str]]) -&gt; str:\n    \"\"\"\n    Converts a list of chat messages in the OpenAI format to HTML.\n\n    Args:\n        messages (List[Dict[str, str]]): A list of dictionaries where each dictionary represents a chat message.\n            Each dictionary should have the keys:\n                - \"role\": A string indicating the role of the sender (e.g., \"user\", \"model\", \"assistant\", \"system\").\n                - \"content\": The content of the message.\n\n    Returns:\n        str: An HTML string that represents the chat conversation.\n\n    Raises:\n        ValueError: If the an invalid role is passed.\n\n    Examples:\n        ```python\n        from argilla.markdown import chat_to_html\n        html = chat_to_html([\n            {\"role\": \"user\", \"content\": \"hello\"},\n            {\"role\": \"assistant\", \"content\": \"goodbye\"}\n        ])\n        ```\n    \"\"\"\n    chat_html = \"\"\n    for message in messages:\n        role = message[\"role\"]\n        content = message[\"content\"]\n        content_html = markdown.markdown(content)\n\n        if role == \"user\":\n            html = '&lt;div class=\"user-message\"&gt;' + '&lt;div class=\"message-content\"&gt;'\n        elif role in [\"model\", \"assistant\", \"system\"]:\n            html = '&lt;div class=\"system-message\"&gt;' + '&lt;div class=\"message-content\"&gt;'\n        else:\n            raise ValueError(f\"Invalid role: {role}\")\n\n        html += f\"{content_html}\"\n        html += \"&lt;/div&gt;&lt;/div&gt;\"\n        chat_html += html\n\n    return f\"&lt;body&gt;{CHAT_CSS_STYLE}{chat_html}&lt;/body&gt;\"\n</code></pre>"},{"location":"reference/argilla/search/","title":"<code>rg.Query</code>","text":"<p>To collect records based on searching criteria, you can use the <code>Query</code> and <code>Filter</code> classes. The <code>Query</code> class is used to define the search criteria, while the <code>Filter</code> class is used to filter the search results. <code>Filter</code> is passed to a <code>Query</code> object so you can combine multiple filters to create complex search queries. A <code>Query</code> object can also be passed to <code>Dataset.records</code> to fetch records based on the search criteria.</p>"},{"location":"reference/argilla/search/#usage-examples","title":"Usage Examples","text":""},{"location":"reference/argilla/search/#searching-for-records-with-terms","title":"Searching for records with terms","text":"<p>To search for records with terms, you can use the <code>Dataset.records</code> attribute with a query string. The search terms are used to search for records that contain the terms in the text field.</p> <pre><code>for record in dataset.records(query=\"paris\"):\n    print(record)\n</code></pre>"},{"location":"reference/argilla/search/#filtering-records-by-conditions","title":"Filtering records by conditions","text":"<p>Argilla allows you to filter records based on conditions. You can use the <code>Filter</code> class to define the conditions and pass them to the <code>Dataset.records</code> attribute to fetch records based on the conditions. Conditions include \"==\", \"&gt;=\", \"&lt;=\", or \"in\". Conditions can be combined with dot notation to filter records based on metadata, suggestions, or responses.</p> <pre><code># create a range from 10 to 20\nrange_filter = rg.Filter(\n    [\n        (\"metadata.count\", \"&gt;=\", 10),\n        (\"metadata.count\", \"&lt;=\", 20)\n    ]\n)\n\n# query records with metadata count greater than 10 and less than 20\nquery = rg.Query(filters=range_filter, query=\"paris\")\n\n# iterate over the results\nfor record in dataset.records(query=query):\n    print(record)\n</code></pre>"},{"location":"reference/argilla/search/#src.argilla.records._search.Query","title":"<code>Query</code>","text":"<p>This class is used to map user queries to the internal query models</p> Source code in <code>src/argilla/records/_search.py</code> <pre><code>class Query:\n    \"\"\"This class is used to map user queries to the internal query models\"\"\"\n\n    def __init__(\n        self,\n        *,\n        query: Union[str, None] = None,\n        similar: Union[Similar, None] = None,\n        filter: Union[Filter, Conditions, None] = None,\n    ):\n        \"\"\"Create a query object for use in Argilla search requests.add()\n\n        Parameters:\n            query (Union[str, None], optional): The query string that will be used to search.\n            similar (Union[Similar, None], optional): The similar object that will be used to search for similar records\n            filter (Union[Filter, None], optional): The filter object that will be used to filter the search results.\n        \"\"\"\n\n        if isinstance(filter, tuple):\n            filter = [filter]\n\n        if isinstance(filter, list):\n            filter = Filter(conditions=filter)\n\n        self.query = query\n        self.filter = filter\n        self.similar = similar\n\n    def has_search(self) -&gt; bool:\n        return bool(self.query or self.similar or self.filter)\n\n    def api_model(self) -&gt; SearchQueryModel:\n        model = SearchQueryModel()\n\n        if self.query or self.similar:\n            query = QueryModel()\n\n            if self.query is not None:\n                query.text = TextQueryModel(q=self.query)\n\n            if self.similar is not None:\n                query.vector = self.similar.api_model()\n\n            model.query = query\n\n        if self.filter is not None:\n            model.filters = self.filter.api_model()\n\n        return model\n</code></pre>"},{"location":"reference/argilla/search/#src.argilla.records._search.Query.__init__","title":"<code>__init__(*, query=None, similar=None, filter=None)</code>","text":"<p>Create a query object for use in Argilla search requests.add()</p> <p>Parameters:</p> Name Type Description Default <code>query</code> <code>Union[str, None]</code> <p>The query string that will be used to search.</p> <code>None</code> <code>similar</code> <code>Union[Similar, None]</code> <p>The similar object that will be used to search for similar records</p> <code>None</code> <code>filter</code> <code>Union[Filter, None]</code> <p>The filter object that will be used to filter the search results.</p> <code>None</code> Source code in <code>src/argilla/records/_search.py</code> <pre><code>def __init__(\n    self,\n    *,\n    query: Union[str, None] = None,\n    similar: Union[Similar, None] = None,\n    filter: Union[Filter, Conditions, None] = None,\n):\n    \"\"\"Create a query object for use in Argilla search requests.add()\n\n    Parameters:\n        query (Union[str, None], optional): The query string that will be used to search.\n        similar (Union[Similar, None], optional): The similar object that will be used to search for similar records\n        filter (Union[Filter, None], optional): The filter object that will be used to filter the search results.\n    \"\"\"\n\n    if isinstance(filter, tuple):\n        filter = [filter]\n\n    if isinstance(filter, list):\n        filter = Filter(conditions=filter)\n\n    self.query = query\n    self.filter = filter\n    self.similar = similar\n</code></pre>"},{"location":"reference/argilla/search/#src.argilla.records._search.Filter","title":"<code>Filter</code>","text":"<p>This class is used to map user filters to the internal filter models</p> Source code in <code>src/argilla/records/_search.py</code> <pre><code>class Filter:\n    \"\"\"This class is used to map user filters to the internal filter models\"\"\"\n\n    def __init__(self, conditions: Union[Conditions, None] = None):\n        \"\"\" Create a filter object for use in Argilla search requests.\n\n        Parameters:\n            conditions (Union[List[Tuple[str, str, Any]], Tuple[str, str, Any], None], optional): \\\n                The conditions that will be used to filter the search results. \\\n                The conditions should be a list of tuples where each tuple contains \\\n                the field, operator, and value. For example `(\"label\", \"in\", [\"positive\",\"happy\"])`.\\\n        \"\"\"\n\n        if isinstance(conditions, tuple):\n            conditions = [conditions]\n        self.conditions = [Condition(condition) for condition in conditions]\n\n    def api_model(self) -&gt; AndFilterModel:\n        return AndFilterModel.model_validate({\"and\": [condition.api_model() for condition in self.conditions]})\n</code></pre>"},{"location":"reference/argilla/search/#src.argilla.records._search.Filter.__init__","title":"<code>__init__(conditions=None)</code>","text":"<p>Create a filter object for use in Argilla search requests.</p> <p>Parameters:</p> Name Type Description Default <code>conditions</code> <code>Union[List[Tuple[str, str, Any]], Tuple[str, str, Any], None]</code> <p>The conditions that will be used to filter the search results.                 The conditions should be a list of tuples where each tuple contains                 the field, operator, and value. For example <code>(\"label\", \"in\", [\"positive\",\"happy\"])</code>.</p> <code>None</code> Source code in <code>src/argilla/records/_search.py</code> <pre><code>def __init__(self, conditions: Union[Conditions, None] = None):\n    \"\"\" Create a filter object for use in Argilla search requests.\n\n    Parameters:\n        conditions (Union[List[Tuple[str, str, Any]], Tuple[str, str, Any], None], optional): \\\n            The conditions that will be used to filter the search results. \\\n            The conditions should be a list of tuples where each tuple contains \\\n            the field, operator, and value. For example `(\"label\", \"in\", [\"positive\",\"happy\"])`.\\\n    \"\"\"\n\n    if isinstance(conditions, tuple):\n        conditions = [conditions]\n    self.conditions = [Condition(condition) for condition in conditions]\n</code></pre>"},{"location":"reference/argilla/search/#src.argilla.records._search.Similar","title":"<code>Similar</code>","text":"<p>This class is used to map user similar queries to the internal query models</p> Source code in <code>src/argilla/records/_search.py</code> <pre><code>class Similar:\n    \"\"\"This class is used to map user similar queries to the internal query models\"\"\"\n\n    def __init__(self, name: str, value: Union[Iterable[float], \"Record\"], most_similar: bool = True):\n        \"\"\"\n        Create a similar object for use in Argilla search requests.\n\n        Parameters:\n            name: The name of the vector field\n            value: The vector value or the record to search for similar records\n            most_similar: Whether to search for the most similar records or the least similar records\n        \"\"\"\n\n        self.name = name\n        self.value = value\n        self.most_similar = most_similar if most_similar is not None else True\n\n    def api_model(self) -&gt; VectorQueryModel:\n        from argilla.records import Record\n\n        order = \"most_similar\" if self.most_similar else \"least_similar\"\n\n        if isinstance(self.value, Record):\n            return VectorQueryModel(name=self.name, record_id=self.value._server_id, order=order)\n\n        return VectorQueryModel(name=self.name, value=self.value, order=order)\n</code></pre>"},{"location":"reference/argilla/search/#src.argilla.records._search.Similar.__init__","title":"<code>__init__(name, value, most_similar=True)</code>","text":"<p>Create a similar object for use in Argilla search requests.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the vector field</p> required <code>value</code> <code>Union[Iterable[float], Record]</code> <p>The vector value or the record to search for similar records</p> required <code>most_similar</code> <code>bool</code> <p>Whether to search for the most similar records or the least similar records</p> <code>True</code> Source code in <code>src/argilla/records/_search.py</code> <pre><code>def __init__(self, name: str, value: Union[Iterable[float], \"Record\"], most_similar: bool = True):\n    \"\"\"\n    Create a similar object for use in Argilla search requests.\n\n    Parameters:\n        name: The name of the vector field\n        value: The vector value or the record to search for similar records\n        most_similar: Whether to search for the most similar records or the least similar records\n    \"\"\"\n\n    self.name = name\n    self.value = value\n    self.most_similar = most_similar if most_similar is not None else True\n</code></pre>"},{"location":"reference/argilla/users/","title":"<code>rg.User</code>","text":"<p>A user in Argilla is a profile that uses the SDK or UI. Their profile can be used to track their feedback activity and to manage their access to the Argilla server.</p>"},{"location":"reference/argilla/users/#usage-examples","title":"Usage Examples","text":"<p>To create a new user, instantiate the <code>User</code> object with the client and the username:</p> <pre><code>user = rg.User(username=\"my_username\", password=\"my_password\")\nuser.create()\n</code></pre> <p>Existing users can be retrieved by their username:</p> <pre><code>user = client.users(\"my_username\")\n</code></pre> <p>The current user of the <code>rg.Argilla</code> client can be accessed using the <code>me</code> attribute:</p> <pre><code>client.me\n</code></pre>"},{"location":"reference/argilla/users/#src.argilla.users._resource.User","title":"<code>User</code>","text":"<p>               Bases: <code>Resource</code></p> <p>Class for interacting with Argilla users in the Argilla server. User profiles         are used to manage access to the Argilla server and track responses to records.</p> <p>Attributes:</p> Name Type Description <code>username</code> <code>str</code> <p>The username of the user.</p> <code>first_name</code> <code>str</code> <p>The first name of the user.</p> <code>last_name</code> <code>str</code> <p>The last name of the user.</p> <code>role</code> <code>str</code> <p>The role of the user, either 'annotator' or 'admin'.</p> <code>password</code> <code>str</code> <p>The password of the user.</p> <code>id</code> <code>UUID</code> <p>The ID of the user.</p> Source code in <code>src/argilla/users/_resource.py</code> <pre><code>class User(Resource):\n    \"\"\"Class for interacting with Argilla users in the Argilla server. User profiles \\\n        are used to manage access to the Argilla server and track responses to records.\n\n    Attributes:\n        username (str): The username of the user.\n        first_name (str): The first name of the user.\n        last_name (str): The last name of the user.\n        role (str): The role of the user, either 'annotator' or 'admin'.\n        password (str): The password of the user.\n        id (UUID): The ID of the user.\n    \"\"\"\n\n    _model: UserModel\n    _api: UsersAPI\n\n    def __init__(\n        self,\n        username: Optional[str] = None,\n        first_name: Optional[str] = None,\n        last_name: Optional[str] = None,\n        role: Optional[str] = None,\n        password: Optional[str] = None,\n        client: Optional[\"Argilla\"] = None,\n        id: Optional[UUID] = None,\n        _model: Optional[UserModel] = None,\n    ) -&gt; None:\n        \"\"\"Initializes a User object with a client and a username\n\n        Parameters:\n            username (str): The username of the user\n            first_name (str): The first name of the user\n            last_name (str): The last name of the user\n            role (str): The role of the user, either 'annotator', admin, or 'owner'\n            password (str): The password of the user\n            client (Argilla): The client used to interact with Argilla\n\n        Returns:\n            User: The initialized user object\n        \"\"\"\n        client = client or Argilla._get_default()\n        super().__init__(client=client, api=client.api.users)\n\n        if _model is None:\n            _model = UserModel(\n                username=username,\n                password=password,\n                first_name=first_name or username,\n                last_name=last_name,\n                role=role or Role.annotator,\n                id=id,\n            )\n            self._log_message(f\"Initialized user with username {username}\")\n        self._model = _model\n\n    def create(self) -&gt; \"User\":\n        \"\"\"Creates the user in Argilla. After creating a user, it will be able to log in to the Argilla server.\n\n        Returns:\n            User: The user that was created in Argilla.\n        \"\"\"\n        model_create = self.api_model()\n        model = self._api.create(model_create)\n        # The password is not returned in the response\n        model.password = model_create.password\n        self._model = model\n        return self\n\n    def delete(self) -&gt; None:\n        \"\"\"Deletes the user from Argilla. After deleting a user, it will no longer be able to log in to the Argilla server.\"\"\"\n        super().delete()\n        # exists relies on the id, so we need to set it to None\n        self._model = UserModel(username=self.username)\n\n    def add_to_workspace(self, workspace: \"Workspace\") -&gt; \"User\":\n        \"\"\"Adds the user to a workspace. After adding a user to a workspace, it will have access to the datasets\n        in the workspace.\n\n        Args:\n            workspace (Workspace): The workspace to add the user to.\n\n        Returns:\n            User: The user that was added to the workspace.\n        \"\"\"\n        self._model = self._api.add_to_workspace(workspace.id, self.id)\n        return self\n\n    def remove_from_workspace(self, workspace: \"Workspace\") -&gt; \"User\":\n        \"\"\"Removes the user from a workspace. After removing a user from a workspace, it will no longer have access to\n        the datasets in the workspace.\n\n        Args:\n            workspace (Workspace): The workspace to remove the user from.\n\n        Returns:\n            User: The user that was removed from the workspace.\n\n        \"\"\"\n        self._model = self._api.delete_from_workspace(workspace.id, self.id)\n        return self\n\n    ############################\n    # Properties\n    ############################\n    @property\n    def username(self) -&gt; str:\n        return self._model.username\n\n    @username.setter\n    def username(self, value: str) -&gt; None:\n        self._model.username = value\n\n    @property\n    def password(self) -&gt; str:\n        return self._model.password\n\n    @password.setter\n    def password(self, value: str) -&gt; None:\n        self._model.password = value\n\n    @property\n    def first_name(self) -&gt; str:\n        return self._model.first_name\n\n    @first_name.setter\n    def first_name(self, value: str) -&gt; None:\n        self._model.first_name = value\n\n    @property\n    def last_name(self) -&gt; str:\n        return self._model.last_name\n\n    @last_name.setter\n    def last_name(self, value: str) -&gt; None:\n        self._model.last_name = value\n\n    @property\n    def role(self) -&gt; Role:\n        return self._model.role\n\n    @role.setter\n    def role(self, value: Role) -&gt; None:\n        self._model.role = value\n</code></pre>"},{"location":"reference/argilla/users/#src.argilla.users._resource.User.__init__","title":"<code>__init__(username=None, first_name=None, last_name=None, role=None, password=None, client=None, id=None, _model=None)</code>","text":"<p>Initializes a User object with a client and a username</p> <p>Parameters:</p> Name Type Description Default <code>username</code> <code>str</code> <p>The username of the user</p> <code>None</code> <code>first_name</code> <code>str</code> <p>The first name of the user</p> <code>None</code> <code>last_name</code> <code>str</code> <p>The last name of the user</p> <code>None</code> <code>role</code> <code>str</code> <p>The role of the user, either 'annotator', admin, or 'owner'</p> <code>None</code> <code>password</code> <code>str</code> <p>The password of the user</p> <code>None</code> <code>client</code> <code>Argilla</code> <p>The client used to interact with Argilla</p> <code>None</code> <p>Returns:</p> Name Type Description <code>User</code> <code>None</code> <p>The initialized user object</p> Source code in <code>src/argilla/users/_resource.py</code> <pre><code>def __init__(\n    self,\n    username: Optional[str] = None,\n    first_name: Optional[str] = None,\n    last_name: Optional[str] = None,\n    role: Optional[str] = None,\n    password: Optional[str] = None,\n    client: Optional[\"Argilla\"] = None,\n    id: Optional[UUID] = None,\n    _model: Optional[UserModel] = None,\n) -&gt; None:\n    \"\"\"Initializes a User object with a client and a username\n\n    Parameters:\n        username (str): The username of the user\n        first_name (str): The first name of the user\n        last_name (str): The last name of the user\n        role (str): The role of the user, either 'annotator', admin, or 'owner'\n        password (str): The password of the user\n        client (Argilla): The client used to interact with Argilla\n\n    Returns:\n        User: The initialized user object\n    \"\"\"\n    client = client or Argilla._get_default()\n    super().__init__(client=client, api=client.api.users)\n\n    if _model is None:\n        _model = UserModel(\n            username=username,\n            password=password,\n            first_name=first_name or username,\n            last_name=last_name,\n            role=role or Role.annotator,\n            id=id,\n        )\n        self._log_message(f\"Initialized user with username {username}\")\n    self._model = _model\n</code></pre>"},{"location":"reference/argilla/users/#src.argilla.users._resource.User.create","title":"<code>create()</code>","text":"<p>Creates the user in Argilla. After creating a user, it will be able to log in to the Argilla server.</p> <p>Returns:</p> Name Type Description <code>User</code> <code>User</code> <p>The user that was created in Argilla.</p> Source code in <code>src/argilla/users/_resource.py</code> <pre><code>def create(self) -&gt; \"User\":\n    \"\"\"Creates the user in Argilla. After creating a user, it will be able to log in to the Argilla server.\n\n    Returns:\n        User: The user that was created in Argilla.\n    \"\"\"\n    model_create = self.api_model()\n    model = self._api.create(model_create)\n    # The password is not returned in the response\n    model.password = model_create.password\n    self._model = model\n    return self\n</code></pre>"},{"location":"reference/argilla/users/#src.argilla.users._resource.User.delete","title":"<code>delete()</code>","text":"<p>Deletes the user from Argilla. After deleting a user, it will no longer be able to log in to the Argilla server.</p> Source code in <code>src/argilla/users/_resource.py</code> <pre><code>def delete(self) -&gt; None:\n    \"\"\"Deletes the user from Argilla. After deleting a user, it will no longer be able to log in to the Argilla server.\"\"\"\n    super().delete()\n    # exists relies on the id, so we need to set it to None\n    self._model = UserModel(username=self.username)\n</code></pre>"},{"location":"reference/argilla/users/#src.argilla.users._resource.User.add_to_workspace","title":"<code>add_to_workspace(workspace)</code>","text":"<p>Adds the user to a workspace. After adding a user to a workspace, it will have access to the datasets in the workspace.</p> <p>Parameters:</p> Name Type Description Default <code>workspace</code> <code>Workspace</code> <p>The workspace to add the user to.</p> required <p>Returns:</p> Name Type Description <code>User</code> <code>User</code> <p>The user that was added to the workspace.</p> Source code in <code>src/argilla/users/_resource.py</code> <pre><code>def add_to_workspace(self, workspace: \"Workspace\") -&gt; \"User\":\n    \"\"\"Adds the user to a workspace. After adding a user to a workspace, it will have access to the datasets\n    in the workspace.\n\n    Args:\n        workspace (Workspace): The workspace to add the user to.\n\n    Returns:\n        User: The user that was added to the workspace.\n    \"\"\"\n    self._model = self._api.add_to_workspace(workspace.id, self.id)\n    return self\n</code></pre>"},{"location":"reference/argilla/users/#src.argilla.users._resource.User.remove_from_workspace","title":"<code>remove_from_workspace(workspace)</code>","text":"<p>Removes the user from a workspace. After removing a user from a workspace, it will no longer have access to the datasets in the workspace.</p> <p>Parameters:</p> Name Type Description Default <code>workspace</code> <code>Workspace</code> <p>The workspace to remove the user from.</p> required <p>Returns:</p> Name Type Description <code>User</code> <code>User</code> <p>The user that was removed from the workspace.</p> Source code in <code>src/argilla/users/_resource.py</code> <pre><code>def remove_from_workspace(self, workspace: \"Workspace\") -&gt; \"User\":\n    \"\"\"Removes the user from a workspace. After removing a user from a workspace, it will no longer have access to\n    the datasets in the workspace.\n\n    Args:\n        workspace (Workspace): The workspace to remove the user from.\n\n    Returns:\n        User: The user that was removed from the workspace.\n\n    \"\"\"\n    self._model = self._api.delete_from_workspace(workspace.id, self.id)\n    return self\n</code></pre>"},{"location":"reference/argilla/workspaces/","title":"<code>rg.Workspace</code>","text":"<p>In Argilla, workspaces are used to organize datasets in to groups. For example, you might have a workspace for each project or team.</p>"},{"location":"reference/argilla/workspaces/#usage-examples","title":"Usage Examples","text":"<p>To create a new workspace, instantiate the <code>Workspace</code> object with the client and the name:</p> <pre><code>workspace = rg.Workspace(name=\"my_workspace\")\nworkspace.create()\n</code></pre> <p>To retrieve an existing workspace, use the <code>client.workspaces</code> attribute:</p> <pre><code>workspace = client.workspaces(\"my_workspace\")\n</code></pre>"},{"location":"reference/argilla/workspaces/#src.argilla.workspaces._resource.Workspace","title":"<code>Workspace</code>","text":"<p>               Bases: <code>Resource</code></p> <p>Class for interacting with Argilla workspaces. Workspaces are used to organize datasets in the Argilla server.</p> <p>Attributes:</p> Name Type Description <code>name</code> <code>str</code> <p>The name of the workspace.</p> <code>id</code> <code>UUID</code> <p>The ID of the workspace. This is a unique identifier for the workspace in the server.</p> <code>datasets</code> <code>List[Dataset]</code> <p>A list of all datasets in the workspace.</p> <code>users</code> <code>WorkspaceUsers</code> <p>A list of all users in the workspace.</p> Source code in <code>src/argilla/workspaces/_resource.py</code> <pre><code>class Workspace(Resource):\n    \"\"\"Class for interacting with Argilla workspaces. Workspaces are used to organize datasets in the Argilla server.\n\n    Attributes:\n        name (str): The name of the workspace.\n        id (UUID): The ID of the workspace. This is a unique identifier for the workspace in the server.\n        datasets (List[Dataset]): A list of all datasets in the workspace.\n        users (WorkspaceUsers): A list of all users in the workspace.\n    \"\"\"\n\n    name: Optional[str]\n\n    _api: \"WorkspacesAPI\"\n\n    def __init__(\n        self,\n        name: Optional[str] = None,\n        id: Optional[UUID] = None,\n        client: Optional[\"Argilla\"] = None,\n    ) -&gt; None:\n        \"\"\"Initializes a Workspace object with a client and a name or id\n\n        Parameters:\n            client (Argilla): The client used to interact with Argilla\n            name (str): The name of the workspace\n            id (UUID): The id of the workspace\n\n        Returns:\n            Workspace: The initialized workspace object\n        \"\"\"\n        client = client or Argilla._get_default()\n        super().__init__(client=client, api=client.api.workspaces)\n\n        self._model = WorkspaceModel(name=name, id=id)\n\n    def add_user(self, user: Union[\"User\", str]) -&gt; \"User\":\n        \"\"\"Adds a user to the workspace. After adding a user to the workspace, it will have access to the datasets\n        in the workspace.\n\n        Args:\n            user (Union[User, str]): The user to add to the workspace. Can be a User object or a username.\n\n        Returns:\n            User: The user that was added to the workspace\n        \"\"\"\n        return self.users.add(user)\n\n    def remove_user(self, user: Union[\"User\", str]) -&gt; \"User\":\n        \"\"\"Removes a user from the workspace. After removing a user from the workspace, it will no longer have access\n\n        Args:\n            user (Union[User, str]): The user to remove from the workspace. Can be a User object or a username.\n\n        Returns:\n            User: The user that was removed from the workspace.\n        \"\"\"\n        return self.users.delete(user)\n\n    # TODO: Make this method private\n    def list_datasets(self) -&gt; List[\"Dataset\"]:\n        from argilla.datasets import Dataset\n\n        datasets = self._client.api.datasets.list(self.id)\n        self._log_message(f\"Got {len(datasets)} datasets for workspace {self.id}\")\n        return [Dataset.from_model(model=dataset, client=self._client) for dataset in datasets]\n\n    @classmethod\n    def from_model(cls, model: WorkspaceModel, client: Argilla) -&gt; \"Workspace\":\n        instance = cls(name=model.name, id=model.id, client=client)\n        instance._model = model\n\n        return instance\n\n    ############################\n    # Properties\n    ############################\n\n    @property\n    def name(self) -&gt; Optional[str]:\n        return self._model.name\n\n    @name.setter\n    def name(self, value: str) -&gt; None:\n        self._model.name = value\n\n    @property\n    def datasets(self) -&gt; List[\"Dataset\"]:\n        \"\"\"List all datasets in the workspace\n\n        Returns:\n            List[Dataset]: A list of all datasets in the workspace\n        \"\"\"\n        return self.list_datasets()\n\n    @property\n    def users(self) -&gt; \"WorkspaceUsers\":\n        \"\"\"List all users in the workspace\n\n        Returns:\n            WorkspaceUsers: A list of all users in the workspace\n        \"\"\"\n        return WorkspaceUsers(workspace=self)\n</code></pre>"},{"location":"reference/argilla/workspaces/#src.argilla.workspaces._resource.Workspace.datasets","title":"<code>datasets: List[Dataset]</code>  <code>property</code>","text":"<p>List all datasets in the workspace</p> <p>Returns:</p> Type Description <code>List[Dataset]</code> <p>List[Dataset]: A list of all datasets in the workspace</p>"},{"location":"reference/argilla/workspaces/#src.argilla.workspaces._resource.Workspace.users","title":"<code>users: WorkspaceUsers</code>  <code>property</code>","text":"<p>List all users in the workspace</p> <p>Returns:</p> Name Type Description <code>WorkspaceUsers</code> <code>WorkspaceUsers</code> <p>A list of all users in the workspace</p>"},{"location":"reference/argilla/workspaces/#src.argilla.workspaces._resource.Workspace.__init__","title":"<code>__init__(name=None, id=None, client=None)</code>","text":"<p>Initializes a Workspace object with a client and a name or id</p> <p>Parameters:</p> Name Type Description Default <code>client</code> <code>Argilla</code> <p>The client used to interact with Argilla</p> <code>None</code> <code>name</code> <code>str</code> <p>The name of the workspace</p> <code>None</code> <code>id</code> <code>UUID</code> <p>The id of the workspace</p> <code>None</code> <p>Returns:</p> Name Type Description <code>Workspace</code> <code>None</code> <p>The initialized workspace object</p> Source code in <code>src/argilla/workspaces/_resource.py</code> <pre><code>def __init__(\n    self,\n    name: Optional[str] = None,\n    id: Optional[UUID] = None,\n    client: Optional[\"Argilla\"] = None,\n) -&gt; None:\n    \"\"\"Initializes a Workspace object with a client and a name or id\n\n    Parameters:\n        client (Argilla): The client used to interact with Argilla\n        name (str): The name of the workspace\n        id (UUID): The id of the workspace\n\n    Returns:\n        Workspace: The initialized workspace object\n    \"\"\"\n    client = client or Argilla._get_default()\n    super().__init__(client=client, api=client.api.workspaces)\n\n    self._model = WorkspaceModel(name=name, id=id)\n</code></pre>"},{"location":"reference/argilla/workspaces/#src.argilla.workspaces._resource.Workspace.add_user","title":"<code>add_user(user)</code>","text":"<p>Adds a user to the workspace. After adding a user to the workspace, it will have access to the datasets in the workspace.</p> <p>Parameters:</p> Name Type Description Default <code>user</code> <code>Union[User, str]</code> <p>The user to add to the workspace. Can be a User object or a username.</p> required <p>Returns:</p> Name Type Description <code>User</code> <code>User</code> <p>The user that was added to the workspace</p> Source code in <code>src/argilla/workspaces/_resource.py</code> <pre><code>def add_user(self, user: Union[\"User\", str]) -&gt; \"User\":\n    \"\"\"Adds a user to the workspace. After adding a user to the workspace, it will have access to the datasets\n    in the workspace.\n\n    Args:\n        user (Union[User, str]): The user to add to the workspace. Can be a User object or a username.\n\n    Returns:\n        User: The user that was added to the workspace\n    \"\"\"\n    return self.users.add(user)\n</code></pre>"},{"location":"reference/argilla/workspaces/#src.argilla.workspaces._resource.Workspace.remove_user","title":"<code>remove_user(user)</code>","text":"<p>Removes a user from the workspace. After removing a user from the workspace, it will no longer have access</p> <p>Parameters:</p> Name Type Description Default <code>user</code> <code>Union[User, str]</code> <p>The user to remove from the workspace. Can be a User object or a username.</p> required <p>Returns:</p> Name Type Description <code>User</code> <code>User</code> <p>The user that was removed from the workspace.</p> Source code in <code>src/argilla/workspaces/_resource.py</code> <pre><code>def remove_user(self, user: Union[\"User\", str]) -&gt; \"User\":\n    \"\"\"Removes a user from the workspace. After removing a user from the workspace, it will no longer have access\n\n    Args:\n        user (Union[User, str]): The user to remove from the workspace. Can be a User object or a username.\n\n    Returns:\n        User: The user that was removed from the workspace.\n    \"\"\"\n    return self.users.delete(user)\n</code></pre>"},{"location":"reference/argilla/datasets/dataset_records/","title":"<code>rg.Dataset.records</code>","text":""},{"location":"reference/argilla/datasets/dataset_records/#usage-examples","title":"Usage Examples","text":"<p>In most cases, you will not need to create a <code>DatasetRecords</code> object directly. Instead, you can access it via the <code>Dataset</code> object:</p> <pre><code>dataset.records\n</code></pre> <p>For user familiar with legacy approaches</p> <ol> <li><code>Dataset.records</code> object is used to interact with the records in a dataset. It interactively fetches records from the server in batches without using a local copy of the records.</li> <li>The <code>log</code> method of <code>Dataset.records</code> is used to both add and update records in a dataset. If the record includes a known <code>id</code> field, the record will be updated. If the record does not include a known <code>id</code> field, the record will be added.</li> </ol>"},{"location":"reference/argilla/datasets/dataset_records/#adding-records-to-a-dataset","title":"Adding records to a dataset","text":"<p>To add records to a dataset, use the <code>log</code> method. Records can be added as dictionaries or as <code>Record</code> objects. Single records can also be added as a dictionary or <code>Record</code>.</p> As a <code>Record</code> objectFrom a data structureFrom a data structure with a mappingFrom a Hugging Face dataset <p>You can also add records to a dataset by initializing a <code>Record</code> object directly.</p> <pre><code>records = [\n    rg.Record(\n        fields={\n            \"question\": \"Do you need oxygen to breathe?\",\n            \"answer\": \"Yes\"\n        },\n    ),\n    rg.Record(\n        fields={\n            \"question\": \"What is the boiling point of water?\",\n            \"answer\": \"100 degrees Celsius\"\n        },\n    ),\n] # (1)\n\ndataset.records.log(records)\n</code></pre> <ol> <li>This is an illustration of a definition. In a real world scenario, you would iterate over a data structure and create <code>Record</code> objects for each iteration.</li> </ol> <pre><code>data = [\n    {\n        \"question\": \"Do you need oxygen to breathe?\",\n        \"answer\": \"Yes\",\n    },\n    {\n        \"question\": \"What is the boiling point of water?\",\n        \"answer\": \"100 degrees Celsius\",\n    },\n] # (1)\n\ndataset.records.log(data)\n</code></pre> <ol> <li>The data structure's keys must match the fields or questions in the Argilla dataset. In this case, there are fields named <code>question</code> and <code>answer</code>.</li> </ol> <pre><code>data = [\n    {\n        \"query\": \"Do you need oxygen to breathe?\",\n        \"response\": \"Yes\",\n    },\n    {\n        \"query\": \"What is the boiling point of water?\",\n        \"response\": \"100 degrees Celsius\",\n    },\n] # (1)\ndataset.records.log(\n    records=data,\n    mapping={\"query\": \"question\", \"response\": \"answer\"} # (2)\n)\n</code></pre> <ol> <li>The data structure's keys must match the fields or questions in the Argilla dataset. In this case, there are fields named <code>question</code> and <code>answer</code>.</li> <li>The data structure has keys <code>query</code> and <code>response</code> and the Argilla dataset has <code>question</code> and <code>answer</code>. You can use the <code>mapping</code> parameter to map the keys in the data structure to the fields in the Argilla dataset.</li> </ol> <p>You can also add records to a dataset using a Hugging Face dataset. This is useful when you want to use a dataset from the Hugging Face Hub and add it to your Argilla dataset.</p> <p>You can add the dataset where the column names correspond to the names of fields, questions, metadata or vectors in the Argilla dataset.</p> <p>If the dataset's schema does not correspond to your Argilla dataset names, you can use a <code>mapping</code> to indicate which columns in the dataset correspond to the Argilla dataset fields.</p> <pre><code>from datasets import load_dataset\n\nhf_dataset = load_dataset(\"imdb\", split=\"train[:100]\") # (1)\n\ndataset.records.log(records=hf_dataset)\n</code></pre> <ol> <li>In this example, the Hugging Face dataset matches the Argilla dataset schema. If that is not the case, you could use the <code>.map</code> of the <code>datasets</code> library to prepare the data before adding it to the Argilla dataset.</li> </ol> <p>Here we use the <code>mapping</code> parameter to specify the relationship between the Hugging Face dataset and the Argilla dataset.</p> <pre><code>dataset.records.log(records=hf_dataset, mapping={\"txt\": \"text\", \"y\": \"label\"}) # (1)\n</code></pre> <ol> <li>In this case, the <code>txt</code> key in the Hugging Face dataset corresponds to the <code>text</code> field in the Argilla dataset, and the <code>y</code> key in the Hugging Face dataset corresponds to the <code>label</code> field in the Argilla dataset.</li> </ol>"},{"location":"reference/argilla/datasets/dataset_records/#updating-records-in-a-dataset","title":"Updating records in a dataset","text":"<p>Records can also be updated using the <code>log</code> method with records that contain an <code>id</code> to identify the records to be updated. As above, records can be added as dictionaries or as <code>Record</code> objects.</p> As a <code>Record</code> objectFrom a data structureFrom a data structure with a mappingFrom a Hugging Face dataset <p>You can update records in a dataset by initializing a <code>Record</code> object directly and providing the <code>id</code> field.</p> <pre><code>records = [\n    rg.Record(\n        metadata={\"department\": \"toys\"},\n        id=\"2\" # (1)\n    ),\n]\n\ndataset.records.log(records)\n</code></pre> <ol> <li>The <code>id</code> field is required to identify the record to be updated. The <code>id</code> field must be unique for each record in the dataset. If the <code>id</code> field is not provided, the record will be added as a new record.</li> </ol> <p>You can also update records in a dataset by providing the <code>id</code> field in the data structure.</p> <pre><code>data = [\n    {\n        \"metadata\": {\"department\": \"toys\"},\n        \"id\": \"2\" # (1)\n    },\n]\n\ndataset.records.log(data)\n</code></pre> <ol> <li>The <code>id</code> field is required to identify the record to be updated. The <code>id</code> field must be unique for each record in the dataset. If the <code>id</code> field is not provided, the record will be added as a new record.</li> </ol> <p>You can also update records in a dataset by providing the <code>id</code> field in the data structure and using a mapping to map the keys in the data structure to the fields in the dataset.</p> <pre><code>data = [\n    {\n        \"metadata\": {\"department\": \"toys\"},\n        \"my_id\": \"2\" # (1)\n    },\n]\n\ndataset.records.log(\n    records=data,\n    mapping={\"my_id\": \"id\"} # (2)\n)\n</code></pre> <ol> <li>The <code>id</code> field is required to identify the record to be updated. The <code>id</code> field must be unique for each record in the dataset. If the <code>id</code> field is not provided, the record will be added as a new record.</li> <li>Let's say that your data structure has keys <code>my_id</code> instead of <code>id</code>. You can use the <code>mapping</code> parameter to map the keys in the data structure to the fields in the dataset.</li> </ol> <p>You can also update records to an Argilla dataset using a Hugging Face dataset. To update records, the Hugging Face dataset must contain an <code>id</code> field to identify the records to be updated, or you can use a mapping to map the keys in the Hugging Face dataset to the fields in the Argilla dataset.</p> <pre><code>from datasets import load_dataset\n\nhf_dataset = load_dataset(\"imdb\", split=\"train[:100]\") # (1)\n\ndataset.records.log(records=hf_dataset, mapping={\"uuid\": \"id\"}) # (2)\n</code></pre> <ol> <li>In this example, the Hugging Face dataset matches the Argilla dataset schema.</li> <li>The <code>uuid</code> key in the Hugging Face dataset corresponds to the <code>id</code> field in the Argilla dataset.</li> </ol>"},{"location":"reference/argilla/datasets/dataset_records/#adding-and-updating-records-with-images","title":"Adding and updating records with images","text":"<p>Argilla datasets can contain image fields. You can add images to a dataset by passing the image to the record object as either a remote URL, a local path to an image file, or a PIL object. The field names must be defined as an <code>rg.ImageField</code> in the dataset's <code>Settings</code> object to be accepted. Images will be stored in the Argilla database and returned using the data URI schema.</p> <p>As PIL objects</p> <p>To retrieve the images as rescaled PIL objects, you can use the <code>to_datasets</code> method when exporting the records, as shown in this how-to guide.</p> From a data structure with remote URLsFrom a data structure with local files or PIL objectsFrom a Hugging Face dataset <pre><code>data = [\n    {\n        \"image\": \"https://example.com/image1.jpg\",\n    },\n    {\n        \"image\": \"https://example.com/image2.jpg\",\n    },\n]\n\ndataset.records.log(data)\n</code></pre> <pre><code>import os\nfrom PIL import Image\n\nimage_dir = \"path/to/images\"\n\ndata = [\n    {\n        \"image\": os.path.join(image_dir, \"image1.jpg\"), # (1)\n    },\n    {\n        \"image\": Image.open(os.path.join(image_dir, \"image2.jpg\")), # (2)\n    },\n]\n\ndataset.records.log(data)\n</code></pre> <ol> <li>The image is a local file path.</li> <li>The image is a PIL object.</li> </ol> <p>Hugging Face datasets can be passed directly to the <code>log</code> method. The image field must be defined as an <code>Image</code> in the dataset's features.</p> <pre><code>hf_dataset = load_dataset(\"ylecun/mnist\", split=\"train[:100]\")\ndataset.records.log(records=hf_dataset)\n</code></pre> <p>If the image field is not defined as an <code>Image</code> in the dataset's features, you can cast the dataset to the correct schema before adding it to the Argilla dataset. This is only necessary if the image field is not defined as an <code>Image</code> in the dataset's features, and is not one of the supported image types by Argilla (URL, local path, or PIL object).</p> <pre><code>hf_dataset = load_dataset(\"&lt;my_custom_dataset&gt;\") # (1)\nhf_dataset = hf_dataset.cast(\n    features=Features({\"image\": Image(), \"label\": Value(\"string\")}),\n)\ndataset.records.log(records=hf_dataset)\n</code></pre> <ol> <li>In this example, the Hugging Face dataset matches the Argilla dataset schema but the image field is not defined as an <code>Image</code> in the dataset's features.</li> </ol>"},{"location":"reference/argilla/datasets/dataset_records/#iterating-over-records-in-a-dataset","title":"Iterating over records in a dataset","text":"<p><code>Dataset.records</code> can be used to iterate over records in a dataset from the server. The records will be fetched in batches from the server::</p> <pre><code>for record in dataset.records:\n    print(record)\n\n# Fetch records with suggestions and responses\nfor record in dataset.records(with_suggestions=True, with_responses=True):\n    print(record.suggestions)\n    print(record.responses)\n\n# Filter records by a query and fetch records with vectors\nfor record in dataset.records(query=\"capital\", with_vectors=True):\n    print(record.vectors)\n</code></pre> <p>Check out the <code>rg.Record</code> class reference for more information on the properties and methods available on a record and the <code>rg.Query</code> class reference for more information on the query syntax.</p>"},{"location":"reference/argilla/datasets/dataset_records/#src.argilla.records._dataset_records.DatasetRecords","title":"<code>DatasetRecords</code>","text":"<p>               Bases: <code>Iterable[Record]</code>, <code>LoggingMixin</code></p> <p>This class is used to work with records from a dataset and is accessed via <code>Dataset.records</code>. The responsibility of this class is to provide an interface to interact with records in a dataset, by adding, updating, fetching, querying, deleting, and exporting records.</p> <p>Attributes:</p> Name Type Description <code>client</code> <code>Argilla</code> <p>The Argilla client object.</p> <code>dataset</code> <code>Dataset</code> <p>The dataset object.</p> Source code in <code>src/argilla/records/_dataset_records.py</code> <pre><code>class DatasetRecords(Iterable[Record], LoggingMixin):\n    \"\"\"This class is used to work with records from a dataset and is accessed via `Dataset.records`.\n    The responsibility of this class is to provide an interface to interact with records in a dataset,\n    by adding, updating, fetching, querying, deleting, and exporting records.\n\n    Attributes:\n        client (Argilla): The Argilla client object.\n        dataset (Dataset): The dataset object.\n    \"\"\"\n\n    _api: RecordsAPI\n\n    DEFAULT_BATCH_SIZE = 256\n    DEFAULT_DELETE_BATCH_SIZE = 64\n\n    def __init__(\n        self, client: \"Argilla\", dataset: \"Dataset\", mapping: Optional[Dict[str, Union[str, Sequence[str]]]] = None\n    ):\n        \"\"\"Initializes a DatasetRecords object with a client and a dataset.\n        Args:\n            client: An Argilla client object.\n            dataset: A Dataset object.\n        \"\"\"\n        self.__client = client\n        self.__dataset = dataset\n        self._mapping = mapping or {}\n        self._api = self.__client.api.records\n\n    def __iter__(self):\n        return DatasetRecordsIterator(self.__dataset, self.__client, with_suggestions=True, with_responses=True)\n\n    def __call__(\n        self,\n        query: Optional[Union[str, Query]] = None,\n        batch_size: Optional[int] = DEFAULT_BATCH_SIZE,\n        start_offset: int = 0,\n        with_suggestions: bool = True,\n        with_responses: bool = True,\n        with_vectors: Optional[Union[List, bool, str]] = None,\n        limit: Optional[int] = None,\n    ) -&gt; DatasetRecordsIterator:\n        \"\"\"Returns an iterator over the records in the dataset on the server.\n\n        Parameters:\n            query: A string or a Query object to filter the records.\n            batch_size: The number of records to fetch in each batch. The default is 256.\n            start_offset: The offset from which to start fetching records. The default is 0.\n            with_suggestions: Whether to include suggestions in the records. The default is True.\n            with_responses: Whether to include responses in the records. The default is True.\n            with_vectors: A list of vector names to include in the records. The default is None.\n                If a list is provided, only the specified vectors will be included.\n                If True is provided, all vectors will be included.\n            limit: The maximum number of records to fetch. The default is None.\n\n        Returns:\n            An iterator over the records in the dataset on the server.\n\n        \"\"\"\n        if query and isinstance(query, str):\n            query = Query(query=query)\n\n        if with_vectors:\n            self._validate_vector_names(vector_names=with_vectors)\n\n        return DatasetRecordsIterator(\n            dataset=self.__dataset,\n            client=self.__client,\n            query=query,\n            batch_size=batch_size,\n            start_offset=start_offset,\n            with_suggestions=with_suggestions,\n            with_responses=with_responses,\n            with_vectors=with_vectors,\n            limit=limit,\n        )\n\n    def __repr__(self) -&gt; str:\n        return f\"{self.__class__.__name__}({self.__dataset})\"\n\n    ############################\n    # Public methods\n    ############################\n\n    def log(\n        self,\n        records: Union[List[dict], List[Record], HFDataset],\n        mapping: Optional[Dict[str, Union[str, Sequence[str]]]] = None,\n        user_id: Optional[UUID] = None,\n        batch_size: int = DEFAULT_BATCH_SIZE,\n        on_error: RecordErrorHandling = RecordErrorHandling.RAISE,\n    ) -&gt; \"DatasetRecords\":\n        \"\"\"Add or update records in a dataset on the server using the provided records.\n        If the record includes a known `id` field, the record will be updated.\n        If the record does not include a known `id` field, the record will be added as a new record.\n        See `rg.Record` for more information on the record definition.\n\n        Parameters:\n            records: A list of `Record` objects, a Hugging Face Dataset, or a list of dictionaries representing the records.\n                     If records are defined as a dictionaries or a dataset, the keys/ column names should correspond to the\n                     fields in the Argilla dataset's fields and questions. `id` should be provided to identify the records when updating.\n            mapping: A dictionary that maps the keys/ column names in the records to the fields or questions in the Argilla dataset.\n                     To assign an incoming key or column to multiple fields or questions, provide a list or tuple of field or question names.\n            user_id: The user id to be associated with the records' response. If not provided, the current user id is used.\n            batch_size: The number of records to send in each batch. The default is 256.\n\n        Returns:\n            A list of Record objects representing the updated records.\n        \"\"\"\n        record_models = self._ingest_records(\n            records=records, mapping=mapping, user_id=user_id or self.__client.me.id, on_error=on_error\n        )\n        batch_size = self._normalize_batch_size(\n            batch_size=batch_size,\n            records_length=len(record_models),\n            max_value=self._api.MAX_RECORDS_PER_UPSERT_BULK,\n        )\n\n        created_or_updated = []\n        records_updated = 0\n\n        for batch in tqdm(\n            iterable=range(0, len(records), batch_size),\n            desc=\"Sending records...\",\n            total=len(records) // batch_size,\n            unit=\"batch\",\n        ):\n            self._log_message(message=f\"Sending records from {batch} to {batch + batch_size}.\")\n            batch_records = record_models[batch : batch + batch_size]\n            models, updated = self._api.bulk_upsert(dataset_id=self.__dataset.id, records=batch_records)\n            created_or_updated.extend([Record.from_model(model=model, dataset=self.__dataset) for model in models])\n            records_updated += updated\n\n        records_created = len(created_or_updated) - records_updated\n        self._log_message(\n            message=f\"Updated {records_updated} records and added {records_created} records to dataset {self.__dataset.name}\",\n            level=\"info\",\n        )\n\n        return self\n\n    def delete(\n        self,\n        records: List[Record],\n        batch_size: int = DEFAULT_DELETE_BATCH_SIZE,\n    ) -&gt; List[Record]:\n        \"\"\"Delete records in a dataset on the server using the provided records\n            and matching based on the id.\n\n        Parameters:\n            records: A list of `Record` objects representing the records to be deleted.\n            batch_size: The number of records to send in each batch. The default is 64.\n\n        Returns:\n            A list of Record objects representing the deleted records.\n\n        \"\"\"\n        mapping = None\n        user_id = self.__client.me.id\n        record_models = self._ingest_records(records=records, mapping=mapping, user_id=user_id)\n        batch_size = self._normalize_batch_size(\n            batch_size=batch_size,\n            records_length=len(record_models),\n            max_value=self._api.MAX_RECORDS_PER_DELETE_BULK,\n        )\n\n        records_deleted = 0\n        for batch in tqdm(\n            iterable=range(0, len(records), batch_size),\n            desc=\"Sending records...\",\n            total=len(records) // batch_size,\n            unit=\"batch\",\n        ):\n            self._log_message(message=f\"Sending records from {batch} to {batch + batch_size}.\")\n            batch_records = record_models[batch : batch + batch_size]\n            self._api.delete_many(dataset_id=self.__dataset.id, records=batch_records)\n            records_deleted += len(batch_records)\n\n        self._log_message(\n            message=f\"Deleted {len(record_models)} records from dataset {self.__dataset.name}\",\n            level=\"info\",\n        )\n\n        return records\n\n    def to_dict(self, flatten: bool = False, orient: str = \"names\") -&gt; Dict[str, Any]:\n        \"\"\"\n        Return the records as a dictionary. This is a convenient shortcut for dataset.records(...).to_dict().\n\n        Parameters:\n            flatten (bool): The structure of the exported dictionary.\n                - True: The record fields, metadata, suggestions and responses will be flattened.\n                - False: The record fields, metadata, suggestions and responses will be nested.\n            orient (str): The orientation of the exported dictionary.\n                - \"names\": The keys of the dictionary will be the names of the fields, metadata, suggestions and responses.\n                - \"index\": The keys of the dictionary will be the id of the records.\n        Returns:\n            A dictionary of records.\n\n        \"\"\"\n        return self().to_dict(flatten=flatten, orient=orient)\n\n    def to_list(self, flatten: bool = False) -&gt; List[Dict[str, Any]]:\n        \"\"\"\n        Return the records as a list of dictionaries. This is a convenient shortcut for dataset.records(...).to_list().\n\n        Parameters:\n            flatten (bool): The structure of the exported dictionaries in the list.\n                - True: The record keys are flattened and a dot notation is used to record attributes and their attributes . For example, `label.suggestion` and `label.response`. Records responses are spread across multiple columns for values and users.\n                - False: The record fields, metadata, suggestions and responses will be nested dictionary with keys for record attributes.\n        Returns:\n            A list of dictionaries of records.\n        \"\"\"\n        data = self().to_list(flatten=flatten)\n        return data\n\n    def to_json(self, path: Union[Path, str]) -&gt; Path:\n        \"\"\"\n        Export the records to a file on disk.\n\n        Parameters:\n            path (str): The path to the file to save the records.\n\n        Returns:\n            The path to the file where the records were saved.\n\n        \"\"\"\n        return self().to_json(path=path)\n\n    def from_json(self, path: Union[Path, str]) -&gt; List[Record]:\n        \"\"\"Creates a DatasetRecords object from a disk path to a JSON file.\n            The JSON file should be defined by `DatasetRecords.to_json`.\n\n        Args:\n            path (str): The path to the file containing the records.\n\n        Returns:\n            DatasetRecords: The DatasetRecords object created from the disk path.\n\n        \"\"\"\n        records = JsonIO._records_from_json(path=path)\n        return self.log(records=records)\n\n    def to_datasets(self) -&gt; HFDataset:\n        \"\"\"\n        Export the records to a HFDataset.\n\n        Returns:\n            The dataset containing the records.\n\n        \"\"\"\n\n        return self().to_datasets()\n\n    ############################\n    # Private methods\n    ############################\n\n    def _ingest_records(\n        self,\n        records: Union[List[Dict[str, Any]], List[Record], HFDataset],\n        mapping: Optional[Dict[str, Union[str, Sequence[str]]]] = None,\n        user_id: Optional[UUID] = None,\n        on_error: RecordErrorHandling = RecordErrorHandling.RAISE,\n    ) -&gt; List[RecordModel]:\n        \"\"\"Ingests records from a list of dictionaries, a Hugging Face Dataset, or a list of Record objects.\"\"\"\n\n        mapping = mapping or self._mapping\n        if len(records) == 0:\n            raise ValueError(\"No records provided to ingest.\")\n\n        if HFDatasetsIO._is_hf_dataset(dataset=records):\n            records = HFDatasetsIO._record_dicts_from_datasets(hf_dataset=records)\n\n        ingested_records = []\n        record_mapper = IngestedRecordMapper(mapping=mapping, dataset=self.__dataset, user_id=user_id)\n        for record in records:\n            try:\n                if isinstance(record, dict):\n                    record = record_mapper(data=record)\n                elif isinstance(record, Record):\n                    record.dataset = self.__dataset\n                else:\n                    raise ValueError(\n                        \"Records should be a a list Record instances, \"\n                        \"a Hugging Face Dataset, or a list of dictionaries representing the records.\"\n                        f\"Found a record of type {type(record)}: {record}.\"\n                    )\n            except Exception as e:\n                if on_error == RecordErrorHandling.IGNORE:\n                    self._log_message(\n                        message=f\"Failed to ingest record from dict {record}: {e}\",\n                        level=\"info\",\n                    )\n                    continue\n                elif on_error == RecordErrorHandling.WARN:\n                    warnings.warn(f\"Failed to ingest record from dict {record}: {e}\")\n                    continue\n                raise RecordsIngestionError(f\"Failed to ingest record from dict {record}\") from e\n            ingested_records.append(record.api_model())\n        return ingested_records\n\n    def _normalize_batch_size(self, batch_size: int, records_length, max_value: int):\n        norm_batch_size = min(batch_size, records_length, max_value)\n\n        if batch_size != norm_batch_size:\n            self._log_message(\n                message=f\"The provided batch size {batch_size} was normalized. Using value {norm_batch_size}.\",\n                level=\"warning\",\n            )\n\n        return norm_batch_size\n\n    def _validate_vector_names(self, vector_names: Union[List[str], str]) -&gt; None:\n        if not isinstance(vector_names, list):\n            vector_names = [vector_names]\n        for vector_name in vector_names:\n            if isinstance(vector_name, bool):\n                continue\n            if vector_name not in self.__dataset.schema:\n                raise ValueError(f\"Vector field {vector_name} not found in dataset schema.\")\n</code></pre>"},{"location":"reference/argilla/datasets/dataset_records/#src.argilla.records._dataset_records.DatasetRecords.__init__","title":"<code>__init__(client, dataset, mapping=None)</code>","text":"<p>Initializes a DatasetRecords object with a client and a dataset. Args:     client: An Argilla client object.     dataset: A Dataset object.</p> Source code in <code>src/argilla/records/_dataset_records.py</code> <pre><code>def __init__(\n    self, client: \"Argilla\", dataset: \"Dataset\", mapping: Optional[Dict[str, Union[str, Sequence[str]]]] = None\n):\n    \"\"\"Initializes a DatasetRecords object with a client and a dataset.\n    Args:\n        client: An Argilla client object.\n        dataset: A Dataset object.\n    \"\"\"\n    self.__client = client\n    self.__dataset = dataset\n    self._mapping = mapping or {}\n    self._api = self.__client.api.records\n</code></pre>"},{"location":"reference/argilla/datasets/dataset_records/#src.argilla.records._dataset_records.DatasetRecords.__call__","title":"<code>__call__(query=None, batch_size=DEFAULT_BATCH_SIZE, start_offset=0, with_suggestions=True, with_responses=True, with_vectors=None, limit=None)</code>","text":"<p>Returns an iterator over the records in the dataset on the server.</p> <p>Parameters:</p> Name Type Description Default <code>query</code> <code>Optional[Union[str, Query]]</code> <p>A string or a Query object to filter the records.</p> <code>None</code> <code>batch_size</code> <code>Optional[int]</code> <p>The number of records to fetch in each batch. The default is 256.</p> <code>DEFAULT_BATCH_SIZE</code> <code>start_offset</code> <code>int</code> <p>The offset from which to start fetching records. The default is 0.</p> <code>0</code> <code>with_suggestions</code> <code>bool</code> <p>Whether to include suggestions in the records. The default is True.</p> <code>True</code> <code>with_responses</code> <code>bool</code> <p>Whether to include responses in the records. The default is True.</p> <code>True</code> <code>with_vectors</code> <code>Optional[Union[List, bool, str]]</code> <p>A list of vector names to include in the records. The default is None. If a list is provided, only the specified vectors will be included. If True is provided, all vectors will be included.</p> <code>None</code> <code>limit</code> <code>Optional[int]</code> <p>The maximum number of records to fetch. The default is None.</p> <code>None</code> <p>Returns:</p> Type Description <code>DatasetRecordsIterator</code> <p>An iterator over the records in the dataset on the server.</p> Source code in <code>src/argilla/records/_dataset_records.py</code> <pre><code>def __call__(\n    self,\n    query: Optional[Union[str, Query]] = None,\n    batch_size: Optional[int] = DEFAULT_BATCH_SIZE,\n    start_offset: int = 0,\n    with_suggestions: bool = True,\n    with_responses: bool = True,\n    with_vectors: Optional[Union[List, bool, str]] = None,\n    limit: Optional[int] = None,\n) -&gt; DatasetRecordsIterator:\n    \"\"\"Returns an iterator over the records in the dataset on the server.\n\n    Parameters:\n        query: A string or a Query object to filter the records.\n        batch_size: The number of records to fetch in each batch. The default is 256.\n        start_offset: The offset from which to start fetching records. The default is 0.\n        with_suggestions: Whether to include suggestions in the records. The default is True.\n        with_responses: Whether to include responses in the records. The default is True.\n        with_vectors: A list of vector names to include in the records. The default is None.\n            If a list is provided, only the specified vectors will be included.\n            If True is provided, all vectors will be included.\n        limit: The maximum number of records to fetch. The default is None.\n\n    Returns:\n        An iterator over the records in the dataset on the server.\n\n    \"\"\"\n    if query and isinstance(query, str):\n        query = Query(query=query)\n\n    if with_vectors:\n        self._validate_vector_names(vector_names=with_vectors)\n\n    return DatasetRecordsIterator(\n        dataset=self.__dataset,\n        client=self.__client,\n        query=query,\n        batch_size=batch_size,\n        start_offset=start_offset,\n        with_suggestions=with_suggestions,\n        with_responses=with_responses,\n        with_vectors=with_vectors,\n        limit=limit,\n    )\n</code></pre>"},{"location":"reference/argilla/datasets/dataset_records/#src.argilla.records._dataset_records.DatasetRecords.log","title":"<code>log(records, mapping=None, user_id=None, batch_size=DEFAULT_BATCH_SIZE, on_error=RecordErrorHandling.RAISE)</code>","text":"<p>Add or update records in a dataset on the server using the provided records. If the record includes a known <code>id</code> field, the record will be updated. If the record does not include a known <code>id</code> field, the record will be added as a new record. See <code>rg.Record</code> for more information on the record definition.</p> <p>Parameters:</p> Name Type Description Default <code>records</code> <code>Union[List[dict], List[Record], HFDataset]</code> <p>A list of <code>Record</code> objects, a Hugging Face Dataset, or a list of dictionaries representing the records.      If records are defined as a dictionaries or a dataset, the keys/ column names should correspond to the      fields in the Argilla dataset's fields and questions. <code>id</code> should be provided to identify the records when updating.</p> required <code>mapping</code> <code>Optional[Dict[str, Union[str, Sequence[str]]]]</code> <p>A dictionary that maps the keys/ column names in the records to the fields or questions in the Argilla dataset.      To assign an incoming key or column to multiple fields or questions, provide a list or tuple of field or question names.</p> <code>None</code> <code>user_id</code> <code>Optional[UUID]</code> <p>The user id to be associated with the records' response. If not provided, the current user id is used.</p> <code>None</code> <code>batch_size</code> <code>int</code> <p>The number of records to send in each batch. The default is 256.</p> <code>DEFAULT_BATCH_SIZE</code> <p>Returns:</p> Type Description <code>DatasetRecords</code> <p>A list of Record objects representing the updated records.</p> Source code in <code>src/argilla/records/_dataset_records.py</code> <pre><code>def log(\n    self,\n    records: Union[List[dict], List[Record], HFDataset],\n    mapping: Optional[Dict[str, Union[str, Sequence[str]]]] = None,\n    user_id: Optional[UUID] = None,\n    batch_size: int = DEFAULT_BATCH_SIZE,\n    on_error: RecordErrorHandling = RecordErrorHandling.RAISE,\n) -&gt; \"DatasetRecords\":\n    \"\"\"Add or update records in a dataset on the server using the provided records.\n    If the record includes a known `id` field, the record will be updated.\n    If the record does not include a known `id` field, the record will be added as a new record.\n    See `rg.Record` for more information on the record definition.\n\n    Parameters:\n        records: A list of `Record` objects, a Hugging Face Dataset, or a list of dictionaries representing the records.\n                 If records are defined as a dictionaries or a dataset, the keys/ column names should correspond to the\n                 fields in the Argilla dataset's fields and questions. `id` should be provided to identify the records when updating.\n        mapping: A dictionary that maps the keys/ column names in the records to the fields or questions in the Argilla dataset.\n                 To assign an incoming key or column to multiple fields or questions, provide a list or tuple of field or question names.\n        user_id: The user id to be associated with the records' response. If not provided, the current user id is used.\n        batch_size: The number of records to send in each batch. The default is 256.\n\n    Returns:\n        A list of Record objects representing the updated records.\n    \"\"\"\n    record_models = self._ingest_records(\n        records=records, mapping=mapping, user_id=user_id or self.__client.me.id, on_error=on_error\n    )\n    batch_size = self._normalize_batch_size(\n        batch_size=batch_size,\n        records_length=len(record_models),\n        max_value=self._api.MAX_RECORDS_PER_UPSERT_BULK,\n    )\n\n    created_or_updated = []\n    records_updated = 0\n\n    for batch in tqdm(\n        iterable=range(0, len(records), batch_size),\n        desc=\"Sending records...\",\n        total=len(records) // batch_size,\n        unit=\"batch\",\n    ):\n        self._log_message(message=f\"Sending records from {batch} to {batch + batch_size}.\")\n        batch_records = record_models[batch : batch + batch_size]\n        models, updated = self._api.bulk_upsert(dataset_id=self.__dataset.id, records=batch_records)\n        created_or_updated.extend([Record.from_model(model=model, dataset=self.__dataset) for model in models])\n        records_updated += updated\n\n    records_created = len(created_or_updated) - records_updated\n    self._log_message(\n        message=f\"Updated {records_updated} records and added {records_created} records to dataset {self.__dataset.name}\",\n        level=\"info\",\n    )\n\n    return self\n</code></pre>"},{"location":"reference/argilla/datasets/dataset_records/#src.argilla.records._dataset_records.DatasetRecords.delete","title":"<code>delete(records, batch_size=DEFAULT_DELETE_BATCH_SIZE)</code>","text":"<p>Delete records in a dataset on the server using the provided records     and matching based on the id.</p> <p>Parameters:</p> Name Type Description Default <code>records</code> <code>List[Record]</code> <p>A list of <code>Record</code> objects representing the records to be deleted.</p> required <code>batch_size</code> <code>int</code> <p>The number of records to send in each batch. The default is 64.</p> <code>DEFAULT_DELETE_BATCH_SIZE</code> <p>Returns:</p> Type Description <code>List[Record]</code> <p>A list of Record objects representing the deleted records.</p> Source code in <code>src/argilla/records/_dataset_records.py</code> <pre><code>def delete(\n    self,\n    records: List[Record],\n    batch_size: int = DEFAULT_DELETE_BATCH_SIZE,\n) -&gt; List[Record]:\n    \"\"\"Delete records in a dataset on the server using the provided records\n        and matching based on the id.\n\n    Parameters:\n        records: A list of `Record` objects representing the records to be deleted.\n        batch_size: The number of records to send in each batch. The default is 64.\n\n    Returns:\n        A list of Record objects representing the deleted records.\n\n    \"\"\"\n    mapping = None\n    user_id = self.__client.me.id\n    record_models = self._ingest_records(records=records, mapping=mapping, user_id=user_id)\n    batch_size = self._normalize_batch_size(\n        batch_size=batch_size,\n        records_length=len(record_models),\n        max_value=self._api.MAX_RECORDS_PER_DELETE_BULK,\n    )\n\n    records_deleted = 0\n    for batch in tqdm(\n        iterable=range(0, len(records), batch_size),\n        desc=\"Sending records...\",\n        total=len(records) // batch_size,\n        unit=\"batch\",\n    ):\n        self._log_message(message=f\"Sending records from {batch} to {batch + batch_size}.\")\n        batch_records = record_models[batch : batch + batch_size]\n        self._api.delete_many(dataset_id=self.__dataset.id, records=batch_records)\n        records_deleted += len(batch_records)\n\n    self._log_message(\n        message=f\"Deleted {len(record_models)} records from dataset {self.__dataset.name}\",\n        level=\"info\",\n    )\n\n    return records\n</code></pre>"},{"location":"reference/argilla/datasets/dataset_records/#src.argilla.records._dataset_records.DatasetRecords.to_dict","title":"<code>to_dict(flatten=False, orient='names')</code>","text":"<p>Return the records as a dictionary. This is a convenient shortcut for dataset.records(...).to_dict().</p> <p>Parameters:</p> Name Type Description Default <code>flatten</code> <code>bool</code> <p>The structure of the exported dictionary. - True: The record fields, metadata, suggestions and responses will be flattened. - False: The record fields, metadata, suggestions and responses will be nested.</p> <code>False</code> <code>orient</code> <code>str</code> <p>The orientation of the exported dictionary. - \"names\": The keys of the dictionary will be the names of the fields, metadata, suggestions and responses. - \"index\": The keys of the dictionary will be the id of the records.</p> <code>'names'</code> <p>Returns:     A dictionary of records.</p> Source code in <code>src/argilla/records/_dataset_records.py</code> <pre><code>def to_dict(self, flatten: bool = False, orient: str = \"names\") -&gt; Dict[str, Any]:\n    \"\"\"\n    Return the records as a dictionary. This is a convenient shortcut for dataset.records(...).to_dict().\n\n    Parameters:\n        flatten (bool): The structure of the exported dictionary.\n            - True: The record fields, metadata, suggestions and responses will be flattened.\n            - False: The record fields, metadata, suggestions and responses will be nested.\n        orient (str): The orientation of the exported dictionary.\n            - \"names\": The keys of the dictionary will be the names of the fields, metadata, suggestions and responses.\n            - \"index\": The keys of the dictionary will be the id of the records.\n    Returns:\n        A dictionary of records.\n\n    \"\"\"\n    return self().to_dict(flatten=flatten, orient=orient)\n</code></pre>"},{"location":"reference/argilla/datasets/dataset_records/#src.argilla.records._dataset_records.DatasetRecords.to_list","title":"<code>to_list(flatten=False)</code>","text":"<p>Return the records as a list of dictionaries. This is a convenient shortcut for dataset.records(...).to_list().</p> <p>Parameters:</p> Name Type Description Default <code>flatten</code> <code>bool</code> <p>The structure of the exported dictionaries in the list. - True: The record keys are flattened and a dot notation is used to record attributes and their attributes . For example, <code>label.suggestion</code> and <code>label.response</code>. Records responses are spread across multiple columns for values and users. - False: The record fields, metadata, suggestions and responses will be nested dictionary with keys for record attributes.</p> <code>False</code> <p>Returns:     A list of dictionaries of records.</p> Source code in <code>src/argilla/records/_dataset_records.py</code> <pre><code>def to_list(self, flatten: bool = False) -&gt; List[Dict[str, Any]]:\n    \"\"\"\n    Return the records as a list of dictionaries. This is a convenient shortcut for dataset.records(...).to_list().\n\n    Parameters:\n        flatten (bool): The structure of the exported dictionaries in the list.\n            - True: The record keys are flattened and a dot notation is used to record attributes and their attributes . For example, `label.suggestion` and `label.response`. Records responses are spread across multiple columns for values and users.\n            - False: The record fields, metadata, suggestions and responses will be nested dictionary with keys for record attributes.\n    Returns:\n        A list of dictionaries of records.\n    \"\"\"\n    data = self().to_list(flatten=flatten)\n    return data\n</code></pre>"},{"location":"reference/argilla/datasets/dataset_records/#src.argilla.records._dataset_records.DatasetRecords.to_json","title":"<code>to_json(path)</code>","text":"<p>Export the records to a file on disk.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>str</code> <p>The path to the file to save the records.</p> required <p>Returns:</p> Type Description <code>Path</code> <p>The path to the file where the records were saved.</p> Source code in <code>src/argilla/records/_dataset_records.py</code> <pre><code>def to_json(self, path: Union[Path, str]) -&gt; Path:\n    \"\"\"\n    Export the records to a file on disk.\n\n    Parameters:\n        path (str): The path to the file to save the records.\n\n    Returns:\n        The path to the file where the records were saved.\n\n    \"\"\"\n    return self().to_json(path=path)\n</code></pre>"},{"location":"reference/argilla/datasets/dataset_records/#src.argilla.records._dataset_records.DatasetRecords.from_json","title":"<code>from_json(path)</code>","text":"<p>Creates a DatasetRecords object from a disk path to a JSON file.     The JSON file should be defined by <code>DatasetRecords.to_json</code>.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>str</code> <p>The path to the file containing the records.</p> required <p>Returns:</p> Name Type Description <code>DatasetRecords</code> <code>List[Record]</code> <p>The DatasetRecords object created from the disk path.</p> Source code in <code>src/argilla/records/_dataset_records.py</code> <pre><code>def from_json(self, path: Union[Path, str]) -&gt; List[Record]:\n    \"\"\"Creates a DatasetRecords object from a disk path to a JSON file.\n        The JSON file should be defined by `DatasetRecords.to_json`.\n\n    Args:\n        path (str): The path to the file containing the records.\n\n    Returns:\n        DatasetRecords: The DatasetRecords object created from the disk path.\n\n    \"\"\"\n    records = JsonIO._records_from_json(path=path)\n    return self.log(records=records)\n</code></pre>"},{"location":"reference/argilla/datasets/dataset_records/#src.argilla.records._dataset_records.DatasetRecords.to_datasets","title":"<code>to_datasets()</code>","text":"<p>Export the records to a HFDataset.</p> <p>Returns:</p> Type Description <code>HFDataset</code> <p>The dataset containing the records.</p> Source code in <code>src/argilla/records/_dataset_records.py</code> <pre><code>def to_datasets(self) -&gt; HFDataset:\n    \"\"\"\n    Export the records to a HFDataset.\n\n    Returns:\n        The dataset containing the records.\n\n    \"\"\"\n\n    return self().to_datasets()\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/","title":"<code>rg.Dataset</code>","text":"<p><code>Dataset</code> is a class that represents a collection of records. It is used to store and manage records in Argilla.</p>"},{"location":"reference/argilla/datasets/datasets/#usage-examples","title":"Usage Examples","text":""},{"location":"reference/argilla/datasets/datasets/#creating-a-dataset","title":"Creating a Dataset","text":"<p>To create a new dataset you need to define its name and settings. Optional parameters are <code>workspace</code> and <code>client</code>, if you want to create the dataset in a specific workspace or on a specific Argilla instance.</p> <pre><code>dataset = rg.Dataset(\n    name=\"my_dataset\",\n    settings=rg.Settings(\n        fields=[\n            rg.TextField(name=\"text\"),\n        ],\n        questions=[\n            rg.TextQuestion(name=\"response\"),\n        ],\n    ),\n)\ndataset.create()\n</code></pre> <p>For a detail guide of the dataset creation and publication process, see the Dataset how to guide.</p>"},{"location":"reference/argilla/datasets/datasets/#retrieving-an-existing-dataset","title":"Retrieving an existing Dataset","text":"<p>To retrieve an existing dataset, use <code>client.datasets(\"my_dataset\")</code> instead.</p> <pre><code>dataset = client.datasets(\"my_dataset\")\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/#src.argilla.datasets._resource.Dataset","title":"<code>Dataset</code>","text":"<p>               Bases: <code>Resource</code>, <code>HubImportExportMixin</code>, <code>DiskImportExportMixin</code></p> <p>Class for interacting with Argilla Datasets</p> <p>Attributes:</p> Name Type Description <code>name</code> <code>str</code> <p>Name of the dataset.</p> <code>records</code> <code>DatasetRecords</code> <p>The records object for the dataset. Used to interact with the records of the dataset by iterating, searching, etc.</p> <code>settings</code> <code>Settings</code> <p>The settings object of the dataset. Used to configure the dataset with fields, questions, guidelines, etc.</p> <code>fields</code> <code>list</code> <p>The fields of the dataset, for example the <code>rg.TextField</code> of the dataset. Defined in the settings.</p> <code>questions</code> <code>list</code> <p>The questions of the dataset defined in the settings. For example, the <code>rg.TextQuestion</code> that you want labelers to answer.</p> <code>guidelines</code> <code>str</code> <p>The guidelines of the dataset defined in the settings. Used to provide instructions to labelers.</p> <code>allow_extra_metadata</code> <code>bool</code> <p>True if extra metadata is allowed, False otherwise.</p> Source code in <code>src/argilla/datasets/_resource.py</code> <pre><code>class Dataset(Resource, HubImportExportMixin, DiskImportExportMixin):\n    \"\"\"Class for interacting with Argilla Datasets\n\n    Attributes:\n        name: Name of the dataset.\n        records (DatasetRecords): The records object for the dataset. Used to interact with the records of the dataset by iterating, searching, etc.\n        settings (Settings): The settings object of the dataset. Used to configure the dataset with fields, questions, guidelines, etc.\n        fields (list): The fields of the dataset, for example the `rg.TextField` of the dataset. Defined in the settings.\n        questions (list): The questions of the dataset defined in the settings. For example, the `rg.TextQuestion` that you want labelers to answer.\n        guidelines (str): The guidelines of the dataset defined in the settings. Used to provide instructions to labelers.\n        allow_extra_metadata (bool): True if extra metadata is allowed, False otherwise.\n    \"\"\"\n\n    name: str\n    id: Optional[UUID]\n\n    _api: \"DatasetsAPI\"\n    _model: \"DatasetModel\"\n\n    def __init__(\n        self,\n        name: Optional[str] = None,\n        workspace: Optional[Union[\"Workspace\", str, UUID]] = None,\n        settings: Optional[Settings] = None,\n        client: Optional[\"Argilla\"] = None,\n    ) -&gt; None:\n        \"\"\"Initializes a new Argilla Dataset object with the given parameters.\n\n        Parameters:\n            name (str): Name of the dataset. Replaced by random UUID if not assigned.\n            workspace (UUID): Workspace of the dataset. Default is the first workspace found in the server.\n            settings (Settings): Settings class to be used to configure the dataset.\n            client (Argilla): Instance of Argilla to connect with the server. Default is the default client.\n        \"\"\"\n        client = client or Argilla._get_default()\n        super().__init__(client=client, api=client.api.datasets)\n        if name is None:\n            name = f\"dataset_{uuid4()}\"\n            self._log_message(f\"Settings dataset name to unique UUID: {name}\")\n\n        self._workspace = workspace\n        self._model = DatasetModel(name=name)\n        self._settings = settings._copy() if settings else Settings(_dataset=self)\n        self._settings.dataset = self\n        self.__records = DatasetRecords(client=self._client, dataset=self, mapping=self._settings.mapping)\n\n    #####################\n    #  Properties       #\n    #####################\n\n    @property\n    def name(self) -&gt; str:\n        return self._model.name\n\n    @name.setter\n    def name(self, value: str) -&gt; None:\n        self._model.name = value\n\n    @property\n    def records(self) -&gt; \"DatasetRecords\":\n        return self.__records\n\n    @property\n    def settings(self) -&gt; Settings:\n        return self._settings\n\n    @settings.setter\n    def settings(self, value: Settings) -&gt; None:\n        settings_copy = value._copy()\n        settings_copy.dataset = self\n        self._settings = settings_copy\n\n    @property\n    def fields(self) -&gt; list:\n        return self.settings.fields\n\n    @property\n    def questions(self) -&gt; list:\n        return self.settings.questions\n\n    @property\n    def guidelines(self) -&gt; str:\n        return self.settings.guidelines\n\n    @guidelines.setter\n    def guidelines(self, value: str) -&gt; None:\n        self.settings.guidelines = value\n\n    @property\n    def allow_extra_metadata(self) -&gt; bool:\n        return self.settings.allow_extra_metadata\n\n    @allow_extra_metadata.setter\n    def allow_extra_metadata(self, value: bool) -&gt; None:\n        self.settings.allow_extra_metadata = value\n\n    @property\n    def schema(self) -&gt; dict:\n        return self.settings.schema\n\n    @property\n    def workspace(self) -&gt; Workspace:\n        self._workspace = self._resolve_workspace()\n        return self._workspace\n\n    @property\n    def distribution(self) -&gt; TaskDistribution:\n        return self.settings.distribution\n\n    @distribution.setter\n    def distribution(self, value: TaskDistribution) -&gt; None:\n        self.settings.distribution = value\n\n    #####################\n    #  Core methods     #\n    #####################\n\n    def get(self) -&gt; \"Dataset\":\n        super().get()\n        self.settings.get()\n        return self\n\n    def create(self) -&gt; \"Dataset\":\n        \"\"\"Creates the dataset on the server with the `Settings` configuration.\n\n        Returns:\n            Dataset: The created dataset object.\n        \"\"\"\n        try:\n            super().create()\n        except ForbiddenError as e:\n            settings_url = f\"{self._client.api_url}/user-settings\"\n            user_role = self._client.me.role.value\n            user_name = self._client.me.username\n            workspace_name = self.workspace.name\n            message = f\"\"\"User '{user_name}' is not authorized to create a dataset in workspace '{workspace_name}'\n            with role '{user_role}'. Go to {settings_url} to view your role.\"\"\"\n            raise ForbiddenError(message) from e\n        try:\n            return self._publish()\n        except Exception as e:\n            self._log_message(message=f\"Error creating dataset: {e}\", level=\"error\")\n            self._rollback_dataset_creation()\n            raise SettingsError from e\n\n    def update(self) -&gt; \"Dataset\":\n        \"\"\"Updates the dataset on the server with the current settings.\n\n        Returns:\n            Dataset: The updated dataset object.\n        \"\"\"\n        self.settings.update()\n        return self\n\n    def progress(self, with_users_distribution: bool = False) -&gt; dict:\n        \"\"\"Returns the team's progress on the dataset.\n\n        Parameters:\n            with_users_distribution (bool): If True, the progress of the dataset is returned\n                with users distribution. This includes the number of responses made by each user.\n\n        Returns:\n            dict: The team's progress on the dataset.\n\n        An example of a response when `with_users_distribution` is `True`:\n        ```json\n        {\n            \"total\": 100,\n            \"completed\": 50,\n            \"pending\": 50,\n            \"users\": {\n                \"user1\": {\n                   \"completed\": { \"submitted\": 10, \"draft\": 5, \"discarded\": 5},\n                   \"pending\": { \"submitted\": 5, \"draft\": 10, \"discarded\": 10},\n                },\n                \"user2\": {\n                   \"completed\": { \"submitted\": 20, \"draft\": 10, \"discarded\": 5},\n                   \"pending\": { \"submitted\": 2, \"draft\": 25, \"discarded\": 0},\n                },\n                ...\n        }\n        ```\n\n        \"\"\"\n\n        progress = self._api.get_progress(dataset_id=self._model.id).model_dump()\n\n        if with_users_distribution:\n            users_progress = self._api.list_users_progress(dataset_id=self._model.id)\n            users_distribution = {\n                user.username: {\n                    \"completed\": user.completed.model_dump(),\n                    \"pending\": user.pending.model_dump(),\n                }\n                for user in users_progress\n            }\n\n            progress.update({\"users\": users_distribution})\n\n        return progress\n\n    @classmethod\n    def from_model(cls, model: DatasetModel, client: \"Argilla\") -&gt; \"Dataset\":\n        instance = cls(client=client, workspace=model.workspace_id, name=model.name)\n        instance._model = model\n\n        return instance\n\n    #####################\n    #  Utility methods  #\n    #####################\n\n    def api_model(self) -&gt; DatasetModel:\n        self._model.workspace_id = self.workspace.id\n        return self._model\n\n    def _publish(self) -&gt; \"Dataset\":\n        self._settings.create()\n        self._api.publish(dataset_id=self._model.id)\n\n        return self.get()\n\n    def _resolve_workspace(self) -&gt; Workspace:\n        workspace = self._workspace\n\n        if workspace is None:\n            workspace = self._client.workspaces.default\n            warnings.warn(f\"Workspace not provided. Using default workspace: {workspace.name} id: {workspace.id}\")\n        elif isinstance(workspace, str):\n            workspace = self._client.workspaces(workspace)\n            if workspace is None:\n                available_workspace_names = [ws.name for ws in self._client.workspaces]\n                raise NotFoundError(\n                    f\"Workspace with name {workspace} not found. Available workspaces: {available_workspace_names}\"\n                )\n        elif isinstance(workspace, UUID):\n            ws_model = self._client.api.workspaces.get(workspace)\n            workspace = Workspace.from_model(ws_model, client=self._client)\n        elif not isinstance(workspace, Workspace):\n            raise ValueError(f\"Wrong workspace value found {workspace}\")\n\n        return workspace\n\n    def _rollback_dataset_creation(self):\n        if not self._is_published():\n            self.delete()\n\n    def _is_published(self) -&gt; bool:\n        return self._model.status == \"ready\"\n\n    @classmethod\n    def _sanitize_name(cls, name: str):\n        name = name.replace(\" \", \"_\")\n\n        for character in [\"/\", \"\\\\\", \".\", \",\", \";\", \":\", \"-\", \"+\", \"=\"]:\n            name = name.replace(character, \"-\")\n        return name\n\n    def _with_client(self, client: Argilla) -&gt; \"Self\":\n        return super()._with_client(client=client)\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/#src.argilla.datasets._resource.Dataset.__init__","title":"<code>__init__(name=None, workspace=None, settings=None, client=None)</code>","text":"<p>Initializes a new Argilla Dataset object with the given parameters.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>Name of the dataset. Replaced by random UUID if not assigned.</p> <code>None</code> <code>workspace</code> <code>UUID</code> <p>Workspace of the dataset. Default is the first workspace found in the server.</p> <code>None</code> <code>settings</code> <code>Settings</code> <p>Settings class to be used to configure the dataset.</p> <code>None</code> <code>client</code> <code>Argilla</code> <p>Instance of Argilla to connect with the server. Default is the default client.</p> <code>None</code> Source code in <code>src/argilla/datasets/_resource.py</code> <pre><code>def __init__(\n    self,\n    name: Optional[str] = None,\n    workspace: Optional[Union[\"Workspace\", str, UUID]] = None,\n    settings: Optional[Settings] = None,\n    client: Optional[\"Argilla\"] = None,\n) -&gt; None:\n    \"\"\"Initializes a new Argilla Dataset object with the given parameters.\n\n    Parameters:\n        name (str): Name of the dataset. Replaced by random UUID if not assigned.\n        workspace (UUID): Workspace of the dataset. Default is the first workspace found in the server.\n        settings (Settings): Settings class to be used to configure the dataset.\n        client (Argilla): Instance of Argilla to connect with the server. Default is the default client.\n    \"\"\"\n    client = client or Argilla._get_default()\n    super().__init__(client=client, api=client.api.datasets)\n    if name is None:\n        name = f\"dataset_{uuid4()}\"\n        self._log_message(f\"Settings dataset name to unique UUID: {name}\")\n\n    self._workspace = workspace\n    self._model = DatasetModel(name=name)\n    self._settings = settings._copy() if settings else Settings(_dataset=self)\n    self._settings.dataset = self\n    self.__records = DatasetRecords(client=self._client, dataset=self, mapping=self._settings.mapping)\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/#src.argilla.datasets._resource.Dataset.create","title":"<code>create()</code>","text":"<p>Creates the dataset on the server with the <code>Settings</code> configuration.</p> <p>Returns:</p> Name Type Description <code>Dataset</code> <code>Dataset</code> <p>The created dataset object.</p> Source code in <code>src/argilla/datasets/_resource.py</code> <pre><code>def create(self) -&gt; \"Dataset\":\n    \"\"\"Creates the dataset on the server with the `Settings` configuration.\n\n    Returns:\n        Dataset: The created dataset object.\n    \"\"\"\n    try:\n        super().create()\n    except ForbiddenError as e:\n        settings_url = f\"{self._client.api_url}/user-settings\"\n        user_role = self._client.me.role.value\n        user_name = self._client.me.username\n        workspace_name = self.workspace.name\n        message = f\"\"\"User '{user_name}' is not authorized to create a dataset in workspace '{workspace_name}'\n        with role '{user_role}'. Go to {settings_url} to view your role.\"\"\"\n        raise ForbiddenError(message) from e\n    try:\n        return self._publish()\n    except Exception as e:\n        self._log_message(message=f\"Error creating dataset: {e}\", level=\"error\")\n        self._rollback_dataset_creation()\n        raise SettingsError from e\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/#src.argilla.datasets._resource.Dataset.update","title":"<code>update()</code>","text":"<p>Updates the dataset on the server with the current settings.</p> <p>Returns:</p> Name Type Description <code>Dataset</code> <code>Dataset</code> <p>The updated dataset object.</p> Source code in <code>src/argilla/datasets/_resource.py</code> <pre><code>def update(self) -&gt; \"Dataset\":\n    \"\"\"Updates the dataset on the server with the current settings.\n\n    Returns:\n        Dataset: The updated dataset object.\n    \"\"\"\n    self.settings.update()\n    return self\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/#src.argilla.datasets._resource.Dataset.progress","title":"<code>progress(with_users_distribution=False)</code>","text":"<p>Returns the team's progress on the dataset.</p> <p>Parameters:</p> Name Type Description Default <code>with_users_distribution</code> <code>bool</code> <p>If True, the progress of the dataset is returned with users distribution. This includes the number of responses made by each user.</p> <code>False</code> <p>Returns:</p> Name Type Description <code>dict</code> <code>dict</code> <p>The team's progress on the dataset.</p> <p>An example of a response when <code>with_users_distribution</code> is <code>True</code>: <pre><code>{\n    \"total\": 100,\n    \"completed\": 50,\n    \"pending\": 50,\n    \"users\": {\n        \"user1\": {\n           \"completed\": { \"submitted\": 10, \"draft\": 5, \"discarded\": 5},\n           \"pending\": { \"submitted\": 5, \"draft\": 10, \"discarded\": 10},\n        },\n        \"user2\": {\n           \"completed\": { \"submitted\": 20, \"draft\": 10, \"discarded\": 5},\n           \"pending\": { \"submitted\": 2, \"draft\": 25, \"discarded\": 0},\n        },\n        ...\n}\n</code></pre></p> Source code in <code>src/argilla/datasets/_resource.py</code> <pre><code>def progress(self, with_users_distribution: bool = False) -&gt; dict:\n    \"\"\"Returns the team's progress on the dataset.\n\n    Parameters:\n        with_users_distribution (bool): If True, the progress of the dataset is returned\n            with users distribution. This includes the number of responses made by each user.\n\n    Returns:\n        dict: The team's progress on the dataset.\n\n    An example of a response when `with_users_distribution` is `True`:\n    ```json\n    {\n        \"total\": 100,\n        \"completed\": 50,\n        \"pending\": 50,\n        \"users\": {\n            \"user1\": {\n               \"completed\": { \"submitted\": 10, \"draft\": 5, \"discarded\": 5},\n               \"pending\": { \"submitted\": 5, \"draft\": 10, \"discarded\": 10},\n            },\n            \"user2\": {\n               \"completed\": { \"submitted\": 20, \"draft\": 10, \"discarded\": 5},\n               \"pending\": { \"submitted\": 2, \"draft\": 25, \"discarded\": 0},\n            },\n            ...\n    }\n    ```\n\n    \"\"\"\n\n    progress = self._api.get_progress(dataset_id=self._model.id).model_dump()\n\n    if with_users_distribution:\n        users_progress = self._api.list_users_progress(dataset_id=self._model.id)\n        users_distribution = {\n            user.username: {\n                \"completed\": user.completed.model_dump(),\n                \"pending\": user.pending.model_dump(),\n            }\n            for user in users_progress\n        }\n\n        progress.update({\"users\": users_distribution})\n\n    return progress\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/#src.argilla.datasets._io._disk.DiskImportExportMixin","title":"<code>DiskImportExportMixin</code>","text":"<p>               Bases: <code>ABC</code></p> <p>A mixin for exporting and importing datasets to and from disk.</p> Source code in <code>src/argilla/datasets/_io/_disk.py</code> <pre><code>class DiskImportExportMixin(ABC):\n    \"\"\"A mixin for exporting and importing datasets to and from disk.\"\"\"\n\n    _model: DatasetModel\n    _DEFAULT_RECORDS_PATH = \"records.json\"\n    _DEFAULT_CONFIG_REPO_DIR = \".argilla\"\n    _DEFAULT_SETTINGS_PATH = f\"{_DEFAULT_CONFIG_REPO_DIR}/settings.json\"\n    _DEFAULT_DATASET_PATH = f\"{_DEFAULT_CONFIG_REPO_DIR}/dataset.json\"\n    _DEFAULT_CONFIGURATION_FILES = [_DEFAULT_SETTINGS_PATH, _DEFAULT_DATASET_PATH]\n\n    def to_disk(self: \"Dataset\", path: str, *, with_records: bool = True) -&gt; str:\n        \"\"\"Exports the dataset to disk in the given path. The dataset is exported as a directory containing the dataset model, settings and records as json files.\n\n        Parameters:\n            path (str): The path to export the dataset to. Must be an empty directory.\n            with_records: whether to load the records from the Hugging Face dataset. Defaults to `True`.\n        \"\"\"\n        dataset_path, settings_path, records_path = self._define_child_paths(path=path)\n        logging.info(f\"Loading dataset from {dataset_path}\")\n        logging.info(f\"Loading settings from {settings_path}\")\n        logging.info(f\"Loading records from {records_path}\")\n        # Export the dataset model, settings and records\n        self._persist_dataset_model(path=dataset_path)\n        self.settings.to_json(path=settings_path)\n        if with_records:\n            self.records.to_json(path=records_path)\n\n        return path\n\n    @classmethod\n    def from_disk(\n        cls: Type[\"Dataset\"],\n        path: str,\n        *,\n        name: Optional[str] = None,\n        workspace: Optional[Union[\"Workspace\", str]] = None,\n        client: Optional[\"Argilla\"] = None,\n        with_records: bool = True,\n    ) -&gt; \"Dataset\":\n        \"\"\"Imports a dataset from disk as a directory containing the dataset model, settings and records.\n        The directory should be defined using the `to_disk` method.\n\n        Parameters:\n            path (str): The path to the directory containing the dataset model, settings and records.\n            name (str, optional): The name to assign to the new dataset. Defaults to None and the dataset's source name is used, unless it already exists, in which case a unique UUID is appended.\n            workspace (Union[Workspace, str], optional): The workspace to import the dataset to. Defaults to None and default workspace is used.\n            client (Argilla, optional): The client to use for the import. Defaults to None and the default client is used.\n            with_records: whether to load the records from the Hugging Face dataset. Defaults to `True`.\n        \"\"\"\n\n        client = client or Argilla._get_default()\n\n        try:\n            dataset_path, settings_path, records_path = cls._define_child_paths(path=path)\n            logging.info(f\"Loading dataset from {dataset_path}\")\n            logging.info(f\"Loading settings from {settings_path}\")\n            logging.info(f\"Loading records from {records_path}\")\n\n            dataset_model = cls._load_dataset_model(path=dataset_path)\n        except (NotADirectoryError, FileNotFoundError) as e:\n            raise ImportDatasetError(f\"Error loading dataset from disk. {e}\") from e\n\n        # Get the relevant workspace_id of the incoming dataset\n        if isinstance(workspace, str):\n            workspace = client.workspaces(workspace)\n            if not workspace:\n                raise ArgillaError(f\"Workspace {workspace} not found on the server.\")\n        else:\n            warnings.warn(\"Workspace not provided. Using default workspace.\")\n            workspace = client.workspaces.default\n        dataset_model.workspace_id = workspace.id\n\n        if name and (name != dataset_model.name):\n            logging.info(f\"Changing dataset name from {dataset_model.name} to {name}\")\n            dataset_model.name = name\n\n        if client.api.datasets.name_exists(name=dataset_model.name, workspace_id=workspace.id):\n            warnings.warn(\n                f\"Loaded dataset name {dataset_model.name} already exists in the workspace {workspace.name} so using it. To create a new dataset, provide a unique name to the `name` parameter.\"\n            )\n            dataset_model = client.api.datasets.get_by_name_and_workspace_id(\n                name=dataset_model.name, workspace_id=workspace.id\n            )\n            dataset = cls.from_model(model=dataset_model, client=client)\n        else:\n            # Create a new dataset and load the settings and records\n            if not os.path.exists(settings_path):\n                raise ImportDatasetError(f\"Settings file not found at {settings_path}\")\n\n            dataset = cls.from_model(model=dataset_model, client=client)\n            dataset.settings = Settings.from_json(path=settings_path)\n            dataset.create()\n\n        if os.path.exists(records_path) and with_records:\n            try:\n                dataset.records.from_json(path=records_path)\n            except RecordsIngestionError as e:\n                raise RecordsIngestionError(\n                    message=\"Error importing dataset records from disk. \"\n                    \"Records and datasets settings are not compatible.\"\n                ) from e\n\n        return dataset\n\n    ############################\n    # Utility methods\n    ############################\n\n    def _persist_dataset_model(self, path: Path):\n        \"\"\"Persists the dataset model to disk.\"\"\"\n        if path.exists():\n            raise FileExistsError(f\"Dataset already exists at {path}\")\n        with open(file=path, mode=\"w\") as f:\n            json.dump(self.api_model().model_dump(), f)\n\n    @classmethod\n    def _load_dataset_model(cls, path: Path):\n        \"\"\"Loads the dataset model from disk.\"\"\"\n        if not os.path.exists(path):\n            raise FileNotFoundError(f\"Dataset model not found at {path}\")\n        with open(file=path, mode=\"r\") as f:\n            dataset_model = json.load(f)\n            dataset_model = DatasetModel(**dataset_model)\n        return dataset_model\n\n    @classmethod\n    def _define_child_paths(cls, path: Union[Path, str]) -&gt; Tuple[Path, Path, Path]:\n        path = Path(path)\n        if not path.is_dir():\n            raise NotADirectoryError(f\"Path {path} is not a directory\")\n        main_path = path / cls._DEFAULT_CONFIG_REPO_DIR\n        main_path.mkdir(exist_ok=True)\n        dataset_path = path / cls._DEFAULT_DATASET_PATH\n        settings_path = path / cls._DEFAULT_SETTINGS_PATH\n        records_path = path / cls._DEFAULT_RECORDS_PATH\n        return dataset_path, settings_path, records_path\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/#src.argilla.datasets._io._disk.DiskImportExportMixin.to_disk","title":"<code>to_disk(path, *, with_records=True)</code>","text":"<p>Exports the dataset to disk in the given path. The dataset is exported as a directory containing the dataset model, settings and records as json files.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>str</code> <p>The path to export the dataset to. Must be an empty directory.</p> required <code>with_records</code> <code>bool</code> <p>whether to load the records from the Hugging Face dataset. Defaults to <code>True</code>.</p> <code>True</code> Source code in <code>src/argilla/datasets/_io/_disk.py</code> <pre><code>def to_disk(self: \"Dataset\", path: str, *, with_records: bool = True) -&gt; str:\n    \"\"\"Exports the dataset to disk in the given path. The dataset is exported as a directory containing the dataset model, settings and records as json files.\n\n    Parameters:\n        path (str): The path to export the dataset to. Must be an empty directory.\n        with_records: whether to load the records from the Hugging Face dataset. Defaults to `True`.\n    \"\"\"\n    dataset_path, settings_path, records_path = self._define_child_paths(path=path)\n    logging.info(f\"Loading dataset from {dataset_path}\")\n    logging.info(f\"Loading settings from {settings_path}\")\n    logging.info(f\"Loading records from {records_path}\")\n    # Export the dataset model, settings and records\n    self._persist_dataset_model(path=dataset_path)\n    self.settings.to_json(path=settings_path)\n    if with_records:\n        self.records.to_json(path=records_path)\n\n    return path\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/#src.argilla.datasets._io._disk.DiskImportExportMixin.from_disk","title":"<code>from_disk(path, *, name=None, workspace=None, client=None, with_records=True)</code>  <code>classmethod</code>","text":"<p>Imports a dataset from disk as a directory containing the dataset model, settings and records. The directory should be defined using the <code>to_disk</code> method.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>str</code> <p>The path to the directory containing the dataset model, settings and records.</p> required <code>name</code> <code>str</code> <p>The name to assign to the new dataset. Defaults to None and the dataset's source name is used, unless it already exists, in which case a unique UUID is appended.</p> <code>None</code> <code>workspace</code> <code>Union[Workspace, str]</code> <p>The workspace to import the dataset to. Defaults to None and default workspace is used.</p> <code>None</code> <code>client</code> <code>Argilla</code> <p>The client to use for the import. Defaults to None and the default client is used.</p> <code>None</code> <code>with_records</code> <code>bool</code> <p>whether to load the records from the Hugging Face dataset. Defaults to <code>True</code>.</p> <code>True</code> Source code in <code>src/argilla/datasets/_io/_disk.py</code> <pre><code>@classmethod\ndef from_disk(\n    cls: Type[\"Dataset\"],\n    path: str,\n    *,\n    name: Optional[str] = None,\n    workspace: Optional[Union[\"Workspace\", str]] = None,\n    client: Optional[\"Argilla\"] = None,\n    with_records: bool = True,\n) -&gt; \"Dataset\":\n    \"\"\"Imports a dataset from disk as a directory containing the dataset model, settings and records.\n    The directory should be defined using the `to_disk` method.\n\n    Parameters:\n        path (str): The path to the directory containing the dataset model, settings and records.\n        name (str, optional): The name to assign to the new dataset. Defaults to None and the dataset's source name is used, unless it already exists, in which case a unique UUID is appended.\n        workspace (Union[Workspace, str], optional): The workspace to import the dataset to. Defaults to None and default workspace is used.\n        client (Argilla, optional): The client to use for the import. Defaults to None and the default client is used.\n        with_records: whether to load the records from the Hugging Face dataset. Defaults to `True`.\n    \"\"\"\n\n    client = client or Argilla._get_default()\n\n    try:\n        dataset_path, settings_path, records_path = cls._define_child_paths(path=path)\n        logging.info(f\"Loading dataset from {dataset_path}\")\n        logging.info(f\"Loading settings from {settings_path}\")\n        logging.info(f\"Loading records from {records_path}\")\n\n        dataset_model = cls._load_dataset_model(path=dataset_path)\n    except (NotADirectoryError, FileNotFoundError) as e:\n        raise ImportDatasetError(f\"Error loading dataset from disk. {e}\") from e\n\n    # Get the relevant workspace_id of the incoming dataset\n    if isinstance(workspace, str):\n        workspace = client.workspaces(workspace)\n        if not workspace:\n            raise ArgillaError(f\"Workspace {workspace} not found on the server.\")\n    else:\n        warnings.warn(\"Workspace not provided. Using default workspace.\")\n        workspace = client.workspaces.default\n    dataset_model.workspace_id = workspace.id\n\n    if name and (name != dataset_model.name):\n        logging.info(f\"Changing dataset name from {dataset_model.name} to {name}\")\n        dataset_model.name = name\n\n    if client.api.datasets.name_exists(name=dataset_model.name, workspace_id=workspace.id):\n        warnings.warn(\n            f\"Loaded dataset name {dataset_model.name} already exists in the workspace {workspace.name} so using it. To create a new dataset, provide a unique name to the `name` parameter.\"\n        )\n        dataset_model = client.api.datasets.get_by_name_and_workspace_id(\n            name=dataset_model.name, workspace_id=workspace.id\n        )\n        dataset = cls.from_model(model=dataset_model, client=client)\n    else:\n        # Create a new dataset and load the settings and records\n        if not os.path.exists(settings_path):\n            raise ImportDatasetError(f\"Settings file not found at {settings_path}\")\n\n        dataset = cls.from_model(model=dataset_model, client=client)\n        dataset.settings = Settings.from_json(path=settings_path)\n        dataset.create()\n\n    if os.path.exists(records_path) and with_records:\n        try:\n            dataset.records.from_json(path=records_path)\n        except RecordsIngestionError as e:\n            raise RecordsIngestionError(\n                message=\"Error importing dataset records from disk. \"\n                \"Records and datasets settings are not compatible.\"\n            ) from e\n\n    return dataset\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/#src.argilla.datasets._io._hub.HubImportExportMixin","title":"<code>HubImportExportMixin</code>","text":"<p>               Bases: <code>DiskImportExportMixin</code></p> Source code in <code>src/argilla/datasets/_io/_hub.py</code> <pre><code>class HubImportExportMixin(DiskImportExportMixin):\n    def to_hub(\n        self: \"Dataset\",\n        repo_id: str,\n        *,\n        with_records: bool = True,\n        generate_card: Optional[bool] = True,\n        **kwargs: Any,\n    ) -&gt; None:\n        \"\"\"Pushes the `Dataset` to the Hugging Face Hub. If the dataset has been previously pushed to the\n        Hugging Face Hub, it will be updated instead of creating a new dataset repo.\n\n        Parameters:\n            repo_id: the ID of the Hugging Face Hub repo to push the `Dataset` to.\n            with_records: whether to load the records from the Hugging Face dataset. Defaults to `True`.\n            generate_card: whether to generate a dataset card for the `Dataset` in the Hugging Face Hub. Defaults\n                to `True`.\n            **kwargs: the kwargs to pass to `datasets.Dataset.push_to_hub`.\n\n        Returns:\n            None\n        \"\"\"\n\n        from huggingface_hub import DatasetCardData, HfApi\n\n        from argilla.datasets._io.card import (\n            ArgillaDatasetCard,\n            size_categories_parser,\n        )\n\n        hf_api = HfApi(token=kwargs.get(\"token\"))\n\n        hfds = False\n        if with_records:\n            hfds = self.records(with_vectors=True, with_responses=True, with_suggestions=True).to_datasets()\n            hfds.push_to_hub(repo_id, **kwargs)\n        else:\n            hf_api.create_repo(repo_id=repo_id, repo_type=\"dataset\", exist_ok=kwargs.get(\"exist_ok\") or True)\n\n        with TemporaryDirectory() as tmpdirname:\n            config_dir = os.path.join(tmpdirname)\n\n            self.to_disk(path=config_dir, with_records=False)\n\n            if generate_card:\n                sample_argilla_record = next(iter(self.records(with_suggestions=True, with_responses=True)))\n                sample_huggingface_record = self._get_sample_hf_record(hfds) if with_records else None\n                dataset_size = len(hfds) if with_records else 0\n                card = ArgillaDatasetCard.from_template(\n                    card_data=DatasetCardData(\n                        size_categories=size_categories_parser(dataset_size),\n                        tags=[\"rlfh\", \"argilla\", \"human-feedback\"],\n                    ),\n                    repo_id=repo_id,\n                    argilla_fields=self.settings.fields,\n                    argilla_questions=self.settings.questions,\n                    argilla_guidelines=self.settings.guidelines or None,\n                    argilla_vectors_settings=self.settings.vectors or None,\n                    argilla_metadata_properties=self.settings.metadata,\n                    argilla_record=sample_argilla_record.to_dict(),\n                    huggingface_record=sample_huggingface_record,\n                )\n                card.save(filepath=os.path.join(tmpdirname, \"README.md\"))\n\n            hf_api.upload_folder(\n                folder_path=tmpdirname,\n                repo_id=repo_id,\n                repo_type=\"dataset\",\n            )\n\n    @classmethod\n    def from_hub(\n        cls: Type[\"Dataset\"],\n        repo_id: str,\n        *,\n        name: Optional[str] = None,\n        workspace: Optional[Union[\"Workspace\", str]] = None,\n        client: Optional[\"Argilla\"] = None,\n        with_records: bool = True,\n        settings: Optional[\"Settings\"] = None,\n        split: Optional[str] = None,\n        subset: Optional[str] = None,\n        **kwargs: Any,\n    ) -&gt; \"Dataset\":\n        \"\"\"Loads a `Dataset` from the Hugging Face Hub.\n\n        Parameters:\n            repo_id: the ID of the Hugging Face Hub repo to load the `Dataset` from.\n            name (str, optional): The name to assign to the new dataset. Defaults to None and the dataset's source name is used, unless it already exists, in which case a unique UUID is appended.\n            workspace (Union[Workspace, str], optional): The workspace to import the dataset to. Defaults to None and default workspace is used.\n            client: the client to use to load the `Dataset`. If not provided, the default client will be used.\n            with_records: whether to load the records from the Hugging Face dataset. Defaults to `True`.\n            settings: the settings to use to load the `Dataset`. If not provided, the settings will be loaded from the Hugging Face dataset.\n            split: the split to load from the Hugging Face dataset. If not provided, the first split will be loaded.\n            **kwargs: the kwargs to pass to `datasets.Dataset.load_from_hub`.\n\n        Returns:\n            A `Dataset` loaded from the Hugging Face Hub.\n        \"\"\"\n        from datasets import load_dataset\n        from huggingface_hub import snapshot_download\n        from argilla import Dataset\n\n        if name is None:\n            name = Dataset._sanitize_name(repo_id)\n\n        if settings is not None:\n            dataset = cls(name=name, settings=settings)\n            dataset.create()\n        else:\n            try:\n                # download configuration files from the hub\n                folder_path = snapshot_download(\n                    repo_id=repo_id,\n                    repo_type=\"dataset\",\n                    allow_patterns=cls._DEFAULT_CONFIGURATION_FILES,\n                    token=kwargs.get(\"token\"),\n                )\n\n                dataset = cls.from_disk(\n                    path=folder_path, workspace=workspace, name=name, client=client, with_records=with_records\n                )\n            except ImportDatasetError:\n                from argilla import Settings\n\n                settings = Settings.from_hub(repo_id=repo_id, subset=subset)\n                dataset = cls.from_hub(\n                    repo_id=repo_id,\n                    name=name,\n                    workspace=workspace,\n                    client=client,\n                    with_records=with_records,\n                    settings=settings,\n                    split=split,\n                    subset=subset,\n                    **kwargs,\n                )\n                return dataset\n\n        if with_records:\n            try:\n                hf_dataset = load_dataset(\n                    path=repo_id,\n                    split=split,\n                    name=subset,\n                    **kwargs,\n                )  # type: ignore\n                hf_dataset = cls._get_dataset_split(hf_dataset=hf_dataset, split=split, **kwargs)\n                cls._log_dataset_records(hf_dataset=hf_dataset, dataset=dataset)\n            except EmptyDatasetError:\n                warnings.warn(\n                    message=\"Trying to load a dataset `with_records=True` but dataset does not contain any records.\",\n                    category=UserWarning,\n                )\n\n        return dataset\n\n    @staticmethod\n    def _log_dataset_records(hf_dataset: \"HFDataset\", dataset: \"Dataset\"):\n        \"\"\"This method extracts the responses from a Hugging Face dataset and returns a list of `Record` objects\"\"\"\n        # THIS IS REQUIRED SINCE THE NAME RESTRICTION IN ARGILLA. HUGGING FACE DATASET COLUMNS ARE CASE SENSITIVE\n        # Also, there is a logic with column names including \".responses\" and \".suggestion\" in the name.\n        columns_map = {}\n        for column in hf_dataset.column_names:\n            if \".responses\" in column or \".suggestion\" in column:\n                columns_map[column] = column.lower()\n            else:\n                columns_map[column] = dataset.settings._sanitize_settings_name(column)\n\n        hf_dataset = hf_dataset.rename_columns(columns_map)\n\n        # Identify columns that columns that contain responses\n        responses_columns = [col for col in hf_dataset.column_names if \".responses\" in col]\n        response_questions = defaultdict(dict)\n        user_ids = {}\n        for col in responses_columns:\n            question_name = col.split(\".\")[0]\n            if col.endswith(\"users\"):\n                response_questions[question_name][\"users\"] = hf_dataset[col]\n                user_ids.update({UUID(user_id): UUID(user_id) for user_id in set(sum(hf_dataset[col], []))})\n            elif col.endswith(\"responses\"):\n                response_questions[question_name][\"responses\"] = hf_dataset[col]\n            elif col.endswith(\"status\"):\n                response_questions[question_name][\"status\"] = hf_dataset[col]\n\n        # Check if all user ids are known to this Argilla client\n        known_users_ids = [user.id for user in dataset._client.users]\n        unknown_user_ids = set(user_ids.keys()) - set(known_users_ids)\n        my_user = dataset._client.me\n        if len(unknown_user_ids) &gt; 1:\n            warnings.warn(\n                message=f\"\"\"Found unknown user ids in dataset repo: {unknown_user_ids}.\n                    Assigning first response for each record to current user ({my_user.username}) and discarding the rest.\"\"\"\n            )\n        for unknown_user_id in unknown_user_ids:\n            user_ids[unknown_user_id] = my_user.id\n\n        # Create a mapper to map the Hugging Face dataset to a Record object\n        mapping = {col: col for col in hf_dataset.column_names if \".suggestion\" in col}\n        mapper = IngestedRecordMapper(dataset=dataset, mapping=mapping, user_id=my_user.id)\n\n        # Extract responses and create Record objects\n        records = []\n        hf_dataset = HFDatasetsIO.to_argilla(hf_dataset=hf_dataset)\n        for idx, row in enumerate(hf_dataset):\n            record = mapper(row)\n            for question_name, values in response_questions.items():\n                response_values = values[\"responses\"][idx]\n                response_users = values[\"users\"][idx]\n                response_status = values[\"status\"][idx]\n                for value, user_id, status in zip(response_values, response_users, response_status):\n                    user_id = user_ids[UUID(user_id)]\n                    if user_id in response_users:\n                        continue\n                    response_users[user_id] = True\n                    response = Response(\n                        user_id=user_id,\n                        question_name=question_name,\n                        value=value,\n                        status=status,\n                    )\n                    record.responses.add(response)\n            records.append(record)\n\n        try:\n            dataset.records.log(records=records)\n        except (RecordsIngestionError, UnprocessableEntityError) as e:\n            raise SettingsError(\n                message=f\"Failed to load records from Hugging Face dataset. Defined settings do not match dataset schema. Hugging face dataset features: {hf_dataset.features}. Argilla dataset settings : {dataset.settings}\"\n            ) from e\n\n    @staticmethod\n    def _get_dataset_split(hf_dataset: \"HFDataset\", split: Optional[str] = None, **kwargs: Dict) -&gt; \"HFDataset\":\n        \"\"\"Get a single dataset from a Hugging Face dataset.\n\n        Parameters:\n            hf_dataset (HFDataset): The Hugging Face dataset to get a single dataset from.\n\n        Returns:\n            HFDataset: The single dataset.\n        \"\"\"\n\n        if isinstance(hf_dataset, DatasetDict) and split is None:\n            split = next(iter(hf_dataset.keys()))\n            if len(hf_dataset.keys()) &gt; 1:\n                warnings.warn(\n                    message=f\"Multiple splits found in Hugging Face dataset. Using the first split: {split}. \"\n                    f\"Available splits are: {', '.join(hf_dataset.keys())}.\"\n                )\n            hf_dataset = hf_dataset[split]\n        return hf_dataset\n\n    @staticmethod\n    def _get_sample_hf_record(hf_dataset: \"HFDataset\") -&gt; Dict:\n        \"\"\"Get a sample record from a Hugging Face dataset.\n\n        Parameters:\n            hf_dataset (HFDataset): The Hugging Face dataset to get a sample record from.\n\n        Returns:\n            Dict: The sample record.\n        \"\"\"\n\n        if hf_dataset:\n            sample_huggingface_record = {}\n            for key, value in hf_dataset[0].items():\n                try:\n                    json.dumps(value)\n                    sample_huggingface_record[key] = value\n                except TypeError:\n                    if isinstance(value, Image.Image):\n                        sample_huggingface_record[key] = pil_to_data_uri(value)\n                    else:\n                        sample_huggingface_record[key] = \"Record value is not serializable\"\n            return sample_huggingface_record\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/#src.argilla.datasets._io._hub.HubImportExportMixin.to_hub","title":"<code>to_hub(repo_id, *, with_records=True, generate_card=True, **kwargs)</code>","text":"<p>Pushes the <code>Dataset</code> to the Hugging Face Hub. If the dataset has been previously pushed to the Hugging Face Hub, it will be updated instead of creating a new dataset repo.</p> <p>Parameters:</p> Name Type Description Default <code>repo_id</code> <code>str</code> <p>the ID of the Hugging Face Hub repo to push the <code>Dataset</code> to.</p> required <code>with_records</code> <code>bool</code> <p>whether to load the records from the Hugging Face dataset. Defaults to <code>True</code>.</p> <code>True</code> <code>generate_card</code> <code>Optional[bool]</code> <p>whether to generate a dataset card for the <code>Dataset</code> in the Hugging Face Hub. Defaults to <code>True</code>.</p> <code>True</code> <code>**kwargs</code> <code>Any</code> <p>the kwargs to pass to <code>datasets.Dataset.push_to_hub</code>.</p> <code>{}</code> <p>Returns:</p> Type Description <code>None</code> <p>None</p> Source code in <code>src/argilla/datasets/_io/_hub.py</code> <pre><code>def to_hub(\n    self: \"Dataset\",\n    repo_id: str,\n    *,\n    with_records: bool = True,\n    generate_card: Optional[bool] = True,\n    **kwargs: Any,\n) -&gt; None:\n    \"\"\"Pushes the `Dataset` to the Hugging Face Hub. If the dataset has been previously pushed to the\n    Hugging Face Hub, it will be updated instead of creating a new dataset repo.\n\n    Parameters:\n        repo_id: the ID of the Hugging Face Hub repo to push the `Dataset` to.\n        with_records: whether to load the records from the Hugging Face dataset. Defaults to `True`.\n        generate_card: whether to generate a dataset card for the `Dataset` in the Hugging Face Hub. Defaults\n            to `True`.\n        **kwargs: the kwargs to pass to `datasets.Dataset.push_to_hub`.\n\n    Returns:\n        None\n    \"\"\"\n\n    from huggingface_hub import DatasetCardData, HfApi\n\n    from argilla.datasets._io.card import (\n        ArgillaDatasetCard,\n        size_categories_parser,\n    )\n\n    hf_api = HfApi(token=kwargs.get(\"token\"))\n\n    hfds = False\n    if with_records:\n        hfds = self.records(with_vectors=True, with_responses=True, with_suggestions=True).to_datasets()\n        hfds.push_to_hub(repo_id, **kwargs)\n    else:\n        hf_api.create_repo(repo_id=repo_id, repo_type=\"dataset\", exist_ok=kwargs.get(\"exist_ok\") or True)\n\n    with TemporaryDirectory() as tmpdirname:\n        config_dir = os.path.join(tmpdirname)\n\n        self.to_disk(path=config_dir, with_records=False)\n\n        if generate_card:\n            sample_argilla_record = next(iter(self.records(with_suggestions=True, with_responses=True)))\n            sample_huggingface_record = self._get_sample_hf_record(hfds) if with_records else None\n            dataset_size = len(hfds) if with_records else 0\n            card = ArgillaDatasetCard.from_template(\n                card_data=DatasetCardData(\n                    size_categories=size_categories_parser(dataset_size),\n                    tags=[\"rlfh\", \"argilla\", \"human-feedback\"],\n                ),\n                repo_id=repo_id,\n                argilla_fields=self.settings.fields,\n                argilla_questions=self.settings.questions,\n                argilla_guidelines=self.settings.guidelines or None,\n                argilla_vectors_settings=self.settings.vectors or None,\n                argilla_metadata_properties=self.settings.metadata,\n                argilla_record=sample_argilla_record.to_dict(),\n                huggingface_record=sample_huggingface_record,\n            )\n            card.save(filepath=os.path.join(tmpdirname, \"README.md\"))\n\n        hf_api.upload_folder(\n            folder_path=tmpdirname,\n            repo_id=repo_id,\n            repo_type=\"dataset\",\n        )\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/#src.argilla.datasets._io._hub.HubImportExportMixin.from_hub","title":"<code>from_hub(repo_id, *, name=None, workspace=None, client=None, with_records=True, settings=None, split=None, subset=None, **kwargs)</code>  <code>classmethod</code>","text":"<p>Loads a <code>Dataset</code> from the Hugging Face Hub.</p> <p>Parameters:</p> Name Type Description Default <code>repo_id</code> <code>str</code> <p>the ID of the Hugging Face Hub repo to load the <code>Dataset</code> from.</p> required <code>name</code> <code>str</code> <p>The name to assign to the new dataset. Defaults to None and the dataset's source name is used, unless it already exists, in which case a unique UUID is appended.</p> <code>None</code> <code>workspace</code> <code>Union[Workspace, str]</code> <p>The workspace to import the dataset to. Defaults to None and default workspace is used.</p> <code>None</code> <code>client</code> <code>Optional[Argilla]</code> <p>the client to use to load the <code>Dataset</code>. If not provided, the default client will be used.</p> <code>None</code> <code>with_records</code> <code>bool</code> <p>whether to load the records from the Hugging Face dataset. Defaults to <code>True</code>.</p> <code>True</code> <code>settings</code> <code>Optional[Settings]</code> <p>the settings to use to load the <code>Dataset</code>. If not provided, the settings will be loaded from the Hugging Face dataset.</p> <code>None</code> <code>split</code> <code>Optional[str]</code> <p>the split to load from the Hugging Face dataset. If not provided, the first split will be loaded.</p> <code>None</code> <code>**kwargs</code> <code>Any</code> <p>the kwargs to pass to <code>datasets.Dataset.load_from_hub</code>.</p> <code>{}</code> <p>Returns:</p> Type Description <code>Dataset</code> <p>A <code>Dataset</code> loaded from the Hugging Face Hub.</p> Source code in <code>src/argilla/datasets/_io/_hub.py</code> <pre><code>@classmethod\ndef from_hub(\n    cls: Type[\"Dataset\"],\n    repo_id: str,\n    *,\n    name: Optional[str] = None,\n    workspace: Optional[Union[\"Workspace\", str]] = None,\n    client: Optional[\"Argilla\"] = None,\n    with_records: bool = True,\n    settings: Optional[\"Settings\"] = None,\n    split: Optional[str] = None,\n    subset: Optional[str] = None,\n    **kwargs: Any,\n) -&gt; \"Dataset\":\n    \"\"\"Loads a `Dataset` from the Hugging Face Hub.\n\n    Parameters:\n        repo_id: the ID of the Hugging Face Hub repo to load the `Dataset` from.\n        name (str, optional): The name to assign to the new dataset. Defaults to None and the dataset's source name is used, unless it already exists, in which case a unique UUID is appended.\n        workspace (Union[Workspace, str], optional): The workspace to import the dataset to. Defaults to None and default workspace is used.\n        client: the client to use to load the `Dataset`. If not provided, the default client will be used.\n        with_records: whether to load the records from the Hugging Face dataset. Defaults to `True`.\n        settings: the settings to use to load the `Dataset`. If not provided, the settings will be loaded from the Hugging Face dataset.\n        split: the split to load from the Hugging Face dataset. If not provided, the first split will be loaded.\n        **kwargs: the kwargs to pass to `datasets.Dataset.load_from_hub`.\n\n    Returns:\n        A `Dataset` loaded from the Hugging Face Hub.\n    \"\"\"\n    from datasets import load_dataset\n    from huggingface_hub import snapshot_download\n    from argilla import Dataset\n\n    if name is None:\n        name = Dataset._sanitize_name(repo_id)\n\n    if settings is not None:\n        dataset = cls(name=name, settings=settings)\n        dataset.create()\n    else:\n        try:\n            # download configuration files from the hub\n            folder_path = snapshot_download(\n                repo_id=repo_id,\n                repo_type=\"dataset\",\n                allow_patterns=cls._DEFAULT_CONFIGURATION_FILES,\n                token=kwargs.get(\"token\"),\n            )\n\n            dataset = cls.from_disk(\n                path=folder_path, workspace=workspace, name=name, client=client, with_records=with_records\n            )\n        except ImportDatasetError:\n            from argilla import Settings\n\n            settings = Settings.from_hub(repo_id=repo_id, subset=subset)\n            dataset = cls.from_hub(\n                repo_id=repo_id,\n                name=name,\n                workspace=workspace,\n                client=client,\n                with_records=with_records,\n                settings=settings,\n                split=split,\n                subset=subset,\n                **kwargs,\n            )\n            return dataset\n\n    if with_records:\n        try:\n            hf_dataset = load_dataset(\n                path=repo_id,\n                split=split,\n                name=subset,\n                **kwargs,\n            )  # type: ignore\n            hf_dataset = cls._get_dataset_split(hf_dataset=hf_dataset, split=split, **kwargs)\n            cls._log_dataset_records(hf_dataset=hf_dataset, dataset=dataset)\n        except EmptyDatasetError:\n            warnings.warn(\n                message=\"Trying to load a dataset `with_records=True` but dataset does not contain any records.\",\n                category=UserWarning,\n            )\n\n    return dataset\n</code></pre>"},{"location":"reference/argilla/records/metadata/","title":"<code>metadata</code>","text":"<p>Metadata in argilla is a dictionary that can be attached to a record. It is used to store additional information about the record that is not part of the record's fields or responses. For example, the source of the record, the date it was created, or any other information that is relevant to the record. Metadata can be added to a record directly or as valules within a dictionary.</p>"},{"location":"reference/argilla/records/metadata/#usage-examples","title":"Usage Examples","text":"<p>To use metadata within a dataset, you must define a metadata property in the dataset settings. The metadata property is a list of metadata properties that can be attached to a record. The following example demonstrates how to add metadata to a dataset and how to access metadata from a record object:</p> <pre><code>import argilla as rg\n\ndataset = Dataset(\n    name=\"dataset_with_metadata\",\n    settings=Settings(\n        fields=[TextField(name=\"text\")],\n        questions=[LabelQuestion(name=\"label\", labels=[\"positive\", \"negative\"])],\n        metadata=[\n            rg.TermsMetadataProperty(name=\"category\", options=[\"A\", \"B\", \"C\"]),\n        ],\n    ),\n)\ndataset.create()\n</code></pre> <p>Then, you can add records to the dataset with metadata that corresponds to the metadata property defined in the dataset settings:</p> <pre><code>dataset_with_metadata.records.log(\n    [\n        {\"text\": \"text\", \"label\": \"positive\", \"category\": \"A\"},\n        {\"text\": \"text\", \"label\": \"negative\", \"category\": \"B\"},\n    ]\n)\n</code></pre>"},{"location":"reference/argilla/records/metadata/#format-per-metadataproperty-type","title":"Format per <code>MetadataProperty</code> type","text":"<p>Depending on the <code>MetadataProperty</code> type, metadata might need to be formatted in a slightly different way.</p> For <code>TermsMetadataProperty</code>For <code>FloatMetadataProperty</code>For <code>IntegerMetadataProperty</code> <pre><code>rg.Records(\n    fields={\"text\": \"example\"},\n    metadata={\"category\": \"A\"}\n)\n\n# with multiple terms\n\nrg.Records(\n    fields={\"text\": \"example\"},\n    metadata={\"category\": [\"A\", \"B\"]}\n)\n</code></pre> <pre><code>rg.Records(\n    fields={\"text\": \"example\"},\n    metadata={\"category\": 2.1}\n)\n</code></pre> <pre><code>rg.Records(\n    fields={\"text\": \"example\"},\n    metadata={\"category\": 42}\n)\n</code></pre>"},{"location":"reference/argilla/records/records/","title":"<code>rg.Record</code>","text":"<p>The <code>Record</code> object is used to represent a single record in Argilla. It contains fields, suggestions, responses, metadata, and vectors.</p>"},{"location":"reference/argilla/records/records/#usage-examples","title":"Usage Examples","text":""},{"location":"reference/argilla/records/records/#creating-a-record","title":"Creating a Record","text":"<p>To create records, you can use the <code>Record</code> class and pass it to the <code>Dataset.records.log</code> method. The <code>Record</code> class requires a <code>fields</code> parameter, which is a dictionary of field names and values. The field names must match the field names in the dataset's <code>Settings</code> object to be accepted.</p> <pre><code>dataset.records.log(\n    records=[\n        rg.Record(\n            fields={\"text\": \"Hello World, how are you?\"},\n        ),\n    ]\n) # (1)\n</code></pre> <ol> <li>The Argilla dataset contains a field named <code>text</code> matching the key here.</li> </ol> <p>To create records with image fields, pass the image to the record object as either a remote url, local path to an image file, or a PIL object. The field names must be defined as an <code>rg.ImageField</code>in the dataset's <code>Settings</code> object to be accepted. Images will be stored in the Argilla database and returned as rescaled PIL objects.</p> <pre><code>dataset.records.log(\n    records=[\n        rg.Record(\n            fields={\"image\": \"https://example.com/image.jpg\"}, # (1)\n        ),\n    ]\n)\n</code></pre> <ol> <li>The image can be referenced as either a remote url, a local file path, or a PIL object.</li> </ol> <p>Note</p> <p>The image will be stored in the Argilla database and can impact the dataset's storage usage. Images should be less than 5mb in size and datasets should contain less than 10,000 images.</p>"},{"location":"reference/argilla/records/records/#accessing-record-attributes","title":"Accessing Record Attributes","text":"<p>The <code>Record</code> object has suggestions, responses, metadata, and vectors attributes that can be accessed directly whilst iterating over records in a dataset.</p> <pre><code>for record in dataset.records(\n    with_suggestions=True,\n    with_responses=True,\n    with_metadata=True,\n    with_vectors=True\n    ):\n    print(record.suggestions)\n    print(record.responses)\n    print(record.metadata)\n    print(record.vectors)\n</code></pre> <p>Record properties can also be updated whilst iterating over records in a dataset.</p> <pre><code>for record in dataset.records(with_metadata=True):\n    record.metadata = {\"department\": \"toys\"}\n</code></pre> <p>For changes to take effect, the user must call the <code>update</code> method on the <code>Dataset</code> object, or pass the updated records to <code>Dataset.records.log</code>. All core record atttributes can be updated in this way. Check their respective documentation for more information: Suggestions, Responses, Metadata, Vectors.</p>"},{"location":"reference/argilla/records/records/#src.argilla.records._resource.Record","title":"<code>Record</code>","text":"<p>               Bases: <code>Resource</code></p> <p>The class for interacting with Argilla Records. A <code>Record</code> is a single sample in a dataset. Records receives feedback in the form of responses and suggestions. Records contain fields, metadata, and vectors.</p> <p>Attributes:</p> Name Type Description <code>id</code> <code>Union[str, UUID]</code> <p>The id of the record.</p> <code>fields</code> <code>RecordFields</code> <p>The fields of the record.</p> <code>metadata</code> <code>RecordMetadata</code> <p>The metadata of the record.</p> <code>vectors</code> <code>RecordVectors</code> <p>The vectors of the record.</p> <code>responses</code> <code>RecordResponses</code> <p>The responses of the record.</p> <code>suggestions</code> <code>RecordSuggestions</code> <p>The suggestions of the record.</p> <code>dataset</code> <code>Dataset</code> <p>The dataset to which the record belongs.</p> <code>_server_id</code> <code>UUID</code> <p>An id for the record generated by the Argilla server.</p> Source code in <code>src/argilla/records/_resource.py</code> <pre><code>class Record(Resource):\n    \"\"\"The class for interacting with Argilla Records. A `Record` is a single sample\n    in a dataset. Records receives feedback in the form of responses and suggestions.\n    Records contain fields, metadata, and vectors.\n\n    Attributes:\n        id (Union[str, UUID]): The id of the record.\n        fields (RecordFields): The fields of the record.\n        metadata (RecordMetadata): The metadata of the record.\n        vectors (RecordVectors): The vectors of the record.\n        responses (RecordResponses): The responses of the record.\n        suggestions (RecordSuggestions): The suggestions of the record.\n        dataset (Dataset): The dataset to which the record belongs.\n        _server_id (UUID): An id for the record generated by the Argilla server.\n    \"\"\"\n\n    _model: RecordModel\n\n    def __init__(\n        self,\n        id: Optional[Union[UUID, str]] = None,\n        fields: Optional[Dict[str, FieldValue]] = None,\n        metadata: Optional[Dict[str, MetadataValue]] = None,\n        vectors: Optional[Dict[str, VectorValue]] = None,\n        responses: Optional[List[Response]] = None,\n        suggestions: Optional[List[Suggestion]] = None,\n        _server_id: Optional[UUID] = None,\n        _dataset: Optional[\"Dataset\"] = None,\n    ):\n        \"\"\"Initializes a Record with fields, metadata, vectors, responses, suggestions, external_id, and id.\n        Records are typically defined as flat dictionary objects with fields, metadata, vectors, responses, and suggestions\n        and passed to Dataset.DatasetRecords.add() as a list of dictionaries.\n\n        Args:\n            id: An id for the record. If not provided, a UUID will be generated.\n            fields: A dictionary of fields for the record.\n            metadata: A dictionary of metadata for the record.\n            vectors: A dictionary of vectors for the record.\n            responses: A list of Response objects for the record.\n            suggestions: A list of Suggestion objects for the record.\n            _server_id: An id for the record. (Read-only and set by the server)\n            _dataset: The dataset object to which the record belongs.\n        \"\"\"\n\n        if fields is None and metadata is None and vectors is None and responses is None and suggestions is None:\n            raise ValueError(\"At least one of fields, metadata, vectors, responses, or suggestions must be provided.\")\n        if fields is None and id is None:\n            raise ValueError(\"If fields are not provided, an id must be provided.\")\n        if fields == {} and id is None:\n            raise ValueError(\"If fields are an empty dictionary, an id must be provided.\")\n\n        self._dataset = _dataset\n        self._model = RecordModel(external_id=id, id=_server_id)\n        self.__fields = RecordFields(fields=fields, record=self)\n        self.__vectors = RecordVectors(vectors=vectors)\n        self.__metadata = RecordMetadata(metadata=metadata)\n        self.__responses = RecordResponses(responses=responses, record=self)\n        self.__suggestions = RecordSuggestions(suggestions=suggestions, record=self)\n\n    def __repr__(self) -&gt; str:\n        return (\n            f\"Record(id={self.id},status={self.status},fields={self.fields},metadata={self.metadata},\"\n            f\"suggestions={self.suggestions},responses={self.responses})\"\n        )\n\n    ############################\n    # Properties\n    ############################\n\n    @property\n    def id(self) -&gt; str:\n        return self._model.external_id\n\n    @id.setter\n    def id(self, value: str) -&gt; None:\n        self._model.external_id = value\n\n    @property\n    def dataset(self) -&gt; \"Dataset\":\n        return self._dataset\n\n    @dataset.setter\n    def dataset(self, value: \"Dataset\") -&gt; None:\n        self._dataset = value\n\n    @property\n    def fields(self) -&gt; \"RecordFields\":\n        return self.__fields\n\n    @property\n    def responses(self) -&gt; \"RecordResponses\":\n        return self.__responses\n\n    @property\n    def suggestions(self) -&gt; \"RecordSuggestions\":\n        return self.__suggestions\n\n    @property\n    def metadata(self) -&gt; \"RecordMetadata\":\n        return self.__metadata\n\n    @property\n    def vectors(self) -&gt; \"RecordVectors\":\n        return self.__vectors\n\n    @property\n    def status(self) -&gt; str:\n        return self._model.status\n\n    @property\n    def _server_id(self) -&gt; Optional[UUID]:\n        return self._model.id\n\n    ############################\n    # Public methods\n    ############################\n\n    def get(self) -&gt; \"Record\":\n        \"\"\"Retrieves the record from the server.\"\"\"\n        model = self._client.api.records.get(self._server_id)\n        instance = self.from_model(model, dataset=self.dataset)\n        self.__dict__ = instance.__dict__\n\n        return self\n\n    def api_model(self) -&gt; RecordModel:\n        return RecordModel(\n            id=self._model.id,\n            external_id=self._model.external_id,\n            fields=self.fields.to_dict(),\n            metadata=self.metadata.api_models(),\n            vectors=self.vectors.api_models(),\n            responses=self.responses.api_models(),\n            suggestions=self.suggestions.api_models(),\n            status=self.status,\n        )\n\n    def serialize(self) -&gt; Dict[str, Any]:\n        \"\"\"Serializes the Record to a dictionary for interaction with the API\"\"\"\n        serialized_model = self._model.model_dump()\n        serialized_suggestions = [suggestion.serialize() for suggestion in self.__suggestions]\n        serialized_responses = [response.serialize() for response in self.__responses]\n        serialized_model[\"responses\"] = serialized_responses\n        serialized_model[\"suggestions\"] = serialized_suggestions\n\n        return serialized_model\n\n    def to_dict(self) -&gt; Dict[str, Dict]:\n        \"\"\"Converts a Record object to a dictionary for export.\n        Returns:\n            A dictionary representing the record where the keys are \"fields\",\n            \"metadata\", \"suggestions\", and \"responses\". Each field and question is\n            represented as a key-value pair in the dictionary of the respective key. i.e.\n            `{\"fields\": {\"prompt\": \"...\", \"response\": \"...\"}, \"responses\": {\"rating\": \"...\"},\n        \"\"\"\n        id = str(self.id) if self.id else None\n        server_id = str(self._model.id) if self._model.id else None\n        status = self.status\n        fields = self.fields.to_dict()\n        metadata = self.metadata.to_dict()\n        suggestions = self.suggestions.to_dict()\n        responses = self.responses.to_dict()\n        vectors = self.vectors.to_dict()\n\n        # TODO: Review model attributes when to_dict and serialize methods are unified\n        return {\n            \"id\": id,\n            \"fields\": fields,\n            \"metadata\": metadata,\n            \"suggestions\": suggestions,\n            \"responses\": responses,\n            \"vectors\": vectors,\n            \"status\": status,\n            \"_server_id\": server_id,\n        }\n\n    @classmethod\n    def from_dict(cls, data: Dict[str, Dict], dataset: Optional[\"Dataset\"] = None) -&gt; \"Record\":\n        \"\"\"Converts a dictionary to a Record object.\n        Args:\n            data: A dictionary representing the record.\n            dataset: The dataset object to which the record belongs.\n        Returns:\n            A Record object.\n        \"\"\"\n        fields = data.get(\"fields\", {})\n        metadata = data.get(\"metadata\", {})\n        suggestions = data.get(\"suggestions\", {})\n        responses = data.get(\"responses\", {})\n        vectors = data.get(\"vectors\", {})\n        record_id = data.get(\"id\", None)\n        _server_id = data.get(\"_server_id\", None)\n\n        suggestions = [Suggestion(question_name=question_name, **value) for question_name, value in suggestions.items()]\n        responses = [\n            Response(question_name=question_name, **value)\n            for question_name, _responses in responses.items()\n            for value in _responses\n        ]\n\n        return cls(\n            id=record_id,\n            fields=fields,\n            suggestions=suggestions,\n            responses=responses,\n            vectors=vectors,\n            metadata=metadata,\n            _dataset=dataset,\n            _server_id=_server_id,\n        )\n\n    @classmethod\n    def from_model(cls, model: RecordModel, dataset: \"Dataset\") -&gt; \"Record\":\n        \"\"\"Converts a RecordModel object to a Record object.\n        Args:\n            model: A RecordModel object.\n            dataset: The dataset object to which the record belongs.\n        Returns:\n            A Record object.\n        \"\"\"\n        instance = cls(\n            id=model.external_id,\n            fields=model.fields,\n            metadata={meta.name: meta.value for meta in model.metadata},\n            vectors={vector.name: vector.vector_values for vector in model.vectors},\n            _dataset=dataset,\n            responses=[],\n            suggestions=[],\n        )\n\n        # set private attributes\n        instance._dataset = dataset\n        instance._model = model\n\n        # Responses and suggestions are computed separately based on the record model\n        instance.responses.from_models(model.responses)\n        instance.suggestions.from_models(model.suggestions)\n\n        return instance\n\n    @property\n    def _client(self) -&gt; Optional[\"Argilla\"]:\n        if self._dataset:\n            return self.dataset._client\n\n    @property\n    def _api(self) -&gt; Optional[\"RecordsAPI\"]:\n        if self._client:\n            return self._client.api.records\n</code></pre>"},{"location":"reference/argilla/records/records/#src.argilla.records._resource.Record.__init__","title":"<code>__init__(id=None, fields=None, metadata=None, vectors=None, responses=None, suggestions=None, _server_id=None, _dataset=None)</code>","text":"<p>Initializes a Record with fields, metadata, vectors, responses, suggestions, external_id, and id. Records are typically defined as flat dictionary objects with fields, metadata, vectors, responses, and suggestions and passed to Dataset.DatasetRecords.add() as a list of dictionaries.</p> <p>Parameters:</p> Name Type Description Default <code>id</code> <code>Optional[Union[UUID, str]]</code> <p>An id for the record. If not provided, a UUID will be generated.</p> <code>None</code> <code>fields</code> <code>Optional[Dict[str, FieldValue]]</code> <p>A dictionary of fields for the record.</p> <code>None</code> <code>metadata</code> <code>Optional[Dict[str, MetadataValue]]</code> <p>A dictionary of metadata for the record.</p> <code>None</code> <code>vectors</code> <code>Optional[Dict[str, VectorValue]]</code> <p>A dictionary of vectors for the record.</p> <code>None</code> <code>responses</code> <code>Optional[List[Response]]</code> <p>A list of Response objects for the record.</p> <code>None</code> <code>suggestions</code> <code>Optional[List[Suggestion]]</code> <p>A list of Suggestion objects for the record.</p> <code>None</code> <code>_server_id</code> <code>Optional[UUID]</code> <p>An id for the record. (Read-only and set by the server)</p> <code>None</code> <code>_dataset</code> <code>Optional[Dataset]</code> <p>The dataset object to which the record belongs.</p> <code>None</code> Source code in <code>src/argilla/records/_resource.py</code> <pre><code>def __init__(\n    self,\n    id: Optional[Union[UUID, str]] = None,\n    fields: Optional[Dict[str, FieldValue]] = None,\n    metadata: Optional[Dict[str, MetadataValue]] = None,\n    vectors: Optional[Dict[str, VectorValue]] = None,\n    responses: Optional[List[Response]] = None,\n    suggestions: Optional[List[Suggestion]] = None,\n    _server_id: Optional[UUID] = None,\n    _dataset: Optional[\"Dataset\"] = None,\n):\n    \"\"\"Initializes a Record with fields, metadata, vectors, responses, suggestions, external_id, and id.\n    Records are typically defined as flat dictionary objects with fields, metadata, vectors, responses, and suggestions\n    and passed to Dataset.DatasetRecords.add() as a list of dictionaries.\n\n    Args:\n        id: An id for the record. If not provided, a UUID will be generated.\n        fields: A dictionary of fields for the record.\n        metadata: A dictionary of metadata for the record.\n        vectors: A dictionary of vectors for the record.\n        responses: A list of Response objects for the record.\n        suggestions: A list of Suggestion objects for the record.\n        _server_id: An id for the record. (Read-only and set by the server)\n        _dataset: The dataset object to which the record belongs.\n    \"\"\"\n\n    if fields is None and metadata is None and vectors is None and responses is None and suggestions is None:\n        raise ValueError(\"At least one of fields, metadata, vectors, responses, or suggestions must be provided.\")\n    if fields is None and id is None:\n        raise ValueError(\"If fields are not provided, an id must be provided.\")\n    if fields == {} and id is None:\n        raise ValueError(\"If fields are an empty dictionary, an id must be provided.\")\n\n    self._dataset = _dataset\n    self._model = RecordModel(external_id=id, id=_server_id)\n    self.__fields = RecordFields(fields=fields, record=self)\n    self.__vectors = RecordVectors(vectors=vectors)\n    self.__metadata = RecordMetadata(metadata=metadata)\n    self.__responses = RecordResponses(responses=responses, record=self)\n    self.__suggestions = RecordSuggestions(suggestions=suggestions, record=self)\n</code></pre>"},{"location":"reference/argilla/records/records/#src.argilla.records._resource.Record.get","title":"<code>get()</code>","text":"<p>Retrieves the record from the server.</p> Source code in <code>src/argilla/records/_resource.py</code> <pre><code>def get(self) -&gt; \"Record\":\n    \"\"\"Retrieves the record from the server.\"\"\"\n    model = self._client.api.records.get(self._server_id)\n    instance = self.from_model(model, dataset=self.dataset)\n    self.__dict__ = instance.__dict__\n\n    return self\n</code></pre>"},{"location":"reference/argilla/records/records/#src.argilla.records._resource.Record.serialize","title":"<code>serialize()</code>","text":"<p>Serializes the Record to a dictionary for interaction with the API</p> Source code in <code>src/argilla/records/_resource.py</code> <pre><code>def serialize(self) -&gt; Dict[str, Any]:\n    \"\"\"Serializes the Record to a dictionary for interaction with the API\"\"\"\n    serialized_model = self._model.model_dump()\n    serialized_suggestions = [suggestion.serialize() for suggestion in self.__suggestions]\n    serialized_responses = [response.serialize() for response in self.__responses]\n    serialized_model[\"responses\"] = serialized_responses\n    serialized_model[\"suggestions\"] = serialized_suggestions\n\n    return serialized_model\n</code></pre>"},{"location":"reference/argilla/records/records/#src.argilla.records._resource.Record.to_dict","title":"<code>to_dict()</code>","text":"<p>Converts a Record object to a dictionary for export. Returns:     A dictionary representing the record where the keys are \"fields\",     \"metadata\", \"suggestions\", and \"responses\". Each field and question is     represented as a key-value pair in the dictionary of the respective key. i.e.     `{\"fields\": {\"prompt\": \"...\", \"response\": \"...\"}, \"responses\": {\"rating\": \"...\"},</p> Source code in <code>src/argilla/records/_resource.py</code> <pre><code>def to_dict(self) -&gt; Dict[str, Dict]:\n    \"\"\"Converts a Record object to a dictionary for export.\n    Returns:\n        A dictionary representing the record where the keys are \"fields\",\n        \"metadata\", \"suggestions\", and \"responses\". Each field and question is\n        represented as a key-value pair in the dictionary of the respective key. i.e.\n        `{\"fields\": {\"prompt\": \"...\", \"response\": \"...\"}, \"responses\": {\"rating\": \"...\"},\n    \"\"\"\n    id = str(self.id) if self.id else None\n    server_id = str(self._model.id) if self._model.id else None\n    status = self.status\n    fields = self.fields.to_dict()\n    metadata = self.metadata.to_dict()\n    suggestions = self.suggestions.to_dict()\n    responses = self.responses.to_dict()\n    vectors = self.vectors.to_dict()\n\n    # TODO: Review model attributes when to_dict and serialize methods are unified\n    return {\n        \"id\": id,\n        \"fields\": fields,\n        \"metadata\": metadata,\n        \"suggestions\": suggestions,\n        \"responses\": responses,\n        \"vectors\": vectors,\n        \"status\": status,\n        \"_server_id\": server_id,\n    }\n</code></pre>"},{"location":"reference/argilla/records/records/#src.argilla.records._resource.Record.from_dict","title":"<code>from_dict(data, dataset=None)</code>  <code>classmethod</code>","text":"<p>Converts a dictionary to a Record object. Args:     data: A dictionary representing the record.     dataset: The dataset object to which the record belongs. Returns:     A Record object.</p> Source code in <code>src/argilla/records/_resource.py</code> <pre><code>@classmethod\ndef from_dict(cls, data: Dict[str, Dict], dataset: Optional[\"Dataset\"] = None) -&gt; \"Record\":\n    \"\"\"Converts a dictionary to a Record object.\n    Args:\n        data: A dictionary representing the record.\n        dataset: The dataset object to which the record belongs.\n    Returns:\n        A Record object.\n    \"\"\"\n    fields = data.get(\"fields\", {})\n    metadata = data.get(\"metadata\", {})\n    suggestions = data.get(\"suggestions\", {})\n    responses = data.get(\"responses\", {})\n    vectors = data.get(\"vectors\", {})\n    record_id = data.get(\"id\", None)\n    _server_id = data.get(\"_server_id\", None)\n\n    suggestions = [Suggestion(question_name=question_name, **value) for question_name, value in suggestions.items()]\n    responses = [\n        Response(question_name=question_name, **value)\n        for question_name, _responses in responses.items()\n        for value in _responses\n    ]\n\n    return cls(\n        id=record_id,\n        fields=fields,\n        suggestions=suggestions,\n        responses=responses,\n        vectors=vectors,\n        metadata=metadata,\n        _dataset=dataset,\n        _server_id=_server_id,\n    )\n</code></pre>"},{"location":"reference/argilla/records/records/#src.argilla.records._resource.Record.from_model","title":"<code>from_model(model, dataset)</code>  <code>classmethod</code>","text":"<p>Converts a RecordModel object to a Record object. Args:     model: A RecordModel object.     dataset: The dataset object to which the record belongs. Returns:     A Record object.</p> Source code in <code>src/argilla/records/_resource.py</code> <pre><code>@classmethod\ndef from_model(cls, model: RecordModel, dataset: \"Dataset\") -&gt; \"Record\":\n    \"\"\"Converts a RecordModel object to a Record object.\n    Args:\n        model: A RecordModel object.\n        dataset: The dataset object to which the record belongs.\n    Returns:\n        A Record object.\n    \"\"\"\n    instance = cls(\n        id=model.external_id,\n        fields=model.fields,\n        metadata={meta.name: meta.value for meta in model.metadata},\n        vectors={vector.name: vector.vector_values for vector in model.vectors},\n        _dataset=dataset,\n        responses=[],\n        suggestions=[],\n    )\n\n    # set private attributes\n    instance._dataset = dataset\n    instance._model = model\n\n    # Responses and suggestions are computed separately based on the record model\n    instance.responses.from_models(model.responses)\n    instance.suggestions.from_models(model.suggestions)\n\n    return instance\n</code></pre>"},{"location":"reference/argilla/records/responses/","title":"<code>rg.Response</code>","text":"<p>Class for interacting with Argilla Responses of records. Responses are answers to questions by a user. Therefore, a recod question can have multiple responses, one for each user that has answered the question. A <code>Response</code> is typically created by a user in the UI or consumed from a data source as a label, unlike a <code>Suggestion</code> which is typically created by a model prediction.</p>"},{"location":"reference/argilla/records/responses/#usage-examples","title":"Usage Examples","text":"<p>Responses can be added to an instantiated <code>Record</code> directly or as a dictionary a dictionary. The following examples demonstrate how to add responses to a record object and how to access responses from a record object:</p> <p>Instantiate the <code>Record</code> and related <code>Response</code> objects:</p> <pre><code>dataset.records.log(\n    [\n        rg.Record(\n            fields={\"text\": \"Hello World, how are you?\"},\n            responses=[rg.Response(\"label\", \"negative\", user_id=user.id)],\n            external_id=str(uuid.uuid4()),\n        )\n    ]\n)\n</code></pre> <p>Or, add a response from a dictionary where key is the question name and value is the response:</p> <pre><code>dataset.records.log(\n    [\n        {\n            \"text\": \"Hello World, how are you?\",\n            \"label.response\": \"negative\",\n        },\n    ]\n)\n</code></pre> <p>Responses can be accessed from a <code>Record</code> via their question name as an attribute of the record. So if a question is named <code>label</code>, the response can be accessed as <code>record.label</code>. The following example demonstrates how to access responses from a record object:</p> <p><pre><code># iterate over the records and responses\n\nfor record in dataset.records:\n    for response in record.responses[\"label\"]: # (1)\n        print(response.value)\n        print(response.user_id)\n\n# validate that the record has a response\n\nfor record in dataset.records:\n    if record.responses[\"label\"]:\n        for response in record.responses[\"label\"]:\n            print(response.value)\n            print(response.user_id)\n    else:\n        record.responses.add(\n            rg.Response(\"label\", \"positive\", user_id=user.id)\n        ) # (2)\n</code></pre>     1. Access the responses for the question named <code>label</code> for each record like a dictionary containing a list of <code>Response</code> objects.     2. Add a response to the record if it does not already have one.</p>"},{"location":"reference/argilla/records/responses/#format-per-question-type","title":"Format per <code>Question</code> type","text":"<p>Depending on the <code>Question</code> type, responses might need to be formatted in a slightly different way.</p> For <code>LabelQuestion</code>For <code>MultiLabelQuestion</code>For <code>RankingQuestion</code>For <code>RatingQuestion</code>For <code>SpanQuestion</code>For <code>TextQuestion</code> <pre><code>rg.Response(\n    question_name=\"label\",\n    value=\"positive\",\n    user_id=user.id,\n    status=\"draft\"\n)\n</code></pre> <pre><code>rg.Response(\n    question_name=\"multi-label\",\n    value=[\"positive\", \"negative\"],\n    user_id=user.id,\n    status=\"draft\"\n)\n</code></pre> <pre><code>rg.Response(\n    question_name=\"rank\",\n    value=[\"1\", \"3\", \"2\"],\n    user_id=user.id,\n    status=\"draft\"\n)\n</code></pre> <pre><code>rg.Response(\n    question_name=\"rating\",\n    value=4,\n    user_id=user.id,\n    status=\"draft\"\n)\n</code></pre> <pre><code>rg.Response(\n    question_name=\"span\",\n    value=[{\"start\": 0, \"end\": 9, \"label\": \"MISC\"}],\n    user_id=user.id,\n    status=\"draft\"\n)\n</code></pre> <pre><code>rg.Response(\n    question_name=\"text\",\n    value=\"value\",\n    user_id=user.id,\n    status=\"draft\"\n)\n</code></pre>"},{"location":"reference/argilla/records/responses/#src.argilla.responses.Response","title":"<code>Response</code>","text":"<p>Class for interacting with Argilla Responses of records. Responses are answers to questions by a user. Therefore, a record question can have multiple responses, one for each user that has answered the question. A <code>Response</code> is typically created by a user in the UI or consumed from a data source as a label, unlike a <code>Suggestion</code> which is typically created by a model prediction.</p> Source code in <code>src/argilla/responses.py</code> <pre><code>class Response:\n    \"\"\"Class for interacting with Argilla Responses of records. Responses are answers to questions by a user.\n    Therefore, a record question can have multiple responses, one for each user that has answered the question.\n    A `Response` is typically created by a user in the UI or consumed from a data source as a label,\n    unlike a `Suggestion` which is typically created by a model prediction.\n\n    \"\"\"\n\n    def __init__(\n        self,\n        question_name: str,\n        value: Any,\n        user_id: UUID,\n        status: Optional[Union[ResponseStatus, str]] = None,\n        _record: Optional[\"Record\"] = None,\n    ) -&gt; None:\n        \"\"\"Initializes a `Response` for a `Record` with a user_id and value\n\n        Attributes:\n            question_name (str): The name of the question that the suggestion is for.\n            value (str): The value of the response\n            user_id (UUID): The id of the user that submits the response\n            status (Union[ResponseStatus, str]): The status of the response as \"draft\", \"submitted\", \"discarded\".\n        \"\"\"\n\n        if question_name is None:\n            raise ValueError(\"question_name is required\")\n        if value is None:\n            raise ValueError(\"value is required\")\n        if user_id is None:\n            raise ValueError(\"user_id is required\")\n\n        if isinstance(status, str):\n            status = ResponseStatus(status)\n\n        self._record = _record\n        self.question_name = question_name\n        self.value = value\n        self.user_id = user_id\n        self.status = status\n\n    @property\n    def record(self) -&gt; \"Record\":\n        \"\"\"Returns the record associated with the response\"\"\"\n        return self._record\n\n    @record.setter\n    def record(self, record: \"Record\") -&gt; None:\n        \"\"\"Sets the record associated with the response\"\"\"\n        self._record = record\n\n    def serialize(self) -&gt; dict[str, Any]:\n        \"\"\"Serializes the Response to a dictionary. This is principally used for sending the response to the API, \\\n            but can be used for data wrangling or manual export.\n\n        Returns:\n            dict[str, Any]: The serialized response as a dictionary with keys `question_name`, `value`, and `user_id`.\n\n        Examples:\n\n        ```python\n        response = rg.Response(\"label\", \"negative\", user_id=user.id)\n        response.serialize()\n        ```\n        \"\"\"\n        return {\n            \"question_name\": self.question_name,\n            \"value\": self.value,\n            \"user_id\": self.user_id,\n            \"status\": self.status,\n        }\n</code></pre>"},{"location":"reference/argilla/records/responses/#src.argilla.responses.Response.record","title":"<code>record: Record</code>  <code>property</code> <code>writable</code>","text":"<p>Returns the record associated with the response</p>"},{"location":"reference/argilla/records/responses/#src.argilla.responses.Response.__init__","title":"<code>__init__(question_name, value, user_id, status=None, _record=None)</code>","text":"<p>Initializes a <code>Response</code> for a <code>Record</code> with a user_id and value</p> <p>Attributes:</p> Name Type Description <code>question_name</code> <code>str</code> <p>The name of the question that the suggestion is for.</p> <code>value</code> <code>str</code> <p>The value of the response</p> <code>user_id</code> <code>UUID</code> <p>The id of the user that submits the response</p> <code>status</code> <code>Union[ResponseStatus, str]</code> <p>The status of the response as \"draft\", \"submitted\", \"discarded\".</p> Source code in <code>src/argilla/responses.py</code> <pre><code>def __init__(\n    self,\n    question_name: str,\n    value: Any,\n    user_id: UUID,\n    status: Optional[Union[ResponseStatus, str]] = None,\n    _record: Optional[\"Record\"] = None,\n) -&gt; None:\n    \"\"\"Initializes a `Response` for a `Record` with a user_id and value\n\n    Attributes:\n        question_name (str): The name of the question that the suggestion is for.\n        value (str): The value of the response\n        user_id (UUID): The id of the user that submits the response\n        status (Union[ResponseStatus, str]): The status of the response as \"draft\", \"submitted\", \"discarded\".\n    \"\"\"\n\n    if question_name is None:\n        raise ValueError(\"question_name is required\")\n    if value is None:\n        raise ValueError(\"value is required\")\n    if user_id is None:\n        raise ValueError(\"user_id is required\")\n\n    if isinstance(status, str):\n        status = ResponseStatus(status)\n\n    self._record = _record\n    self.question_name = question_name\n    self.value = value\n    self.user_id = user_id\n    self.status = status\n</code></pre>"},{"location":"reference/argilla/records/responses/#src.argilla.responses.Response.serialize","title":"<code>serialize()</code>","text":"<p>Serializes the Response to a dictionary. This is principally used for sending the response to the API,             but can be used for data wrangling or manual export.</p> <p>Returns:</p> Type Description <code>dict[str, Any]</code> <p>dict[str, Any]: The serialized response as a dictionary with keys <code>question_name</code>, <code>value</code>, and <code>user_id</code>.</p> <p>Examples:</p> <pre><code>response = rg.Response(\"label\", \"negative\", user_id=user.id)\nresponse.serialize()\n</code></pre> Source code in <code>src/argilla/responses.py</code> <pre><code>def serialize(self) -&gt; dict[str, Any]:\n    \"\"\"Serializes the Response to a dictionary. This is principally used for sending the response to the API, \\\n        but can be used for data wrangling or manual export.\n\n    Returns:\n        dict[str, Any]: The serialized response as a dictionary with keys `question_name`, `value`, and `user_id`.\n\n    Examples:\n\n    ```python\n    response = rg.Response(\"label\", \"negative\", user_id=user.id)\n    response.serialize()\n    ```\n    \"\"\"\n    return {\n        \"question_name\": self.question_name,\n        \"value\": self.value,\n        \"user_id\": self.user_id,\n        \"status\": self.status,\n    }\n</code></pre>"},{"location":"reference/argilla/records/suggestions/","title":"<code>rg.Suggestion</code>","text":"<p>Class for interacting with Argilla Suggestions of records. Suggestions are typically created by a model prediction, unlike a <code>Response</code> which is typically created by a user in the UI or consumed from a data source as a label.</p>"},{"location":"reference/argilla/records/suggestions/#usage-examples","title":"Usage Examples","text":""},{"location":"reference/argilla/records/suggestions/#adding-records-with-suggestions","title":"Adding records with suggestions","text":"<p>Suggestions can be added to a record directly or via a dictionary structure. The following examples demonstrate how to add suggestions to a record object and how to access suggestions from a record object:</p> <p>Add a response from a dictionary where key is the question name and value is the response:</p> <pre><code>dataset.records.log(\n    [\n        {\n            \"text\": \"Hello World, how are you?\",\n            \"label\": \"negative\", # this will be used as a suggestion\n        },\n    ]\n)\n</code></pre> <p>If your data contains scores for suggestions you can add them as well via the <code>mapping</code> parameter. The following example demonstrates how to add a suggestion with a score to a record object:</p> <pre><code>dataset.records.log(\n    [\n        {\n            \"prompt\": \"Hello World, how are you?\",\n            \"label\": \"negative\",  # this will be used as a suggestion\n            \"score\": 0.9,  # this will be used as the suggestion score\n            \"model\": \"model_name\",  # this will be used as the suggestion agent\n        },\n    ],\n    mapping={\n        \"score\": \"label.suggestion.score\",\n        \"model\": \"label.suggestion.agent\",\n    },  # `label` is the question name in the dataset settings\n)\n</code></pre> <p>Or, instantiate the <code>Record</code> and related <code>Suggestions</code> objects directly, like this:</p> <pre><code>dataset.records.log(\n    [\n        rg.Record(\n            fields={\"text\": \"Hello World, how are you?\"},\n            suggestions=[rg.Suggestion(\"negative\", \"label\", score=0.9, agent=\"model_name\")],\n        )\n    ]\n)\n</code></pre>"},{"location":"reference/argilla/records/suggestions/#iterating-over-records-with-suggestions","title":"Iterating over records with suggestions","text":"<p>Just like responses, suggestions can be accessed from a <code>Record</code> via their question name as an attribute of the record. So if a question is named <code>label</code>, the suggestion can be accessed as <code>record.label</code>. The following example demonstrates how to access suggestions from a record object:</p> <pre><code>for record in dataset.records(with_suggestions=True):\n    print(record.suggestions[\"label\"].value)\n</code></pre> <p>We can also add suggestions to records as we iterate over them using the <code>add</code> method:</p> <pre><code>for record in dataset.records(with_suggestions=True):\n    if not record.suggestions[\"label\"]: # (1)\n        record.suggestions.add(\n            rg.Suggestion(\"positive\", \"label\", score=0.9, agent=\"model_name\")\n        ) # (2)\n</code></pre> <ol> <li>Validate that the record has a suggestion</li> <li>Add a suggestion to the record if it does not already have one</li> </ol>"},{"location":"reference/argilla/records/suggestions/#format-per-question-type","title":"Format per <code>Question</code> type","text":"<p>Depending on the <code>Question</code> type, responses might need to be formatted in a slightly different way.</p> For <code>LabelQuestion</code>For <code>MultiLabelQuestion</code>For <code>RankingQuestion</code>For <code>RatingQuestion</code>For <code>SpanQuestion</code>For <code>TextQuestion</code> <pre><code>rg.Suggestion(\n    question_name=\"label\",\n    value=\"positive\",\n    score=0.9,\n    agent=\"model_name\"\n)\n</code></pre> <pre><code>rg.Suggestion(\n    question_name=\"multi-label\",\n    value=[\"positive\", \"negative\"],\n    score=0.9,\n    agent=\"model_name\"\n)\n</code></pre> <pre><code>rg.Suggestion(\n    question_name=\"rank\",\n    value=[\"1\", \"3\", \"2\"],\n    score=0.9,\n    agent=\"model_name\"\n)\n</code></pre> <pre><code>rg.Suggestion(\n    question_name=\"rating\",\n    value=4,\n    score=0.9,\n    agent=\"model_name\"\n)\n</code></pre> <pre><code>rg.Suggestion(\n    question_name=\"span\",\n    value=[{\"start\": 0, \"end\": 9, \"label\": \"MISC\"}],\n    score=0.9,\n    agent=\"model_name\"\n)\n</code></pre> <pre><code>rg.Suggestion(\n    question_name=\"text\",\n    value=\"value\",\n    score=0.9,\n    agent=\"model_name\"\n)\n</code></pre>"},{"location":"reference/argilla/records/suggestions/#src.argilla.suggestions.Suggestion","title":"<code>Suggestion</code>","text":"<p>               Bases: <code>Resource</code></p> <p>Class for interacting with Argilla Suggestions. Suggestions are typically model predictions for records. Suggestions are rendered in the user interfaces as 'hints' or 'suggestions' for the user to review and accept or reject.</p> <p>Attributes:</p> Name Type Description <code>question_name</code> <code>str</code> <p>The name of the question that the suggestion is for.</p> <code>value</code> <code>str</code> <p>The value of the suggestion</p> <code>score</code> <code>float</code> <p>The score of the suggestion. For example, the probability of the model prediction.</p> <code>agent</code> <code>str</code> <p>The agent that created the suggestion. For example, the model name.</p> <code>type</code> <code>str</code> <p>The type of suggestion, either 'model' or 'human'.</p> Source code in <code>src/argilla/suggestions.py</code> <pre><code>class Suggestion(Resource):\n    \"\"\"Class for interacting with Argilla Suggestions. Suggestions are typically model predictions for records.\n    Suggestions are rendered in the user interfaces as 'hints' or 'suggestions' for the user to review and accept or reject.\n\n    Attributes:\n        question_name (str): The name of the question that the suggestion is for.\n        value (str): The value of the suggestion\n        score (float): The score of the suggestion. For example, the probability of the model prediction.\n        agent (str): The agent that created the suggestion. For example, the model name.\n        type (str): The type of suggestion, either 'model' or 'human'.\n    \"\"\"\n\n    _model: SuggestionModel\n\n    def __init__(\n        self,\n        question_name: str,\n        value: Any,\n        score: Union[float, List[float], None] = None,\n        agent: Optional[str] = None,\n        type: Optional[Literal[\"model\", \"human\"]] = None,\n        _record: Optional[\"Record\"] = None,\n    ) -&gt; None:\n        super().__init__()\n\n        if question_name is None:\n            raise ValueError(\"question_name is required\")\n        if value is None:\n            raise ValueError(\"value is required\")\n\n        self._record = _record\n        self._model = SuggestionModel(\n            question_name=question_name,\n            value=value,\n            type=type,\n            score=score,\n            agent=agent,\n        )\n\n    ##############################\n    # Properties\n    ##############################\n\n    @property\n    def value(self) -&gt; Any:\n        \"\"\"The value of the suggestion.\"\"\"\n        return self._model.value\n\n    @property\n    def question_name(self) -&gt; Optional[str]:\n        \"\"\"The name of the question that the suggestion is for.\"\"\"\n        return self._model.question_name\n\n    @question_name.setter\n    def question_name(self, value: str) -&gt; None:\n        self._model.question_name = value\n\n    @property\n    def type(self) -&gt; Optional[Literal[\"model\", \"human\"]]:\n        \"\"\"The type of suggestion, either 'model' or 'human'.\"\"\"\n        return self._model.type\n\n    @property\n    def score(self) -&gt; Optional[Union[float, List[float]]]:\n        \"\"\"The score of the suggestion.\"\"\"\n        return self._model.score\n\n    @score.setter\n    def score(self, value: float) -&gt; None:\n        self._model.score = value\n\n    @property\n    def agent(self) -&gt; Optional[str]:\n        \"\"\"The agent that created the suggestion.\"\"\"\n        return self._model.agent\n\n    @agent.setter\n    def agent(self, value: str) -&gt; None:\n        self._model.agent = value\n\n    @property\n    def record(self) -&gt; Optional[\"Record\"]:\n        \"\"\"The record that the suggestion is for.\"\"\"\n        return self._record\n\n    @record.setter\n    def record(self, value: \"Record\") -&gt; None:\n        self._record = value\n\n    @classmethod\n    def from_model(cls, model: SuggestionModel, record: \"Record\") -&gt; \"Suggestion\":\n        question = record.dataset.settings.questions[model.question_id]\n        model.question_name = question.name\n        model.value = cls.__from_model_value(model.value, question)\n\n        instance = cls(question.name, model.value, _record=record)\n        instance._model = model\n\n        return instance\n\n    def api_model(self) -&gt; SuggestionModel:\n        if self.record is None or self.record.dataset is None:\n            return self._model\n\n        question = self.record.dataset.settings.questions[self.question_name]\n        if question:\n            return SuggestionModel(\n                value=self.__to_model_value(self.value, question),\n                question_name=None if not question else question.name,\n                question_id=None if not question else question.id,\n                type=self._model.type,\n                score=self._model.score,\n                agent=self._model.agent,\n                id=self._model.id,\n            )\n        else:\n            raise RecordSuggestionsError(\n                f\"Record suggestion is invalid because question with name={self.question_name} does not exist in the dataset ({self.record.dataset.name}). Available questions are: {list(self.record.dataset.settings.questions._properties_by_name.keys())}\"\n            )\n\n    @classmethod\n    def __to_model_value(cls, value: Any, question: \"QuestionType\") -&gt; Any:\n        if isinstance(question, RankingQuestion):\n            return cls.__ranking_to_model_value(value)\n        return value\n\n    @classmethod\n    def __from_model_value(cls, value: Any, question: \"QuestionType\") -&gt; Any:\n        if isinstance(question, RankingQuestion):\n            return cls.__ranking_from_model_value(value)\n        return value\n\n    @classmethod\n    def __ranking_from_model_value(cls, value: List[Dict[str, Any]]) -&gt; List[str]:\n        return [v[\"value\"] for v in value]\n\n    @classmethod\n    def __ranking_to_model_value(cls, value: List[str]) -&gt; List[Dict[str, str]]:\n        return [{\"value\": str(v)} for v in value]\n</code></pre>"},{"location":"reference/argilla/records/suggestions/#src.argilla.suggestions.Suggestion.value","title":"<code>value: Any</code>  <code>property</code>","text":"<p>The value of the suggestion.</p>"},{"location":"reference/argilla/records/suggestions/#src.argilla.suggestions.Suggestion.question_name","title":"<code>question_name: Optional[str]</code>  <code>property</code> <code>writable</code>","text":"<p>The name of the question that the suggestion is for.</p>"},{"location":"reference/argilla/records/suggestions/#src.argilla.suggestions.Suggestion.type","title":"<code>type: Optional[Literal['model', 'human']]</code>  <code>property</code>","text":"<p>The type of suggestion, either 'model' or 'human'.</p>"},{"location":"reference/argilla/records/suggestions/#src.argilla.suggestions.Suggestion.score","title":"<code>score: Optional[Union[float, List[float]]]</code>  <code>property</code> <code>writable</code>","text":"<p>The score of the suggestion.</p>"},{"location":"reference/argilla/records/suggestions/#src.argilla.suggestions.Suggestion.agent","title":"<code>agent: Optional[str]</code>  <code>property</code> <code>writable</code>","text":"<p>The agent that created the suggestion.</p>"},{"location":"reference/argilla/records/suggestions/#src.argilla.suggestions.Suggestion.record","title":"<code>record: Optional[Record]</code>  <code>property</code> <code>writable</code>","text":"<p>The record that the suggestion is for.</p>"},{"location":"reference/argilla/records/vectors/","title":"<code>rg.Vector</code>","text":"<p>A vector is a numerical representation of a <code>Record</code> field or attribute, usually the record's text. Vectors can be used to search for similar records via the UI or SDK. Vectors can be added to a record directly or as a dictionary with a key that the matches <code>rg.VectorField</code> name.</p>"},{"location":"reference/argilla/records/vectors/#usage-examples","title":"Usage Examples","text":"<p>To use vectors within a dataset, you must define a vector field in the dataset settings. The vector field is a list of vector fields that can be attached to a record. The following example demonstrates how to add vectors to a dataset and how to access vectors from a record object:</p> <pre><code>import argilla as rg\n\ndataset = Dataset(\n    name=\"dataset_with_metadata\",\n    settings=Settings(\n        fields=[TextField(name=\"text\")],\n        questions=[LabelQuestion(name=\"label\", labels=[\"positive\", \"negative\"])],\n        vectors=[\n            VectorField(name=\"vector_name\"),\n        ],\n    ),\n)\ndataset.create()\n</code></pre> <p>Then, you can add records to the dataset with vectors that correspond to the vector field defined in the dataset settings:</p> <pre><code>dataset.records.log(\n    [\n        {\n            \"text\": \"Hello World, how are you?\",\n            \"vector_name\": [0.1, 0.2, 0.3]\n        }\n    ]\n)\n</code></pre> <p>Vectors can be passed using a mapping, where the key is the key in the data source and the value is the name in the dataset's setting's <code>rg.VectorField</code> object. For example, the following code adds a record with a vector using a mapping:</p> <pre><code>dataset.records.log(\n    [\n        {\n            \"text\": \"Hello World, how are you?\",\n            \"x\": [0.1, 0.2, 0.3]\n        }\n    ],\n    mapping={\"x\": \"vector_name\"}\n)\n</code></pre> <p>Or, vectors can be instantiated and added to a record directly, like this:</p> <pre><code>dataset.records.log(\n    [\n        rg.Record(\n            fields={\"text\": \"Hello World, how are you?\"},\n            vectors=[rg.Vector(\"embedding\", [0.1, 0.2, 0.3])],\n        )\n    ]\n)\n</code></pre>"},{"location":"reference/argilla/records/vectors/#src.argilla.vectors.Vector","title":"<code>Vector</code>","text":"<p>               Bases: <code>Resource</code></p> <p>Class for interacting with Argilla Vectors. Vectors are typically used to represent         embeddings or features of records. The <code>Vector</code> class is used to deliver vectors to the Argilla server.</p> <p>Attributes:</p> Name Type Description <code>name</code> <code>str</code> <p>The name of the vector.</p> <code>values</code> <code>list[float]</code> <p>The values of the vector.</p> Source code in <code>src/argilla/vectors.py</code> <pre><code>class Vector(Resource):\n    \"\"\" Class for interacting with Argilla Vectors. Vectors are typically used to represent \\\n        embeddings or features of records. The `Vector` class is used to deliver vectors to the Argilla server.\n\n    Attributes:\n        name (str): The name of the vector.\n        values (list[float]): The values of the vector.\n    \"\"\"\n\n    _model: VectorModel\n\n    def __init__(\n        self,\n        name: str,\n        values: list[float],\n    ) -&gt; None:\n        \"\"\"Initializes a Vector with a name and values that can be used to search in the Argilla ui.\n\n        Parameters:\n            name (str): Name of the vector\n            values (list[float]): List of float values\n\n        \"\"\"\n        self._model = VectorModel(\n            name=name,\n            vector_values=values,\n        )\n\n    def __repr__(self) -&gt; str:\n        return repr(f\"{self.__class__.__name__}({self._model})\")\n\n    ##############################\n    # Properties\n    ##############################\n\n    @property\n    def name(self) -&gt; str:\n        \"\"\"Name of the vector that corresponds to the name of the vector in the dataset's `Settings`\"\"\"\n        return self._model.name\n\n    @property\n    def values(self) -&gt; list[float]:\n        \"\"\"List of float values that represent the vector.\"\"\"\n        return self._model.vector_values\n\n    ##############################\n    # Methods\n    ##############################\n\n    @classmethod\n    def from_model(cls, model: VectorModel) -&gt; \"Vector\":\n        return cls(\n            name=model.name,\n            values=model.vector_values,\n        )\n\n    def serialize(self) -&gt; dict[str, Any]:\n        dumped_model = self._model.model_dump()\n        name = dumped_model.pop(\"name\")\n        values = dumped_model.pop(\"vector_values\")\n        return {name: values}\n</code></pre>"},{"location":"reference/argilla/records/vectors/#src.argilla.vectors.Vector.name","title":"<code>name: str</code>  <code>property</code>","text":"<p>Name of the vector that corresponds to the name of the vector in the dataset's <code>Settings</code></p>"},{"location":"reference/argilla/records/vectors/#src.argilla.vectors.Vector.values","title":"<code>values: list[float]</code>  <code>property</code>","text":"<p>List of float values that represent the vector.</p>"},{"location":"reference/argilla/records/vectors/#src.argilla.vectors.Vector.__init__","title":"<code>__init__(name, values)</code>","text":"<p>Initializes a Vector with a name and values that can be used to search in the Argilla ui.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>Name of the vector</p> required <code>values</code> <code>list[float]</code> <p>List of float values</p> required Source code in <code>src/argilla/vectors.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    values: list[float],\n) -&gt; None:\n    \"\"\"Initializes a Vector with a name and values that can be used to search in the Argilla ui.\n\n    Parameters:\n        name (str): Name of the vector\n        values (list[float]): List of float values\n\n    \"\"\"\n    self._model = VectorModel(\n        name=name,\n        vector_values=values,\n    )\n</code></pre>"},{"location":"reference/argilla/settings/fields/","title":"Fields","text":"<p>Fields in Argilla define the content of a record that will be reviewed by a user.</p>"},{"location":"reference/argilla/settings/fields/#usage-examples","title":"Usage Examples","text":"<p>To define a field, instantiate the different field classes and pass it to the <code>fields</code> parameter of the <code>Settings</code> class.</p> <pre><code>text_field = rg.TextField(name=\"text\")\nmarkdown_field = rg.TextField(name=\"text\", use_markdown=True)\nimage_field = rg.ImageField(name=\"image\")\n</code></pre> <p>The <code>fields</code> parameter of the <code>Settings</code> class can accept a list of fields, like this:</p> <pre><code>settings = rg.Settings(\n    fields=[\n        text_field,\n        markdown_field,\n        image_field,\n    ],\n    questions=[\n        rg.TextQuestion(name=\"response\"),\n    ],\n)\n\ndata = rg.Dataset(\n    name=\"my_dataset\",\n    settings=settings,\n)\n</code></pre> <p>To add records with values for fields, refer to the <code>rg.Dataset.records</code> documentation.</p>"},{"location":"reference/argilla/settings/fields/#src.argilla.settings._field.TextField","title":"<code>TextField</code>","text":"<p>               Bases: <code>AbstractField</code></p> <p>Text field for use in Argilla <code>Dataset</code> <code>Settings</code></p> Source code in <code>src/argilla/settings/_field.py</code> <pre><code>class TextField(AbstractField):\n    \"\"\"Text field for use in Argilla `Dataset` `Settings`\"\"\"\n\n    def __init__(\n        self,\n        name: str,\n        title: Optional[str] = None,\n        use_markdown: Optional[bool] = False,\n        required: bool = True,\n        description: Optional[str] = None,\n        client: Optional[Argilla] = None,\n    ) -&gt; None:\n        \"\"\"Text field for use in Argilla `Dataset` `Settings`\n        Parameters:\n            name (str): The name of the field\n            title (Optional[str], optional): The title of the field. Defaults to None.\n            use_markdown (Optional[bool], optional): Whether to use markdown. Defaults to False.\n            required (bool): Whether the field is required. Defaults to True.\n            description (Optional[str], optional): The description of the field. Defaults to None.\n\n        \"\"\"\n\n        super().__init__(\n            name=name,\n            title=title,\n            required=required,\n            description=description,\n            settings=TextFieldSettings(use_markdown=use_markdown),\n            _client=client,\n        )\n\n    @property\n    def use_markdown(self) -&gt; Optional[bool]:\n        return self._model.settings.use_markdown\n\n    @use_markdown.setter\n    def use_markdown(self, value: bool) -&gt; None:\n        self._model.settings.use_markdown = value\n</code></pre>"},{"location":"reference/argilla/settings/fields/#src.argilla.settings._field.TextField.__init__","title":"<code>__init__(name, title=None, use_markdown=False, required=True, description=None, client=None)</code>","text":"<p>Text field for use in Argilla <code>Dataset</code> <code>Settings</code> Parameters:     name (str): The name of the field     title (Optional[str], optional): The title of the field. Defaults to None.     use_markdown (Optional[bool], optional): Whether to use markdown. Defaults to False.     required (bool): Whether the field is required. Defaults to True.     description (Optional[str], optional): The description of the field. Defaults to None.</p> Source code in <code>src/argilla/settings/_field.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    title: Optional[str] = None,\n    use_markdown: Optional[bool] = False,\n    required: bool = True,\n    description: Optional[str] = None,\n    client: Optional[Argilla] = None,\n) -&gt; None:\n    \"\"\"Text field for use in Argilla `Dataset` `Settings`\n    Parameters:\n        name (str): The name of the field\n        title (Optional[str], optional): The title of the field. Defaults to None.\n        use_markdown (Optional[bool], optional): Whether to use markdown. Defaults to False.\n        required (bool): Whether the field is required. Defaults to True.\n        description (Optional[str], optional): The description of the field. Defaults to None.\n\n    \"\"\"\n\n    super().__init__(\n        name=name,\n        title=title,\n        required=required,\n        description=description,\n        settings=TextFieldSettings(use_markdown=use_markdown),\n        _client=client,\n    )\n</code></pre>"},{"location":"reference/argilla/settings/fields/#src.argilla.settings._field.ImageField","title":"<code>ImageField</code>","text":"<p>               Bases: <code>AbstractField</code></p> <p>Image field for use in Argilla <code>Dataset</code> <code>Settings</code></p> Source code in <code>src/argilla/settings/_field.py</code> <pre><code>class ImageField(AbstractField):\n    \"\"\"Image field for use in Argilla `Dataset` `Settings`\"\"\"\n\n    def __init__(\n        self,\n        name: str,\n        title: Optional[str] = None,\n        required: Optional[bool] = True,\n        description: Optional[str] = None,\n        _client: Optional[Argilla] = None,\n    ) -&gt; None:\n        \"\"\"\n        Text field for use in Argilla `Dataset` `Settings`\n\n        Parameters:\n            name (str): The name of the field\n            title (Optional[str], optional): The title of the field. Defaults to None.\n            required (Optional[bool], optional): Whether the field is required. Defaults to True.\n            description (Optional[str], optional): The description of the field. Defaults to None.\n        \"\"\"\n\n        super().__init__(\n            name=name,\n            title=title,\n            required=required,\n            description=description,\n            settings=ImageFieldSettings(),\n            _client=_client,\n        )\n</code></pre>"},{"location":"reference/argilla/settings/fields/#src.argilla.settings._field.ImageField.__init__","title":"<code>__init__(name, title=None, required=True, description=None, _client=None)</code>","text":"<p>Text field for use in Argilla <code>Dataset</code> <code>Settings</code></p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the field</p> required <code>title</code> <code>Optional[str]</code> <p>The title of the field. Defaults to None.</p> <code>None</code> <code>required</code> <code>Optional[bool]</code> <p>Whether the field is required. Defaults to True.</p> <code>True</code> <code>description</code> <code>Optional[str]</code> <p>The description of the field. Defaults to None.</p> <code>None</code> Source code in <code>src/argilla/settings/_field.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    title: Optional[str] = None,\n    required: Optional[bool] = True,\n    description: Optional[str] = None,\n    _client: Optional[Argilla] = None,\n) -&gt; None:\n    \"\"\"\n    Text field for use in Argilla `Dataset` `Settings`\n\n    Parameters:\n        name (str): The name of the field\n        title (Optional[str], optional): The title of the field. Defaults to None.\n        required (Optional[bool], optional): Whether the field is required. Defaults to True.\n        description (Optional[str], optional): The description of the field. Defaults to None.\n    \"\"\"\n\n    super().__init__(\n        name=name,\n        title=title,\n        required=required,\n        description=description,\n        settings=ImageFieldSettings(),\n        _client=_client,\n    )\n</code></pre>"},{"location":"reference/argilla/settings/fields/#src.argilla.settings._field.ChatField","title":"<code>ChatField</code>","text":"<p>               Bases: <code>AbstractField</code></p> <p>Chat field for use in Argilla <code>Dataset</code> <code>Settings</code></p> Source code in <code>src/argilla/settings/_field.py</code> <pre><code>class ChatField(AbstractField):\n    \"\"\"Chat field for use in Argilla `Dataset` `Settings`\"\"\"\n\n    def __init__(\n        self,\n        name: str,\n        title: Optional[str] = None,\n        use_markdown: Optional[bool] = True,\n        required: bool = True,\n        description: Optional[str] = None,\n        _client: Optional[Argilla] = None,\n    ) -&gt; None:\n        \"\"\"\n        Chat field for use in Argilla `Dataset` `Settings`\n\n        Parameters:\n            name (str): The name of the field\n            title (Optional[str], optional): The title of the field. Defaults to None.\n            use_markdown (Optional[bool], optional): Whether to use markdown. Defaults to True.\n            required (bool): Whether the field is required. Defaults to True.\n            description (Optional[str], optional): The description of the field. Defaults to None.\n        \"\"\"\n\n        super().__init__(\n            name=name,\n            title=title,\n            required=required,\n            description=description,\n            settings=ChatFieldSettings(use_markdown=use_markdown),\n            _client=_client,\n        )\n\n    @property\n    def use_markdown(self) -&gt; Optional[bool]:\n        return self._model.settings.use_markdown\n\n    @use_markdown.setter\n    def use_markdown(self, value: bool) -&gt; None:\n        self._model.settings.use_markdown = value\n</code></pre>"},{"location":"reference/argilla/settings/fields/#src.argilla.settings._field.ChatField.__init__","title":"<code>__init__(name, title=None, use_markdown=True, required=True, description=None, _client=None)</code>","text":"<p>Chat field for use in Argilla <code>Dataset</code> <code>Settings</code></p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the field</p> required <code>title</code> <code>Optional[str]</code> <p>The title of the field. Defaults to None.</p> <code>None</code> <code>use_markdown</code> <code>Optional[bool]</code> <p>Whether to use markdown. Defaults to True.</p> <code>True</code> <code>required</code> <code>bool</code> <p>Whether the field is required. Defaults to True.</p> <code>True</code> <code>description</code> <code>Optional[str]</code> <p>The description of the field. Defaults to None.</p> <code>None</code> Source code in <code>src/argilla/settings/_field.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    title: Optional[str] = None,\n    use_markdown: Optional[bool] = True,\n    required: bool = True,\n    description: Optional[str] = None,\n    _client: Optional[Argilla] = None,\n) -&gt; None:\n    \"\"\"\n    Chat field for use in Argilla `Dataset` `Settings`\n\n    Parameters:\n        name (str): The name of the field\n        title (Optional[str], optional): The title of the field. Defaults to None.\n        use_markdown (Optional[bool], optional): Whether to use markdown. Defaults to True.\n        required (bool): Whether the field is required. Defaults to True.\n        description (Optional[str], optional): The description of the field. Defaults to None.\n    \"\"\"\n\n    super().__init__(\n        name=name,\n        title=title,\n        required=required,\n        description=description,\n        settings=ChatFieldSettings(use_markdown=use_markdown),\n        _client=_client,\n    )\n</code></pre>"},{"location":"reference/argilla/settings/fields/#src.argilla.settings._field.CustomField","title":"<code>CustomField</code>","text":"<p>               Bases: <code>AbstractField</code></p> <p>Custom field for use in Argilla <code>Dataset</code> <code>Settings</code></p> Source code in <code>src/argilla/settings/_field.py</code> <pre><code>class CustomField(AbstractField):\n    \"\"\"Custom field for use in Argilla `Dataset` `Settings`\"\"\"\n\n    def __init__(\n        self,\n        name: str,\n        title: Optional[str] = None,\n        template: Optional[str] = \"\",\n        advanced_mode: Optional[bool] = False,\n        required: bool = True,\n        description: Optional[str] = None,\n        _client: Optional[Argilla] = None,\n    ) -&gt; None:\n        \"\"\"\n        Custom field for use in Argilla `Dataset` `Settings` for working with custom HTML and CSS templates.\n        By default argilla will use a brackets syntax engine for the templates, which converts\n        `{{ field.key }}` to the values of record's field's object.\n\n        Parameters:\n            name (str): The name of the field\n            title (Optional[str], optional): The title of the field. Defaults to None.\n            template (str): The template of the field (HTML and CSS)\n            advanced_mode (Optional[bool], optional): Whether to use advanced mode. Defaults to False.\n                Deactivate the brackets syntax engine and use custom javascript to render the field.\n            required (Optional[bool], optional): Whether the field is required. Defaults to True.\n            required (bool): Whether the field is required. Defaults to True.\n            description (Optional[str], optional): The description of the field. Defaults to None.\n        \"\"\"\n        template = self._load_template(template)\n        super().__init__(\n            name=name,\n            title=title,\n            required=required,\n            description=description,\n            settings=CustomFieldSettings(template=template, advanced_mode=advanced_mode),\n            _client=_client,\n        )\n\n    @property\n    def template(self) -&gt; Optional[str]:\n        return self._model.settings.template\n\n    @template.setter\n    def template(self, value: str) -&gt; None:\n        self._model.settings.template = self._load_template(value)\n\n    @property\n    def advanced_mode(self) -&gt; Optional[bool]:\n        return self._model.settings.advanced_mode\n\n    @advanced_mode.setter\n    def advanced_mode(self, value: bool) -&gt; None:\n        self._model.settings.advanced_mode = value\n\n    def validate(self):\n        if self.template is None or self.template.strip() == \"\":\n            raise SettingsError(\"A valid template is required for CustomField\")\n\n    @classmethod\n    def _load_template(cls, template: str) -&gt; str:\n        if template.endswith(\".html\") and os.path.exists(template):\n            with open(template, \"r\") as f:\n                return f.read()\n        if template.startswith(\"http\") or template.startswith(\"https\"):\n            return requests.get(template).text\n        if isinstance(template, str):\n            return template\n        raise ArgillaError(\n            \"Invalid template. Please provide 1: a valid path or URL to a HTML file. 2: a valid HTML string.\"\n        )\n</code></pre>"},{"location":"reference/argilla/settings/fields/#src.argilla.settings._field.CustomField.__init__","title":"<code>__init__(name, title=None, template='', advanced_mode=False, required=True, description=None, _client=None)</code>","text":"<p>Custom field for use in Argilla <code>Dataset</code> <code>Settings</code> for working with custom HTML and CSS templates. By default argilla will use a brackets syntax engine for the templates, which converts <code>{{ field.key }}</code> to the values of record's field's object.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the field</p> required <code>title</code> <code>Optional[str]</code> <p>The title of the field. Defaults to None.</p> <code>None</code> <code>template</code> <code>str</code> <p>The template of the field (HTML and CSS)</p> <code>''</code> <code>advanced_mode</code> <code>Optional[bool]</code> <p>Whether to use advanced mode. Defaults to False. Deactivate the brackets syntax engine and use custom javascript to render the field.</p> <code>False</code> <code>required</code> <code>Optional[bool]</code> <p>Whether the field is required. Defaults to True.</p> <code>True</code> <code>required</code> <code>bool</code> <p>Whether the field is required. Defaults to True.</p> <code>True</code> <code>description</code> <code>Optional[str]</code> <p>The description of the field. Defaults to None.</p> <code>None</code> Source code in <code>src/argilla/settings/_field.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    title: Optional[str] = None,\n    template: Optional[str] = \"\",\n    advanced_mode: Optional[bool] = False,\n    required: bool = True,\n    description: Optional[str] = None,\n    _client: Optional[Argilla] = None,\n) -&gt; None:\n    \"\"\"\n    Custom field for use in Argilla `Dataset` `Settings` for working with custom HTML and CSS templates.\n    By default argilla will use a brackets syntax engine for the templates, which converts\n    `{{ field.key }}` to the values of record's field's object.\n\n    Parameters:\n        name (str): The name of the field\n        title (Optional[str], optional): The title of the field. Defaults to None.\n        template (str): The template of the field (HTML and CSS)\n        advanced_mode (Optional[bool], optional): Whether to use advanced mode. Defaults to False.\n            Deactivate the brackets syntax engine and use custom javascript to render the field.\n        required (Optional[bool], optional): Whether the field is required. Defaults to True.\n        required (bool): Whether the field is required. Defaults to True.\n        description (Optional[str], optional): The description of the field. Defaults to None.\n    \"\"\"\n    template = self._load_template(template)\n    super().__init__(\n        name=name,\n        title=title,\n        required=required,\n        description=description,\n        settings=CustomFieldSettings(template=template, advanced_mode=advanced_mode),\n        _client=_client,\n    )\n</code></pre>"},{"location":"reference/argilla/settings/metadata_property/","title":"Metadata Properties","text":"<p>Metadata properties are used to define metadata fields in a dataset. Metadata fields are used to store additional information about the records in the dataset. For example, the category of a record, the price of a product, or any other information that is relevant to the record.</p>"},{"location":"reference/argilla/settings/metadata_property/#usage-examples","title":"Usage Examples","text":""},{"location":"reference/argilla/settings/metadata_property/#defining-metadata-property-for-a-dataset","title":"Defining Metadata Property for a dataset","text":"<p>We define metadata properties via type specific classes. The following example demonstrates how to define metadata properties as either a float, integer, or terms metadata property and pass them to the <code>Settings</code>.</p> <p><code>TermsMetadataProperty</code> is used to define a metadata field with a list of options. For example, a color field with options red, blue, and green. <code>FloatMetadataProperty</code> and <code>IntegerMetadataProperty</code> is used to define a metadata field with a float value. For example, a price field with a minimum value of 0.0 and a maximum value of 100.0.</p> <pre><code>metadata_field = rg.TermsMetadataProperty(\n    name=\"color\",\n    options=[\"red\", \"blue\", \"green\"],\n    title=\"Color\",\n)\n\nfloat_metadata_field = rg.FloatMetadataProperty(\n    name=\"price\",\n    min=0.0,\n    max=100.0,\n    title=\"Price\",\n)\n\nint_metadata_field = rg.IntegerMetadataProperty(\n    name=\"quantity\",\n    min=0,\n    max=100,\n    title=\"Quantity\",\n)\n\ndataset = rg.Dataset(\n    name=\"my_dataset\",\n    settings=rg.Settings(\n        fields=[\n            rg.TextField(name=\"text\"),\n        ],\n        questions=[\n            rg.TextQuestion(name=\"response\"),\n        ],\n        metadata=[\n            metadata_field,\n            float_metadata_field,\n            int_metadata_field,\n        ],\n    ),\n)\n\ndataset = rg.Dataset(\n    name=\"my_dataset\",\n    settings=settings,\n)\n</code></pre> <p>To add records with metadata, refer to the <code>rg.Metadata</code> class documentation.</p>"},{"location":"reference/argilla/settings/metadata_property/#src.argilla.settings._metadata.FloatMetadataProperty","title":"<code>FloatMetadataProperty</code>","text":"<p>               Bases: <code>MetadataPropertyBase</code></p> Source code in <code>src/argilla/settings/_metadata.py</code> <pre><code>class FloatMetadataProperty(MetadataPropertyBase):\n    def __init__(\n        self,\n        name: str,\n        min: Optional[float] = None,\n        max: Optional[float] = None,\n        title: Optional[str] = None,\n        visible_for_annotators: Optional[bool] = True,\n        client: Optional[Argilla] = None,\n    ) -&gt; None:\n        \"\"\"Create a metadata field with float settings.\n\n        Parameters:\n            name (str): The name of the metadata field\n            min (Optional[float]): The minimum valid value. If none is provided, it will be computed from the values provided in the records.\n            max (Optional[float]): The maximum valid value. If none is provided, it will be computed from the values provided in the records.\n            title (Optional[str]): The title of the metadata to be shown in the UI\n            visible_for_annotators (Optional[bool]): Whether the metadata field is visible for annotators.\n\n        Raises:\n            MetadataError: If an error occurs while defining metadata settings.\n        \"\"\"\n\n        super().__init__(client=client)\n\n        try:\n            settings = FloatMetadataPropertySettings(min=min, max=max, type=MetadataPropertyType.float)\n        except ValueError as e:\n            raise MetadataError(f\"Error defining metadata settings for {name}\") from e\n\n        self._model = MetadataFieldModel(\n            name=name,\n            type=MetadataPropertyType.float,\n            title=title,\n            settings=settings,\n            visible_for_annotators=visible_for_annotators,\n        )\n\n    @property\n    def min(self) -&gt; Optional[int]:\n        return self._model.settings.min\n\n    @min.setter\n    def min(self, value: Optional[int]) -&gt; None:\n        self._model.settings.min = value\n\n    @property\n    def max(self) -&gt; Optional[int]:\n        return self._model.settings.max\n\n    @max.setter\n    def max(self, value: Optional[int]) -&gt; None:\n        self._model.settings.max = value\n\n    @classmethod\n    def from_model(cls, model: MetadataFieldModel) -&gt; \"FloatMetadataProperty\":\n        instance = FloatMetadataProperty(name=model.name)\n        instance._model = model\n\n        return instance\n</code></pre>"},{"location":"reference/argilla/settings/metadata_property/#src.argilla.settings._metadata.FloatMetadataProperty.__init__","title":"<code>__init__(name, min=None, max=None, title=None, visible_for_annotators=True, client=None)</code>","text":"<p>Create a metadata field with float settings.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the metadata field</p> required <code>min</code> <code>Optional[float]</code> <p>The minimum valid value. If none is provided, it will be computed from the values provided in the records.</p> <code>None</code> <code>max</code> <code>Optional[float]</code> <p>The maximum valid value. If none is provided, it will be computed from the values provided in the records.</p> <code>None</code> <code>title</code> <code>Optional[str]</code> <p>The title of the metadata to be shown in the UI</p> <code>None</code> <code>visible_for_annotators</code> <code>Optional[bool]</code> <p>Whether the metadata field is visible for annotators.</p> <code>True</code> <p>Raises:</p> Type Description <code>MetadataError</code> <p>If an error occurs while defining metadata settings.</p> Source code in <code>src/argilla/settings/_metadata.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    min: Optional[float] = None,\n    max: Optional[float] = None,\n    title: Optional[str] = None,\n    visible_for_annotators: Optional[bool] = True,\n    client: Optional[Argilla] = None,\n) -&gt; None:\n    \"\"\"Create a metadata field with float settings.\n\n    Parameters:\n        name (str): The name of the metadata field\n        min (Optional[float]): The minimum valid value. If none is provided, it will be computed from the values provided in the records.\n        max (Optional[float]): The maximum valid value. If none is provided, it will be computed from the values provided in the records.\n        title (Optional[str]): The title of the metadata to be shown in the UI\n        visible_for_annotators (Optional[bool]): Whether the metadata field is visible for annotators.\n\n    Raises:\n        MetadataError: If an error occurs while defining metadata settings.\n    \"\"\"\n\n    super().__init__(client=client)\n\n    try:\n        settings = FloatMetadataPropertySettings(min=min, max=max, type=MetadataPropertyType.float)\n    except ValueError as e:\n        raise MetadataError(f\"Error defining metadata settings for {name}\") from e\n\n    self._model = MetadataFieldModel(\n        name=name,\n        type=MetadataPropertyType.float,\n        title=title,\n        settings=settings,\n        visible_for_annotators=visible_for_annotators,\n    )\n</code></pre>"},{"location":"reference/argilla/settings/metadata_property/#src.argilla.settings._metadata.IntegerMetadataProperty","title":"<code>IntegerMetadataProperty</code>","text":"<p>               Bases: <code>MetadataPropertyBase</code></p> Source code in <code>src/argilla/settings/_metadata.py</code> <pre><code>class IntegerMetadataProperty(MetadataPropertyBase):\n    def __init__(\n        self,\n        name: str,\n        min: Optional[int] = None,\n        max: Optional[int] = None,\n        title: Optional[str] = None,\n        visible_for_annotators: Optional[bool] = True,\n        client: Optional[Argilla] = None,\n    ) -&gt; None:\n        \"\"\"Create a metadata field with integer settings.\n\n        Parameters:\n            name (str): The name of the metadata field\n            min (Optional[int]): The minimum valid value. If none is provided, it will be computed from the values provided in the records.\n            max (Optional[int]): The maximum  valid value. If none is provided, it will be computed from the values provided in the records.\n            title (Optional[str]): The title of the metadata to be shown in the UI\n            visible_for_annotators (Optional[bool]): Whether the metadata field is visible for annotators.\n\n        Raises:\n            MetadataError: If an error occurs while defining metadata settings.\n        \"\"\"\n        super().__init__(client=client)\n\n        try:\n            settings = IntegerMetadataPropertySettings(min=min, max=max, type=MetadataPropertyType.integer)\n        except ValueError as e:\n            raise MetadataError(f\"Error defining metadata settings for {name}\") from e\n\n        self._model = MetadataFieldModel(\n            name=name,\n            type=MetadataPropertyType.integer,\n            title=title,\n            settings=settings,\n            visible_for_annotators=visible_for_annotators,\n        )\n\n    @property\n    def min(self) -&gt; Optional[int]:\n        return self._model.settings.min\n\n    @min.setter\n    def min(self, value: Optional[int]) -&gt; None:\n        self._model.settings.min = value\n\n    @property\n    def max(self) -&gt; Optional[int]:\n        return self._model.settings.max\n\n    @max.setter\n    def max(self, value: Optional[int]) -&gt; None:\n        self._model.settings.max = value\n\n    @classmethod\n    def from_model(cls, model: MetadataFieldModel) -&gt; \"IntegerMetadataProperty\":\n        instance = IntegerMetadataProperty(name=model.name)\n        instance._model = model\n\n        return instance\n</code></pre>"},{"location":"reference/argilla/settings/metadata_property/#src.argilla.settings._metadata.IntegerMetadataProperty.__init__","title":"<code>__init__(name, min=None, max=None, title=None, visible_for_annotators=True, client=None)</code>","text":"<p>Create a metadata field with integer settings.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the metadata field</p> required <code>min</code> <code>Optional[int]</code> <p>The minimum valid value. If none is provided, it will be computed from the values provided in the records.</p> <code>None</code> <code>max</code> <code>Optional[int]</code> <p>The maximum  valid value. If none is provided, it will be computed from the values provided in the records.</p> <code>None</code> <code>title</code> <code>Optional[str]</code> <p>The title of the metadata to be shown in the UI</p> <code>None</code> <code>visible_for_annotators</code> <code>Optional[bool]</code> <p>Whether the metadata field is visible for annotators.</p> <code>True</code> <p>Raises:</p> Type Description <code>MetadataError</code> <p>If an error occurs while defining metadata settings.</p> Source code in <code>src/argilla/settings/_metadata.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    min: Optional[int] = None,\n    max: Optional[int] = None,\n    title: Optional[str] = None,\n    visible_for_annotators: Optional[bool] = True,\n    client: Optional[Argilla] = None,\n) -&gt; None:\n    \"\"\"Create a metadata field with integer settings.\n\n    Parameters:\n        name (str): The name of the metadata field\n        min (Optional[int]): The minimum valid value. If none is provided, it will be computed from the values provided in the records.\n        max (Optional[int]): The maximum  valid value. If none is provided, it will be computed from the values provided in the records.\n        title (Optional[str]): The title of the metadata to be shown in the UI\n        visible_for_annotators (Optional[bool]): Whether the metadata field is visible for annotators.\n\n    Raises:\n        MetadataError: If an error occurs while defining metadata settings.\n    \"\"\"\n    super().__init__(client=client)\n\n    try:\n        settings = IntegerMetadataPropertySettings(min=min, max=max, type=MetadataPropertyType.integer)\n    except ValueError as e:\n        raise MetadataError(f\"Error defining metadata settings for {name}\") from e\n\n    self._model = MetadataFieldModel(\n        name=name,\n        type=MetadataPropertyType.integer,\n        title=title,\n        settings=settings,\n        visible_for_annotators=visible_for_annotators,\n    )\n</code></pre>"},{"location":"reference/argilla/settings/metadata_property/#src.argilla.settings._metadata.TermsMetadataProperty","title":"<code>TermsMetadataProperty</code>","text":"<p>               Bases: <code>MetadataPropertyBase</code></p> Source code in <code>src/argilla/settings/_metadata.py</code> <pre><code>class TermsMetadataProperty(MetadataPropertyBase):\n    def __init__(\n        self,\n        name: str,\n        options: Optional[List[str]] = None,\n        title: Optional[str] = None,\n        visible_for_annotators: Optional[bool] = True,\n        client: Optional[Argilla] = None,\n    ) -&gt; None:\n        \"\"\"Create a metadata field with terms settings.\n\n        Parameters:\n            name (str): The name of the metadata field\n            options (Optional[List[str]]): The list of options\n            title (Optional[str]): The title of the metadata to be shown in the UI\n            visible_for_annotators (Optional[bool]): Whether the metadata field is visible for annotators.\n\n        Raises:\n            MetadataError: If an error occurs while defining metadata settings\n        \"\"\"\n        super().__init__(client=client)\n\n        try:\n            settings = TermsMetadataPropertySettings(values=options, type=MetadataPropertyType.terms)\n        except ValueError as e:\n            raise MetadataError(f\"Error defining metadata settings for {name}\") from e\n\n        self._model = MetadataFieldModel(\n            name=name,\n            type=MetadataPropertyType.terms,\n            title=title,\n            settings=settings,\n            visible_for_annotators=visible_for_annotators,\n        )\n\n    @property\n    def options(self) -&gt; Optional[List[str]]:\n        return self._model.settings.values\n\n    @options.setter\n    def options(self, value: list[str]) -&gt; None:\n        self._model.settings.values = value\n\n    @classmethod\n    def from_model(cls, model: MetadataFieldModel) -&gt; \"TermsMetadataProperty\":\n        instance = TermsMetadataProperty(name=model.name)\n        instance._model = model\n\n        return instance\n</code></pre>"},{"location":"reference/argilla/settings/metadata_property/#src.argilla.settings._metadata.TermsMetadataProperty.__init__","title":"<code>__init__(name, options=None, title=None, visible_for_annotators=True, client=None)</code>","text":"<p>Create a metadata field with terms settings.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the metadata field</p> required <code>options</code> <code>Optional[List[str]]</code> <p>The list of options</p> <code>None</code> <code>title</code> <code>Optional[str]</code> <p>The title of the metadata to be shown in the UI</p> <code>None</code> <code>visible_for_annotators</code> <code>Optional[bool]</code> <p>Whether the metadata field is visible for annotators.</p> <code>True</code> <p>Raises:</p> Type Description <code>MetadataError</code> <p>If an error occurs while defining metadata settings</p> Source code in <code>src/argilla/settings/_metadata.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    options: Optional[List[str]] = None,\n    title: Optional[str] = None,\n    visible_for_annotators: Optional[bool] = True,\n    client: Optional[Argilla] = None,\n) -&gt; None:\n    \"\"\"Create a metadata field with terms settings.\n\n    Parameters:\n        name (str): The name of the metadata field\n        options (Optional[List[str]]): The list of options\n        title (Optional[str]): The title of the metadata to be shown in the UI\n        visible_for_annotators (Optional[bool]): Whether the metadata field is visible for annotators.\n\n    Raises:\n        MetadataError: If an error occurs while defining metadata settings\n    \"\"\"\n    super().__init__(client=client)\n\n    try:\n        settings = TermsMetadataPropertySettings(values=options, type=MetadataPropertyType.terms)\n    except ValueError as e:\n        raise MetadataError(f\"Error defining metadata settings for {name}\") from e\n\n    self._model = MetadataFieldModel(\n        name=name,\n        type=MetadataPropertyType.terms,\n        title=title,\n        settings=settings,\n        visible_for_annotators=visible_for_annotators,\n    )\n</code></pre>"},{"location":"reference/argilla/settings/questions/","title":"Questions","text":"<p>Argilla uses questions to gather the feedback. The questions will be answered by users or models.</p>"},{"location":"reference/argilla/settings/questions/#usage-examples","title":"Usage Examples","text":"<p>To define a label question, for example, instantiate the <code>LabelQuestion</code> class and pass it to the <code>Settings</code> class.</p> <pre><code>label_question = rg.LabelQuestion(name=\"label\", labels=[\"positive\", \"negative\"])\n\nsettings = rg.Settings(\n    fields=[\n        rg.TextField(name=\"text\"),\n    ],\n    questions=[\n        label_question,\n    ],\n)\n</code></pre> <p>Questions can be combined in extensible ways based on the type of feedback you want to collect. For example, you can combine a label question with a text question to collect both a label and a text response.</p> <pre><code>label_question = rg.LabelQuestion(name=\"label\", labels=[\"positive\", \"negative\"])\ntext_question = rg.TextQuestion(name=\"response\")\n\nsettings = rg.Settings(\n    fields=[\n        rg.TextField(name=\"text\"),\n    ],\n    questions=[\n        label_question,\n        text_question,\n    ],\n)\n\ndataset = rg.Dataset(\n    name=\"my_dataset\",\n    settings=settings,\n)\n</code></pre> <p>To add records with responses to questions, refer to the <code>rg.Response</code> class documentation.</p>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.LabelQuestion","title":"<code>LabelQuestion</code>","text":"<p>               Bases: <code>QuestionPropertyBase</code></p> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>class LabelQuestion(QuestionPropertyBase):\n    _model: LabelQuestionModel\n\n    def __init__(\n        self,\n        name: str,\n        labels: Union[List[str], Dict[str, str]],\n        title: Optional[str] = None,\n        description: Optional[str] = None,\n        required: bool = True,\n        visible_labels: Optional[int] = None,\n    ) -&gt; None:\n        \"\"\" Define a new label question for `Settings` of a `Dataset`. A label \\\n            question is a question where the user can select one label from \\\n            a list of available labels.\n\n        Parameters:\n            name (str): The name of the question to be used as a reference.\n            labels (Union[List[str], Dict[str, str]]): The list of available labels for the question, or a\n                dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.\n            title (Optional[str]): The title of the question to be shown in the UI.\n            description (Optional[str]): The description of the question to be shown in the UI.\n            required (bool): If the question is required for a record to be valid. At least one question must be required.\n            visible_labels (Optional[int]): The number of visible labels for the question to be shown in the UI. \\\n                Setting it to None show all options.\n        \"\"\"\n        self._model = LabelQuestionModel(\n            name=name,\n            title=title,\n            description=description,\n            required=required,\n            settings=LabelQuestionSettings(\n                options=self._render_values_as_options(labels), visible_options=visible_labels\n            ),\n        )\n\n    @classmethod\n    def from_model(cls, model: LabelQuestionModel) -&gt; \"LabelQuestion\":\n        instance = cls(name=model.name, labels=cls._render_options_as_values(model.settings.options))\n        instance._model = model\n        return instance\n\n    @classmethod\n    def from_dict(cls, data: dict) -&gt; \"LabelQuestion\":\n        model = LabelQuestionModel(**data)\n        return cls.from_model(model=model)\n\n    ##############################\n    # Public properties\n    ##############################\n\n    @property\n    def labels(self) -&gt; List[str]:\n        return self._render_options_as_labels(self._model.settings.options)\n\n    @labels.setter\n    def labels(self, labels: List[str]) -&gt; None:\n        self._model.settings.options = self._render_values_as_options(labels)\n\n    @property\n    def visible_labels(self) -&gt; Optional[int]:\n        return self._model.settings.visible_options\n\n    @visible_labels.setter\n    def visible_labels(self, visible_labels: Optional[int]) -&gt; None:\n        self._model.settings.visible_options = visible_labels\n</code></pre>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.LabelQuestion.__init__","title":"<code>__init__(name, labels, title=None, description=None, required=True, visible_labels=None)</code>","text":"<p>Define a new label question for <code>Settings</code> of a <code>Dataset</code>. A label             question is a question where the user can select one label from             a list of available labels.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the question to be used as a reference.</p> required <code>labels</code> <code>Union[List[str], Dict[str, str]]</code> <p>The list of available labels for the question, or a dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.</p> required <code>title</code> <code>Optional[str]</code> <p>The title of the question to be shown in the UI.</p> <code>None</code> <code>description</code> <code>Optional[str]</code> <p>The description of the question to be shown in the UI.</p> <code>None</code> <code>required</code> <code>bool</code> <p>If the question is required for a record to be valid. At least one question must be required.</p> <code>True</code> <code>visible_labels</code> <code>Optional[int]</code> <p>The number of visible labels for the question to be shown in the UI.                 Setting it to None show all options.</p> <code>None</code> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    labels: Union[List[str], Dict[str, str]],\n    title: Optional[str] = None,\n    description: Optional[str] = None,\n    required: bool = True,\n    visible_labels: Optional[int] = None,\n) -&gt; None:\n    \"\"\" Define a new label question for `Settings` of a `Dataset`. A label \\\n        question is a question where the user can select one label from \\\n        a list of available labels.\n\n    Parameters:\n        name (str): The name of the question to be used as a reference.\n        labels (Union[List[str], Dict[str, str]]): The list of available labels for the question, or a\n            dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.\n        title (Optional[str]): The title of the question to be shown in the UI.\n        description (Optional[str]): The description of the question to be shown in the UI.\n        required (bool): If the question is required for a record to be valid. At least one question must be required.\n        visible_labels (Optional[int]): The number of visible labels for the question to be shown in the UI. \\\n            Setting it to None show all options.\n    \"\"\"\n    self._model = LabelQuestionModel(\n        name=name,\n        title=title,\n        description=description,\n        required=required,\n        settings=LabelQuestionSettings(\n            options=self._render_values_as_options(labels), visible_options=visible_labels\n        ),\n    )\n</code></pre>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.MultiLabelQuestion","title":"<code>MultiLabelQuestion</code>","text":"<p>               Bases: <code>LabelQuestion</code></p> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>class MultiLabelQuestion(LabelQuestion):\n    _model: MultiLabelQuestionModel\n\n    def __init__(\n        self,\n        name: str,\n        labels: Union[List[str], Dict[str, str]],\n        visible_labels: Optional[int] = None,\n        labels_order: Literal[\"natural\", \"suggestion\"] = \"natural\",\n        title: Optional[str] = None,\n        description: Optional[str] = None,\n        required: bool = True,\n    ) -&gt; None:\n        \"\"\"Create a new multi-label question for `Settings` of a `Dataset`. A \\\n            multi-label question is a question where the user can select multiple \\\n            labels from a list of available labels.\n\n        Parameters:\n            name (str): The name of the question to be used as a reference.\n            labels (Union[List[str], Dict[str, str]]): The list of available labels for the question, or a \\\n                dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.\n            visible_labels (Optional[int]): The number of visible labels for the question to be shown in the UI. \\\n                Setting it to None show all options.\n            labels_order (Literal[\"natural\", \"suggestion\"]): The order of the labels in the UI. \\\n                Can be either \"natural\" (order in which they were specified) or \"suggestion\" (order prioritizing those associated with a suggestion). \\\n                The score of the suggestion will be taken into account for ordering if available.\n            title (Optional[str]: The title of the question to be shown in the UI.\n            description (Optional[str]): The description of the question to be shown in the UI.\n            required (bool): If the question is required for a record to be valid. At least one question must be required.\n        \"\"\"\n        self._model = MultiLabelQuestionModel(\n            name=name,\n            title=title,\n            description=description,\n            required=required,\n            settings=MultiLabelQuestionSettings(\n                options=self._render_values_as_options(labels),\n                visible_options=visible_labels,\n                options_order=labels_order,\n            ),\n        )\n\n    @classmethod\n    def from_model(cls, model: MultiLabelQuestionModel) -&gt; \"MultiLabelQuestion\":\n        instance = cls(\n            name=model.name,\n            labels=cls._render_options_as_values(model.settings.options),\n            labels_order=model.settings.options_order,\n        )\n        instance._model = model\n\n        return instance\n\n    @classmethod\n    def from_dict(cls, data: dict) -&gt; \"MultiLabelQuestion\":\n        model = MultiLabelQuestionModel(**data)\n        return cls.from_model(model=model)\n</code></pre>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.MultiLabelQuestion.__init__","title":"<code>__init__(name, labels, visible_labels=None, labels_order='natural', title=None, description=None, required=True)</code>","text":"<p>Create a new multi-label question for <code>Settings</code> of a <code>Dataset</code>. A             multi-label question is a question where the user can select multiple             labels from a list of available labels.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the question to be used as a reference.</p> required <code>labels</code> <code>Union[List[str], Dict[str, str]]</code> <p>The list of available labels for the question, or a                 dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.</p> required <code>visible_labels</code> <code>Optional[int]</code> <p>The number of visible labels for the question to be shown in the UI.                 Setting it to None show all options.</p> <code>None</code> <code>labels_order</code> <code>Literal['natural', 'suggestion']</code> <p>The order of the labels in the UI.                 Can be either \"natural\" (order in which they were specified) or \"suggestion\" (order prioritizing those associated with a suggestion).                 The score of the suggestion will be taken into account for ordering if available.</p> <code>'natural'</code> <code>title</code> <code>Optional[str]</code> <p>The title of the question to be shown in the UI.</p> <code>None</code> <code>description</code> <code>Optional[str]</code> <p>The description of the question to be shown in the UI.</p> <code>None</code> <code>required</code> <code>bool</code> <p>If the question is required for a record to be valid. At least one question must be required.</p> <code>True</code> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    labels: Union[List[str], Dict[str, str]],\n    visible_labels: Optional[int] = None,\n    labels_order: Literal[\"natural\", \"suggestion\"] = \"natural\",\n    title: Optional[str] = None,\n    description: Optional[str] = None,\n    required: bool = True,\n) -&gt; None:\n    \"\"\"Create a new multi-label question for `Settings` of a `Dataset`. A \\\n        multi-label question is a question where the user can select multiple \\\n        labels from a list of available labels.\n\n    Parameters:\n        name (str): The name of the question to be used as a reference.\n        labels (Union[List[str], Dict[str, str]]): The list of available labels for the question, or a \\\n            dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.\n        visible_labels (Optional[int]): The number of visible labels for the question to be shown in the UI. \\\n            Setting it to None show all options.\n        labels_order (Literal[\"natural\", \"suggestion\"]): The order of the labels in the UI. \\\n            Can be either \"natural\" (order in which they were specified) or \"suggestion\" (order prioritizing those associated with a suggestion). \\\n            The score of the suggestion will be taken into account for ordering if available.\n        title (Optional[str]: The title of the question to be shown in the UI.\n        description (Optional[str]): The description of the question to be shown in the UI.\n        required (bool): If the question is required for a record to be valid. At least one question must be required.\n    \"\"\"\n    self._model = MultiLabelQuestionModel(\n        name=name,\n        title=title,\n        description=description,\n        required=required,\n        settings=MultiLabelQuestionSettings(\n            options=self._render_values_as_options(labels),\n            visible_options=visible_labels,\n            options_order=labels_order,\n        ),\n    )\n</code></pre>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.RankingQuestion","title":"<code>RankingQuestion</code>","text":"<p>               Bases: <code>QuestionPropertyBase</code></p> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>class RankingQuestion(QuestionPropertyBase):\n    _model: RankingQuestionModel\n\n    def __init__(\n        self,\n        name: str,\n        values: Union[List[str], Dict[str, str]],\n        title: Optional[str] = None,\n        description: Optional[str] = None,\n        required: bool = True,\n    ) -&gt; None:\n        \"\"\"Create a new ranking question for `Settings` of a `Dataset`. A ranking question \\\n            is a question where the user can rank a list of options.\n\n        Parameters:\n            name (str): The name of the question to be used as a reference.\n            values (Union[List[str], Dict[str, str]]): The list of options to be ranked, or a \\\n                dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.\n            title (Optional[str]:) The title of the question to be shown in the UI.\n            description (Optional[str]): The description of the question to be shown in the UI.\n            required (bool): If the question is required for a record to be valid. At least one question must be required.\n        \"\"\"\n        self._model = RankingQuestionModel(\n            name=name,\n            title=title,\n            description=description,\n            required=required,\n            settings=RankingQuestionSettings(options=self._render_values_as_options(values)),\n        )\n\n    @classmethod\n    def from_model(cls, model: RankingQuestionModel) -&gt; \"RankingQuestion\":\n        instance = cls(name=model.name, values=cls._render_options_as_values(model.settings.options))\n        instance._model = model\n\n        return instance\n\n    @classmethod\n    def from_dict(cls, data: dict) -&gt; \"RankingQuestion\":\n        model = RankingQuestionModel(**data)\n        return cls.from_model(model=model)\n\n    @property\n    def values(self) -&gt; List[str]:\n        return self._render_options_as_labels(self._model.settings.options)\n\n    @values.setter\n    def values(self, values: List[int]) -&gt; None:\n        self._model.settings.options = self._render_values_as_options(values)\n</code></pre>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.RankingQuestion.__init__","title":"<code>__init__(name, values, title=None, description=None, required=True)</code>","text":"<p>Create a new ranking question for <code>Settings</code> of a <code>Dataset</code>. A ranking question             is a question where the user can rank a list of options.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the question to be used as a reference.</p> required <code>values</code> <code>Union[List[str], Dict[str, str]]</code> <p>The list of options to be ranked, or a                 dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.</p> required <code>title</code> <code>Optional[str]</code> <p>) The title of the question to be shown in the UI.</p> <code>None</code> <code>description</code> <code>Optional[str]</code> <p>The description of the question to be shown in the UI.</p> <code>None</code> <code>required</code> <code>bool</code> <p>If the question is required for a record to be valid. At least one question must be required.</p> <code>True</code> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    values: Union[List[str], Dict[str, str]],\n    title: Optional[str] = None,\n    description: Optional[str] = None,\n    required: bool = True,\n) -&gt; None:\n    \"\"\"Create a new ranking question for `Settings` of a `Dataset`. A ranking question \\\n        is a question where the user can rank a list of options.\n\n    Parameters:\n        name (str): The name of the question to be used as a reference.\n        values (Union[List[str], Dict[str, str]]): The list of options to be ranked, or a \\\n            dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.\n        title (Optional[str]:) The title of the question to be shown in the UI.\n        description (Optional[str]): The description of the question to be shown in the UI.\n        required (bool): If the question is required for a record to be valid. At least one question must be required.\n    \"\"\"\n    self._model = RankingQuestionModel(\n        name=name,\n        title=title,\n        description=description,\n        required=required,\n        settings=RankingQuestionSettings(options=self._render_values_as_options(values)),\n    )\n</code></pre>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.TextQuestion","title":"<code>TextQuestion</code>","text":"<p>               Bases: <code>QuestionPropertyBase</code></p> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>class TextQuestion(QuestionPropertyBase):\n    _model: TextQuestionModel\n\n    def __init__(\n        self,\n        name: str,\n        title: Optional[str] = None,\n        description: Optional[str] = None,\n        required: bool = True,\n        use_markdown: bool = False,\n    ) -&gt; None:\n        \"\"\"Create a new text question for `Settings` of a `Dataset`. A text question \\\n            is a question where the user can input text.\n\n        Parameters:\n            name (str): The name of the question to be used as a reference.\n            title (Optional[str]): The title of the question to be shown in the UI.\n            description (Optional[str]): The description of the question to be shown in the UI.\n            required (bool): If the question is required for a record to be valid. At least one question must be required.\n            use_markdown (Optional[bool]): Whether to render the markdown in the UI. When True, you will be able \\\n                to use all the Markdown features for text formatting, including LaTex formulas and embedding multimedia content and PDFs.\n        \"\"\"\n        self._model = TextQuestionModel(\n            name=name,\n            title=title,\n            description=description,\n            required=required,\n            settings=TextQuestionSettings(use_markdown=use_markdown),\n        )\n\n    @classmethod\n    def from_model(cls, model: TextQuestionModel) -&gt; \"TextQuestion\":\n        instance = cls(name=model.name)\n        instance._model = model\n\n        return instance\n\n    @classmethod\n    def from_dict(cls, data: dict) -&gt; \"TextQuestion\":\n        model = TextQuestionModel(**data)\n        return cls.from_model(model=model)\n\n    @property\n    def use_markdown(self) -&gt; bool:\n        return self._model.settings.use_markdown\n\n    @use_markdown.setter\n    def use_markdown(self, use_markdown: bool) -&gt; None:\n        self._model.settings.use_markdown = use_markdown\n</code></pre>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.TextQuestion.__init__","title":"<code>__init__(name, title=None, description=None, required=True, use_markdown=False)</code>","text":"<p>Create a new text question for <code>Settings</code> of a <code>Dataset</code>. A text question             is a question where the user can input text.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the question to be used as a reference.</p> required <code>title</code> <code>Optional[str]</code> <p>The title of the question to be shown in the UI.</p> <code>None</code> <code>description</code> <code>Optional[str]</code> <p>The description of the question to be shown in the UI.</p> <code>None</code> <code>required</code> <code>bool</code> <p>If the question is required for a record to be valid. At least one question must be required.</p> <code>True</code> <code>use_markdown</code> <code>Optional[bool]</code> <p>Whether to render the markdown in the UI. When True, you will be able                 to use all the Markdown features for text formatting, including LaTex formulas and embedding multimedia content and PDFs.</p> <code>False</code> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    title: Optional[str] = None,\n    description: Optional[str] = None,\n    required: bool = True,\n    use_markdown: bool = False,\n) -&gt; None:\n    \"\"\"Create a new text question for `Settings` of a `Dataset`. A text question \\\n        is a question where the user can input text.\n\n    Parameters:\n        name (str): The name of the question to be used as a reference.\n        title (Optional[str]): The title of the question to be shown in the UI.\n        description (Optional[str]): The description of the question to be shown in the UI.\n        required (bool): If the question is required for a record to be valid. At least one question must be required.\n        use_markdown (Optional[bool]): Whether to render the markdown in the UI. When True, you will be able \\\n            to use all the Markdown features for text formatting, including LaTex formulas and embedding multimedia content and PDFs.\n    \"\"\"\n    self._model = TextQuestionModel(\n        name=name,\n        title=title,\n        description=description,\n        required=required,\n        settings=TextQuestionSettings(use_markdown=use_markdown),\n    )\n</code></pre>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.RatingQuestion","title":"<code>RatingQuestion</code>","text":"<p>               Bases: <code>QuestionPropertyBase</code></p> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>class RatingQuestion(QuestionPropertyBase):\n    _model: RatingQuestionModel\n\n    def __init__(\n        self,\n        name: str,\n        values: List[int],\n        title: Optional[str] = None,\n        description: Optional[str] = None,\n        required: bool = True,\n    ) -&gt; None:\n        \"\"\"Create a new rating question for `Settings` of a `Dataset`. A rating question \\\n            is a question where the user can select a value from a sequential list of options.\n\n        Parameters:\n            name (str): The name of the question to be used as a reference.\n            values (List[int]): The list of selectable values. It should be defined in the range [0, 10].\n            title (Optional[str]:) The title of the question to be shown in the UI.\n            description (Optional[str]): The description of the question to be shown in the UI.\n            required (bool): If the question is required for a record to be valid. At least one question must be required.\n        \"\"\"\n        self._model = RatingQuestionModel(\n            name=name,\n            title=title,\n            description=description,\n            required=required,\n            values=values,\n            settings=RatingQuestionSettings(options=self._render_values_as_options(values)),\n        )\n\n    @classmethod\n    def from_model(cls, model: RatingQuestionModel) -&gt; \"RatingQuestion\":\n        instance = cls(name=model.name, values=cls._render_options_as_values(model.settings.options))\n        instance._model = model\n\n        return instance\n\n    @classmethod\n    def from_dict(cls, data: dict) -&gt; \"RatingQuestion\":\n        model = RatingQuestionModel(**data)\n        return cls.from_model(model=model)\n\n    @property\n    def values(self) -&gt; List[int]:\n        return self._render_options_as_labels(self._model.settings.options)\n\n    @values.setter\n    def values(self, values: List[int]) -&gt; None:\n        self._model.values = self._render_values_as_options(values)\n</code></pre>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.RatingQuestion.__init__","title":"<code>__init__(name, values, title=None, description=None, required=True)</code>","text":"<p>Create a new rating question for <code>Settings</code> of a <code>Dataset</code>. A rating question             is a question where the user can select a value from a sequential list of options.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the question to be used as a reference.</p> required <code>values</code> <code>List[int]</code> <p>The list of selectable values. It should be defined in the range [0, 10].</p> required <code>title</code> <code>Optional[str]</code> <p>) The title of the question to be shown in the UI.</p> <code>None</code> <code>description</code> <code>Optional[str]</code> <p>The description of the question to be shown in the UI.</p> <code>None</code> <code>required</code> <code>bool</code> <p>If the question is required for a record to be valid. At least one question must be required.</p> <code>True</code> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    values: List[int],\n    title: Optional[str] = None,\n    description: Optional[str] = None,\n    required: bool = True,\n) -&gt; None:\n    \"\"\"Create a new rating question for `Settings` of a `Dataset`. A rating question \\\n        is a question where the user can select a value from a sequential list of options.\n\n    Parameters:\n        name (str): The name of the question to be used as a reference.\n        values (List[int]): The list of selectable values. It should be defined in the range [0, 10].\n        title (Optional[str]:) The title of the question to be shown in the UI.\n        description (Optional[str]): The description of the question to be shown in the UI.\n        required (bool): If the question is required for a record to be valid. At least one question must be required.\n    \"\"\"\n    self._model = RatingQuestionModel(\n        name=name,\n        title=title,\n        description=description,\n        required=required,\n        values=values,\n        settings=RatingQuestionSettings(options=self._render_values_as_options(values)),\n    )\n</code></pre>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.SpanQuestion","title":"<code>SpanQuestion</code>","text":"<p>               Bases: <code>QuestionPropertyBase</code></p> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>class SpanQuestion(QuestionPropertyBase):\n    _model: SpanQuestionModel\n\n    def __init__(\n        self,\n        name: str,\n        field: str,\n        labels: Union[List[str], Dict[str, str]],\n        allow_overlapping: bool = False,\n        visible_labels: Optional[int] = None,\n        title: Optional[str] = None,\n        description: Optional[str] = None,\n        required: bool = True,\n    ):\n        \"\"\" Create a new span question for `Settings` of a `Dataset`. A span question \\\n            is a question where the user can select a section of text within a text field \\\n            and assign it a label.\n\n            Parameters:\n                name (str): The name of the question to be used as a reference.\n                field (str): The name of the text field where the span question will be applied.\n                labels (Union[List[str], Dict[str, str]]): The list of available labels for the question, or a \\\n                    dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.\n                allow_overlapping (bool): This value specifies whether overlapped spans are allowed or not.\n                visible_labels (Optional[int]): The number of visible labels for the question to be shown in the UI. \\\n                    Setting it to None show all options.\n                title (Optional[str]:) The title of the question to be shown in the UI.\n                description (Optional[str]): The description of the question to be shown in the UI.\n                required (bool): If the question is required for a record to be valid. At least one question must be required.\n            \"\"\"\n        self._model = SpanQuestionModel(\n            name=name,\n            title=title,\n            description=description,\n            required=required,\n            settings=SpanQuestionSettings(\n                field=field,\n                allow_overlapping=allow_overlapping,\n                visible_options=visible_labels,\n                options=self._render_values_as_options(labels),\n            ),\n        )\n\n    @property\n    def name(self):\n        return self._model.name\n\n    @property\n    def field(self):\n        return self._model.settings.field\n\n    @field.setter\n    def field(self, field: str):\n        self._model.settings.field = field\n\n    @property\n    def allow_overlapping(self):\n        return self._model.settings.allow_overlapping\n\n    @allow_overlapping.setter\n    def allow_overlapping(self, allow_overlapping: bool):\n        self._model.settings.allow_overlapping = allow_overlapping\n\n    @property\n    def visible_labels(self) -&gt; Optional[int]:\n        return self._model.settings.visible_options\n\n    @visible_labels.setter\n    def visible_labels(self, visible_labels: Optional[int]) -&gt; None:\n        self._model.settings.visible_options = visible_labels\n\n    @property\n    def labels(self) -&gt; List[str]:\n        return self._render_options_as_labels(self._model.settings.options)\n\n    @labels.setter\n    def labels(self, labels: List[str]) -&gt; None:\n        self._model.settings.options = self._render_values_as_options(labels)\n\n    @classmethod\n    def from_model(cls, model: SpanQuestionModel) -&gt; \"SpanQuestion\":\n        instance = cls(\n            name=model.name,\n            field=model.settings.field,\n            labels=cls._render_options_as_values(model.settings.options),\n        )\n        instance._model = model\n\n        return instance\n\n    @classmethod\n    def from_dict(cls, data: dict) -&gt; \"SpanQuestion\":\n        model = SpanQuestionModel(**data)\n        return cls.from_model(model=model)\n</code></pre>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.SpanQuestion.__init__","title":"<code>__init__(name, field, labels, allow_overlapping=False, visible_labels=None, title=None, description=None, required=True)</code>","text":"<p>Create a new span question for <code>Settings</code> of a <code>Dataset</code>. A span question             is a question where the user can select a section of text within a text field             and assign it a label.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the question to be used as a reference.</p> required <code>field</code> <code>str</code> <p>The name of the text field where the span question will be applied.</p> required <code>labels</code> <code>Union[List[str], Dict[str, str]]</code> <p>The list of available labels for the question, or a                     dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.</p> required <code>allow_overlapping</code> <code>bool</code> <p>This value specifies whether overlapped spans are allowed or not.</p> <code>False</code> <code>visible_labels</code> <code>Optional[int]</code> <p>The number of visible labels for the question to be shown in the UI.                     Setting it to None show all options.</p> <code>None</code> <code>title</code> <code>Optional[str]</code> <p>) The title of the question to be shown in the UI.</p> <code>None</code> <code>description</code> <code>Optional[str]</code> <p>The description of the question to be shown in the UI.</p> <code>None</code> <code>required</code> <code>bool</code> <p>If the question is required for a record to be valid. At least one question must be required.</p> <code>True</code> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    field: str,\n    labels: Union[List[str], Dict[str, str]],\n    allow_overlapping: bool = False,\n    visible_labels: Optional[int] = None,\n    title: Optional[str] = None,\n    description: Optional[str] = None,\n    required: bool = True,\n):\n    \"\"\" Create a new span question for `Settings` of a `Dataset`. A span question \\\n        is a question where the user can select a section of text within a text field \\\n        and assign it a label.\n\n        Parameters:\n            name (str): The name of the question to be used as a reference.\n            field (str): The name of the text field where the span question will be applied.\n            labels (Union[List[str], Dict[str, str]]): The list of available labels for the question, or a \\\n                dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.\n            allow_overlapping (bool): This value specifies whether overlapped spans are allowed or not.\n            visible_labels (Optional[int]): The number of visible labels for the question to be shown in the UI. \\\n                Setting it to None show all options.\n            title (Optional[str]:) The title of the question to be shown in the UI.\n            description (Optional[str]): The description of the question to be shown in the UI.\n            required (bool): If the question is required for a record to be valid. At least one question must be required.\n        \"\"\"\n    self._model = SpanQuestionModel(\n        name=name,\n        title=title,\n        description=description,\n        required=required,\n        settings=SpanQuestionSettings(\n            field=field,\n            allow_overlapping=allow_overlapping,\n            visible_options=visible_labels,\n            options=self._render_values_as_options(labels),\n        ),\n    )\n</code></pre>"},{"location":"reference/argilla/settings/settings/","title":"<code>rg.Settings</code>","text":"<p><code>rg.Settings</code> is used to define the setttings of an Argilla <code>Dataset</code>. The settings can be used to configure the behavior of the dataset, such as the fields, questions, guidelines, metadata, and vectors. The <code>Settings</code> class is passed to the <code>Dataset</code> class and used to create the dataset on the server. Once created, the settings of a dataset cannot be changed.</p>"},{"location":"reference/argilla/settings/settings/#usage-examples","title":"Usage Examples","text":""},{"location":"reference/argilla/settings/settings/#creating-a-new-dataset-with-settings","title":"Creating a new dataset with settings","text":"<p>To create a new dataset with settings, instantiate the <code>Settings</code> class and pass it to the <code>Dataset</code> class.</p> <pre><code>import argilla as rg\n\nsettings = rg.Settings(\n    guidelines=\"Select the sentiment of the prompt.\",\n    fields=[rg.TextField(name=\"prompt\", use_markdown=True)],\n    questions=[rg.LabelQuestion(name=\"sentiment\", labels=[\"positive\", \"negative\"])],\n)\n\ndataset = rg.Dataset(name=\"sentiment_analysis\", settings=settings)\n\n# Create the dataset on the server\ndataset.create()\n</code></pre> <p>To define the settings for fields, questions, metadata, vectors, or distribution, refer to the <code>rg.TextField</code>, <code>rg.LabelQuestion</code>, <code>rg.TermsMetadataProperty</code>, and <code>rg.VectorField</code>, <code>rg.TaskDistribution</code> class documentation.</p>"},{"location":"reference/argilla/settings/settings/#creating-settings-using-built-in-templates","title":"Creating settings using built in templates","text":"<p>Argilla provides built-in templates for creating settings for common dataset types. To use a template, use the class methods of the <code>Settings</code> class. There are three built-in templates available for classification, ranking, and rating tasks. Template settings also include default guidelines and mappings.</p>"},{"location":"reference/argilla/settings/settings/#classification-task","title":"Classification Task","text":"<p>You can define a classification task using the <code>rg.Settings.for_classification</code> class method. This will create a dataset with a text field and a label question. You can select field types using the <code>field_type</code> parameter with <code>image</code> or <code>text</code>.</p> <pre><code>settings = rg.Settings.for_classification(labels=[\"positive\", \"negative\"]) # (1)\n</code></pre> <p>This will return a <code>Settings</code> object with the following settings:</p> <pre><code>settings = Settings(\n    guidelines=\"Select a label for the document.\",\n    fields=[rg.TextField(field_type)(name=\"text\")],\n    questions=[LabelQuestion(name=\"label\", labels=labels)],\n    mapping={\"input\": \"text\", \"output\": \"label\", \"document\": \"text\"},\n)\n</code></pre>"},{"location":"reference/argilla/settings/settings/#ranking-task","title":"Ranking Task","text":"<p>You can define a ranking task using the <code>rg.Settings.for_ranking</code> class method. This will create a dataset with a text field and a ranking question.</p> <pre><code>settings = rg.Settings.for_ranking()\n</code></pre> <p>This will return a <code>Settings</code> object with the following settings:</p> <pre><code>settings = Settings(\n    guidelines=\"Rank the responses.\",\n    fields=[\n        rg.TextField(name=\"instruction\"),\n        rg.TextField(name=\"response1\"),\n        rg.TextField(name=\"response2\"),\n    ],\n    questions=[RankingQuestion(name=\"ranking\", values=[\"response1\", \"response2\"])],\n    mapping={\n        \"input\": \"instruction\",\n        \"prompt\": \"instruction\",\n        \"chosen\": \"response1\",\n        \"rejected\": \"response2\",\n    },\n)\n</code></pre>"},{"location":"reference/argilla/settings/settings/#rating-task","title":"Rating Task","text":"<p>You can define a rating task using the <code>rg.Settings.for_rating</code> class method. This will create a dataset with a text field and a rating question.</p> <pre><code>settings = rg.Settings.for_rating()\n</code></pre> <p>This will return a <code>Settings</code> object with the following settings:</p> <pre><code>settings = Settings(\n    guidelines=\"Rate the response.\",\n    fields=[\n        rg.TextField(name=\"instruction\"),\n        rg.TextField(name=\"response\"),\n    ],\n    questions=[RatingQuestion(name=\"rating\", values=[1, 2, 3, 4, 5])],\n    mapping={\n        \"input\": \"instruction\",\n        \"prompt\": \"instruction\",\n        \"output\": \"response\",\n        \"score\": \"rating\",\n    },\n)\n</code></pre>"},{"location":"reference/argilla/settings/settings/#src.argilla.settings._resource.Settings","title":"<code>Settings</code>","text":"<p>               Bases: <code>DefaultSettingsMixin</code>, <code>Resource</code></p> <p>Settings class for Argilla Datasets.</p> <p>This class is used to define the representation of a Dataset within the UI.</p> Source code in <code>src/argilla/settings/_resource.py</code> <pre><code>class Settings(DefaultSettingsMixin, Resource):\n    \"\"\"\n    Settings class for Argilla Datasets.\n\n    This class is used to define the representation of a Dataset within the UI.\n    \"\"\"\n\n    def __init__(\n        self,\n        fields: Optional[List[Field]] = None,\n        questions: Optional[List[QuestionType]] = None,\n        vectors: Optional[List[VectorField]] = None,\n        metadata: Optional[List[MetadataType]] = None,\n        guidelines: Optional[str] = None,\n        allow_extra_metadata: bool = False,\n        distribution: Optional[TaskDistribution] = None,\n        mapping: Optional[Dict[str, Union[str, Sequence[str]]]] = None,\n        _dataset: Optional[\"Dataset\"] = None,\n    ) -&gt; None:\n        \"\"\"\n        Args:\n            fields (List[Field]): A list of Field objects that represent the fields in the Dataset.\n            questions (List[Union[LabelQuestion, MultiLabelQuestion, RankingQuestion, TextQuestion, RatingQuestion]]):\n                A list of Question objects that represent the questions in the Dataset.\n            vectors (List[VectorField]): A list of VectorField objects that represent the vectors in the Dataset.\n            metadata (List[MetadataField]): A list of MetadataField objects that represent the metadata in the Dataset.\n            guidelines (str): A string containing the guidelines for the Dataset.\n            allow_extra_metadata (bool): A boolean that determines whether or not extra metadata is allowed in the\n                Dataset. Defaults to False.\n            distribution (TaskDistribution): The annotation task distribution configuration.\n                Default to DEFAULT_TASK_DISTRIBUTION\n            mapping (Dict[str, Union[str, Sequence[str]]]): A dictionary that maps incoming data names to Argilla dataset attributes in DatasetRecords.\n        \"\"\"\n        super().__init__(client=_dataset._client if _dataset else None)\n\n        self._dataset = _dataset\n        self._distribution = distribution\n        self._mapping = mapping\n        self.__guidelines = self.__process_guidelines(guidelines)\n        self.__allow_extra_metadata = allow_extra_metadata\n\n        self.__questions = QuestionsProperties(self, questions)\n        self.__fields = SettingsProperties(self, fields)\n        self.__vectors = SettingsProperties(self, vectors)\n        self.__metadata = SettingsProperties(self, metadata)\n\n    #####################\n    # Properties        #\n    #####################\n\n    @property\n    def fields(self) -&gt; \"SettingsProperties\":\n        return self.__fields\n\n    @fields.setter\n    def fields(self, fields: List[Field]):\n        self.__fields = SettingsProperties(self, fields)\n\n    @property\n    def questions(self) -&gt; \"SettingsProperties\":\n        return self.__questions\n\n    @questions.setter\n    def questions(self, questions: List[QuestionType]):\n        self.__questions = QuestionsProperties(self, questions)\n\n    @property\n    def vectors(self) -&gt; \"SettingsProperties\":\n        return self.__vectors\n\n    @vectors.setter\n    def vectors(self, vectors: List[VectorField]):\n        self.__vectors = SettingsProperties(self, vectors)\n\n    @property\n    def metadata(self) -&gt; \"SettingsProperties\":\n        return self.__metadata\n\n    @metadata.setter\n    def metadata(self, metadata: List[MetadataType]):\n        self.__metadata = SettingsProperties(self, metadata)\n\n    @property\n    def guidelines(self) -&gt; str:\n        return self.__guidelines\n\n    @guidelines.setter\n    def guidelines(self, guidelines: str):\n        self.__guidelines = self.__process_guidelines(guidelines)\n\n    @property\n    def allow_extra_metadata(self) -&gt; bool:\n        return self.__allow_extra_metadata\n\n    @allow_extra_metadata.setter\n    def allow_extra_metadata(self, value: bool):\n        self.__allow_extra_metadata = value\n\n    @property\n    def distribution(self) -&gt; TaskDistribution:\n        return self._distribution or TaskDistribution.default()\n\n    @distribution.setter\n    def distribution(self, value: TaskDistribution) -&gt; None:\n        self._distribution = value\n\n    @property\n    def mapping(self) -&gt; Dict[str, Union[str, Sequence[str]]]:\n        return self._mapping\n\n    @mapping.setter\n    def mapping(self, value: Dict[str, Union[str, Sequence[str]]]):\n        self._mapping = value\n\n    @property\n    def dataset(self) -&gt; \"Dataset\":\n        return self._dataset\n\n    @dataset.setter\n    def dataset(self, dataset: \"Dataset\"):\n        self._dataset = dataset\n        self._client = dataset._client\n\n    @cached_property\n    def schema(self) -&gt; dict:\n        schema_dict = {}\n\n        for field in self.fields:\n            schema_dict[field.name] = field\n\n        for question in self.questions:\n            schema_dict[question.name] = question\n\n        for vector in self.vectors:\n            schema_dict[vector.name] = vector\n\n        for metadata in self.metadata:\n            schema_dict[metadata.name] = metadata\n\n        return schema_dict\n\n    @cached_property\n    def schema_by_id(self) -&gt; Dict[UUID, Union[Field, QuestionType, MetadataType, VectorField]]:\n        return {v.id: v for v in self.schema.values()}\n\n    def validate(self) -&gt; None:\n        self._validate_empty_settings()\n        self._validate_duplicate_names()\n\n        for field in self.fields:\n            field.validate()\n\n    #####################\n    #  Public methods   #\n    #####################\n\n    def get(self) -&gt; \"Settings\":\n        self.fields = self._fetch_fields()\n        self.questions = self._fetch_questions()\n        self.vectors = self._fetch_vectors()\n        self.metadata = self._fetch_metadata()\n        self.__fetch_dataset_related_attributes()\n\n        self._update_last_api_call()\n        return self\n\n    def create(self) -&gt; \"Settings\":\n        self.validate()\n\n        self._update_dataset_related_attributes()\n        self.__fields.create()\n        self.__questions.create()\n        self.__vectors.create()\n        self.__metadata.create()\n\n        self._update_last_api_call()\n        return self\n\n    def update(self) -&gt; \"Resource\":\n        self.validate()\n\n        self._update_dataset_related_attributes()\n        self.__fields.update()\n        self.__vectors.update()\n        self.__metadata.update()\n        # self.questions.update()\n\n        self._update_last_api_call()\n        return self\n\n    def serialize(self):\n        try:\n            return {\n                \"guidelines\": self.guidelines,\n                \"questions\": self.__questions.serialize(),\n                \"fields\": self.__fields.serialize(),\n                \"vectors\": self.vectors.serialize(),\n                \"metadata\": self.metadata.serialize(),\n                \"allow_extra_metadata\": self.allow_extra_metadata,\n                \"distribution\": self.distribution.to_dict(),\n                \"mapping\": self.mapping,\n            }\n        except Exception as e:\n            raise ArgillaSerializeError(f\"Failed to serialize the settings. {e.__class__.__name__}\") from e\n\n    def to_json(self, path: Union[Path, str]) -&gt; None:\n        \"\"\"Save the settings to a file on disk\n\n        Parameters:\n            path (str): The path to save the settings to\n        \"\"\"\n        if not isinstance(path, Path):\n            path = Path(path)\n        if path.exists():\n            raise FileExistsError(f\"File {path} already exists\")\n        with open(path, \"w\") as file:\n            json.dump(self.serialize(), file)\n\n    @classmethod\n    def from_json(cls, path: Union[Path, str]) -&gt; \"Settings\":\n        \"\"\"Load the settings from a file on disk\"\"\"\n\n        with open(path, \"r\") as file:\n            settings_dict = json.load(file)\n            return cls._from_dict(settings_dict)\n\n    @classmethod\n    def from_hub(\n        cls,\n        repo_id: str,\n        subset: Optional[str] = None,\n        feature_mapping: Optional[Dict[str, Literal[\"question\", \"field\", \"metadata\"]]] = None,\n        **kwargs,\n    ) -&gt; \"Settings\":\n        \"\"\"Load the settings from the Hub\n\n        Parameters:\n            repo_id (str): The ID of the repository to load the settings from on the Hub.\n            subset (Optional[str]): The subset of the repository to load the settings from.\n            feature_mapping (Dict[str, Literal[\"question\", \"field\", \"metadata\"]]): A dictionary that maps incoming column names to Argilla attributes.\n        \"\"\"\n\n        settings = build_settings_from_repo_id(repo_id=repo_id, feature_mapping=feature_mapping, subset=subset)\n        return settings\n\n    def __eq__(self, other: \"Settings\") -&gt; bool:\n        return self.serialize() == other.serialize()  # TODO: Create proper __eq__ methods for fields and questions\n\n    #####################\n    #  Repr Methods     #\n    #####################\n\n    def __repr__(self) -&gt; str:\n        return (\n            f\"Settings(guidelines={self.guidelines}, allow_extra_metadata={self.allow_extra_metadata}, \"\n            f\"distribution={self.distribution}, \"\n            f\"fields={self.fields}, questions={self.questions}, vectors={self.vectors}, metadata={self.metadata})\"\n        )\n\n    #####################\n    #  Private methods  #\n    #####################\n\n    @classmethod\n    def _from_dict(cls, settings_dict: dict) -&gt; \"Settings\":\n        fields = settings_dict.get(\"fields\", [])\n        vectors = settings_dict.get(\"vectors\", [])\n        metadata = settings_dict.get(\"metadata\", [])\n        guidelines = settings_dict.get(\"guidelines\")\n        distribution = settings_dict.get(\"distribution\")\n        allow_extra_metadata = settings_dict.get(\"allow_extra_metadata\")\n        mapping = settings_dict.get(\"mapping\")\n\n        questions = [question_from_dict(question) for question in settings_dict.get(\"questions\", [])]\n        fields = [_field_from_dict(field) for field in fields]\n        vectors = [VectorField.from_dict(vector) for vector in vectors]\n        metadata = [MetadataField.from_dict(metadata) for metadata in metadata]\n\n        if distribution:\n            distribution = TaskDistribution.from_dict(distribution)\n\n        if mapping:\n            mapping = cls._validate_mapping(mapping)\n\n        return cls(\n            questions=questions,\n            fields=fields,\n            vectors=vectors,\n            metadata=metadata,\n            guidelines=guidelines,\n            allow_extra_metadata=allow_extra_metadata,\n            distribution=distribution,\n            mapping=mapping,\n        )\n\n    def _copy(self) -&gt; \"Settings\":\n        instance = self.__class__._from_dict(self.serialize())\n        return instance\n\n    def _fetch_fields(self) -&gt; List[Field]:\n        models = self._client.api.fields.list(dataset_id=self._dataset.id)\n        return [_field_from_model(model) for model in models]\n\n    def _fetch_questions(self) -&gt; List[QuestionType]:\n        models = self._client.api.questions.list(dataset_id=self._dataset.id)\n        return [question_from_model(model) for model in models]\n\n    def _fetch_vectors(self) -&gt; List[VectorField]:\n        models = self.dataset._client.api.vectors.list(self.dataset.id)\n        return [VectorField.from_model(model) for model in models]\n\n    def _fetch_metadata(self) -&gt; List[MetadataType]:\n        models = self._client.api.metadata.list(dataset_id=self._dataset.id)\n        return [MetadataField.from_model(model) for model in models]\n\n    def __fetch_dataset_related_attributes(self):\n        # This flow may be a bit weird, but it's the only way to update the dataset related attributes\n        # Everything is point that we should have several settings-related endpoints in the API to handle this.\n        # POST /api/v1/datasets/{dataset_id}/settings\n        # {\n        #   \"guidelines\": ....,\n        #   \"allow_extra_metadata\": ....,\n        # }\n        # But this is not implemented yet, so we need to update the dataset model directly\n        dataset_model = self._client.api.datasets.get(self._dataset.id)\n\n        self.guidelines = dataset_model.guidelines\n        self.allow_extra_metadata = dataset_model.allow_extra_metadata\n\n        if dataset_model.distribution:\n            self.distribution = TaskDistribution.from_model(dataset_model.distribution)\n\n    def _update_dataset_related_attributes(self):\n        # This flow may be a bit weird, but it's the only way to update the dataset related attributes\n        # Everything is point that we should have several settings-related endpoints in the API to handle this.\n        # POST /api/v1/datasets/{dataset_id}/settings\n        # {\n        #   \"guidelines\": ....,\n        #   \"allow_extra_metadata\": ....,\n        # }\n        # But this is not implemented yet, so we need to update the dataset model directly\n        dataset_model = DatasetModel(\n            id=self._dataset.id,\n            name=self._dataset.name,\n            guidelines=self.guidelines,\n            allow_extra_metadata=self.allow_extra_metadata,\n            distribution=self.distribution._api_model(),\n        )\n        self._client.api.datasets.update(dataset_model)\n\n    def _validate_empty_settings(self):\n        if not all([self.fields, self.questions]):\n            message = \"Fields and questions are required\"\n            raise SettingsError(message=message)\n\n    def _validate_duplicate_names(self) -&gt; None:\n        dataset_properties_by_name = {}\n\n        for properties in [self.fields, self.questions, self.vectors, self.metadata]:\n            for property in properties:\n                if property.name in dataset_properties_by_name:\n                    raise SettingsError(\n                        f\"names of dataset settings must be unique, \"\n                        f\"but the name {property.name!r} is used by {type(property).__name__!r} and {type(dataset_properties_by_name[property.name]).__name__!r} \"\n                    )\n                dataset_properties_by_name[property.name] = property\n\n    @classmethod\n    def _validate_mapping(cls, mapping: Dict[str, Union[str, Sequence[str]]]) -&gt; dict:\n        validate_mapping = {}\n        for key, value in mapping.items():\n            if isinstance(value, str):\n                validate_mapping[key] = value\n            elif isinstance(value, list) or isinstance(value, tuple):\n                validate_mapping[key] = tuple(value)\n            else:\n                raise SettingsError(f\"Invalid mapping value for key {key!r}: {value}\")\n\n        return validate_mapping\n\n    @classmethod\n    def _sanitize_settings_name(cls, name: str) -&gt; str:\n        \"\"\"Sanitize the name for the settings\"\"\"\n\n        for char in [\" \", \":\", \".\", \"&amp;\", \"?\", \"!\"]:\n            name = name.replace(char, \"_\")\n\n        return name.lower()\n\n    def __process_guidelines(self, guidelines):\n        if guidelines is None:\n            return guidelines\n\n        if not isinstance(guidelines, str):\n            raise SettingsError(\"Guidelines must be a string or a path to a file\")\n\n        if os.path.exists(guidelines):\n            with open(guidelines, \"r\") as file:\n                return file.read()\n\n        return guidelines\n\n    @classmethod\n    def _is_valid_name(cls, name: str) -&gt; bool:\n        \"\"\"Check if the name is valid\"\"\"\n        return bool(re.match(r\"^(?=.*[a-z0-9])[a-z0-9_-]+$\", name))\n</code></pre>"},{"location":"reference/argilla/settings/settings/#src.argilla.settings._resource.Settings.__init__","title":"<code>__init__(fields=None, questions=None, vectors=None, metadata=None, guidelines=None, allow_extra_metadata=False, distribution=None, mapping=None, _dataset=None)</code>","text":"<p>Parameters:</p> Name Type Description Default <code>fields</code> <code>List[Field]</code> <p>A list of Field objects that represent the fields in the Dataset.</p> <code>None</code> <code>questions</code> <code>List[Union[LabelQuestion, MultiLabelQuestion, RankingQuestion, TextQuestion, RatingQuestion]]</code> <p>A list of Question objects that represent the questions in the Dataset.</p> <code>None</code> <code>vectors</code> <code>List[VectorField]</code> <p>A list of VectorField objects that represent the vectors in the Dataset.</p> <code>None</code> <code>metadata</code> <code>List[MetadataField]</code> <p>A list of MetadataField objects that represent the metadata in the Dataset.</p> <code>None</code> <code>guidelines</code> <code>str</code> <p>A string containing the guidelines for the Dataset.</p> <code>None</code> <code>allow_extra_metadata</code> <code>bool</code> <p>A boolean that determines whether or not extra metadata is allowed in the Dataset. Defaults to False.</p> <code>False</code> <code>distribution</code> <code>TaskDistribution</code> <p>The annotation task distribution configuration. Default to DEFAULT_TASK_DISTRIBUTION</p> <code>None</code> <code>mapping</code> <code>Dict[str, Union[str, Sequence[str]]]</code> <p>A dictionary that maps incoming data names to Argilla dataset attributes in DatasetRecords.</p> <code>None</code> Source code in <code>src/argilla/settings/_resource.py</code> <pre><code>def __init__(\n    self,\n    fields: Optional[List[Field]] = None,\n    questions: Optional[List[QuestionType]] = None,\n    vectors: Optional[List[VectorField]] = None,\n    metadata: Optional[List[MetadataType]] = None,\n    guidelines: Optional[str] = None,\n    allow_extra_metadata: bool = False,\n    distribution: Optional[TaskDistribution] = None,\n    mapping: Optional[Dict[str, Union[str, Sequence[str]]]] = None,\n    _dataset: Optional[\"Dataset\"] = None,\n) -&gt; None:\n    \"\"\"\n    Args:\n        fields (List[Field]): A list of Field objects that represent the fields in the Dataset.\n        questions (List[Union[LabelQuestion, MultiLabelQuestion, RankingQuestion, TextQuestion, RatingQuestion]]):\n            A list of Question objects that represent the questions in the Dataset.\n        vectors (List[VectorField]): A list of VectorField objects that represent the vectors in the Dataset.\n        metadata (List[MetadataField]): A list of MetadataField objects that represent the metadata in the Dataset.\n        guidelines (str): A string containing the guidelines for the Dataset.\n        allow_extra_metadata (bool): A boolean that determines whether or not extra metadata is allowed in the\n            Dataset. Defaults to False.\n        distribution (TaskDistribution): The annotation task distribution configuration.\n            Default to DEFAULT_TASK_DISTRIBUTION\n        mapping (Dict[str, Union[str, Sequence[str]]]): A dictionary that maps incoming data names to Argilla dataset attributes in DatasetRecords.\n    \"\"\"\n    super().__init__(client=_dataset._client if _dataset else None)\n\n    self._dataset = _dataset\n    self._distribution = distribution\n    self._mapping = mapping\n    self.__guidelines = self.__process_guidelines(guidelines)\n    self.__allow_extra_metadata = allow_extra_metadata\n\n    self.__questions = QuestionsProperties(self, questions)\n    self.__fields = SettingsProperties(self, fields)\n    self.__vectors = SettingsProperties(self, vectors)\n    self.__metadata = SettingsProperties(self, metadata)\n</code></pre>"},{"location":"reference/argilla/settings/settings/#src.argilla.settings._resource.Settings.to_json","title":"<code>to_json(path)</code>","text":"<p>Save the settings to a file on disk</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>str</code> <p>The path to save the settings to</p> required Source code in <code>src/argilla/settings/_resource.py</code> <pre><code>def to_json(self, path: Union[Path, str]) -&gt; None:\n    \"\"\"Save the settings to a file on disk\n\n    Parameters:\n        path (str): The path to save the settings to\n    \"\"\"\n    if not isinstance(path, Path):\n        path = Path(path)\n    if path.exists():\n        raise FileExistsError(f\"File {path} already exists\")\n    with open(path, \"w\") as file:\n        json.dump(self.serialize(), file)\n</code></pre>"},{"location":"reference/argilla/settings/settings/#src.argilla.settings._resource.Settings.from_json","title":"<code>from_json(path)</code>  <code>classmethod</code>","text":"<p>Load the settings from a file on disk</p> Source code in <code>src/argilla/settings/_resource.py</code> <pre><code>@classmethod\ndef from_json(cls, path: Union[Path, str]) -&gt; \"Settings\":\n    \"\"\"Load the settings from a file on disk\"\"\"\n\n    with open(path, \"r\") as file:\n        settings_dict = json.load(file)\n        return cls._from_dict(settings_dict)\n</code></pre>"},{"location":"reference/argilla/settings/settings/#src.argilla.settings._resource.Settings.from_hub","title":"<code>from_hub(repo_id, subset=None, feature_mapping=None, **kwargs)</code>  <code>classmethod</code>","text":"<p>Load the settings from the Hub</p> <p>Parameters:</p> Name Type Description Default <code>repo_id</code> <code>str</code> <p>The ID of the repository to load the settings from on the Hub.</p> required <code>subset</code> <code>Optional[str]</code> <p>The subset of the repository to load the settings from.</p> <code>None</code> <code>feature_mapping</code> <code>Dict[str, Literal['question', 'field', 'metadata']]</code> <p>A dictionary that maps incoming column names to Argilla attributes.</p> <code>None</code> Source code in <code>src/argilla/settings/_resource.py</code> <pre><code>@classmethod\ndef from_hub(\n    cls,\n    repo_id: str,\n    subset: Optional[str] = None,\n    feature_mapping: Optional[Dict[str, Literal[\"question\", \"field\", \"metadata\"]]] = None,\n    **kwargs,\n) -&gt; \"Settings\":\n    \"\"\"Load the settings from the Hub\n\n    Parameters:\n        repo_id (str): The ID of the repository to load the settings from on the Hub.\n        subset (Optional[str]): The subset of the repository to load the settings from.\n        feature_mapping (Dict[str, Literal[\"question\", \"field\", \"metadata\"]]): A dictionary that maps incoming column names to Argilla attributes.\n    \"\"\"\n\n    settings = build_settings_from_repo_id(repo_id=repo_id, feature_mapping=feature_mapping, subset=subset)\n    return settings\n</code></pre>"},{"location":"reference/argilla/settings/task_distribution/","title":"Distribution","text":"<p>Distribution settings are used to define the criteria used by the tool to automatically manage records in the dataset depending on the expected number of submitted responses per record.</p>"},{"location":"reference/argilla/settings/task_distribution/#usage-examples","title":"Usage Examples","text":"<p>The default minimum submitted responses per record is 1. If you wish to increase this value, you can define it through the <code>TaskDistribution</code> class and pass it to the <code>Settings</code> class.</p> <pre><code>settings = rg.Settings(\n    guidelines=\"These are some guidelines.\",\n    fields=[\n        rg.TextField(\n            name=\"text\",\n        ),\n    ],\n    questions=[\n        rg.LabelQuestion(\n            name=\"label\",\n            labels=[\"label_1\", \"label_2\", \"label_3\"]\n        ),\n    ],\n    distribution=rg.TaskDistribution(min_submitted=3)\n)\n\ndataset = rg.Dataset(\n    name=\"my_dataset\",\n    settings=settings\n)\n</code></pre>"},{"location":"reference/argilla/settings/task_distribution/#src.argilla.settings._task_distribution.OverlapTaskDistribution","title":"<code>OverlapTaskDistribution</code>","text":"<p>The task distribution settings class.</p> <p>This task distribution defines a number of submitted responses required to complete a record.</p> <p>Parameters:</p> Name Type Description Default <code>min_submitted</code> <code>int</code> <p>The number of min. submitted responses to complete the record</p> required Source code in <code>src/argilla/settings/_task_distribution.py</code> <pre><code>class OverlapTaskDistribution:\n    \"\"\"The task distribution settings class.\n\n    This task distribution defines a number of submitted responses required to complete a record.\n\n    Parameters:\n        min_submitted (int): The number of min. submitted responses to complete the record\n    \"\"\"\n\n    strategy: Literal[\"overlap\"] = \"overlap\"\n\n    def __init__(self, min_submitted: int):\n        self._model = OverlapTaskDistributionModel(min_submitted=min_submitted, strategy=self.strategy)\n\n    def __repr__(self) -&gt; str:\n        return f\"OverlapTaskDistribution(min_submitted={self.min_submitted})\"\n\n    def __eq__(self, other) -&gt; bool:\n        if not isinstance(other, self.__class__):\n            return False\n\n        return self._model == other._model\n\n    @classmethod\n    def default(cls) -&gt; \"OverlapTaskDistribution\":\n        return cls(min_submitted=1)\n\n    @property\n    def min_submitted(self):\n        return self._model.min_submitted\n\n    @min_submitted.setter\n    def min_submitted(self, value: int):\n        self._model.min_submitted = value\n\n    @classmethod\n    def from_model(cls, model: OverlapTaskDistributionModel) -&gt; \"OverlapTaskDistribution\":\n        return cls(min_submitted=model.min_submitted)\n\n    @classmethod\n    def from_dict(cls, dict: Dict[str, Any]) -&gt; \"OverlapTaskDistribution\":\n        return cls.from_model(OverlapTaskDistributionModel.model_validate(dict))\n\n    def to_dict(self):\n        return self._model.model_dump()\n\n    def _api_model(self) -&gt; OverlapTaskDistributionModel:\n        return self._model\n</code></pre>"},{"location":"reference/argilla/settings/vectors/","title":"Vectors","text":"<p>Vector fields in Argilla are used to define the vector form of a record that will be reviewed by a user.</p>"},{"location":"reference/argilla/settings/vectors/#usage-examples","title":"Usage Examples","text":"<p>To define a vector field, instantiate the <code>VectorField</code> class with a name and dimensions, then pass it to the <code>vectors</code> parameter of the <code>Settings</code> class.</p> <pre><code>settings = rg.Settings(\n    fields=[\n        rg.TextField(name=\"text\"),\n    ],\n    vectors=[\n        rg.VectorField(\n            name=\"my_vector\",\n            dimension=768,\n            title=\"Document Embedding\",\n        ),\n    ],\n)\n</code></pre> <p>To add records with vectors, refer to the <code>rg.Vector</code> class documentation.</p>"},{"location":"reference/argilla/settings/vectors/#src.argilla.settings._vector.VectorField","title":"<code>VectorField</code>","text":"<p>               Bases: <code>Resource</code></p> <p>Vector field for use in Argilla <code>Dataset</code> <code>Settings</code></p> Source code in <code>src/argilla/settings/_vector.py</code> <pre><code>class VectorField(Resource):\n    \"\"\"Vector field for use in Argilla `Dataset` `Settings`\"\"\"\n\n    _model: VectorFieldModel\n    _api: VectorsAPI\n    _dataset: Optional[\"Dataset\"]\n\n    def __init__(\n        self,\n        name: str,\n        dimensions: int,\n        title: Optional[str] = None,\n        _client: Optional[\"Argilla\"] = None,\n    ) -&gt; None:\n        \"\"\"Vector field for use in Argilla `Dataset` `Settings`\n\n        Parameters:\n            name (str): The name of the vector field\n            dimensions (int): The number of dimensions in the vector\n            title (Optional[str]): The title of the vector to be shown in the UI.\n        \"\"\"\n        client = _client or Argilla._get_default()\n        super().__init__(api=client.api.vectors, client=client)\n        self._model = VectorFieldModel(name=name, title=title, dimensions=dimensions)\n        self._dataset = None\n\n    @property\n    def name(self) -&gt; str:\n        return self._model.name\n\n    @name.setter\n    def name(self, value: str) -&gt; None:\n        self._model.name = value\n\n    @property\n    def title(self) -&gt; Optional[str]:\n        return self._model.title\n\n    @title.setter\n    def title(self, value: Optional[str]) -&gt; None:\n        self._model.title = value\n\n    @property\n    def dimensions(self) -&gt; int:\n        return self._model.dimensions\n\n    @dimensions.setter\n    def dimensions(self, value: int) -&gt; None:\n        self._model.dimensions = value\n\n    @property\n    def dataset(self) -&gt; \"Dataset\":\n        return self._dataset\n\n    @dataset.setter\n    def dataset(self, value: \"Dataset\") -&gt; None:\n        self._dataset = value\n        self._model.dataset_id = self._dataset.id\n        self._with_client(self._dataset._client)\n\n    def __repr__(self) -&gt; str:\n        return f\"{self.__class__.__name__}(name={self.name}, title={self.title}, dimensions={self.dimensions})\"\n\n    @classmethod\n    def from_model(cls, model: VectorFieldModel) -&gt; \"VectorField\":\n        instance = cls(name=model.name, dimensions=model.dimensions)\n        instance._model = model\n\n        return instance\n\n    @classmethod\n    def from_dict(cls, data: dict) -&gt; \"VectorField\":\n        model = VectorFieldModel(**data)\n        return cls.from_model(model=model)\n\n    def _with_client(self, client: \"Argilla\") -&gt; \"VectorField\":\n        # TODO: Review and simplify. Maybe only one of them is required\n        self._client = client\n        self._api = self._client.api.vectors\n\n        return self\n</code></pre>"},{"location":"reference/argilla/settings/vectors/#src.argilla.settings._vector.VectorField.__init__","title":"<code>__init__(name, dimensions, title=None, _client=None)</code>","text":"<p>Vector field for use in Argilla <code>Dataset</code> <code>Settings</code></p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the vector field</p> required <code>dimensions</code> <code>int</code> <p>The number of dimensions in the vector</p> required <code>title</code> <code>Optional[str]</code> <p>The title of the vector to be shown in the UI.</p> <code>None</code> Source code in <code>src/argilla/settings/_vector.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    dimensions: int,\n    title: Optional[str] = None,\n    _client: Optional[\"Argilla\"] = None,\n) -&gt; None:\n    \"\"\"Vector field for use in Argilla `Dataset` `Settings`\n\n    Parameters:\n        name (str): The name of the vector field\n        dimensions (int): The number of dimensions in the vector\n        title (Optional[str]): The title of the vector to be shown in the UI.\n    \"\"\"\n    client = _client or Argilla._get_default()\n    super().__init__(api=client.api.vectors, client=client)\n    self._model = VectorFieldModel(name=name, title=title, dimensions=dimensions)\n    self._dataset = None\n</code></pre>"},{"location":"reference/argilla-server/configuration/","title":"Server configuration","text":"<p>This section explains advanced operations and settings for running the Argilla Server and Argilla Python Client.</p> <p>By default, the Argilla Server will look for your Elasticsearch (ES) endpoint at <code>http://localhost:9200</code>. You can customize this by setting the <code>ARGILLA_ELASTICSEARCH</code> environment variable. Have a look at the list of available environment variables to further configure the Argilla server.</p> <p>From the Argilla version <code>1.19.0</code>, you must set up the search engine manually to work with datasets. You should set the environment variable <code>ARGILLA_SEARCH_ENGINE=opensearch</code> or <code>ARGILLA_SEARCH_ENGINE=elasticsearch</code> depending on the backend you're using The default value for this variable is set to <code>elasticsearch</code>. The minimal version for Elasticsearch is <code>8.5.0</code>, and for Opensearch is <code>2.4.0</code>. Please, review your backend and upgrade it if necessary.</p> <p>Warning</p> <p>For vector search in OpenSearch, the filtering applied is using a <code>post_filter</code> step, since there is a bug that makes queries fail using filtering + knn from Argilla. See https://github.com/opensearch-project/k-NN/issues/1286</p> <p>This may result in unexpected results when combining filtering with vector search with this engine.</p>"},{"location":"reference/argilla-server/configuration/#launching","title":"Launching","text":""},{"location":"reference/argilla-server/configuration/#using-a-proxy","title":"Using a proxy","text":"<p>If you run Argilla behind a proxy by adding some extra prefix to expose the service, you should set the <code>ARGILLA_BASE_URL</code> environment variable to properly route requests to the server application.</p> <p>For example, if your proxy exposes Argilla in the URL <code>https://my-proxy/custom-path-for-argilla</code>, you should launch the Argilla server with <code>ARGILLA_BASE_URL=/custom-path-for-argilla</code>.</p> <p>NGINX and Traefik have been tested and are known to work with Argilla:</p> <ul> <li>NGINX example</li> <li>Traefik example</li> </ul>"},{"location":"reference/argilla-server/configuration/#environment-variables","title":"Environment variables","text":"<p>You can set the following environment variables to further configure your server and client.</p>"},{"location":"reference/argilla-server/configuration/#server","title":"Server","text":""},{"location":"reference/argilla-server/configuration/#fastapi","title":"FastAPI","text":"<ul> <li> <p><code>ARGILLA_HOME_PATH</code>: The directory where Argilla will store all the files needed to run. If the path doesn't exist it will be automatically created (Default: <code>~/.argilla</code>).</p> </li> <li> <p><code>ARGILLA_BASE_URL</code>: If you want to launch the Argilla server in a specific base path other than /, you should set up this environment variable. This can be useful when running Argilla behind a proxy that adds a prefix path to route the service (Default: \"/\").</p> </li> <li> <p><code>ARGILLA_CORS_ORIGINS</code>: List of host patterns for CORS origin access.</p> </li> <li> <p><code>ARGILLA_DOCS_ENABLED</code>: If False, disables openapi docs endpoint at /api/docs.</p> </li> <li> <p><code>HF_HUB_DISABLE_TELEMETRY</code>: If True, disables telemetry for usage metrics. Alternatively, you can disable telemetry by setting <code>HF_HUB_OFFLINE=1</code>.</p> </li> </ul>"},{"location":"reference/argilla-server/configuration/#authentication","title":"Authentication","text":"<ul> <li><code>ARGILLA_AUTH_SECRET_KEY</code>: The secret key used to sign the API token data. You can use <code>openssl rand -hex 32</code> to generate a 32 character string to use with this environment variable. By default a random value is generated, so if you are using more than one server worker (or more than one Argilla server) you will need to set the same value for all of them.</li> <li><code>USERNAME</code>: If provided, the owner username (Default: <code>None</code>).</li> <li><code>PASSWORD</code>: If provided, the owner password (Default: <code>None</code>).</li> </ul> <p>If <code>USERNAME</code> and <code>PASSWORD</code> are provided, the owner user will be created with these credentials on the server startup.</p>"},{"location":"reference/argilla-server/configuration/#database","title":"Database","text":"<ul> <li><code>ARGILLA_DATABASE_URL</code>: A URL string that contains the necessary information to connect to a database. Argilla uses SQLite by default, PostgreSQL is also officially supported (Default: <code>sqlite:///$ARGILLA_HOME_PATH/argilla.db?check_same_thread=False</code>).</li> </ul>"},{"location":"reference/argilla-server/configuration/#sqlite","title":"SQLite","text":"<p>The following environment variables are useful only when SQLite is used:</p> <ul> <li><code>ARGILLA_DATABASE_SQLITE_TIMEOUT</code>: How many seconds the connection should wait before raising an <code>OperationalError</code> when a table is locked. If another connection opens a transaction to modify a table, that table will be locked until the transaction is committed. (Defaut: <code>15</code> seconds).</li> </ul>"},{"location":"reference/argilla-server/configuration/#postgresql","title":"PostgreSQL","text":"<p>The following environment variables are useful only when PostgreSQL is used:</p> <ul> <li> <p><code>ARGILLA_DATABASE_POSTGRESQL_POOL_SIZE</code>: The number of connections to keep open inside the database connection pool (Default: <code>15</code>).</p> </li> <li> <p><code>ARGILLA_DATABASE_POSTGRESQL_MAX_OVERFLOW</code>: The number of connections that can be opened above and beyond <code>ARGILLA_DATABASE_POSTGRESQL_POOL_SIZE</code> setting (Default: <code>10</code>).</p> </li> </ul>"},{"location":"reference/argilla-server/configuration/#search-engine","title":"Search engine","text":"<ul> <li> <p><code>ARGILLA_ELASTICSEARCH</code>: URL of the connection endpoint of the Elasticsearch instance (Default: <code>http://localhost:9200</code>).</p> </li> <li> <p><code>ARGILLA_SEARCH_ENGINE</code>: Search engine to use. Valid values are \"elasticsearch\" and \"opensearch\" (Default: \"elasticsearch\").</p> </li> <li> <p><code>ARGILLA_ELASTICSEARCH_SSL_VERIFY</code>: If \"False\", disables SSL certificate verification when connecting to the Elasticsearch backend.</p> </li> <li> <p><code>ARGILLA_ELASTICSEARCH_CA_PATH</code>: Path to CA cert for ES host. For example: <code>/full/path/to/root-ca.pem</code> (Optional)</p> </li> </ul>"},{"location":"reference/argilla-server/configuration/#redis","title":"Redis","text":"<p>Redis is used by Argilla to store information about jobs to be processed on background. The following environment variables are useful to config how Argilla connects to Redis:</p> <ul> <li><code>ARGILLA_REDIS_URL</code>: A URL string that contains the necessary information to connect to a Redis instance (Default: <code>redis://localhost:6379/0</code>).</li> </ul>"},{"location":"reference/argilla-server/configuration/#datasets","title":"Datasets","text":"<ul> <li> <p><code>ARGILLA_LABEL_SELECTION_OPTIONS_MAX_ITEMS</code>: Set the number of maximum items to be allowed by label and multi label questions (Default: <code>500</code>).</p> </li> <li> <p><code>ARGILLA_SPAN_OPTIONS_MAX_ITEMS</code>: Set the number of maximum items to be allowed by span questions (Default: <code>500</code>).</p> </li> </ul>"},{"location":"reference/argilla-server/configuration/#hugging-face","title":"Hugging Face","text":"<ul> <li><code>ARGILLA_SHOW_HUGGINGFACE_SPACE_PERSISTENT_STORAGE_WARNING</code>: When Argilla is running on Hugging Face Spaces you can use this environment variable to disable the warning message showed when persistent storage is disabled for the space (Default: <code>true</code>).</li> </ul>"},{"location":"reference/argilla-server/configuration/#docker-images-only","title":"Docker images only","text":"<ul> <li> <p><code>REINDEX_DATASETS</code>: If <code>true</code> or <code>1</code>, the datasets will be reindexed in the search engine. This is needed when some search configuration changed or data must be refreshed (Default: <code>0</code>).</p> </li> <li> <p><code>USERNAME</code>: If provided, the owner username. This can be combined with HF OAuth to define the argilla server owner (Default: <code>\"\"</code>).</p> </li> <li> <p><code>PASSWORD</code>: If provided, the owner password. If <code>USERNAME</code> and <code>PASSWORD</code> are provided, the owner user will be created with these credentials on the server startup (Default: <code>\"\"</code>).</p> </li> <li> <p><code>WORKSPACE</code>: If provided, the workspace name. If <code>USERNAME</code>, <code>PASSWORD</code> and <code>WORSPACE</code> are provided, a default workspace will be created with this name (Default: <code>\"\"</code>).</p> </li> <li> <p><code>API_KEY</code>: The default user api key to user. If API_KEY is not provided, a new random api key will be generated (Default: <code>\"\"</code>).</p> </li> <li> <p><code>UVICORN_APP</code>: [Advanced] The name of the FastAPI app to run. This is useful when you want to extend the FastAPI app with additional routes or middleware. The default value is <code>argilla_server:app</code>.</p> </li> </ul>"},{"location":"reference/argilla-server/configuration/#rest-api-docs","title":"REST API docs","text":"<p>FastAPI also provides beautiful REST API docs that you can check at http://localhost:6900/api/v1/docs.</p>"},{"location":"reference/argilla-server/telemetry/","title":"Server Telemetry","text":"<p>Argilla uses telemetry to report anonymous usage and error information. As an open-source software, this type of information is important to improve and understand how the product is used. This is done through the Hugging Face Hub library telemetry implementations.</p>"},{"location":"reference/argilla-server/telemetry/#how-to-opt-out","title":"How to opt-out","text":"<p>You can opt out of telemetry reporting using the <code>ENV</code> variable <code>HF_HUB_DISABLE_TELEMETRY</code> before launching the server. Setting this variable to <code>1</code> will completely disable telemetry reporting.</p> <p>If you are a Linux/MacOs user, you should run:</p> <pre><code>export HF_HUB_DISABLE_TELEMETRY=1\n</code></pre> <p>If you are a Windows user, you should run:</p> <pre><code>set HF_HUB_DISABLE_TELEMETRY=1\n</code></pre> <p>To opt in again, you can set the variable to <code>0</code>.</p>"},{"location":"reference/argilla-server/telemetry/#why-reporting-telemetry","title":"Why reporting telemetry","text":"<p>Anonymous telemetry information enables us to continuously improve the product and detect recurring problems to better serve all users. We collect aggregated information about general usage and errors. We do NOT collect any information on users' data records, datasets, or metadata information.</p>"},{"location":"reference/argilla-server/telemetry/#sensitive-data","title":"Sensitive data","text":"<p>We do not collect any piece of information related to the source data you store in Argilla. We don't identify individual users. Your data does not leave your server at any time:</p> <ul> <li>No dataset record is collected.</li> <li>No dataset names or metadata are collected.</li> </ul>"},{"location":"reference/argilla-server/telemetry/#information-reported","title":"Information reported","text":"<p>The following usage and error information is reported:</p> <ul> <li>The code of the raised error</li> <li>The <code>user-agent</code> and <code>accept-language</code> http headers</li> <li>Task name and number of records for bulk operations</li> <li>An anonymous generated user uuid</li> <li>An anonymous generated server uuid</li> <li>The Argilla version running the server</li> <li>The Python version, e.g. <code>3.8.13</code></li> <li>The system/OS name, such as <code>Linux</code>, <code>Darwin</code>, <code>Windows</code></li> <li>The system\u2019s release version, e.g. <code>Darwin Kernel Version 21.5.0: Tue Apr 26 21:08:22 PDT 2022; root:xnu-8020</code></li> <li>The machine type, e.g. <code>AMD64</code></li> <li>The underlying platform spec with as much useful information as possible. (eg. <code>macOS-10.16-x86_64-i386-64bit</code>)</li> <li>The type of deployment: <code>huggingface_space</code> or <code>server</code></li> <li>The dockerized deployment flag: <code>True</code> or <code>False</code></li> </ul> <p>For transparency, you can inspect the source code where this is performed here.</p> <p>If you have any doubts, don't hesitate to join our Discord channel or open a GitHub issue. We'd be very happy to discuss how we can improve this.</p>"},{"location":"tutorials/","title":"Tutorials","text":"<p>These are the tutorials for the Argilla SDK. They provide step-by-step instructions for common tasks.</p> <ul> <li> <p>Text classification</p> <p>Learn about a standard workflow for a text classification task with model fine-tuning.</p> <p> Tutorial</p> </li> <li> <p>Token classification</p> <p>Learn about a standard workflow for a token classification task with model fine-tuning.</p> <p> Tutorial</p> </li> <li> <p>Image classification</p> <p>Learn about a standard workflow for an image classification task with model fine-tuning.</p> <p> Tutorial</p> </li> <li> <p>Image preference</p> <p>Learn about a standard workflow for multi-modal preference datasets like image generation preference.</p> <p> Tutorial</p> </li> </ul>"},{"location":"tutorials/image_classification/","title":"Image classification","text":"<ul> <li>Goal: Show a standard workflow for an image classification task.</li> <li>Dataset: MNIST, a dataset of 28x28 grayscale images that need to be classified as digits.</li> <li>Libraries: datasets, transformers</li> <li>Components: ImageField, LabelQuestion, Suggestion</li> </ul> <p>If you already have deployed Argilla, you can skip this step. Otherwise, you can quickly deploy Argilla following this guide.</p> <p>To complete this tutorial, you need to install the Argilla SDK and a few third-party libraries via <code>pip</code>.</p> <pre><code>!pip install argilla\n</code></pre> <pre><code>!pip install \"transformers[torch]~=4.0\" \"accelerate~=0.34\"\n</code></pre> <p>Let's make the required imports:</p> <pre><code>import base64\nimport io\nimport re\n\nfrom IPython.display import display\nimport numpy as np\nimport torch\nfrom PIL import Image\n\nfrom datasets import load_dataset, Dataset, load_metric\nfrom transformers import (\n    AutoImageProcessor,\n    AutoModelForImageClassification,\n    pipeline,\n    Trainer,\n    TrainingArguments\n)\n\nimport argilla as rg\n</code></pre> <p>You also need to connect to the Argilla server using the <code>api_url</code> and <code>api_key</code>.</p> <pre><code># Replace api_url with your url if using Docker\n# Replace api_key with your API key under \"My Settings\" in the UI\n# Uncomment the last line and set your HF_TOKEN if your space is private\nclient = rg.Argilla(\n    api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n    api_key=\"[your-api-key]\",\n    # headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"}\n)\n</code></pre> <p>Now, we will need to configure the dataset. In the settings, we can specify the guidelines, fields, and questions. If needed, you can also add metadata and vectors. However, for our use case, we just need a field for the <code>image</code> column and a label question for the <code>label</code> column.</p> <p>Note</p> <p>Check this how-to guide to know more about configuring and creating a dataset.</p> <pre><code>labels = [str(x) for x in range(10)]\n\nsettings = rg.Settings(\n    guidelines=\"The goal of this task is to classify a given image of a handwritten digit into one of 10 classes representing integer values from 0 to 9, inclusively.\",\n    fields=[\n        rg.ImageField(\n            name=\"image\",\n            title=\"An image of a handwritten digit.\",\n        ),\n    ],\n    questions=[\n        rg.LabelQuestion(\n            name=\"image_label\",\n            title=\"What digit do you see on the image?\",\n            labels=labels,\n        )\n    ]\n)\n</code></pre> <p>Let's create the dataset with the name and the defined settings:</p> <pre><code>dataset = rg.Dataset(\n    name=\"image_classification_dataset\",\n    settings=settings,\n)\ndataset.create()\n</code></pre> <p>Even if we have created the dataset, it still lacks the information to be annotated (you can check it in the UI). We will use the <code>ylecun/mnist</code> dataset from the Hugging Face Hub. Specifically, we will use <code>100</code> examples. Because we are dealing with a potentially large image dataset, we will set <code>streaming=True</code> to avoid loading the entire dataset into memory and iterate over the data to lazily load it.</p> <p>Tip</p> <p>When working with Hugging Face datasets, you can set <code>Image(decode=False)</code> so that we can get public image URLs, but this depends on the dataset.</p> <pre><code>n_rows = 100\n\nhf_dataset = load_dataset(\"ylecun/mnist\", streaming=True)\ndataset_rows = [row for _,row in zip(range(n_rows), hf_dataset[\"train\"])]\nhf_dataset = Dataset.from_list(dataset_rows)\n\nhf_dataset\n</code></pre> <pre>\n<code>Dataset({\n    features: ['image', 'label'],\n    num_rows: 100\n})</code>\n</pre> <p>Let's have a look at the first image in the dataset.</p> <pre><code>hf_dataset[0]\n</code></pre> <pre>\n<code>{'image': &lt;PIL.PngImagePlugin.PngImageFile image mode=L size=28x28&gt;,\n 'label': 5}</code>\n</pre> <p>We will easily add them to the dataset using <code>log</code>, without needing a mapping since the names already match the Argilla resources. Additionally, since the images are already in PIL format and defined as <code>Image</code> in the Hugging Face dataset\u2019s features, we can log them directly. We will also include an <code>id</code> column in each record, allowing us to easily trace back to the external data source.</p> <pre><code>hf_dataset = hf_dataset.add_column(\"id\", range(len(hf_dataset)))\ndataset.records.log(records=hf_dataset)\n</code></pre> <p>The next step is to add suggestions to the dataset. This will make things easier and faster for the annotation team. Suggestions will appear as preselected options, so annotators will only need to correct them. In our case, we will generate them using a zero-shot CLIP model. However, you can use a framework or technique of your choice.</p> <p>We will start by loading the model using a <code>transformers</code> pipeline.</p> <pre><code>checkpoint = \"openai/clip-vit-large-patch14\"\ndetector = pipeline(model=checkpoint, task=\"zero-shot-image-classification\")\n</code></pre> <p>Now, let's try to make a model prediction and see if it makes sense.</p> <pre><code>predictions = detector(hf_dataset[1][\"image\"], candidate_labels=labels)\npredictions, display(hf_dataset[1][\"image\"])\n</code></pre> <pre>\n<code>([{'score': 0.5236628651618958, 'label': '0'},\n  {'score': 0.11496700346469879, 'label': '7'},\n  {'score': 0.08030630648136139, 'label': '8'},\n  {'score': 0.07141078263521194, 'label': '9'},\n  {'score': 0.05868939310312271, 'label': '6'},\n  {'score': 0.05507850646972656, 'label': '5'},\n  {'score': 0.0341767854988575, 'label': '1'},\n  {'score': 0.027202051132917404, 'label': '4'},\n  {'score': 0.018533246591687202, 'label': '3'},\n  {'score': 0.015973029658198357, 'label': '2'}],\n None)</code>\n</pre> <p>It's time to make the predictions on the dataset! We will set a function that uses the zero-shot model. The model will infer the label based on the image. When working with large datasets, you can create a <code>batch_predict</code> method to speed up the process.</p> <pre><code>def predict(input, labels):\n    prediction = detector(input, candidate_labels=labels)\n    prediction = prediction[0]\n    return {\"image_label\": prediction[\"label\"], \"score\": prediction[\"score\"]}\n</code></pre> <p>To update the records, we will need to retrieve them from the server and update them with the new suggestions. The <code>id</code> will always need to be provided as it is the records' identifier to update a record and avoid creating a new one.</p> <pre><code>data = dataset.records.to_list(flatten=True)\nupdated_data = [\n    {\n        \"id\": sample[\"id\"],\n        **predict(sample[\"image\"], labels),\n    }\n    for sample in data\n]\ndataset.records.log(records=updated_data, mapping={\"score\": \"image_label.suggestion.score\"})\n</code></pre> <p>Voil\u00e0! We have added the suggestions to the dataset, and they will appear in the UI marked with a \u2728. </p> <p>Now, we can start the annotation process. Just open the dataset in the Argilla UI and start annotating the records. If the suggestions are correct, you can just click on <code>Submit</code>. Otherwise, you can select the correct label.</p> <p>Note</p> <p>Check this how-to guide to know more about annotating in the UI.</p> <p>After the annotation, we will have a robust dataset to train the main model. In our case, we will fine-tune using transformers. However, you can select the one that best fits your requirements.</p> <p>So, let's start by retrieving the annotated records and exporting them as a <code>Dataset</code>, so images will be in PIL format.</p> <p>Note</p> <p>Check this how-to guide to know more about filtering and querying in Argilla. Also, you can check the Hugging Face docs on fine-tuning an image classification model.</p> <pre><code>dataset = client.datasets(\"image_classification_dataset\")\n</code></pre> <pre><code>status_filter = rg.Query(filter=rg.Filter((\"response.status\", \"==\", \"submitted\")))\n\nsubmitted = dataset.records(status_filter).to_datasets()\n</code></pre> <p>We now need to ensure our images are forwarded with the correct dimensions. Because the original MNIST dataset is greyscale and the VIT model expects RGB, we need to add a channel dimension to the images. We will do this by stacking the images along the channel axis.</p> <pre><code>def greyscale_to_rgb(img) -&amp;gt; Image:\n    return Image.merge('RGB', (img, img, img))\n\nsubmitted_image_rgb = [\n    {\n        \"id\": sample[\"id\"],\n        \"image\": greyscale_to_rgb(sample[\"image\"]),\n        \"label\": sample[\"image_label.responses\"][0],\n    }\n    for sample in submitted\n]\nsubmitted_image_rgb[0]\n</code></pre> <pre>\n<code>{'id': '0', 'image': &lt;PIL.Image.Image image mode=RGB size=28x28&gt;, 'label': '0'}</code>\n</pre> <p>Next, we will load the <code>ImageProcessor</code> to fine-tune the model. This processor will handle the image resizing and normalization in order to be compatible with the model we intend to use.</p> <pre><code>checkpoint = \"google/vit-base-patch16-224-in21k\"\nprocessor = AutoImageProcessor.from_pretrained(checkpoint)\n\nsubmitted_image_rgb_processed = [\n    {\n        \"pixel_values\": processor(sample[\"image\"], return_tensors='pt')[\"pixel_values\"],\n        \"label\": sample[\"label\"],\n    }\n    for sample in submitted_image_rgb\n]\nsubmitted_image_rgb_processed[0]\n</code></pre> <p>We can now convert the images to a Hugging Face Dataset that is ready for fine-tuning.</p> <pre><code>prepared_ds = Dataset.from_list(submitted_image_rgb_processed)\nprepared_ds = prepared_ds.train_test_split(test_size=0.2)\nprepared_ds\n</code></pre> <pre>\n<code>DatasetDict({\n    train: Dataset({\n        features: ['pixel_values', 'label'],\n        num_rows: 80\n    })\n    test: Dataset({\n        features: ['pixel_values', 'label'],\n        num_rows: 20\n    })\n})</code>\n</pre> <p>We then need to define our data collator, which will ensure the data is unpacked and stacked correctly for the model.</p> <pre><code>def collate_fn(batch):\n    return {\n        'pixel_values': torch.stack([torch.tensor(x['pixel_values'][0]) for x in batch]),\n        'labels': torch.tensor([int(x['label']) for x in batch])\n    }\n</code></pre> <p>Next, we can define our training metrics. We will use the accuracy metric to evaluate the model's performance.</p> <pre><code>metric = load_metric(\"accuracy\", trust_remote_code=True)\ndef compute_metrics(p):\n    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)\n</code></pre> <p>We then load our model and configure the labels that we will use for training.</p> <pre><code>model = AutoModelForImageClassification.from_pretrained(\n    checkpoint,\n    num_labels=len(labels),\n    id2label={int(i): int(c) for i, c in enumerate(labels)},\n    label2id={int(c): int(i) for i, c in enumerate(labels)}\n)\nmodel.config\n</code></pre> <p>Finally, we define the training arguments and start the training process.</p> <pre><code>training_args = TrainingArguments(\n  output_dir=\"./image-classifier\",\n  per_device_train_batch_size=16,\n  eval_strategy=\"steps\",\n  num_train_epochs=1,\n  fp16=False, # True if you have a GPU with mixed precision support\n  save_steps=100,\n  eval_steps=100,\n  logging_steps=10,\n  learning_rate=2e-4,\n  save_total_limit=2,\n  remove_unused_columns=True,\n  push_to_hub=False,\n  load_best_model_at_end=True,\n)\n\ntrainer = Trainer(\n    model=model,\n    args=training_args,\n    data_collator=collate_fn,\n    compute_metrics=compute_metrics,\n    train_dataset=prepared_ds[\"train\"],\n    eval_dataset=prepared_ds[\"test\"],\n    tokenizer=processor,\n)\n\ntrain_results = trainer.train()\ntrainer.save_model()\ntrainer.log_metrics(\"train\", train_results.metrics)\ntrainer.save_metrics(\"train\", train_results.metrics)\ntrainer.save_state()\n</code></pre> <pre>\n<code>{'train_runtime': 12.5374, 'train_samples_per_second': 6.381, 'train_steps_per_second': 0.399, 'train_loss': 2.0533515930175783, 'epoch': 1.0}\n***** train metrics *****\n  epoch                    =        1.0\n  total_flos               =  5774017GF\n  train_loss               =     2.0534\n  train_runtime            = 0:00:12.53\n  train_samples_per_second =      6.381\n  train_steps_per_second   =      0.399\n</code>\n</pre> <p>As the training data was of better quality, we can expect a better model. So we can update the remainder of our original dataset with the new model's suggestions.</p> <pre><code>pipe = pipeline(\"image-classification\", model=model, image_processor=processor)\n\ndef run_inference(batch):\n    predictions = pipe(batch[\"image\"])\n    batch[\"image_label\"] = [prediction[0][\"label\"] for prediction in predictions]\n    batch[\"score\"] = [prediction[0][\"score\"] for prediction in predictions]\n    return batch\n\nhf_dataset = hf_dataset.map(run_inference, batched=True)\n</code></pre> <pre><code>data = dataset.records.to_list(flatten=True)\nupdated_data = [\n    {\n        \"image_label\": str(sample[\"image_label\"]),\n        \"id\": sample[\"id\"],\n        \"score\": sample[\"score\"],\n    }\n    for sample in hf_dataset\n]\ndataset.records.log(records=updated_data, mapping={\"score\": \"image_label.suggestion.score\"})\n</code></pre> <p>In this tutorial, we present an end-to-end example of an image classification task. This serves as the base, but it can be performed iteratively and seamlessly integrated into your workflow to ensure high-quality curation of your data and improved results.</p> <p>We started by configuring the dataset and adding records and suggestions from a zero-shot model. After the annotation process, we trained a new model with the annotated data and updated the remaining records with the new suggestions.</p>"},{"location":"tutorials/image_classification/#image-classification","title":"Image classification","text":""},{"location":"tutorials/image_classification/#getting-started","title":"Getting started","text":""},{"location":"tutorials/image_classification/#deploy-the-argilla-server","title":"Deploy the Argilla server","text":""},{"location":"tutorials/image_classification/#set-up-the-environment","title":"Set up the environment","text":""},{"location":"tutorials/image_classification/#vibe-check-the-dataset","title":"Vibe check the dataset","text":"<p>We will look at the dataset to understand its structure and the kind of data it contains. We do this by using the embedded Hugging Face Dataset Viewer.</p>"},{"location":"tutorials/image_classification/#configure-and-create-the-argilla-dataset","title":"Configure and create the Argilla dataset","text":""},{"location":"tutorials/image_classification/#add-records","title":"Add records","text":""},{"location":"tutorials/image_classification/#add-initial-model-suggestions","title":"Add initial model suggestions","text":""},{"location":"tutorials/image_classification/#evaluate-with-argilla","title":"Evaluate with Argilla","text":""},{"location":"tutorials/image_classification/#train-your-model","title":"Train your model","text":""},{"location":"tutorials/image_classification/#formatting-the-data","title":"Formatting the data","text":""},{"location":"tutorials/image_classification/#the-actual-training","title":"The actual training","text":""},{"location":"tutorials/image_classification/#conclusions","title":"Conclusions","text":""},{"location":"tutorials/image_preference/","title":"Image preference","text":"<ul> <li>Goal: Show a standard workflow for working with complex multi-modal preference datasets, such as for image-generation preference.</li> <li>Dataset: tomg-group-umd/pixelprose, is a comprehensive dataset of over 16M (million) synthetically generated captions, leveraging cutting-edge vision-language models (Gemini 1.0 Pro Vision) for detailed and accurate descriptions.</li> <li>Libraries: datasets, sentence-transformers</li> <li>Components: TextField, ImageField, TextQuestion, LabelQuestion VectorField, FloatMetadataProperty</li> </ul> <p>If you already have deployed Argilla, you can skip this step. Otherwise, you can quickly deploy Argilla following this guide.</p> <p>To complete this tutorial, you need to install the Argilla SDK and a few third-party libraries via <code>pip</code>.</p> <pre><code>!pip install argilla\n</code></pre> <pre><code>!pip install \"sentence-transformers~=3.0\"\n</code></pre> <p>Let's make the required imports:</p> <pre><code>import io\nimport os\nimport time\n\nimport argilla as rg\nimport requests\nfrom PIL import Image\nfrom datasets import load_dataset, Dataset\nfrom sentence_transformers import SentenceTransformer\n</code></pre> <p>You also need to connect to the Argilla server using the <code>api_url</code> and <code>api_key</code>.</p> <pre><code># Replace api_url with your url if using Docker\n# Replace api_key with your API key under \"My Settings\" in the UI\n# Uncomment the last line and set your HF_TOKEN if your space is private\nclient = rg.Argilla(\n    api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n    api_key=\"[your-api-key]\",\n    # headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"}\n)\n</code></pre> <p>Now, we will need to configure the dataset. In the settings, we can specify the guidelines, fields, and questions. We will include a <code>TextField</code>, an <code>ImageField</code> corresponding to the <code>url</code> image column, and two additional <code>ImageField</code> fields representing the images we will generate based on the <code>original_caption</code> column from our dataset. Additionally, we will use a <code>LabelQuestion</code> and an optional <code>TextQuestion</code>, which will be used to collect the user's preference and the reason behind it. We will also be adding a <code>VectorField</code> to store the embeddings for the <code>original_caption</code> so that we can use semantic search and speed up our labeling process. Lastly, we will include two <code>FloatMetadataProperty</code> to store information from the <code>toxicity</code> and the <code>identity_attack</code> columns.</p> <p>Note</p> <p>Check this how-to guide to know more about configuring and creating a dataset.</p> <pre><code>settings = rg.Settings(\n    guidelines=\"The goal is to choose the image that best represents the caption.\",\n    fields=[\n        rg.TextField(\n            name=\"caption\",\n            title=\"An image caption belonging to the original image.\",\n        ),\n        rg.ImageField(\n            name=\"image_original\",\n            title=\"The original image, belonging to the caption.\",\n        ),\n        rg.ImageField(\n            name=\"image_1\",\n            title=\"An image that has been generated based on the caption.\",\n        ),\n        rg.ImageField(\n            name=\"image_2\",\n            title=\"An image that has been generated based on the caption.\",\n        ),\n    ],\n    questions=[\n        rg.LabelQuestion(\n            name=\"preference\",\n            title=\"The chosen preference for the generation.\",\n            labels=[\"image_1\", \"image_2\"],\n        ),\n        rg.TextQuestion(\n            name=\"comment\",\n            title=\"Any additional comments.\",\n            required=False,\n        ),\n    ],\n    metadata=[\n        rg.FloatMetadataProperty(name=\"toxicity\", title=\"Toxicity score\"),\n        rg.FloatMetadataProperty(name=\"identity_attack\", title=\"Identity attack score\"),\n\n    ],\n    vectors=[\n        rg.VectorField(name=\"original_caption_vector\", dimensions=384),\n    ]\n)\n</code></pre> <p>Let's create the dataset with the name and the defined settings:</p> <pre><code>dataset = rg.Dataset(\n    name=\"image_preference_dataset\",\n    settings=settings,\n)\ndataset.create()\n</code></pre> <pre><code>n_rows = 25\n\nhf_dataset = load_dataset(\"tomg-group-umd/pixelprose\", streaming=True)\ndataset_rows = [row for _,row in zip(range(n_rows), hf_dataset[\"train\"])]\nhf_dataset = Dataset.from_list(dataset_rows)\n\nhf_dataset\n</code></pre> <pre>\n<code>Dataset({\n    features: ['uid', 'url', 'key', 'status', 'original_caption', 'vlm_model', 'vlm_caption', 'toxicity', 'severe_toxicity', 'obscene', 'identity_attack', 'insult', 'threat', 'sexual_explicit', 'watermark_class_id', 'watermark_class_score', 'aesthetic_score', 'error_message', 'width', 'height', 'original_width', 'original_height', 'exif', 'sha256', 'image_id', 'author', 'subreddit', 'score'],\n    num_rows: 25\n})</code>\n</pre> <p>Let's have a look at the first entry in the dataset.</p> <pre><code>hf_dataset[0]\n</code></pre> <pre>\n<code>{'uid': '0065a9b1cb4da4696f2cd6640e00304257cafd97c0064d4c61e44760bf0fa31c',\n 'url': 'https://media.gettyimages.com/photos/plate-of-food-from-murray-bros-caddy-shack-at-the-world-golf-hall-of-picture-id916117812?s=612x612',\n 'key': '007740026',\n 'status': 'success',\n 'original_caption': 'A plate of food from Murray Bros Caddy Shack at the World Golf Hall of Fame',\n 'vlm_model': 'gemini-pro-vision',\n 'vlm_caption': ' This image displays: A plate of fried calamari with a lemon wedge and a side of green beans, served in a basket with a pink bowl of marinara sauce. The basket is sitting on a table with a checkered tablecloth. In the background is a glass of water and a plate with a burger and fries. The style of the image is a photograph.',\n 'toxicity': 0.0005555678508244455,\n 'severe_toxicity': 1.7323875454167137e-06,\n 'obscene': 3.8304504414554685e-05,\n 'identity_attack': 0.00010549413127591833,\n 'insult': 0.00014773994917050004,\n 'threat': 2.5982120860135183e-05,\n 'sexual_explicit': 2.0972733182134107e-05,\n 'watermark_class_id': 1.0,\n 'watermark_class_score': 0.733799934387207,\n 'aesthetic_score': 5.390625,\n 'error_message': None,\n 'width': 612,\n 'height': 408,\n 'original_width': 612,\n 'original_height': 408,\n 'exif': '{\"Image ImageDescription\": \"A plate of food from Murray Bros. Caddy Shack at the World Golf Hall of Fame. (Photo by: Jeffrey Greenberg/Universal Images Group via Getty Images)\", \"Image XResolution\": \"300\", \"Image YResolution\": \"300\"}',\n 'sha256': '0065a9b1cb4da4696f2cd6640e00304257cafd97c0064d4c61e44760bf0fa31c',\n 'image_id': 'null',\n 'author': 'null',\n 'subreddit': -1,\n 'score': -1}</code>\n</pre> <p>As we can see, the <code>url</code> column does not contain an image extension, so we will apply some additional filtering to ensure we have only public image URLs.</p> <pre><code>hf_dataset = hf_dataset.filter(\n    lambda x: any([x[\"url\"].endswith(extension) for extension in [\".jpg\", \".png\", \".jpeg\"]]))\n\nhf_dataset\n</code></pre> <pre>\n<code>Dataset({\n    features: ['uid', 'url', 'key', 'status', 'original_caption', 'vlm_model', 'vlm_caption', 'toxicity', 'severe_toxicity', 'obscene', 'identity_attack', 'insult', 'threat', 'sexual_explicit', 'watermark_class_id', 'watermark_class_score', 'aesthetic_score', 'error_message', 'width', 'height', 'original_width', 'original_height', 'exif', 'sha256', 'image_id', 'author', 'subreddit', 'score'],\n    num_rows: 18\n})</code>\n</pre> <pre><code>API_URL = \"https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-schnell\"\nheaders = {\"Authorization\": f\"Bearer {os.getenv('HF_TOKEN')}\"}\n\ndef query(payload):\n    response = requests.post(API_URL, headers=headers, json=payload)\n    if response.status_code == 200:\n        image_bytes = response.content\n        image = Image.open(io.BytesIO(image_bytes))\n    else:\n        print(f\"Request failed with status code {response.status_code}. retrying in 10 seconds.\")\n        time.sleep(10)\n        image = query(payload)\n    return image\n\nquery({\n    \"inputs\": \"Astronaut riding a horse\"\n})\n</code></pre> <p>Cool! Since we've evaluated the generation function, let's generate the PIL images for the dataset.</p> <pre><code>def generate_image(row):\n    caption = row[\"original_caption\"]\n    row[\"image_1\"] = query({\"inputs\": caption})\n    row[\"image_2\"] = query({\"inputs\": caption + \" \"}) # space to avoid caching and getting the same image\n    return row\n\nhf_dataset_with_images = hf_dataset.map(generate_image, batched=False)\n\nhf_dataset_with_images\n</code></pre> <pre>\n<code>Dataset({\n    features: ['uid', 'url', 'key', 'status', 'original_caption', 'vlm_model', 'vlm_caption', 'toxicity', 'severe_toxicity', 'obscene', 'identity_attack', 'insult', 'threat', 'sexual_explicit', 'watermark_class_id', 'watermark_class_score', 'aesthetic_score', 'error_message', 'width', 'height', 'original_width', 'original_height', 'exif', 'sha256', 'image_id', 'author', 'subreddit', 'score', 'image_1', 'image_2'],\n    num_rows: 18\n})</code>\n</pre> <pre><code>model = SentenceTransformer(\"TaylorAI/bge-micro-v2\")\n\ndef encode_questions(batch):\n    vectors_as_numpy = model.encode(batch[\"original_caption\"])\n    batch[\"original_caption_vector\"] = [x.tolist() for x in vectors_as_numpy]\n    return batch\n\nhf_dataset_with_images_vectors = hf_dataset_with_images.map(encode_questions, batched=True)\n</code></pre> <pre><code>dataset.records.log(records=hf_dataset_with_images_vectors, mapping={\n    \"key\": \"id\",\n    \"original_caption\": \"caption\",\n    \"url\": \"image_original\",\n})\n</code></pre> <p>Voil\u00e0! We have our Argilla dataset ready for annotation.</p> <p>Now, we can start the annotation process. Just open the dataset in the Argilla UI and start annotating the records.</p> <p>Note</p> <p>Check this how-to guide to know more about annotating in the UI.</p> <p>In this tutorial, we present an end-to-end example of an image preference task. This serves as the base, but it can be performed iteratively and seamlessly integrated into your workflow to ensure high-quality curation of your data and improved results.</p> <p>We started by configuring the dataset and adding records with the original and generated images. After the annotation process, you can evaluate the results and potentially retrain the model to improve the quality of the generated images.</p>"},{"location":"tutorials/image_preference/#image-preference","title":"Image preference","text":""},{"location":"tutorials/image_preference/#getting-started","title":"Getting started","text":""},{"location":"tutorials/image_preference/#deploy-the-argilla-server","title":"Deploy the Argilla server","text":""},{"location":"tutorials/image_preference/#set-up-the-environment","title":"Set up the environment","text":""},{"location":"tutorials/image_preference/#vibe-check-the-dataset","title":"Vibe check the dataset","text":"<p>We will take a look at the dataset to understand its structure and the types of data it contains. We can do this using the embedded Hugging Face Dataset Viewer.</p>"},{"location":"tutorials/image_preference/#configure-and-create-the-argilla-dataset","title":"Configure and create the Argilla dataset","text":""},{"location":"tutorials/image_preference/#add-records","title":"Add records","text":"<p>Even if we have created the dataset, it still lacks the information to be annotated (you can check it in the UI). We will use the <code>tomg-group-umd/pixelprose</code> dataset from the Hugging Face Hub. Specifically, we will use <code>25</code> examples. Because we are dealing with a potentially large image dataset, we will set <code>streaming=True</code> to avoid loading the entire dataset into memory and iterate over the data to lazily load it.</p> <p>Tip</p> <p>When working with Hugging Face datasets, you can set <code>Image(decode=False)</code> so that we can get public image URLs, but this depends on the dataset.</p>"},{"location":"tutorials/image_preference/#generate-images","title":"Generate images","text":"<p>We'll start by generating images based on the <code>original_caption</code> column using the recently released black-forest-labs/FLUX.1-schnell model. For this, we will use the free but rate-limited Inference API provided by Hugging Face, but you can use any other model from the Hub or method. We will generate 2 images per example. Additionally, we will add a small retry mechanism to handle the rate limit.</p> <p>Let's begin by defining and testing a generation function.</p>"},{"location":"tutorials/image_preference/#add-vectors","title":"Add vectors","text":"<p>We will use the <code>sentence-transformers</code> library to create vectors for the <code>original_caption</code>. We will use the <code>TaylorAI/bge-micro-v2</code> model, which strikes a good balance between speed and performance. Note that we also need to convert the vectors to a <code>list</code> to store them in the Argilla dataset.</p>"},{"location":"tutorials/image_preference/#log-to-argilla","title":"Log to Argilla","text":"<p>We will easily add them to the dataset using <code>log</code> and the mapping, where we indicate which column from our dataset needs to be mapped to which Argilla resource if the names do not correspond. We are also using the <code>key</code> column as <code>id</code> for our record so we can easily backtrack the record to the external data source.</p>"},{"location":"tutorials/image_preference/#evaluate-with-argilla","title":"Evaluate with Argilla","text":""},{"location":"tutorials/image_preference/#conclusions","title":"Conclusions","text":""},{"location":"tutorials/text_classification/","title":"Text classification","text":"<ul> <li>Goal: Show a standard workflow for a text classification task, including zero-shot suggestions and model fine-tuning.</li> <li>Dataset: IMDB, a dataset of movie reviews that need to be classified as positive or negative.</li> <li>Libraries: datasets, transformers, setfit</li> <li>Components: TextField, LabelQuestion, Suggestion, Query, Filter</li> </ul> <p>If you already have deployed Argilla, you can skip this step. Otherwise, you can quickly deploy Argilla following this guide.</p> <p>To complete this tutorial, you need to install the Argilla SDK and a few third-party libraries via <code>pip</code>.</p> <pre><code>!pip install argilla\n</code></pre> <pre><code>!pip install setfit==1.0.3 transformers==4.40.2\n</code></pre> <p>Let's make the required imports:</p> <pre><code>import argilla as rg\n\nfrom datasets import load_dataset, Dataset\nfrom setfit import SetFitModel, Trainer, get_templated_dataset, sample_dataset\n</code></pre> <p>You also need to connect to the Argilla server using the <code>api_url</code> and <code>api_key</code>.</p> <pre><code># Replace api_url with your url if using Docker\n# Replace api_key with your API key under \"My Settings\" in the UI\n# Uncomment the last line and set your HF_TOKEN if your space is private\nclient = rg.Argilla(\n    api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n    api_key=\"[your-api-key]\",\n    # headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"}\n)\n</code></pre> <p>Now, we will need to configure the dataset. In the settings, we can specify the guidelines, fields, and questions. If needed, you can also add metadata and vectors. However, for our use case, we just need a text field and a label question, corresponding to the <code>text</code> and <code>label</code> columns.</p> <p>Note</p> <p>Check this how-to guide to know more about configuring and creating a dataset.</p> <pre><code>labels = [\"positive\", \"negative\"]\n\nsettings = rg.Settings(\n    guidelines=\"Classify the reviews as positive or negative.\",\n    fields=[\n        rg.TextField(\n            name=\"review\",\n            title=\"Text from the review\",\n            use_markdown=False,\n        ),\n    ],\n    questions=[\n        rg.LabelQuestion(\n            name=\"sentiment_label\",\n            title=\"In which category does this article fit?\",\n            labels=labels,\n        )\n    ],\n)\n</code></pre> <p>Let's create the dataset with the name and the defined settings:</p> <pre><code>dataset = rg.Dataset(\n    name=\"text_classification_dataset\",\n    settings=settings,\n)\ndataset.create()\n</code></pre> <p>Even if we have created the dataset, it still lacks the information to be annotated (you can check it in the UI). We will use the <code>imdb</code> dataset from the Hugging Face Hub. Specifically, we will use 100 samples from the <code>train</code> split.</p> <pre><code>hf_dataset = load_dataset(\"imdb\", split=\"train[:100]\")\n</code></pre> <p>We will easily add them to the dataset using <code>log</code> and the mapping, where we indicate that the column <code>text</code> is the data that should be added to the field <code>review</code>.</p> <pre><code>dataset.records.log(records=hf_dataset, mapping={\"text\": \"review\"})\n</code></pre> <p>The next step is to add suggestions to the dataset. This will make things easier and faster for the annotation team. Suggestions will appear as preselected options, so annotators will only need to correct them. In our case, we will generate them using a zero-shot SetFit model. However, you can use a framework or technique of your choice.</p> <p>We will start by defining an example training set with the required labels: <code>positive</code> and <code>negative</code>. Using <code>get_templated_dataset</code> will create sentences from the default template: \"This sentence is {label}.\"</p> <pre><code>zero_ds = get_templated_dataset(\n    candidate_labels=labels,\n    sample_size=8,\n)\n</code></pre> <p>Now, we will prepare a function to train the SetFit model.</p> <p>Note</p> <p>For further customization, you can check the SetFit documentation.</p> <pre><code>def train_model(model_name, dataset):\n    model = SetFitModel.from_pretrained(model_name)\n\n    trainer = Trainer(\n        model=model,\n        train_dataset=dataset,\n    )\n\n    trainer.train()\n\n    return model\n</code></pre> <p>Let's train the model. We will use <code>TaylorAI/bge-micro-v2</code>, available in the Hugging Face Hub.</p> <pre><code>model = train_model(model_name=\"TaylorAI/bge-micro-v2\", dataset=zero_ds)\n</code></pre> <p>You can save it locally or push it to the Hub. And then, load it from there.</p> <pre><code># Save and load locally\n# model.save_pretrained(\"text_classification_model\")\n# model = SetFitModel.from_pretrained(\"text_classification_model\")\n\n# Push and load in HF\n# model.push_to_hub(\"[username]/text_classification_model\")\n# model = SetFitModel.from_pretrained(\"[username]/text_classification_model\")\n</code></pre> <p>It's time to make the predictions! We will set a function that uses the <code>predict</code> method to get the suggested label. The model will infer the label based on the text.</p> <pre><code>def predict(model, input, labels):\n    model.labels = labels\n\n    prediction = model.predict([input])\n\n    return prediction[0]\n</code></pre> <p>To update the records, we will need to retrieve them from the server and update them with the new suggestions. The <code>id</code> will always need to be provided as it is the records' identifier to update a record and avoid creating a new one.</p> <pre><code>data = dataset.records.to_list(flatten=True)\nupdated_data = [\n    {\n        \"sentiment_label\": predict(model, sample[\"review\"], labels),\n        \"id\": sample[\"id\"],\n    }\n    for sample in data\n]\ndataset.records.log(records=updated_data)\n</code></pre> <p>Voil\u00e0! We have added the suggestions to the dataset, and they will appear in the UI marked with a \u2728. </p> <p>Now, we can start the annotation process. Just open the dataset in the Argilla UI and start annotating the records. If the suggestions are correct, you can just click on <code>Submit</code>. Otherwise, you can select the correct label.</p> <p>Note</p> <p>Check this how-to guide to know more about annotating in the UI.</p> <p>After the annotation, we will have a robust dataset to train the main model. In our case, we will fine-tune using SetFit. However, you can select the one that best fits your requirements. So, let's start by retrieving the annotated records.</p> <p>Note</p> <p>Check this how-to guide to know more about filtering and querying in Argilla. Also, you can check the Hugging Face docs on fine-tuning an text classification model.</p> <pre><code>dataset = client.datasets(\"text_classification_dataset\")\n</code></pre> <pre><code>status_filter = rg.Query(filter=rg.Filter((\"response.status\", \"==\", \"submitted\")))\n\nsubmitted = dataset.records(status_filter).to_list(flatten=True)\n</code></pre> <p>As we have a single response per record, we can retrieve the selected label straightforwardly and create the training set with 8 samples per label. We selected 8 samples per label to have a balanced dataset for few-shot learning.</p> <pre><code>train_records = [\n    {\n        \"text\": r[\"review\"],\n        \"label\": r[\"sentiment_label.responses\"][0],\n    }\n    for r in submitted\n]\ntrain_dataset = Dataset.from_list(train_records)\ntrain_dataset = sample_dataset(train_dataset, label_column=\"label\", num_samples=8)\n</code></pre> <p>We can train the model using our previous function, but this time with a high-quality human-annotated training set.</p> <pre><code>model = train_model(model_name=\"TaylorAI/bge-micro-v2\", dataset=train_dataset)\n</code></pre> <p>As the training data was of better quality, we can expect a better model. So we can update the remaining non-annotated records with the new model's suggestions.</p> <pre><code>data = dataset.records.to_list(flatten=True)\nupdated_data = [\n    {\n        \"sentiment_label\": predict(model, sample[\"review\"], labels),\n        \"id\": sample[\"id\"],\n    }\n    for sample in data\n]\ndataset.records.log(records=updated_data)\n</code></pre> <p>In this tutorial, we present an end-to-end example of a text classification task. This serves as the base, but it can be performed iteratively and seamlessly integrated into your workflow to ensure high-quality curation of your data and improved results.</p> <p>We started by configuring the dataset, adding records, and training a zero-shot SetFit model, as an example, to add suggestions. After the annotation process, we trained a new model with the annotated data and updated the remaining records with the new suggestions.</p>"},{"location":"tutorials/text_classification/#text-classification","title":"Text classification","text":""},{"location":"tutorials/text_classification/#getting-started","title":"Getting started","text":""},{"location":"tutorials/text_classification/#deploy-the-argilla-server","title":"Deploy the Argilla server","text":""},{"location":"tutorials/text_classification/#set-up-the-environment","title":"Set up the environment","text":""},{"location":"tutorials/text_classification/#vibe-check-the-dataset","title":"Vibe check the dataset","text":"<p>We will have a look at the dataset to understand its structure and the kind of data it contains. We do this by using the embedded Hugging Face Dataset Viewer.</p>"},{"location":"tutorials/text_classification/#configure-and-create-the-argilla-dataset","title":"Configure and create the Argilla dataset","text":""},{"location":"tutorials/text_classification/#add-records","title":"Add records","text":""},{"location":"tutorials/text_classification/#add-initial-model-suggestions","title":"Add initial model suggestions","text":""},{"location":"tutorials/text_classification/#evaluate-with-argilla","title":"Evaluate with Argilla","text":""},{"location":"tutorials/text_classification/#train-your-model","title":"Train your model","text":""},{"location":"tutorials/text_classification/#conclusions","title":"Conclusions","text":""},{"location":"tutorials/token_classification/","title":"Token classification","text":"<ul> <li>Goal: Show a standard workflow for a token classification task, including zero-shot suggestions and model fine-tuning.</li> <li>Dataset: ontonotes5, a large corpus comprising various genres of text that need to be classified for Named Entity Recognition.</li> <li>Libraries: datasets, gliner, transformers, spanmarker</li> <li>Components: TextField, SpanQuestion, Suggestion, Query, Filter</li> </ul> <p>If you already have deployed Argilla, you can skip this step. Otherwise, you can quickly deploy Argilla following this guide.</p> <p>To complete this tutorial, you need to install the Argilla SDK and a few third-party libraries via <code>pip</code>.</p> <pre><code>!pip install argilla\n</code></pre> <pre><code>!pip install gliner==0.2.6 transformers==4.40.2 span_marker==1.5.0\n</code></pre> <p>Let's make the needed imports:</p> <pre><code>import re\n\nimport argilla as rg\n\nimport torch\nfrom datasets import load_dataset, Dataset, DatasetDict\nfrom gliner import GLiNER\nfrom span_marker import SpanMarkerModel, Trainer\nfrom transformers import TrainingArguments\n</code></pre> <p>You also need to connect to the Argilla server with the <code>api_url</code> and <code>api_key</code>.</p> <pre><code># Replace api_url with your url if using Docker\n# Replace api_key with your API key under \"My Settings\" in the UI\n# Uncomment the last line and set your HF_TOKEN if your space is private\nclient = rg.Argilla(\n    api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n    api_key=\"[your-api-key]\",\n    # headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"}\n)\n</code></pre> <p>Now, we will need to configure the dataset. In the settings, we can specify the guidelines, fields, and questions. If needed, you can also add metadata and vectors. However, for our use case, we just need a text field and a span question, corresponding to the <code>token</code> and <code>tags</code> columns. We will focus on Name Entity Recognition, but this workflow can also be applied to Span Classification, which differs in that the spans are less clearly defined and often overlap.</p> <pre><code>labels = [\n    \"CARDINAL\",\n    \"DATE\",\n    \"PERSON\",\n    \"NORP\",\n    \"GPE\",\n    \"LAW\",\n    \"PERCENT\",\n    \"ORDINAL\",\n    \"MONEY\",\n    \"WORK_OF_ART\",\n    \"FAC\",\n    \"TIME\",\n    \"QUANTITY\",\n    \"PRODUCT\",\n    \"LANGUAGE\",\n    \"ORG\",\n    \"LOC\",\n    \"EVENT\",\n]\n\nsettings = rg.Settings(\n    guidelines=\"Classify individual tokens according to the specified categories, ensuring that any overlapping or nested entities are accurately captured.\",\n    fields=[\n        rg.TextField(\n            name=\"text\",\n            title=\"Text\",\n            use_markdown=False,\n        ),\n    ],\n    questions=[\n        rg.SpanQuestion(\n            name=\"span_label\",\n            field=\"text\",\n            labels=labels,\n            title=\"Classify the tokens according to the specified categories.\",\n            allow_overlapping=False,\n        )\n    ],\n)\n</code></pre> <p>Let's create the dataset with the name and the defined settings:</p> <pre><code>dataset = rg.Dataset(\n    name=\"token_classification_dataset\",\n    settings=settings,\n)\ndataset.create()\n</code></pre> <p>We have created the dataset (you can check it in the UI), but we still need to add the data for annotation. In this case, we will use the <code>ontonote5</code> dataset from the Hugging Face Hub. Specifically, we will use 2100 samples from the <code>test</code> split.</p> <pre><code>hf_dataset = load_dataset(\"tner/ontonotes5\", split=\"test[:2100]\")\n</code></pre> <p>We will iterate over the Hugging Face dataset, adding data to the corresponding field in the <code>Record</code> object for the Argilla dataset. Then, we will easily add them to the dataset using <code>log</code>.</p> <pre><code>records = [rg.Record(fields={\"text\": \" \".join(row[\"tokens\"])}) for row in hf_dataset]\n\ndataset.records.log(records)\n</code></pre> <p>The next step is to add suggestions to the dataset. This will make things easier and faster for the annotation team. Suggestions will appear as preselected options, so annotators will only need to correct them. In our case, we will generate them using a GLiNER model. However, you can use a framework or technique of your choice.</p> <p>Note</p> <p>For further information, you can check the GLiNER repository and the original paper.</p> <p>We will start by loading the pre-trained GLiNER model. Specifically, we will use <code>gliner_mediumv2</code>, available in Hugging Face Hub.</p> <pre><code>gliner_model = GLiNER.from_pretrained(\"urchade/gliner_mediumv2.1\")\n</code></pre> <p>Next, we will create a function to generate predictions using this general model, which can identify the specified labels without being pre-trained on them. The function will return a dictionary formatted with the necessary schema to add entities to our Argilla dataset. This schema includes the keys 'start\u2019 and \u2018end\u2019 to indicate the indices where the span begins and ends, as well as \u2018label\u2019 for the entity label.</p> <pre><code>def predict_gliner(model, text, labels, threshold):\n    entities = model.predict_entities(text, labels, threshold)\n    return [\n        {k: v for k, v in ent.items() if k not in {\"score\", \"text\"}} for ent in entities\n    ]\n</code></pre> <p>To update the records, we will need to retrieve them from the server and update them with the new suggestions. The <code>id</code> will always need to be provided as it is the records' identifier to update a record and avoid creating a new one.</p> <pre><code>data = dataset.records.to_list(flatten=True)\nupdated_data = [\n    {\n        \"span_label\": predict_gliner(\n            model=gliner_model, text=sample[\"text\"], labels=labels, threshold=0.70\n        ),\n        \"id\": sample[\"id\"],\n    }\n    for sample in data\n]\ndataset.records.log(records=updated_data)\n</code></pre> <p>Voil\u00e0! We have added the suggestions to the dataset and they will appear in the UI marked with \u2728.</p> <p>Now, we can start the annotation process. Just open the dataset in the Argilla UI and start annotating the records. If the suggestions are correct, you can just click on <code>Submit</code>. Otherwise, you can select the correct label.</p> <p>Note</p> <p>Check this how-to guide to know more about annotating in the UI.</p> <p>After the annotation, we will have a robust dataset to train our model for entity recognition. For our case, we will train a SpanMarker model, but you can select any model of your choice. So, let's start by retrieving the annotated records.</p> <p>Note</p> <p>Check this how-to guide to learn more about filtering and querying in Argilla. Also, you can check the Hugging Face docs on fine-tuning an token classification model.</p> <pre><code>dataset = client.datasets(\"token_classification_dataset\")\n</code></pre> <p>In our case, we submitted 2000 annotations using the bulk view.</p> <pre><code>status_filter = rg.Query(filter=rg.Filter((\"response.status\", \"==\", \"submitted\")))\n\nsubmitted = dataset.records(status_filter).to_list(flatten=True)\n</code></pre> <p>SpanMarker accepts any dataset as long as it has the <code>tokens</code> and <code>ner_tags</code> columns. The <code>ner_tags</code> can be annotated using the IOB, IOB2, BIOES or BILOU labeling scheme, as well as regular unschemed labels. In our case, we have chosen to use the IOB format. Thus, we will define a function to extract the annotated NER tags according to this schema.</p> <p>Note</p> <p>For further information, you can check the SpanMarker documentation.</p> <pre><code>def get_iob_tag_for_token(token_start, token_end, ner_spans):\n    for span in ner_spans:\n        if token_start &amp;gt;= span[\"start\"] and token_end &amp;lt;= span[\"end\"]:\n            if token_start == span[\"start\"]:\n                return f\"B-{span['label']}\"\n            else:\n                return f\"I-{span['label']}\"\n    return \"O\"\n\n\ndef extract_ner_tags(text, responses):\n    tokens = re.split(r\"(\\s+)\", text)\n    ner_tags = []\n\n    current_position = 0\n    for token in tokens:\n        if token.strip():\n            token_start = current_position\n            token_end = current_position + len(token)\n            tag = get_iob_tag_for_token(token_start, token_end, responses)\n            ner_tags.append(tag)\n        current_position += len(token)\n\n    return ner_tags\n</code></pre> <p>Let's now extract them and save two lists with the tokens and NER tags, which will help us build our dataset to train the SpanMarker model.</p> <pre><code>tokens = []\nner_tags = []\nfor r in submitted:\n    tags = extract_ner_tags(r[\"text\"], r[\"span_label.responses\"][0])\n    tks = r[\"text\"].split()\n    tokens.append(tks)\n    ner_tags.append(tags)\n</code></pre> <p>In addition, we will have to indicate the labels and they should be formatted as integers. So, we will retrieve them and map them.</p> <pre><code>labels = list(set([item for sublist in ner_tags for item in sublist]))\n\nid2label = {i: label for i, label in enumerate(labels)}\nlabel2id = {label: id_ for id_, label in id2label.items()}\n\nmapped_ner_tags = [[label2id[label] for label in ner_tag] for ner_tag in ner_tags]\n</code></pre> <p>Finally, we will create a dataset with the train and validation sets.</p> <pre><code>records = [\n    {\n        \"tokens\": token,\n        \"ner_tags\": ner_tag,\n    }\n    for token, ner_tag in zip(tokens, mapped_ner_tags)\n]\nspan_dataset = DatasetDict(\n    {\n        \"train\": Dataset.from_list(records[:1500]),\n        \"validation\": Dataset.from_list(records[1501:2000]),\n    }\n)\n</code></pre> <p>Now, let's prepare to train our model. For this, it is recommended to use GPU. You can check if it is available as shown below.</p> <pre><code>if torch.cuda.is_available():\n    device = torch.device(\"cuda\")\n    print(f\"Using {torch.cuda.get_device_name(0)}\")\nelif torch.backends.mps.is_available():\n    device = torch.device(\"mps\")\n    print(\"Using MPS device\")\nelse:\n    device = torch.device(\"cpu\")\n    print(\"No GPU available, using CPU instead.\")\n</code></pre> <p>We will define our model and arguments. In this case, we will use the <code>bert-base-cased</code>, available in the Hugging Face Hub, but others can be applied.</p> <p>Note</p> <p>The training arguments are inherited from the Transformers library. You can check more information here.</p> <pre><code>encoder_id = \"bert-base-cased\"\nmodel = SpanMarkerModel.from_pretrained(\n    encoder_id,\n    labels=labels,\n    model_max_length=256,\n    entity_max_length=8,\n)\n\nargs = TrainingArguments(\n    output_dir=\"models/span-marker\",\n    learning_rate=5e-5,\n    per_device_train_batch_size=8,\n    per_device_eval_batch_size=8,\n    num_train_epochs=1,\n    weight_decay=0.01,\n    warmup_ratio=0.1,\n    fp16=False,  # Set to True if available\n    logging_first_step=True,\n    logging_steps=50,\n    evaluation_strategy=\"steps\",\n    save_strategy=\"steps\",\n    eval_steps=500,\n    save_total_limit=2,\n    dataloader_num_workers=2,\n)\n\ntrainer = Trainer(\n    model=model,\n    args=args,\n    train_dataset=span_dataset[\"train\"],\n    eval_dataset=span_dataset[\"validation\"],\n)\n</code></pre> <p>Let's train it! This time, we use a high-quality human-annotated training set, so the results are expected to have improved.</p> <pre><code>trainer.train()\n</code></pre> <pre><code>trainer.evaluate()\n</code></pre> <p>You can save it locally or push it to the Hub. And then load it from there.</p> <pre><code># Save and load locally\n# model.save_pretrained(\"token_classification_model\")\n# model = SpanMarkerModel.from_pretrained(\"token_classification_model\")\n\n# Push and load in HF\n# model.push_to_hub(\"[username]/token_classification_model\")\n# model = SpanMarkerModel.from_pretrained(\"[username]/token_classification_model\")\n</code></pre> <p>It's time to make the predictions! We will set a function that uses the <code>predict</code> method to get the suggested label. The model will infer the label based on the text. The function will return the spans in the corresponding structure for the Argilla dataset.</p> <pre><code>def predict_spanmarker(model, text):\n    entities = model.predict(text)\n    return [\n        {\n            \"start\": ent[\"char_start_index\"],\n            \"end\": ent[\"char_end_index\"],\n            \"label\": ent[\"label\"],\n        }\n        for ent in entities\n    ]\n</code></pre> <p>As the training data was of better quality, we can expect a better model. So we can update the remaining non-annotated records with the new model's suggestions.</p> <pre><code>data = dataset.records.to_list(flatten=True)\nupdated_data = [\n    {\n        \"span_label\": predict_spanmarker(model=model, text=sample[\"text\"]),\n        \"id\": sample[\"id\"],\n    }\n    for sample in data\n]\ndataset.records.log(records=updated_data)\n</code></pre> <p>In this tutorial, we present an end-to-end example of a token classification task. This serves as the base, but it can be performed iteratively and seamlessly integrated into your workflow to ensure high-quality curation of your data and improved results.</p> <p>We started by configuring the dataset, adding records, and adding suggestions based on the GLiNer predictions. After the annotation process, we trained a SpanMarker model with the annotated data and updated the remaining records with the new suggestions.</p>"},{"location":"tutorials/token_classification/#token-classification","title":"Token classification","text":""},{"location":"tutorials/token_classification/#getting-started","title":"Getting started","text":""},{"location":"tutorials/token_classification/#deploy-the-argilla-server","title":"Deploy the Argilla server","text":""},{"location":"tutorials/token_classification/#set-up-the-environment","title":"Set up the environment","text":""},{"location":"tutorials/token_classification/#vibe-check-the-dataset","title":"Vibe check the dataset","text":"<p>We will have a look at the dataset to understand its structure and the kind of data it contains. We do this by using the embedded Hugging Face Dataset Viewer.</p>"},{"location":"tutorials/token_classification/#configure-and-create-the-argilla-dataset","title":"Configure and create the Argilla dataset","text":""},{"location":"tutorials/token_classification/#add-records","title":"Add records","text":""},{"location":"tutorials/token_classification/#add-initial-model-suggestions","title":"Add initial model suggestions","text":""},{"location":"tutorials/token_classification/#evaluate-with-argilla","title":"Evaluate with Argilla","text":""},{"location":"tutorials/token_classification/#train-your-model","title":"Train your model","text":""},{"location":"tutorials/token_classification/#conclusions","title":"Conclusions","text":""}]}
\ No newline at end of file
+{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Welcome to Argilla","text":"<p>Argilla is a collaboration tool for AI engineers and domain experts to build high-quality datasets.</p> <p>To get started:</p> <ul> <li> <p>Get started in 5 minutes!</p> <p>Deploy Argilla for free on the Hugging Face Hub or with <code>Docker</code>. Install the Python SDK with <code>pip</code> and create your first project.</p> <p> Quickstart</p> </li> <li> <p>How-to guides</p> <p>Get familiar with the basic workflows of Argilla. Learn how to manage <code>Users</code>, <code>Workspaces</code>, <code>Datasets</code>, and <code>Records</code> to set up your data annotation projects.</p> <p> Learn more</p> </li> </ul> <p>Or, play with the Argilla UI by signing in with your Hugging Face account:</p> <p>Looking for Argilla 1.x?</p> <p>Looking for documentation for Argilla 1.x? Visit the latest release.</p> <p>Migrate to Argilla 2.x</p> <p>Want to learn how to migrate from Argilla 1.x to 2.x? Take a look at our dedicated Migration Guide.</p>"},{"location":"#why-use-argilla","title":"Why use Argilla?","text":"<p>Argilla can be used for collecting human feedback for a wide variety of AI projects like traditional NLP (text classification, NER, etc.), LLMs (RAG, preference tuning, etc.), or multimodal models (text to image, etc.).</p> <p>Argilla's programmatic approach lets you build workflows for continuous evaluation and model improvement. The goal of Argilla is to ensure your data work pays off by quickly iterating on the right data and models.</p> <p>Improve your AI output quality through data quality</p> <p>Compute is expensive and output quality is important. We help you focus on data, which tackles the root cause of both of these problems at once. Argilla helps you to achieve and keep high-quality standards for your data. This means you can improve the quality of your AI outputs.</p> <p>Take control of your data and models</p> <p>Most AI tools are black boxes. Argilla is different. We believe that you should be the owner of both your data and your models. That's why we provide you with all the tools your team needs to manage your data and models in a way that suits you best.</p> <p>Improve efficiency by quickly iterating on the right data and models</p> <p>Gathering data is a time-consuming process. Argilla helps by providing a tool that allows you to interact with your data in a more engaging way. This means you can quickly and easily label your data with filters, AI feedback suggestions and semantic search. So you can focus on training your models and monitoring their performance.</p>"},{"location":"#what-do-people-build-with-argilla","title":"What do people build with Argilla?","text":"<p>Datasets and models</p> <p>Argilla is a tool that can be used to achieve and keep high-quality data standards with a focus on NLP and LLMs. The community uses Argilla to create amazing open-source datasets and models, and we love contributions to open-source too.</p> <ul> <li>cleaned UltraFeedback dataset and the Notus and Notux models, where we improved benchmark and empirical human judgment for the Mistral and Mixtral models with cleaner data using human feedback.</li> <li>distilabeled Intel Orca DPO dataset and the improved OpenHermes model, show how we improve model performance by filtering out 50% of the original dataset through human and AI feedback.</li> </ul> <p>Projects and pipelines</p> <p>AI teams from companies like the Red Cross, Loris.ai and Prolific use Argilla to improve the quality and efficiency of AI projects. They shared their experiences in the AI community meetup.</p> <ul> <li>AI for good: the Red Cross presentation showcases how their experts and AI team collaborate by classifying and redirecting requests from refugees of the Ukrainian crisis to streamline the support processes of the Red Cross.</li> <li>Customer support: during the Loris meetup they showed how their AI team uses unsupervised and few-shot contrastive learning to help them quickly validate and gain labelled samples for a huge amount of multi-label classifiers.</li> <li>Research studies: the showcase from Prolific announced their integration with Argilla. They use it to actively distribute data collection projects among their annotating workforce. This allows them to quickly and efficiently collect high-quality data for their research studies.</li> </ul>"},{"location":"community/","title":"Community","text":"<p>We are an open-source community-driven project not only focused on building a great product but also on building a great community, where you can get support, share your experiences, and contribute to the project! We would love to hear from you and help you get started with Argilla.</p> <ul> <li> <p>Discord</p> <p>In our Discord channels (#argilla-distilabel-general and #argilla-distilabel-help), you can get direct support from the community.</p> <p> Discord \u2197</p> </li> <li> <p>Community Meetup</p> <p>We host bi-weekly community meetups where you can listen in or present your work.</p> <p> Community Meetup \u2197</p> </li> <li> <p>Changelog</p> <p>The changelog is where you can find the latest updates and changes to the Argilla project.</p> <p> Changelog \u2197</p> </li> <li> <p>Roadmap</p> <p>We love to discuss our plans with the community. Feel encouraged to participate in our roadmap discussions.</p> <p> Roadmap \u2197</p> </li> </ul>"},{"location":"community/changelog/","title":"Changelog","text":"<p>All notable changes to this project will be documented in this file.</p> <p>The format is based on Keep a Changelog, and this project adheres to Semantic Versioning.</p>"},{"location":"community/changelog/#unreleased","title":"Unreleased","text":""},{"location":"community/changelog/#230","title":"2.3.0","text":""},{"location":"community/changelog/#added","title":"Added","text":"<ul> <li>Added support for <code>CustomField</code>. (#5422)</li> <li>Added <code>inserted_at</code> and <code>updated_at</code> to <code>Resource</code> model as properties. (#5540)</li> <li>Added <code>limit</code> argument when fetching records. (#5525</li> <li>Added similarity search support. (#5546)</li> <li>Added filter support for <code>id</code>, <code>_server_id</code>, <code>inserted_at</code> and <code>updated_at</code> record attributes. (#5545)</li> <li>Added support to read argilla credentials from colab secrets. (#5541))</li> </ul>"},{"location":"community/changelog/#changed","title":"Changed","text":"<ul> <li>Changed the repr method for <code>SettingsProperties</code> to display the details of all the properties in <code>Setting</code> object. (#5380)</li> <li>Changed error messages when creating datasets with insufficient permissions. (#5540)</li> </ul>"},{"location":"community/changelog/#fixed","title":"Fixed","text":"<ul> <li>Fixed serialization of <code>ChatField</code> when collecting records from the hub and exporting to <code>datasets</code>. (#5554)</li> </ul>"},{"location":"community/changelog/#222","title":"2.2.2","text":""},{"location":"community/changelog/#fixed_1","title":"Fixed","text":"<ul> <li>Fixed <code>from_hub</code> with unsupported column names. (#5524)</li> <li>Fixed <code>from_hub</code> with missing dataset <code>subset</code> configuration value. (#5524)</li> </ul>"},{"location":"community/changelog/#changed_1","title":"Changed","text":"<ul> <li>Changed <code>from_hub</code> to only generate fields not questions for strings in dataset. (#5524)</li> </ul>"},{"location":"community/changelog/#221","title":"2.2.1","text":""},{"location":"community/changelog/#fixed_2","title":"Fixed","text":"<ul> <li>Fixed <code>from_hub</code> errors when columns names contain uppercase letters. (#5523)</li> <li>Fixed <code>from_hub</code> errors when class feature values contains unlabelled values. (#5523)</li> <li>Fixed <code>from_hub</code> errors when loading cached datasets. (#5523)</li> </ul>"},{"location":"community/changelog/#220","title":"2.2.0","text":"<ul> <li>Added new <code>ChatField</code> supporting chat messages. (#5376)</li> <li>Added template settings to <code>rg.Settings</code> for classification, rating, and ranking questions. (#5426)</li> <li>Added <code>rg.Settings</code> definition based on <code>datasets.Features</code> within <code>rg.Dataset.from_hub</code>. (#5426)</li> <li>Added persistent record mapping to <code>rg.Settings</code> to be used in <code>rg.Dataset.records.log</code>. (#5466)</li> <li>Added multiple error handling methods to the <code>rg.Dataset.records.log</code> method to warn, ignore, or raise errors. (#5466)</li> <li>Changed dataset import and export of <code>rg.LabelQuestion</code> to use <code>datasets.ClassLabel</code> not <code>datasets.Value</code>. (#5474)</li> </ul>"},{"location":"community/changelog/#210","title":"2.1.0","text":""},{"location":"community/changelog/#added_1","title":"Added","text":"<ul> <li>Added new <code>ImageField</code> supporting URLs and Data URLs. (#5279)</li> <li>Added dark mode (#5412)</li> <li>Added settings parameter to <code>rg.Dataset.from_hub</code> to define the dataset settings before ingesting a dataset from the hub. (#5418)</li> </ul>"},{"location":"community/changelog/#201","title":"2.0.1","text":""},{"location":"community/changelog/#fixed_3","title":"Fixed","text":"<ul> <li>Fixed error when creating optional fields. (#5362)</li> <li>Fixed error creating integer and float metadata with <code>visible_for_annotators</code>. (#5364)</li> <li>Fixed error when logging records with <code>suggestions</code> or <code>responses</code> for non-existent questions. (#5396 by @maxserras)</li> <li>Fixed error from conflicts in testing suite when running tests in parallel. (#5349)</li> <li>Fixed error in response model when creating a response with a <code>None</code> value. (#5343)</li> </ul>"},{"location":"community/changelog/#changed_2","title":"Changed","text":"<ul> <li>Changed <code>from_hub</code> method to raise an error when a dataset with the same name exists. (#5258)</li> <li>Changed <code>log</code> method when ingesting records with no known keys to raise a descriptive error. (#5356)</li> <li>Changed <code>code snippets</code> to add new datasets (#5395)</li> </ul>"},{"location":"community/changelog/#added_2","title":"Added","text":"<ul> <li>Added Google Analytics to the documentation site. (#5366)</li> <li>Added frontend skeletons to progress metrics to optimise load time and improve user experience. (#5391)</li> <li>Added documentation in methods in API references for the Python SDK. (#5400)</li> </ul>"},{"location":"community/changelog/#fixed_4","title":"Fixed","text":"<ul> <li>Fix bug when submit the latest record, sometimes you navigate to non existing page #5419</li> </ul>"},{"location":"community/changelog/#200","title":"2.0.0","text":""},{"location":"community/changelog/#added_3","title":"Added","text":"<ul> <li>Added core class refactors. For an overview, see this blog post</li> <li>Added <code>TaskDistribution</code> to define distribution of records to users .</li> <li>Added new documentation site and structure and migrated legacy documentation.</li> </ul>"},{"location":"community/changelog/#changed_3","title":"Changed","text":"<ul> <li>Changed <code>FeedbackDataset</code> to <code>Dataset</code>.</li> <li>Changed <code>rg.init</code> into <code>rg.Argilla</code> class to interact with Argilla server.</li> </ul>"},{"location":"community/changelog/#deprecated","title":"Deprecated","text":"<ul> <li>Deprecated task specific dataset classes like <code>TextClassification</code> and <code>TokenClassification</code>. To migrate legacy datasets to <code>rg.Dataset</code> class, see the how-to-guide.</li> <li>Deprecated use case extensions like <code>listeners</code> and <code>ArgillaTrainer</code>.</li> </ul>"},{"location":"community/changelog/#200rc1","title":"2.0.0rc1","text":"<p>[!NOTE] This release for 2.0.0rc1 does not contain any changelog entries because it is the first release candidate for the 2.0.0 version. The following versions will contain the changelog entries again. For a general overview of the changes in the 2.0.0 version, please refer to our blog or our new documentation.</p>"},{"location":"community/changelog/#1290","title":"1.29.0","text":""},{"location":"community/changelog/#added_4","title":"Added","text":"<ul> <li>Added support for rating questions to include <code>0</code> as a valid value. (#4860)</li> <li>Added support for Python 3.12. (#4837)</li> <li>Added search by field in the <code>FeedbackDataset</code> UI search. (#4746)</li> <li>Added record metadata info in the <code>FeedbackDataset</code> UI. (#4851)</li> <li>Added highlight on search results in the <code>FeedbackDataset</code> UI. (#4747)</li> </ul>"},{"location":"community/changelog/#fixed_5","title":"Fixed","text":"<ul> <li>Fix wildcard import for the whole argilla module. (#4874)</li> <li>Fix issue when record does not have vectors related. (#4856)</li> <li>Fix issue on character level. (#4836)</li> </ul>"},{"location":"community/changelog/#1280","title":"1.28.0","text":""},{"location":"community/changelog/#added_5","title":"Added","text":"<ul> <li>Added suggestion multi score attribute. (#4730)</li> <li>Added order by suggestion first. (#4731)</li> <li>Added multi selection entity dropdown for span annotation overlap. (#4735)</li> <li>Added pre selection highlight for span annotation. (#4726)</li> <li>Added banner when persistent storage is not enabled. (#4744)</li> <li>Added support on Python SDK for new multi-label questions <code>labels_order</code> attribute. (#4757)</li> </ul>"},{"location":"community/changelog/#changed_4","title":"Changed","text":"<ul> <li>Changed the way how Hugging Face space and user is showed in sign in. (#4748)</li> </ul>"},{"location":"community/changelog/#fixed_6","title":"Fixed","text":"<ul> <li>Fixed Korean character reversed. (#4753)</li> </ul>"},{"location":"community/changelog/#fixed_7","title":"Fixed","text":"<ul> <li>Fixed requirements for version of wrapt library conflicting with Python 3.11 (#4693)</li> </ul>"},{"location":"community/changelog/#1270","title":"1.27.0","text":""},{"location":"community/changelog/#added_6","title":"Added","text":"<ul> <li>Added Allow overlap spans in the <code>FeedbackDataset</code>. (#4668)</li> <li>Added <code>allow_overlapping</code> parameter for span questions. (#4697)</li> <li>Added overall progress bar on <code>Datasets</code> table. (#4696)</li> <li>Added German language translation. (#4688)</li> </ul>"},{"location":"community/changelog/#changed_5","title":"Changed","text":"<ul> <li>New UI design for suggestions. (#4682)</li> </ul>"},{"location":"community/changelog/#fixed_8","title":"Fixed","text":"<ul> <li>Improve performance for more than 250 labels. (#4702)</li> </ul>"},{"location":"community/changelog/#1261","title":"1.26.1","text":""},{"location":"community/changelog/#added_7","title":"Added","text":"<ul> <li>Added support for automatic detection of RTL languages. (#4686)</li> </ul>"},{"location":"community/changelog/#1260","title":"1.26.0","text":""},{"location":"community/changelog/#added_8","title":"Added","text":"<ul> <li>If you expand the labels of a <code>single or multi</code> label Question, the state is maintained during the entire annotation process. (#4630)</li> <li>Added support for span questions in the Python SDK. (#4617)</li> <li>Added support for span values in suggestions and responses. (#4623)</li> <li>Added <code>span</code> questions for <code>FeedbackDataset</code>. (#4622)</li> <li>Added <code>ARGILLA_CACHE_DIR</code> environment variable to configure the client cache directory. (#4509)</li> </ul>"},{"location":"community/changelog/#fixed_9","title":"Fixed","text":"<ul> <li>Fixed contextualized workspaces. (#4665)</li> <li>Fixed prepare for training when passing <code>RankingValueSchema</code> instances to suggestions. (#4628)</li> <li>Fixed parsing ranking values in suggestions from HF datasets. (#4629)</li> <li>Fixed reading description from API response payload. (#4632)</li> <li>Fixed pulling (n*chunk_size)+1 records when using <code>ds.pull</code> or iterating over the dataset. (#4662)</li> <li>Fixed client's resolution of enum values when calling the Search and Metrics api, to support Python &gt;=3.11 enum handling. (#4672)</li> </ul>"},{"location":"community/changelog/#1250","title":"1.25.0","text":"<p>[!NOTE] For changes in the argilla-server module, visit the argilla-server release notes</p>"},{"location":"community/changelog/#added_9","title":"Added","text":"<ul> <li>Reorder labels in <code>dataset settings page</code> for single/multi label questions (#4598)</li> <li>Added pandas v2 support using the python SDK. (#4600)</li> </ul>"},{"location":"community/changelog/#removed","title":"Removed","text":"<ul> <li>Removed <code>missing</code> response for status filter. Use <code>pending</code> instead. (#4533)</li> </ul>"},{"location":"community/changelog/#fixed_10","title":"Fixed","text":"<ul> <li>Fixed FloatMetadataProperty: value is not a valid float (#4570)</li> <li>Fixed redirect to <code>user-settings</code> instead of 404 <code>user_settings</code> (#4609)</li> </ul>"},{"location":"community/changelog/#1240","title":"1.24.0","text":"<p>[!NOTE] This release does not contain any new features, but it includes a major change in the <code>argilla-server</code> dependency. The package is using the <code>argilla-server</code> dependency defined here. (#4537)</p>"},{"location":"community/changelog/#changed_6","title":"Changed","text":"<ul> <li>The package is using the <code>argilla-server</code> dependency defined here. (#4537)</li> </ul>"},{"location":"community/changelog/#1231","title":"1.23.1","text":""},{"location":"community/changelog/#fixed_11","title":"Fixed","text":"<ul> <li>Fixed Responsive view for Feedback Datasets. (#4579)</li> </ul>"},{"location":"community/changelog/#1230","title":"1.23.0","text":""},{"location":"community/changelog/#added_10","title":"Added","text":"<ul> <li>Added bulk annotation by filter criteria. (#4516)</li> <li>Automatically fetch new datasets on focus tab. (#4514)</li> <li>API v1 responses returning <code>Record</code> schema now always include <code>dataset_id</code> as attribute. (#4482)</li> <li>API v1 responses returning <code>Response</code> schema now always include <code>record_id</code> as attribute. (#4482)</li> <li>API v1 responses returning <code>Question</code> schema now always include <code>dataset_id</code> attribute. (#4487)</li> <li>API v1 responses returning <code>Field</code> schema now always include <code>dataset_id</code> attribute. (#4488)</li> <li>API v1 responses returning <code>MetadataProperty</code> schema now always include <code>dataset_id</code> attribute. (#4489)</li> <li>API v1 responses returning <code>VectorSettings</code> schema now always include <code>dataset_id</code> attribute. (#4490)</li> <li>Added <code>pdf_to_html</code> function to <code>.html_utils</code> module that convert PDFs to dataURL to be able to render them in tha Argilla UI. (#4481)</li> <li>Added <code>ARGILLA_AUTH_SECRET_KEY</code> environment variable. (#4539)</li> <li>Added <code>ARGILLA_AUTH_ALGORITHM</code> environment variable. (#4539)</li> <li>Added <code>ARGILLA_AUTH_TOKEN_EXPIRATION</code> environment variable. (#4539)</li> <li>Added <code>ARGILLA_AUTH_OAUTH_CFG</code> environment variable. (#4546)</li> <li>Added OAuth2 support for HuggingFace Hub. (#4546)</li> </ul>"},{"location":"community/changelog/#deprecated_1","title":"Deprecated","text":"<ul> <li>Deprecated <code>ARGILLA_LOCAL_AUTH_*</code> environment variables. Will be removed in the release v1.25.0. (#4539)</li> </ul>"},{"location":"community/changelog/#changed_7","title":"Changed","text":"<ul> <li>Changed regex pattern for <code>username</code> attribute in <code>UserCreate</code>. Now uppercase letters are allowed. (#4544)</li> </ul>"},{"location":"community/changelog/#removed_1","title":"Removed","text":"<ul> <li>Remove sending <code>Authorization</code> header from python SDK requests. (#4535)</li> </ul>"},{"location":"community/changelog/#fixed_12","title":"Fixed","text":"<ul> <li>Fixed keyboard shortcut for label questions. (#4530)</li> </ul>"},{"location":"community/changelog/#1220","title":"1.22.0","text":""},{"location":"community/changelog/#added_11","title":"Added","text":"<ul> <li>Added Bulk annotation support. (#4333)</li> <li>Restore filters from feedback dataset settings. ([#4461])(https://github.com/argilla-io/argilla/pull/4461)</li> <li>Warning on feedback dataset settings when leaving page with unsaved changes. (#4461)</li> <li>Added pydantic v2 support using the python SDK. (#4459)</li> <li>Added <code>vector_settings</code> to the <code>__repr__</code> method of the <code>FeedbackDataset</code> and <code>RemoteFeedbackDataset</code>. (#4454)</li> <li>Added integration for <code>sentence-transformers</code> using <code>SentenceTransformersExtractor</code> to configure <code>vector_settings</code> in <code>FeedbackDataset</code> and <code>FeedbackRecord</code>. (#4454)</li> </ul>"},{"location":"community/changelog/#changed_8","title":"Changed","text":"<ul> <li>Module <code>argilla.cli.server</code> definitions have been moved to <code>argilla.server.cli</code> module. (#4472)</li> <li>[breaking] Changed <code>vector_settings_by_name</code> for generic <code>property_by_name</code> usage, which will return <code>None</code> instead of raising an error. (#4454)</li> <li>The constant definition <code>ES_INDEX_REGEX_PATTERN</code> in module <code>argilla._constants</code> is now private. (#4472)</li> <li><code>nan</code> values in metadata properties will raise a 422 error when creating/updating records. (#4300)</li> <li><code>None</code> values are now allowed in metadata properties. (#4300)</li> <li>Refactor and add <code>width</code>, <code>height</code>, <code>autoplay</code> and <code>loop</code> attributes as optional args in <code>to_html</code> functions. (#4481)</li> </ul>"},{"location":"community/changelog/#fixed_13","title":"Fixed","text":"<ul> <li>Paginating to a new record, automatically scrolls down to selected form area. (#4333)</li> </ul>"},{"location":"community/changelog/#deprecated_2","title":"Deprecated","text":"<ul> <li>The <code>missing</code> response status for filtering records is deprecated and will be removed in the release v1.24.0. Use <code>pending</code> instead. (#4433)</li> </ul>"},{"location":"community/changelog/#removed_2","title":"Removed","text":"<ul> <li>The deprecated <code>python -m argilla database</code> command has been removed. (#4472)</li> </ul>"},{"location":"community/changelog/#1210","title":"1.21.0","text":""},{"location":"community/changelog/#added_12","title":"Added","text":"<ul> <li>Added new draft queue for annotation view (#4334)</li> <li>Added annotation metrics module for the <code>FeedbackDataset</code> (<code>argilla.client.feedback.metrics</code>). (#4175).</li> <li>Added strategy to handle and translate errors from the server for <code>401</code> HTTP status code` (#4362)</li> <li>Added integration for <code>textdescriptives</code> using <code>TextDescriptivesExtractor</code> to configure <code>metadata_properties</code> in <code>FeedbackDataset</code> and <code>FeedbackRecord</code>. (#4400). Contributed by @m-newhauser</li> <li>Added <code>POST /api/v1/me/responses/bulk</code> endpoint to create responses in bulk for current user. (#4380)</li> <li>Added list support for term metadata properties. (Closes #4359)</li> <li>Added new CLI task to reindex datasets and records into the search engine. (#4404)</li> <li>Added <code>httpx_extra_kwargs</code> argument to <code>rg.init</code> and <code>Argilla</code> to allow passing extra arguments to <code>httpx.Client</code> used by <code>Argilla</code>. (#4440)</li> <li>Added <code>ResponseStatusFilter</code> enum in <code>__init__</code> imports of Argilla (#4118). Contributed by @Piyush-Kumar-Ghosh.</li> </ul>"},{"location":"community/changelog/#changed_9","title":"Changed","text":"<ul> <li>More productive and simpler shortcut system (#4215)</li> <li>Move <code>ArgillaSingleton</code>, <code>init</code> and <code>active_client</code> to a new module <code>singleton</code>. (#4347)</li> <li>Updated <code>argilla.load</code> functions to also work with <code>FeedbackDataset</code>s. (#4347)</li> <li>[breaking] Updated <code>argilla.delete</code> functions to also work with <code>FeedbackDataset</code>s. It now raises an error if the dataset does not exist. (#4347)</li> <li>Updated <code>argilla.list_datasets</code> functions to also work with <code>FeedbackDataset</code>s. (#4347)</li> </ul>"},{"location":"community/changelog/#fixed_14","title":"Fixed","text":"<ul> <li>Fixed error in <code>TextClassificationSettings.from_dict</code> method in which the <code>label_schema</code> created was a list of <code>dict</code> instead of a list of <code>str</code>. (#4347)</li> <li>Fixed total records on pagination component (#4424)</li> </ul>"},{"location":"community/changelog/#removed_3","title":"Removed","text":"<ul> <li>Removed <code>draft</code> auto save for annotation view (#4334)</li> </ul>"},{"location":"community/changelog/#1200","title":"1.20.0","text":""},{"location":"community/changelog/#added_13","title":"Added","text":"<ul> <li>Added <code>GET /api/v1/datasets/:dataset_id/records/search/suggestions/options</code> endpoint to return suggestion available options for searching. (#4260)</li> <li>Added <code>metadata_properties</code> to the <code>__repr__</code> method of the <code>FeedbackDataset</code> and <code>RemoteFeedbackDataset</code>.(#4192).</li> <li>Added <code>get_model_kwargs</code>, <code>get_trainer_kwargs</code>, <code>get_trainer_model</code>, <code>get_trainer_tokenizer</code> and <code>get_trainer</code> -methods to the <code>ArgillaTrainer</code> to improve interoperability across frameworks. (#4214).</li> <li>Added additional formatting checks to the <code>ArgillaTrainer</code> to allow for better interoperability of <code>defaults</code> and <code>formatting_func</code> usage. (#4214).</li> <li>Added a warning to the <code>update_config</code>-method of <code>ArgillaTrainer</code> to emphasize if the <code>kwargs</code> were updated correctly. (#4214).</li> <li>Added <code>argilla.client.feedback.utils</code> module with <code>html_utils</code> (this mainly includes <code>video/audio/image_to_html</code> that convert media to dataURL to be able to render them in tha Argilla UI and <code>create_token_highlights</code> to highlight tokens in a custom way. Both work on TextQuestion and TextField with use_markdown=True) and <code>assignments</code> (this mainly includes <code>assign_records</code> to assign records according to a number of annotators and records, an overlap and the shuffle option; and <code>assign_workspace</code> to assign and create if needed a workspace according to the record assignment). (#4121)</li> </ul>"},{"location":"community/changelog/#fixed_15","title":"Fixed","text":"<ul> <li>Fixed error in <code>ArgillaTrainer</code>, with numerical labels, using <code>RatingQuestion</code> instead of <code>RankingQuestion</code> (#4171)</li> <li>Fixed error in <code>ArgillaTrainer</code>, now we can train for <code>extractive_question_answering</code> using a validation sample (#4204)</li> <li>Fixed error in <code>ArgillaTrainer</code>, when training for <code>sentence-similarity</code> it didn't work with a list of values per record (#4211)</li> <li>Fixed error in the unification strategy for <code>RankingQuestion</code> (#4295)</li> <li>Fixed <code>TextClassificationSettings.labels_schema</code> order was not being preserved. Closes #3828 (#4332)</li> <li>Fixed error when requesting non-existing API endpoints. Closes #4073 (#4325)</li> <li>Fixed error when passing <code>draft</code> responses to create records endpoint. (#4354)</li> </ul>"},{"location":"community/changelog/#changed_10","title":"Changed","text":"<ul> <li>[breaking] Suggestions <code>agent</code> field only accepts now some specific characters and a limited length. (#4265)</li> <li>[breaking] Suggestions <code>score</code> field only accepts now float values in the range <code>0</code> to <code>1</code>. (#4266)</li> <li>Updated <code>POST /api/v1/dataset/:dataset_id/records/search</code> endpoint to support optional <code>query</code> attribute. (#4327)</li> <li>Updated <code>POST /api/v1/dataset/:dataset_id/records/search</code> endpoint to support <code>filter</code> and <code>sort</code> attributes. (#4327)</li> <li>Updated <code>POST /api/v1/me/datasets/:dataset_id/records/search</code> endpoint to support optional <code>query</code> attribute. (#4270)</li> <li>Updated <code>POST /api/v1/me/datasets/:dataset_id/records/search</code> endpoint to support <code>filter</code> and <code>sort</code> attributes. (#4270)</li> <li>Changed the logging style while pulling and pushing <code>FeedbackDataset</code> to Argilla from <code>tqdm</code> style to <code>rich</code>. (#4267). Contributed by @zucchini-nlp.</li> <li>Updated <code>push_to_argilla</code> to print <code>repr</code> of the pushed <code>RemoteFeedbackDataset</code> after push and changed <code>show_progress</code> to True by default. (#4223)</li> <li>Changed <code>models</code> and <code>tokenizer</code> for the <code>ArgillaTrainer</code> to explicitly allow for changing them when needed. (#4214).</li> </ul>"},{"location":"community/changelog/#1190","title":"1.19.0","text":""},{"location":"community/changelog/#added_14","title":"Added","text":"<ul> <li>Added <code>POST /api/v1/datasets/:dataset_id/records/search</code> endpoint to search for records without user context, including responses by all users. (#4143)</li> <li>Added <code>POST /api/v1/datasets/:dataset_id/vectors-settings</code> endpoint for creating vector settings for a dataset. (#3776)</li> <li>Added <code>GET /api/v1/datasets/:dataset_id/vectors-settings</code> endpoint for listing the vectors settings for a dataset. (#3776)</li> <li>Added <code>DELETE /api/v1/vectors-settings/:vector_settings_id</code> endpoint for deleting a vector settings. (#3776)</li> <li>Added <code>PATCH /api/v1/vectors-settings/:vector_settings_id</code> endpoint for updating a vector settings. (#4092)</li> <li>Added <code>GET /api/v1/records/:record_id</code> endpoint to get a specific record. (#4039)</li> <li>Added support to include vectors for <code>GET /api/v1/datasets/:dataset_id/records</code> endpoint response using <code>include</code> query param. (#4063)</li> <li>Added support to include vectors for <code>GET /api/v1/me/datasets/:dataset_id/records</code> endpoint response using <code>include</code> query param. (#4063)</li> <li>Added support to include vectors for <code>POST /api/v1/me/datasets/:dataset_id/records/search</code> endpoint response using <code>include</code> query param. (#4063)</li> <li>Added <code>show_progress</code> argument to <code>from_huggingface()</code> method to make the progress bar for parsing records process optional.(#4132).</li> <li>Added a progress bar for parsing records process to <code>from_huggingface()</code> method with <code>trange</code> in <code>tqdm</code>.(#4132).</li> <li>Added to sort by <code>inserted_at</code> or <code>updated_at</code> for datasets with no metadata. (4147)</li> <li>Added <code>max_records</code> argument to <code>pull()</code> method for <code>RemoteFeedbackDataset</code>.(#4074)</li> <li>Added functionality to push your models to the Hugging Face hub with <code>ArgillaTrainer.push_to_huggingface</code> (#3976). Contributed by @Racso-3141.</li> <li>Added <code>filter_by</code> argument to <code>ArgillaTrainer</code> to filter by <code>response_status</code> (#4120).</li> <li>Added <code>sort_by</code> argument to <code>ArgillaTrainer</code> to sort by <code>metadata</code> (#4120).</li> <li>Added <code>max_records</code> argument to <code>ArgillaTrainer</code> to limit record used for training (#4120).</li> <li>Added <code>add_vector_settings</code> method to local and remote <code>FeedbackDataset</code>. (#4055)</li> <li>Added <code>update_vectors_settings</code> method to local and remote <code>FeedbackDataset</code>. (#4122)</li> <li>Added <code>delete_vectors_settings</code> method to local and remote <code>FeedbackDataset</code>. (#4130)</li> <li>Added <code>vector_settings_by_name</code> method to local and remote <code>FeedbackDataset</code>. (#4055)</li> <li>Added <code>find_similar_records</code> method to local and remote <code>FeedbackDataset</code>. (#4023)</li> <li>Added <code>ARGILLA_SEARCH_ENGINE</code> environment variable to configure the search engine to use. (#4019)</li> </ul>"},{"location":"community/changelog/#changed_11","title":"Changed","text":"<ul> <li>[breaking] Remove support for Elasticsearch &lt; 8.5 and OpenSearch &lt; 2.4. (#4173)</li> <li>[breaking] Users working with OpenSearch engines must use version &gt;=2.4 and set <code>ARGILLA_SEARCH_ENGINE=opensearch</code>. (#4019 and #4111)</li> <li>[breaking] Changed <code>FeedbackDataset.*_by_name()</code> methods to return <code>None</code> when no match is found (#4101).</li> <li>[breaking] <code>limit</code> query parameter for <code>GET /api/v1/datasets/:dataset_id/records</code> endpoint is now only accepting values greater or equal than <code>1</code> and less or equal than <code>1000</code>. (#4143)</li> <li>[breaking] <code>limit</code> query parameter for <code>GET /api/v1/me/datasets/:dataset_id/records</code> endpoint is now only accepting values greater or equal than <code>1</code> and less or equal than <code>1000</code>. (#4143)</li> <li>Update <code>GET /api/v1/datasets/:dataset_id/records</code> endpoint to fetch record using the search engine. (#4142)</li> <li>Update <code>GET /api/v1/me/datasets/:dataset_id/records</code> endpoint to fetch record using the search engine. (#4142)</li> <li>Update <code>POST /api/v1/datasets/:dataset_id/records</code> endpoint to allow to create records with <code>vectors</code> (#4022)</li> <li>Update <code>PATCH /api/v1/datasets/:dataset_id</code> endpoint to allow updating <code>allow_extra_metadata</code> attribute. (#4112)</li> <li>Update <code>PATCH /api/v1/datasets/:dataset_id/records</code> endpoint to allow to update records with <code>vectors</code>. (#4062)</li> <li>Update <code>PATCH /api/v1/records/:record_id</code> endpoint to allow to update record with <code>vectors</code>. (#4062)</li> <li>Update <code>POST /api/v1/me/datasets/:dataset_id/records/search</code> endpoint to allow to search records with vectors. (#4019)</li> <li>Update <code>BaseElasticAndOpenSearchEngine.index_records</code> method to also index record vectors. (#4062)</li> <li>Update <code>FeedbackDataset.__init__</code> to allow passing a list of vector settings. (#4055)</li> <li>Update <code>FeedbackDataset.push_to_argilla</code> to also push vector settings. (#4055)</li> <li>Update <code>FeedbackDatasetRecord</code> to support the creation of records with vectors. (#4043)</li> <li>Using cosine similarity to compute similarity between vectors. (#4124)</li> </ul>"},{"location":"community/changelog/#fixed_16","title":"Fixed","text":"<ul> <li>Fixed svg images out of screen with too large images (#4047)</li> <li>Fixed creating records with responses from multiple users. Closes #3746 and #3808 (#4142)</li> <li>Fixed deleting or updating responses as an owner for annotators. (Commit 403a66d)</li> <li>Fixed passing user_id when getting records by id. (Commit 98c7927)</li> <li>Fixed non-basic tags serialized when pushing a dataset to the Hugging Face Hub. Closes #4089 (#4200)</li> </ul>"},{"location":"community/changelog/#1180","title":"1.18.0","text":""},{"location":"community/changelog/#added_15","title":"Added","text":"<ul> <li>New <code>GET /api/v1/datasets/:dataset_id/metadata-properties</code> endpoint for listing dataset metadata properties. (#3813)</li> <li>New <code>POST /api/v1/datasets/:dataset_id/metadata-properties</code> endpoint for creating dataset metadata properties. (#3813)</li> <li>New <code>PATCH /api/v1/metadata-properties/:metadata_property_id</code> endpoint allowing the update of a specific metadata property. (#3952)</li> <li>New <code>DELETE /api/v1/metadata-properties/:metadata_property_id</code> endpoint for deletion of a specific metadata property. (#3911)</li> <li>New <code>GET /api/v1/metadata-properties/:metadata_property_id/metrics</code> endpoint to compute metrics for a specific metadata property. (#3856)</li> <li>New <code>PATCH /api/v1/records/:record_id</code> endpoint to update a record. (#3920)</li> <li>New <code>PATCH /api/v1/dataset/:dataset_id/records</code> endpoint to bulk update the records of a dataset. (#3934)</li> <li>Missing validations to <code>PATCH /api/v1/questions/:question_id</code>. Now <code>title</code> and <code>description</code> are using the same validations used to create questions. (#3967)</li> <li>Added <code>TermsMetadataProperty</code>, <code>IntegerMetadataProperty</code> and <code>FloatMetadataProperty</code> classes allowing to define metadata properties for a <code>FeedbackDataset</code>. (#3818)</li> <li>Added <code>metadata_filters</code> to <code>filter_by</code> method in <code>RemoteFeedbackDataset</code> to filter based on metadata i.e. <code>TermsMetadataFilter</code>, <code>IntegerMetadataFilter</code>, and <code>FloatMetadataFilter</code>. (#3834)</li> <li>Added a validation layer for both <code>metadata_properties</code> and <code>metadata_filters</code> in their schemas and as part of the <code>add_records</code> and <code>filter_by</code> methods, respectively. (#3860)</li> <li>Added <code>sort_by</code> query parameter to listing records endpoints that allows to sort the records by <code>inserted_at</code>, <code>updated_at</code> or metadata property. (#3843)</li> <li>Added <code>add_metadata_property</code> method to both <code>FeedbackDataset</code> and <code>RemoteFeedbackDataset</code> (i.e. <code>FeedbackDataset</code> in Argilla). (#3900)</li> <li>Added fields <code>inserted_at</code> and <code>updated_at</code> in <code>RemoteResponseSchema</code>. (#3822)</li> <li>Added support for <code>sort_by</code> for <code>RemoteFeedbackDataset</code> i.e. a <code>FeedbackDataset</code> uploaded to Argilla. (#3925)</li> <li>Added <code>metadata_properties</code> support for both <code>push_to_huggingface</code> and <code>from_huggingface</code>. (#3947)</li> <li>Add support for update records (<code>metadata</code>) from Python SDK. (#3946)</li> <li>Added <code>delete_metadata_properties</code> method to delete metadata properties. (#3932)</li> <li>Added <code>update_metadata_properties</code> method to update <code>metadata_properties</code>. (#3961)</li> <li>Added automatic model card generation through <code>ArgillaTrainer.save</code> (#3857)</li> <li>Added <code>FeedbackDataset</code> <code>TaskTemplateMixin</code> for pre-defined task templates. (#3969)</li> <li>A maximum limit of 50 on the number of options a ranking question can accept. (#3975)</li> <li>New <code>last_activity_at</code> field to <code>FeedbackDataset</code> exposing when the last activity for the associated dataset occurs. (#3992)</li> </ul>"},{"location":"community/changelog/#changed_12","title":"Changed","text":"<ul> <li><code>GET /api/v1/datasets/{dataset_id}/records</code>, <code>GET /api/v1/me/datasets/{dataset_id}/records</code> and <code>POST /api/v1/me/datasets/{dataset_id}/records/search</code> endpoints to return the <code>total</code> number of records. (#3848, #3903)</li> <li>Implemented <code>__len__</code> method for filtered datasets to return the number of records matching the provided filters. (#3916)</li> <li>Increase the default max result window for Elasticsearch created for Feedback datasets. (#3929)</li> <li>Force elastic index refresh after records creation. (#3929)</li> <li>Validate metadata fields for filtering and sorting in the Python SDK. (#3993)</li> <li>Using metadata property name instead of id for indexing data in search engine index. (#3994)</li> </ul>"},{"location":"community/changelog/#fixed_17","title":"Fixed","text":"<ul> <li>Fixed response schemas to allow <code>values</code> to be <code>None</code> i.e. when a record is discarded the <code>response.values</code> are set to <code>None</code>. (#3926)</li> </ul>"},{"location":"community/changelog/#1170","title":"1.17.0","text":""},{"location":"community/changelog/#added_16","title":"Added","text":"<ul> <li>Added fields <code>inserted_at</code> and <code>updated_at</code> in <code>RemoteResponseSchema</code> (#3822).</li> <li>Added automatic model card generation through <code>ArgillaTrainer.save</code> (#3857).</li> <li>Added task templates to the <code>FeedbackDataset</code> (#3973).</li> </ul>"},{"location":"community/changelog/#changed_13","title":"Changed","text":"<ul> <li>Updated <code>Dockerfile</code> to use multi stage build (#3221 and #3793).</li> <li>Updated active learning for text classification notebooks to use the most recent small-text version (#3831).</li> <li>Changed argilla dataset name in the active learning for text classification notebooks to be consistent with the default names in the huggingface spaces (#3831).</li> <li>FeedbackDataset API methods have been aligned to be accessible through the several implementations (#3937).</li> <li>The <code>unify_responses</code> support for remote datasets (#3937).</li> </ul>"},{"location":"community/changelog/#fixed_18","title":"Fixed","text":"<ul> <li>Fix field not shown in the order defined in the dataset settings. Closes #3959 (#3984)</li> <li>Updated active learning for text classification notebooks to pass ids of type int to <code>TextClassificationRecord</code> (#3831).</li> <li>Fixed record fields validation that was preventing from logging records with optional fields (i.e. <code>required=True</code>) when the field value was <code>None</code> (#3846).</li> <li>Always set <code>pretrained_model_name_or_path</code> attribute as string in <code>ArgillaTrainer</code> (#3914).</li> <li>The <code>inserted_at</code> and <code>updated_at</code> attributes are create using the <code>utcnow</code> factory to avoid unexpected race conditions on timestamp creation (#3945)</li> <li>Fixed <code>configure_dataset_settings</code> when providing the workspace via the arg <code>workspace</code> (#3887).</li> <li>Fixed saving of models trained with <code>ArgillaTrainer</code> with a <code>peft_config</code> parameter (#3795).</li> <li>Fixed backwards compatibility on <code>from_huggingface</code> when loading a <code>FeedbackDataset</code> from the Hugging Face Hub that was previously dumped using another version of Argilla, starting at 1.8.0, when it was first introduced (#3829).</li> <li>Fixed wrong <code>__repr__</code> problem for <code>TrainingTask</code>. (#3969)</li> <li>Fixed wrong key return error <code>prepare_for_training_with_*</code> for <code>TrainingTask</code>. (#3969)</li> </ul>"},{"location":"community/changelog/#deprecated_3","title":"Deprecated","text":"<ul> <li>Function <code>rg.configure_dataset</code> is deprecated in favour of <code>rg.configure_dataset_settings</code>. The former will be removed in version 1.19.0</li> </ul>"},{"location":"community/changelog/#1160","title":"1.16.0","text":""},{"location":"community/changelog/#added_17","title":"Added","text":"<ul> <li>Added <code>ArgillaTrainer</code> integration with sentence-transformers, allowing fine tuning for sentence similarity (#3739)</li> <li>Added <code>ArgillaTrainer</code> integration with <code>TrainingTask.for_question_answering</code> (#3740)</li> <li>Added <code>Auto save record</code> to save automatically the current record that you are working on (#3541)</li> <li>Added <code>ArgillaTrainer</code> integration with OpenAI, allowing fine tuning for chat completion (#3615)</li> <li>Added <code>workspaces list</code> command to list Argilla workspaces (#3594).</li> <li>Added <code>datasets list</code> command to list Argilla datasets (#3658).</li> <li>Added <code>users create</code> command to create users (#3667).</li> <li>Added <code>whoami</code> command to get current user (#3673).</li> <li>Added <code>users delete</code> command to delete users (#3671).</li> <li>Added <code>users list</code> command to list users (#3688).</li> <li>Added <code>workspaces delete-user</code> command to remove a user from a workspace (#3699).</li> <li>Added <code>datasets list</code> command to list Argilla datasets (#3658).</li> <li>Added <code>users create</code> command to create users (#3667).</li> <li>Added <code>users delete</code> command to delete users (#3671).</li> <li>Added <code>workspaces create</code> command to create an Argilla workspace (#3676).</li> <li>Added <code>datasets push-to-hub</code> command to push a <code>FeedbackDataset</code> from Argilla into the HuggingFace Hub (#3685).</li> <li>Added <code>info</code> command to get info about the used Argilla client and server (#3707).</li> <li>Added <code>datasets delete</code> command to delete a <code>FeedbackDataset</code> from Argilla (#3703).</li> <li>Added <code>created_at</code> and <code>updated_at</code> properties to <code>RemoteFeedbackDataset</code> and <code>FilteredRemoteFeedbackDataset</code> (#3709).</li> <li>Added handling <code>PermissionError</code> when executing a command with a logged in user with not enough permissions (#3717).</li> <li>Added <code>workspaces add-user</code> command to add a user to workspace (#3712).</li> <li>Added <code>workspace_id</code> param to <code>GET /api/v1/me/datasets</code> endpoint (#3727).</li> <li>Added <code>workspace_id</code> arg to <code>list_datasets</code> in the Python SDK (#3727).</li> <li>Added <code>argilla</code> script that allows to execute Argilla CLI using the <code>argilla</code> command (#3730).</li> <li>Added support for passing already initialized <code>model</code> and <code>tokenizer</code> instances to the <code>ArgillaTrainer</code> (#3751)</li> <li>Added <code>server_info</code> function to check the Argilla server information (also accessible via <code>rg.server_info</code>) (#3772).</li> </ul>"},{"location":"community/changelog/#changed_14","title":"Changed","text":"<ul> <li>Move <code>database</code> commands under <code>server</code> group of commands (#3710)</li> <li><code>server</code> commands only included in the CLI app when <code>server</code> extra requirements are installed (#3710).</li> <li>Updated <code>PUT /api/v1/responses/{response_id}</code> to replace <code>values</code> stored with received <code>values</code> in request (#3711).</li> <li>Display a <code>UserWarning</code> when the <code>user_id</code> in <code>Workspace.add_user</code> and <code>Workspace.delete_user</code> is the ID of an user with the owner role as they don't require explicit permissions (#3716).</li> <li>Rename <code>tasks</code> sub-package to <code>cli</code> (#3723).</li> <li>Changed <code>argilla database</code> command in the CLI to now be accessed via <code>argilla server database</code>, to be deprecated in the upcoming release (#3754).</li> <li>Changed <code>visible_options</code> (of label and multi label selection questions) validation in the backend to check that the provided value is greater or equal than/to 3 and less or equal than/to the number of provided options (#3773).</li> </ul>"},{"location":"community/changelog/#fixed_19","title":"Fixed","text":"<ul> <li>Fixed <code>remove user modification in text component on clear answers</code> (#3775)</li> <li>Fixed <code>Highlight raw text field in dataset feedback task</code> (#3731)</li> <li>Fixed <code>Field title too long</code> (#3734)</li> <li>Fixed error messages when deleting a <code>DatasetForTextClassification</code> (#3652)</li> <li>Fixed <code>Pending queue</code> pagination problems when during data annotation (#3677)</li> <li>Fixed <code>visible_labels</code> default value to be 20 just when <code>visible_labels</code> not provided and <code>len(labels) &gt; 20</code>, otherwise it will either be the provided <code>visible_labels</code> value or <code>None</code>, for <code>LabelQuestion</code> and <code>MultiLabelQuestion</code> (#3702).</li> <li>Fixed <code>DatasetCard</code> generation when <code>RemoteFeedbackDataset</code> contains suggestions (#3718).</li> <li>Add missing <code>draft</code> status in <code>ResponseSchema</code> as now there can be responses with <code>draft</code> status when annotating via the UI (#3749).</li> <li>Searches when queried words are distributed along the record fields (#3759).</li> <li>Fixed Python 3.11 compatibility issue with <code>/api/datasets</code> endpoints due to the <code>TaskType</code> enum replacement in the endpoint URL (#3769).</li> <li>Fixed <code>RankingValueSchema</code> and <code>FeedbackRankingValueModel</code> schemas to allow <code>rank=None</code> when <code>status=draft</code> (#3781).</li> </ul>"},{"location":"community/changelog/#1151","title":"1.15.1","text":""},{"location":"community/changelog/#fixed_20","title":"Fixed","text":"<ul> <li>Fixed <code>Text component</code> text content sanitization behavior just for markdown to prevent disappear the text(#3738)</li> <li>Fixed <code>Text component</code> now you need to press Escape to exit the text area (#3733)</li> <li>Fixed <code>SearchEngine</code> was creating the same number of primary shards and replica shards for each <code>FeedbackDataset</code> (#3736).</li> </ul>"},{"location":"community/changelog/#1150","title":"1.15.0","text":""},{"location":"community/changelog/#added_18","title":"Added","text":"<ul> <li>Added <code>Enable to update guidelines and dataset settings for Feedback Datasets directly in the UI</code> (#3489)</li> <li>Added <code>ArgillaTrainer</code> integration with TRL, allowing for easy supervised finetuning, reward modeling, direct preference optimization and proximal policy optimization (#3467)</li> <li>Added <code>formatting_func</code> to <code>ArgillaTrainer</code> for <code>FeedbackDataset</code> datasets add a custom formatting for the data (#3599).</li> <li>Added <code>login</code> function in <code>argilla.client.login</code> to login into an Argilla server and store the credentials locally (#3582).</li> <li>Added <code>login</code> command to login into an Argilla server (#3600).</li> <li>Added <code>logout</code> command to logout from an Argilla server (#3605).</li> <li>Added <code>DELETE /api/v1/suggestions/{suggestion_id}</code> endpoint to delete a suggestion given its ID (#3617).</li> <li>Added <code>DELETE /api/v1/records/{record_id}/suggestions</code> endpoint to delete several suggestions linked to the same record given their IDs (#3617).</li> <li>Added <code>response_status</code> param to <code>GET /api/v1/datasets/{dataset_id}/records</code> to be able to filter by <code>response_status</code> as previously included for <code>GET /api/v1/me/datasets/{dataset_id}/records</code> (#3613).</li> <li>Added <code>list</code> classmethod to <code>ArgillaMixin</code> to be used as <code>FeedbackDataset.list()</code>, also including the <code>workspace</code> to list from as arg (#3619).</li> <li>Added <code>filter_by</code> method in <code>RemoteFeedbackDataset</code> to filter based on <code>response_status</code> (#3610).</li> <li>Added <code>list_workspaces</code> function (to be used as <code>rg.list_workspaces</code>, but <code>Workspace.list</code> is preferred) to list all the workspaces from an user in Argilla (#3641).</li> <li>Added <code>list_datasets</code> function (to be used as <code>rg.list_datasets</code>) to list the <code>TextClassification</code>, <code>TokenClassification</code>, and <code>Text2Text</code> datasets in Argilla (#3638).</li> <li>Added <code>RemoteSuggestionSchema</code> to manage suggestions in Argilla, including the <code>delete</code> method to delete suggestios from Argilla via <code>DELETE /api/v1/suggestions/{suggestion_id}</code> (#3651).</li> <li>Added <code>delete_suggestions</code> to <code>RemoteFeedbackRecord</code> to remove suggestions from Argilla via <code>DELETE /api/v1/records/{record_id}/suggestions</code> (#3651).</li> </ul>"},{"location":"community/changelog/#changed_15","title":"Changed","text":"<ul> <li>Changed <code>Optional label for * mark for required question</code> (#3608)</li> <li>Updated <code>RemoteFeedbackDataset.delete_records</code> to use batch delete records endpoint (#3580).</li> <li>Included <code>allowed_for_roles</code> for some <code>RemoteFeedbackDataset</code>, <code>RemoteFeedbackRecords</code>, and <code>RemoteFeedbackRecord</code> methods that are only allowed for users with roles <code>owner</code> and <code>admin</code> (#3601).</li> <li>Renamed <code>ArgillaToFromMixin</code> to <code>ArgillaMixin</code> (#3619).</li> <li>Move <code>users</code> CLI app under <code>database</code> CLI app (#3593).</li> <li>Move server <code>Enum</code> classes to <code>argilla.server.enums</code> module (#3620).</li> </ul>"},{"location":"community/changelog/#fixed_21","title":"Fixed","text":"<ul> <li>Fixed <code>Filter by workspace in breadcrumbs</code> (#3577)</li> <li>Fixed <code>Filter by workspace in datasets table</code> (#3604)</li> <li>Fixed <code>Query search highlight</code> for Text2Text and TextClassification (#3621)</li> <li>Fixed <code>RatingQuestion.values</code> validation to raise a <code>ValidationError</code> when values are out of range i.e. [1, 10] (#3626).</li> </ul>"},{"location":"community/changelog/#removed_4","title":"Removed","text":"<ul> <li>Removed <code>multi_task_text_token_classification</code> from <code>TaskType</code> as not used (#3640).</li> <li>Removed <code>argilla_id</code> in favor of <code>id</code> from <code>RemoteFeedbackDataset</code> (#3663).</li> <li>Removed <code>fetch_records</code> from <code>RemoteFeedbackDataset</code> as now the records are lazily fetched from Argilla (#3663).</li> <li>Removed <code>push_to_argilla</code> from <code>RemoteFeedbackDataset</code>, as it just works when calling it through a <code>FeedbackDataset</code> locally, as now the updates of the remote datasets are automatically pushed to Argilla (#3663).</li> <li>Removed <code>set_suggestions</code> in favor of <code>update(suggestions=...)</code> for both <code>FeedbackRecord</code> and <code>RemoteFeedbackRecord</code>, as all the updates of any \"updateable\" attribute of a record will go through <code>update</code> instead (#3663).</li> <li>Remove unused <code>owner</code> attribute for client Dataset data model (#3665)</li> </ul>"},{"location":"community/changelog/#1141","title":"1.14.1","text":""},{"location":"community/changelog/#fixed_22","title":"Fixed","text":"<ul> <li>Fixed PostgreSQL database not being updated after <code>begin_nested</code> because of missing <code>commit</code> (#3567).</li> </ul>"},{"location":"community/changelog/#fixed_23","title":"Fixed","text":"<ul> <li>Fixed <code>settings</code> could not be provided when updating a <code>rating</code> or <code>ranking</code> question (#3552).</li> </ul>"},{"location":"community/changelog/#1140","title":"1.14.0","text":""},{"location":"community/changelog/#added_19","title":"Added","text":"<ul> <li>Added <code>PATCH /api/v1/fields/{field_id}</code> endpoint to update the field title and markdown settings (#3421).</li> <li>Added <code>PATCH /api/v1/datasets/{dataset_id}</code> endpoint to update dataset name and guidelines (#3402).</li> <li>Added <code>PATCH /api/v1/questions/{question_id}</code> endpoint to update question title, description and some settings (depending on the type of question) (#3477).</li> <li>Added <code>DELETE /api/v1/records/{record_id}</code> endpoint to remove a record given its ID (#3337).</li> <li>Added <code>pull</code> method in <code>RemoteFeedbackDataset</code> (a <code>FeedbackDataset</code> pushed to Argilla) to pull all the records from it and return it as a local copy as a <code>FeedbackDataset</code> (#3465).</li> <li>Added <code>delete</code> method in <code>RemoteFeedbackDataset</code> (a <code>FeedbackDataset</code> pushed to Argilla) (#3512).</li> <li>Added <code>delete_records</code> method in <code>RemoteFeedbackDataset</code>, and <code>delete</code> method in <code>RemoteFeedbackRecord</code> to delete records from Argilla (#3526).</li> </ul>"},{"location":"community/changelog/#changed_16","title":"Changed","text":"<ul> <li>Improved efficiency of weak labeling when dataset contains vectors (#3444).</li> <li>Added <code>ArgillaDatasetMixin</code> to detach the Argilla-related functionality from the <code>FeedbackDataset</code> (#3427)</li> <li>Moved <code>FeedbackDataset</code>-related <code>pydantic.BaseModel</code> schemas to <code>argilla.client.feedback.schemas</code> instead, to be better structured and more scalable and maintainable (#3427)</li> <li>Update CLI to use database async connection (#3450).</li> <li>Limit rating questions values to the positive range [1, 10] (#3451).</li> <li>Updated <code>POST /api/users</code> endpoint to be able to provide a list of workspace names to which the user should be linked to (#3462).</li> <li>Updated Python client <code>User.create</code> method to be able to provide a list of workspace names to which the user should be linked to (#3462).</li> <li>Updated <code>GET /api/v1/me/datasets/{dataset_id}/records</code> endpoint to allow getting records matching one of the response statuses provided via query param (#3359).</li> <li>Updated <code>POST /api/v1/me/datasets/{dataset_id}/records</code> endpoint to allow searching records matching one of the response statuses provided via query param (#3359).</li> <li>Updated <code>SearchEngine.search</code> method to allow searching records matching one of the response statuses provided (#3359).</li> <li>After calling <code>FeedbackDataset.push_to_argilla</code>, the methods <code>FeedbackDataset.add_records</code> and <code>FeedbackRecord.set_suggestions</code> will automatically call Argilla with no need of calling <code>push_to_argilla</code> explicitly (#3465).</li> <li>Now calling <code>FeedbackDataset.push_to_huggingface</code> dumps the <code>responses</code> as a <code>List[Dict[str, Any]]</code> instead of <code>Sequence</code> to make it more readable via \ud83e\udd17<code>datasets</code> (#3539).</li> </ul>"},{"location":"community/changelog/#fixed_24","title":"Fixed","text":"<ul> <li>Fixed issue with <code>bool</code> values and <code>default</code> from Jinja2 while generating the HuggingFace <code>DatasetCard</code> from <code>argilla_template.md</code> (#3499).</li> <li>Fixed <code>DatasetConfig.from_yaml</code> which was failing when calling <code>FeedbackDataset.from_huggingface</code> as the UUIDs cannot be deserialized automatically by <code>PyYAML</code>, so UUIDs are neither dumped nor loaded anymore (#3502).</li> <li>Fixed an issue that didn't allow the Argilla server to work behind a proxy (#3543).</li> <li><code>TextClassificationSettings</code> and <code>TokenClassificationSettings</code> labels are properly parsed to strings both in the Python client and in the backend endpoint (#3495).</li> <li>Fixed <code>PUT /api/v1/datasets/{dataset_id}/publish</code> to check whether at least one field and question has <code>required=True</code> (#3511).</li> <li>Fixed <code>FeedbackDataset.from_huggingface</code> as <code>suggestions</code> were being lost when there were no <code>responses</code> (#3539).</li> <li>Fixed <code>QuestionSchema</code> and <code>FieldSchema</code> not validating <code>name</code> attribute (#3550).</li> </ul>"},{"location":"community/changelog/#deprecated_4","title":"Deprecated","text":"<ul> <li>After calling <code>FeedbackDataset.push_to_argilla</code>, calling <code>push_to_argilla</code> again won't do anything since the dataset is already pushed to Argilla (#3465).</li> <li>After calling <code>FeedbackDataset.push_to_argilla</code>, calling <code>fetch_records</code> won't do anything since the records are lazily fetched from Argilla (#3465).</li> <li>After calling <code>FeedbackDataset.push_to_argilla</code>, the Argilla ID is no longer stored in the attribute/property <code>argilla_id</code> but in <code>id</code> instead (#3465).</li> </ul>"},{"location":"community/changelog/#1133","title":"1.13.3","text":""},{"location":"community/changelog/#fixed_25","title":"Fixed","text":"<ul> <li>Fixed <code>ModuleNotFoundError</code> caused because the <code>argilla.utils.telemetry</code> module used in the <code>ArgillaTrainer</code> was importing an optional dependency not installed by default (#3471).</li> <li>Fixed <code>ImportError</code> caused because the <code>argilla.client.feedback.config</code> module was importing <code>pyyaml</code> optional dependency not installed by default (#3471).</li> </ul>"},{"location":"community/changelog/#1132","title":"1.13.2","text":""},{"location":"community/changelog/#fixed_26","title":"Fixed","text":"<ul> <li>The <code>suggestion_type_enum</code> ENUM data type created in PostgreSQL didn't have any value (#3445).</li> </ul>"},{"location":"community/changelog/#1131","title":"1.13.1","text":""},{"location":"community/changelog/#fixed_27","title":"Fixed","text":"<ul> <li>Fix database migration for PostgreSQL (See #3438)</li> </ul>"},{"location":"community/changelog/#1130","title":"1.13.0","text":""},{"location":"community/changelog/#added_20","title":"Added","text":"<ul> <li>Added <code>GET /api/v1/users/{user_id}/workspaces</code> endpoint to list the workspaces to which a user belongs (#3308 and #3343).</li> <li>Added <code>HuggingFaceDatasetMixin</code> for internal usage, to detach the <code>FeedbackDataset</code> integrations from the class itself, and use Mixins instead (#3326).</li> <li>Added <code>GET /api/v1/records/{record_id}/suggestions</code> API endpoint to get the list of suggestions for the responses associated to a record (#3304).</li> <li>Added <code>POST /api/v1/records/{record_id}/suggestions</code> API endpoint to create a suggestion for a response associated to a record (#3304).</li> <li>Added support for <code>RankingQuestionStrategy</code>, <code>RankingQuestionUnification</code> and the <code>.for_text_classification</code> method for the <code>TrainingTaskMapping</code> (#3364)</li> <li>Added <code>PUT /api/v1/records/{record_id}/suggestions</code> API endpoint to create or update a suggestion for a response associated to a record (#3304 &amp; 3391).</li> <li>Added <code>suggestions</code> attribute to <code>FeedbackRecord</code>, and allow adding and retrieving suggestions from the Python client (#3370)</li> <li>Added <code>allowed_for_roles</code> Python decorator to check whether the current user has the required role to access the decorated function/method for <code>User</code> and <code>Workspace</code> (#3383)</li> <li>Added API and Python Client support for workspace deletion (Closes #3260)</li> <li>Added <code>GET /api/v1/me/workspaces</code> endpoint to list the workspaces of the current active user (#3390)</li> </ul>"},{"location":"community/changelog/#changed_17","title":"Changed","text":"<ul> <li>Updated output payload for <code>GET /api/v1/datasets/{dataset_id}/records</code>, <code>GET /api/v1/me/datasets/{dataset_id}/records</code>, <code>POST /api/v1/me/datasets/{dataset_id}/records/search</code> endpoints to include the suggestions of the records based on the value of the <code>include</code> query parameter (#3304).</li> <li>Updated <code>POST /api/v1/datasets/{dataset_id}/records</code> input payload to add suggestions (#3304).</li> <li>The <code>POST /api/datasets/:dataset-id/:task/bulk</code> endpoints don't create the dataset if does not exists (Closes #3244)</li> <li>Added Telemetry support for <code>ArgillaTrainer</code> (closes #3325)</li> <li><code>User.workspaces</code> is no longer an attribute but a property, and is calling <code>list_user_workspaces</code> to list all the workspace names for a given user ID (#3334)</li> <li>Renamed <code>FeedbackDatasetConfig</code> to <code>DatasetConfig</code> and export/import from YAML as default instead of JSON (just used internally on <code>push_to_huggingface</code> and <code>from_huggingface</code> methods of <code>FeedbackDataset</code>) (#3326).</li> <li>The protected metadata fields support other than textual info - existing datasets must be reindex. See docs for more detail (Closes #3332).</li> <li>Updated <code>Dockerfile</code> parent image from <code>python:3.9.16-slim</code> to <code>python:3.10.12-slim</code> (#3425).</li> <li>Updated <code>quickstart.Dockerfile</code> parent image from <code>elasticsearch:8.5.3</code> to <code>argilla/argilla-server:${ARGILLA_VERSION}</code> (#3425).</li> </ul>"},{"location":"community/changelog/#removed_5","title":"Removed","text":"<ul> <li>Removed support to non-prefixed environment variables. All valid env vars start with <code>ARGILLA_</code> (See #3392).</li> </ul>"},{"location":"community/changelog/#fixed_28","title":"Fixed","text":"<ul> <li>Fixed <code>GET /api/v1/me/datasets/{dataset_id}/records</code> endpoint returning always the responses for the records even if <code>responses</code> was not provided via the <code>include</code> query parameter (#3304).</li> <li>Values for protected metadata fields are not truncated (Closes #3331).</li> <li>Big number ids are properly rendered in UI (Closes #3265)</li> <li>Fixed <code>ArgillaDatasetCard</code> to include the values/labels for all the existing questions (#3366)</li> </ul>"},{"location":"community/changelog/#deprecated_5","title":"Deprecated","text":"<ul> <li>Integer support for record id in text classification, token classification and text2text datasets.</li> </ul>"},{"location":"community/changelog/#1121","title":"1.12.1","text":""},{"location":"community/changelog/#fixed_29","title":"Fixed","text":"<ul> <li>Using <code>rg.init</code> with default <code>argilla</code> user skips setting the default workspace if not available. (Closes #3340)</li> <li>Resolved wrong import structure for <code>ArgillaTrainer</code> and <code>TrainingTaskMapping</code> (Closes #3345)</li> <li>Pin pydantic dependency to version &lt; 2 (Closes 3348)</li> </ul>"},{"location":"community/changelog/#1120","title":"1.12.0","text":""},{"location":"community/changelog/#added_21","title":"Added","text":"<ul> <li>Added <code>RankingQuestionSettings</code> class allowing to create ranking questions in the API using <code>POST /api/v1/datasets/{dataset_id}/questions</code> endpoint (#3232)</li> <li>Added <code>RankingQuestion</code> in the Python client to create ranking questions (#3275).</li> <li>Added <code>Ranking</code> component in feedback task question form (#3177 &amp; #3246).</li> <li>Added <code>FeedbackDataset.prepare_for_training</code> method for generaring a framework-specific dataset with the responses provided for <code>RatingQuestion</code>, <code>LabelQuestion</code> and <code>MultiLabelQuestion</code> (#3151).</li> <li>Added <code>ArgillaSpaCyTransformersTrainer</code> class for supporting the training with <code>spacy-transformers</code> (#3256).</li> </ul>"},{"location":"community/changelog/#docs","title":"Docs","text":"<ul> <li>Added instructions for how to run the Argilla frontend in the developer docs (#3314).</li> </ul>"},{"location":"community/changelog/#changed_18","title":"Changed","text":"<ul> <li>All docker related files have been moved into the <code>docker</code> folder (#3053).</li> <li><code>release.Dockerfile</code> have been renamed to <code>Dockerfile</code> (#3133).</li> <li>Updated <code>rg.load</code> function to raise a <code>ValueError</code> with a explanatory message for the cases in which the user tries to use the function to load a <code>FeedbackDataset</code> (#3289).</li> <li>Updated <code>ArgillaSpaCyTrainer</code> to allow re-using <code>tok2vec</code> (#3256).</li> </ul>"},{"location":"community/changelog/#fixed_30","title":"Fixed","text":"<ul> <li>Check available workspaces on Argilla on <code>rg.set_workspace</code> (Closes #3262)</li> </ul>"},{"location":"community/changelog/#1110","title":"1.11.0","text":""},{"location":"community/changelog/#fixed_31","title":"Fixed","text":"<ul> <li>Replaced <code>np.float</code> alias by <code>float</code> to avoid <code>AttributeError</code> when using <code>find_label_errors</code> function with <code>numpy&gt;=1.24.0</code> (#3214).</li> <li>Fixed <code>format_as(\"datasets\")</code> when no responses or optional respones in <code>FeedbackRecord</code>, to set their value to what \ud83e\udd17 Datasets expects instead of just <code>None</code> (#3224).</li> <li>Fixed <code>push_to_huggingface()</code> when <code>generate_card=True</code> (default behaviour), as we were passing a sample record to the <code>ArgillaDatasetCard</code> class, and <code>UUID</code>s introduced in 1.10.0 (#3192), are not JSON-serializable (#3231).</li> <li>Fixed <code>from_argilla</code> and <code>push_to_argilla</code> to ensure consistency on both field and question re-construction, and to ensure <code>UUID</code>s are properly serialized as <code>str</code>, respectively (#3234).</li> <li>Refactored usage of <code>import argilla as rg</code> to clarify package navigation (#3279).</li> </ul>"},{"location":"community/changelog/#docs_1","title":"Docs","text":"<ul> <li>Fixed URLs in Weak Supervision with Sentence Tranformers tutorial #3243.</li> <li>Fixed library buttons' formatting on Tutorials page (#3255).</li> <li>Modified styling of error code outputs in notebooks (#3270).</li> <li>Added ElasticSearch and OpenSearch versions (#3280).</li> <li>Removed template notebook from table of contents (#3271).</li> <li>Fixed tutorials with <code>pip install argilla</code> to not use older versions of the package (#3282).</li> </ul>"},{"location":"community/changelog/#added_22","title":"Added","text":"<ul> <li>Added <code>metadata</code> attribute to the <code>Record</code> of the <code>FeedbackDataset</code> (#3194)</li> <li>New <code>users update</code> command to update the role for an existing user (#3188)</li> <li>New <code>Workspace</code> class to allow users manage their Argilla workspaces and the users assigned to those workspaces via the Python client (#3180)</li> <li>Added <code>User</code> class to let users manage their Argilla users via the Python client (#3169).</li> <li>Added an option to display <code>tqdm</code> progress bar to <code>FeedbackDataset.push_to_argilla</code> when looping over the records to upload (#3233).</li> </ul>"},{"location":"community/changelog/#changed_19","title":"Changed","text":"<ul> <li>The role system now support three different roles <code>owner</code>, <code>admin</code> and <code>annotator</code> (#3104)</li> <li><code>admin</code> role is scoped to workspace-level operations (#3115)</li> <li>The <code>owner</code> user is created among the default pool of users in the quickstart, and the default user in the server has now <code>owner</code> role (#3248), reverting (#3188).</li> </ul>"},{"location":"community/changelog/#deprecated_6","title":"Deprecated","text":"<ul> <li>As of Python 3.7 end-of-life (EOL) on 2023-06-27, Argilla will no longer support Python 3.7 (#3188). More information at https://peps.python.org/pep-0537/</li> </ul>"},{"location":"community/changelog/#1100","title":"1.10.0","text":""},{"location":"community/changelog/#added_23","title":"Added","text":"<ul> <li>Added search component for feedback datasets (#3138)</li> <li>Added markdown support for feedback dataset guidelines (#3153)</li> <li>Added Train button for feedback datasets (#3170)</li> </ul>"},{"location":"community/changelog/#changed_20","title":"Changed","text":"<ul> <li>Updated <code>SearchEngine</code> and <code>POST /api/v1/me/datasets/{dataset_id}/records/search</code> to return the <code>total</code> number of records matching the search query (#3166)</li> </ul>"},{"location":"community/changelog/#fixed_32","title":"Fixed","text":"<ul> <li>Replaced Enum for string value in URLs for client API calls (Closes #3149)</li> <li>Resolve breaking issue with <code>ArgillaSpanMarkerTrainer</code> for Named Entity Recognition with <code>span_marker</code> v1.1.x onwards.</li> <li>Move <code>ArgillaDatasetCard</code> import under <code>@requires_version</code> decorator, so that the <code>ImportError</code> on <code>huggingface_hub</code> is handled properly (#3174)</li> <li>Allow flow <code>FeedbackDataset.from_argilla</code> -&gt; <code>FeedbackDataset.push_to_argilla</code> under different dataset names and/or workspaces (#3192)</li> </ul>"},{"location":"community/changelog/#docs_2","title":"Docs","text":"<ul> <li>Resolved typos in the docs (#3240).</li> <li>Fixed mention of master branch (#3254).</li> </ul>"},{"location":"community/changelog/#190","title":"1.9.0","text":""},{"location":"community/changelog/#added_24","title":"Added","text":"<ul> <li>Added boolean <code>use_markdown</code> property to <code>TextFieldSettings</code> model.</li> <li>Added boolean <code>use_markdown</code> property to <code>TextQuestionSettings</code> model.</li> <li>Added new status <code>draft</code> for the <code>Response</code> model.</li> <li>Added <code>LabelSelectionQuestionSettings</code> class allowing to create label selection (single-choice) questions in the API (#3005)</li> <li>Added <code>MultiLabelSelectionQuestionSettings</code> class allowing to create multi-label selection (multi-choice) questions in the API (#3010).</li> <li>Added <code>POST /api/v1/me/datasets/{dataset_id}/records/search</code> endpoint (#3068).</li> <li>Added new components in feedback task Question form: MultiLabel (#3064) and SingleLabel (#3016).</li> <li>Added docstrings to the <code>pydantic.BaseModel</code>s defined at <code>argilla/client/feedback/schemas.py</code> (#3137)</li> <li>Added the information about executing tests in the developer documentation ([#3143]).</li> </ul>"},{"location":"community/changelog/#changed_21","title":"Changed","text":"<ul> <li>Updated <code>GET /api/v1/me/datasets/:dataset_id/metrics</code> output payload to include the count of responses with <code>draft</code> status.</li> <li>Added <code>LabelSelectionQuestionSettings</code> class allowing to create label selection (single-choice) questions in the API.</li> <li>Added <code>MultiLabelSelectionQuestionSettings</code> class allowing to create multi-label selection (multi-choice) questions in the API.</li> <li>Database setup for unit tests. Now the unit tests use a different database than the one used by the local Argilla server (Closes #2987).</li> <li>Updated <code>alembic</code> setup to be able to autogenerate revision/migration scripts using SQLAlchemy metadata from Argilla server models (#3044)</li> <li>Improved <code>DatasetCard</code> generation on <code>FeedbackDataset.push_to_huggingface</code> when <code>generate_card=True</code>, following the official HuggingFace Hub template, but suited to <code>FeedbackDataset</code>s from Argilla (#3110)</li> </ul>"},{"location":"community/changelog/#fixed_33","title":"Fixed","text":"<ul> <li>Disallow <code>fields</code> and <code>questions</code> in <code>FeedbackDataset</code> with the same name (#3126).</li> <li>Fixed broken links in the documentation and updated the development branch name from <code>development</code> to <code>develop</code> ([#3145]).</li> </ul>"},{"location":"community/changelog/#180","title":"1.8.0","text":""},{"location":"community/changelog/#added_25","title":"Added","text":"<ul> <li><code>/api/v1/datasets</code> new endpoint to list and create datasets (#2615).</li> <li><code>/api/v1/datasets/{dataset_id}</code> new endpoint to get and delete datasets (#2615).</li> <li><code>/api/v1/datasets/{dataset_id}/publish</code> new endpoint to publish a dataset (#2615).</li> <li><code>/api/v1/datasets/{dataset_id}/questions</code> new endpoint to list and create dataset questions (#2615)</li> <li><code>/api/v1/datasets/{dataset_id}/fields</code> new endpoint to list and create dataset fields (#2615)</li> <li><code>/api/v1/datasets/{dataset_id}/questions/{question_id}</code> new endpoint to delete a dataset questions (#2615)</li> <li><code>/api/v1/datasets/{dataset_id}/fields/{field_id}</code> new endpoint to delete a dataset field (#2615)</li> <li><code>/api/v1/workspaces/{workspace_id}</code> new endpoint to get workspaces by id (#2615)</li> <li><code>/api/v1/responses/{response_id}</code> new endpoint to update and delete a response (#2615)</li> <li><code>/api/v1/datasets/{dataset_id}/records</code> new endpoint to create and list dataset records (#2615)</li> <li><code>/api/v1/me/datasets</code> new endpoint to list user visible datasets (#2615)</li> <li><code>/api/v1/me/dataset/{dataset_id}/records</code> new endpoint to list dataset records with user responses (#2615)</li> <li><code>/api/v1/me/datasets/{dataset_id}/metrics</code> new endpoint to get the dataset user metrics (#2615)</li> <li><code>/api/v1/me/records/{record_id}/responses</code> new endpoint to create record user responses (#2615)</li> <li>showing new feedback task datasets in datasets list ([#2719])</li> <li>new page for feedback task ([#2680])</li> <li>show feedback task metrics ([#2822])</li> <li>user can delete dataset in dataset settings page ([#2792])</li> <li>Support for <code>FeedbackDataset</code> in Python client (parent PR #2615, and nested PRs: [#2949], [#2827], [#2943], [#2945], [#2962], and [#3003])</li> <li>Integration with the HuggingFace Hub ([#2949])</li> <li>Added <code>ArgillaPeftTrainer</code> for text and token classificaiton #2854</li> <li>Added <code>predict_proba()</code> method to <code>ArgillaSetFitTrainer</code></li> <li>Added <code>ArgillaAutoTrainTrainer</code> for Text Classification #2664</li> <li>New <code>database revisions</code> command showing database revisions info</li> </ul>"},{"location":"community/changelog/#fixes","title":"Fixes","text":"<ul> <li>Avoid rendering html for invalid html strings in Text2text ([#2911]https://github.com/argilla-io/argilla/issues/2911)</li> </ul>"},{"location":"community/changelog/#changed_22","title":"Changed","text":"<ul> <li>The <code>database migrate</code> command accepts a <code>--revision</code> param to provide specific revision id</li> <li><code>tokens_length</code> metrics function returns empty data (#3045)</li> <li><code>token_length</code> metrics function returns empty data (#3045)</li> <li><code>mention_length</code> metrics function returns empty data (#3045)</li> <li><code>entity_density</code> metrics function returns empty data (#3045)</li> </ul>"},{"location":"community/changelog/#deprecated_7","title":"Deprecated","text":"<ul> <li>Using Argilla with Python 3.7 runtime is deprecated and support will be removed from version 1.11.0 (#2902)</li> <li><code>tokens_length</code> metrics function has been deprecated and will be removed in 1.10.0 (#3045)</li> <li><code>token_length</code> metrics function has been deprecated and will be removed in 1.10.0 (#3045)</li> <li><code>mention_length</code> metrics function has been deprecated and will be removed in 1.10.0 (#3045)</li> <li><code>entity_density</code> metrics function has been deprecated and will be removed in 1.10.0 (#3045)</li> </ul>"},{"location":"community/changelog/#removed_6","title":"Removed","text":"<ul> <li>Removed mention <code>density</code>, <code>tokens_length</code> and <code>chars_length</code> metrics from token classification metrics storage (#3045)</li> <li>Removed token <code>char_start</code>, <code>char_end</code>, <code>tag</code>, and <code>score</code> metrics from token classification metrics storage (#3045)</li> <li>Removed tags-related metrics from token classification metrics storage (#3045)</li> </ul>"},{"location":"community/changelog/#170","title":"1.7.0","text":""},{"location":"community/changelog/#added_26","title":"Added","text":"<ul> <li>add <code>max_retries</code> and <code>num_threads</code> parameters to <code>rg.log</code> to run data logging request concurrently with backoff retry policy. See #2458 and #2533</li> <li><code>rg.load</code> accepts <code>include_vectors</code> and <code>include_metrics</code> when loading data. Closes #2398</li> <li>Added <code>settings</code> param to <code>prepare_for_training</code> (#2689)</li> <li>Added <code>prepare_for_training</code> for <code>openai</code> (#2658)</li> <li>Added <code>ArgillaOpenAITrainer</code> (#2659)</li> <li>Added <code>ArgillaSpanMarkerTrainer</code> for Named Entity Recognition (#2693)</li> <li>Added <code>ArgillaTrainer</code> CLI support. Closes (#2809)</li> </ul>"},{"location":"community/changelog/#fixes_1","title":"Fixes","text":"<ul> <li>fix image alignment on token classification</li> </ul>"},{"location":"community/changelog/#changed_23","title":"Changed","text":"<ul> <li>Argilla quickstart image dependencies are externalized into <code>quickstart.requirements.txt</code>. See #2666</li> <li>bulk endpoints will upsert data when record <code>id</code> is present. Closes #2535</li> <li>moved from <code>click</code> to <code>typer</code> CLI support. Closes (#2815)</li> <li>Argilla server docker image is built with PostgreSQL support. Closes #2686</li> <li>The <code>rg.log</code> computes all batches and raise an error for all failed batches.</li> <li>The default batch size for <code>rg.log</code> is now 100.</li> </ul>"},{"location":"community/changelog/#fixed_34","title":"Fixed","text":"<ul> <li><code>argilla.training</code> bugfixes and unification (#2665)</li> <li>Resolved several small bugs in the <code>ArgillaTrainer</code>.</li> </ul>"},{"location":"community/changelog/#deprecated_8","title":"Deprecated","text":"<ul> <li>The <code>rg.log_async</code> function is deprecated and will be removed in next minor release.</li> </ul>"},{"location":"community/changelog/#160","title":"1.6.0","text":""},{"location":"community/changelog/#added_27","title":"Added","text":"<ul> <li><code>ARGILLA_HOME_PATH</code> new environment variable (#2564).</li> <li><code>ARGILLA_DATABASE_URL</code> new environment variable (#2564).</li> <li>Basic support for user roles with <code>admin</code> and <code>annotator</code> (#2564).</li> <li><code>id</code>, <code>first_name</code>, <code>last_name</code>, <code>role</code>, <code>inserted_at</code> and <code>updated_at</code> new user fields (#2564).</li> <li><code>/api/users</code> new endpoint to list and create users (#2564).</li> <li><code>/api/users/{user_id}</code> new endpoint to delete users (#2564).</li> <li><code>/api/workspaces</code> new endpoint to list and create workspaces (#2564).</li> <li><code>/api/workspaces/{workspace_id}/users</code> new endpoint to list workspace users (#2564).</li> <li><code>/api/workspaces/{workspace_id}/users/{user_id}</code> new endpoint to create and delete workspace users (#2564).</li> <li><code>argilla.tasks.users.migrate</code> new task to migrate users from old YAML file to database (#2564).</li> <li><code>argilla.tasks.users.create</code> new task to create a user (#2564).</li> <li><code>argilla.tasks.users.create_default</code> new task to create a user with default credentials (#2564).</li> <li><code>argilla.tasks.database.migrate</code> new task to execute database migrations (#2564).</li> <li><code>release.Dockerfile</code> and <code>quickstart.Dockerfile</code> now creates a default <code>argilladata</code> volume to persist data (#2564).</li> <li>Add user settings page. Closes #2496</li> <li>Added <code>Argilla.training</code> module with support for <code>spacy</code>, <code>setfit</code>, and <code>transformers</code>. Closes #2504</li> </ul>"},{"location":"community/changelog/#fixes_2","title":"Fixes","text":"<ul> <li>Now the <code>prepare_for_training</code> method is working when <code>multi_label=True</code>. Closes #2606</li> </ul>"},{"location":"community/changelog/#changed_24","title":"Changed","text":"<ul> <li><code>ARGILLA_USERS_DB_FILE</code> environment variable now it's only used to migrate users from YAML file to database (#2564).</li> <li><code>full_name</code> user field is now deprecated and <code>first_name</code> and <code>last_name</code> should be used instead (#2564).</li> <li><code>password</code> user field now requires a minimum of <code>8</code> and a maximum of <code>100</code> characters in size (#2564).</li> <li><code>quickstart.Dockerfile</code> image default users from <code>team</code> and <code>argilla</code> to <code>admin</code> and <code>annotator</code> including new passwords and API keys (#2564).</li> <li>Datasets to be managed only by users with <code>admin</code> role (#2564).</li> <li>The list of rules is now accessible while metrics are computed. Closes#2117</li> <li>Style updates for weak labeling and adding feedback toast when delete rules. See #2626 and #2648</li> </ul>"},{"location":"community/changelog/#removed_7","title":"Removed","text":"<ul> <li><code>email</code> user field (#2564).</li> <li><code>disabled</code> user field (#2564).</li> <li>Support for private workspaces (#2564).</li> <li><code>ARGILLA_LOCAL_AUTH_DEFAULT_APIKEY</code> and <code>ARGILLA_LOCAL_AUTH_DEFAULT_PASSWORD</code> environment variables. Use <code>python -m argilla.tasks.users.create_default</code> instead (#2564).</li> <li>The old headers for <code>API Key</code> and <code>workspace</code> from python client</li> <li>The default value for old <code>API Key</code> constant. Closes #2251</li> </ul>"},{"location":"community/changelog/#151-2023-03-30","title":"1.5.1 - 2023-03-30","text":""},{"location":"community/changelog/#fixes_3","title":"Fixes","text":"<ul> <li>Copying datasets between workspaces with proper owner/workspace info. Closes #2562</li> <li>Copy dataset with empty workspace to the default user workspace 905d4de</li> <li>Using elasticsearch config to request backend version. Closes #2311</li> <li>Remove sorting by score in labels. Closes #2622</li> </ul>"},{"location":"community/changelog/#changed_25","title":"Changed","text":"<ul> <li>Update field name in metadata for image url. See #2609</li> <li>Improvements in tutorial doc cards. Closes #2216</li> </ul>"},{"location":"community/changelog/#150-2023-03-21","title":"1.5.0 - 2023-03-21","text":""},{"location":"community/changelog/#added_28","title":"Added","text":"<ul> <li>Add the fields to retrieve when loading the data from argilla. <code>rg.load</code> takes too long because of the vector field, even when users don't need it. Closes #2398</li> <li>Add new page and components for dataset settings. Closes #2442</li> <li>Add ability to show image in records (for TokenClassification and TextClassification) if an URL is passed in metadata with the key _image_url</li> <li>Non-searchable fields support in metadata. #2570</li> <li>Add record ID references to the prepare for training methods. Closes #2483</li> <li>Add tutorial on Image Classification. #2420</li> <li>Add Train button, visible for \"admin\" role, with code snippets from a selection of libraries. Closes [#2591] (https://github.com/argilla-io/argilla/pull/2591)</li> </ul>"},{"location":"community/changelog/#changed_26","title":"Changed","text":"<ul> <li>Labels are now centralized in a specific vuex ORM called GlobalLabel Model, see https://github.com/argilla-io/argilla/issues/2210. This model is the same for TokenClassification and TextClassification (so both task have labels with color_id and shortcuts parameters in the vuex ORM)</li> <li>The shortcuts improvement for labels #2339 have been moved to the vuex ORM in dataset settings feature #2444</li> <li>Update \"Define a labeling schema\" section in docs.</li> <li>The record inputs are sorted alphabetically in UI by default. #2581</li> <li>The record inputs are fully visible when pagination size is one and the height of collapsed area size is bigger for laptop screen. #2587</li> </ul>"},{"location":"community/changelog/#fixes_4","title":"Fixes","text":"<ul> <li>Allow URL to be clickable in Jupyter notebook again. Closes #2527</li> </ul>"},{"location":"community/changelog/#removed_8","title":"Removed","text":"<ul> <li>Removing some data scan deprecated endpoints used by old clients. This change will break compatibility with client <code>&lt;v1.3.0</code></li> <li>Stop using old scan deprecated endpoints in python client. This logic will break client compatibility with server version <code>&lt;1.3.0</code></li> <li>Remove the previous way to add labels through the dataset page. Now labels can be added only through dataset settings page.</li> </ul>"},{"location":"community/contributor/","title":"How to contribute?","text":"<p>Thank you for investing your time in contributing to the project! Any contribution you make will be reflected in the most recent version of Argilla \ud83e\udd29.</p> New to contributing in general? <p>If you're a new contributor, read the README to get an overview of the project. In addition, here are some resources to help you get started with open-source contributions:</p> <ul> <li>Discord: You are welcome to join the Argilla Discord community, where you can keep in touch with other users, contributors and the Argilla team. In the following section, you can find more information on how to get started in Discord.</li> <li>Git: This is a very useful tool to keep track of the changes in your files. Using the command-line interface (CLI), you can make your contributions easily. For that, you need to have it installed and updated on your computer.</li> <li>GitHub: It is a platform and cloud-based service that uses git and allows developers to collaborate on projects. To contribute to Argilla, you'll need to create an account. Check the Contributor Workflow with Git and Github for more info.</li> <li>Developer Documentation: To collaborate, you'll need to set up an efficient environment. Check the developer documentation to know how to do it.</li> </ul>"},{"location":"community/contributor/#first-contact-in-discord","title":"First Contact in Discord","text":"<p>Discord is a handy tool for more casual conversations and to answer day-to-day questions. As part of Hugging Face, we have set up some Argilla channels on the server. Click here to join the Hugging Face Discord community effortlessly.</p> <p>When part of the Hugging Face Discord, you can select \"Channels &amp; roles\" and select \"Argilla\" along with any of the other groups that are interesting to you. \"Argilla\" will cover anything about Argilla and Distilabel. You can join the following channels:</p> <ul> <li>#argilla-announcements: \ud83d\udce2 Important announcements and updates.</li> <li>#argilla-distilabel-general: \ud83d\udcac General discussions about Argilla and Distilabel.</li> <li>#argilla-distilabel-help: \ud83d\ude4b\u200d\u2640\ufe0f Need assistance? We're always here to help. Select the appropriate label (<code>argilla</code> or <code>distilabel</code>) for your issue and post it.</li> </ul> <p>So now there is only one thing left to do: introduce yourself and talk to the community. You'll always be welcome! \ud83e\udd17\ud83d\udc4b</p>"},{"location":"community/contributor/#contributor-workflow-with-git-and-github","title":"Contributor Workflow with Git and GitHub","text":"<p>If you're working with Argilla and suddenly a new idea comes to your mind or you find an issue that can be improved, it's time to actively participate and contribute to the project!</p>"},{"location":"community/contributor/#report-an-issue","title":"Report an issue","text":"<p>If you spot a problem, search if an issue already exists. You can use the <code>Label</code> filter. If that is the case, participate in the conversation. If it does not exist, create an issue by clicking on <code>New Issue</code>.</p> <p></p> <p>This will show various templates, choose the one that best suits your issue.</p> <p></p> <p>Below, you can see an example of the <code>Feature request</code> template. Once you choose one, you will need to fill in it following the guidelines. Try to be as clear as possible. In addition, you can assign yourself to the issue and add or choose the right labels. Finally, click on <code>Submit new issue</code>.</p> <p></p>"},{"location":"community/contributor/#work-with-a-fork","title":"Work with a fork","text":""},{"location":"community/contributor/#fork-the-argilla-repository","title":"Fork the Argilla repository","text":"<p>After having reported the issue, you can start working on it. For that, you will need to create a fork of the project. To do that, click on the <code>Fork</code> button.</p> <p></p> <p>Now, fill in the information. Remember to uncheck the <code>Copy develop branch only</code> if you are going to work in or from another branch (for instance, to fix documentation the <code>main</code> branch is used). Then, click on <code>Create fork</code>.</p> <p></p> <p>Now, you will be redirected to your fork. You can see that you are in your fork because the name of the repository will be your <code>username/argilla</code>, and it will indicate <code>forked from argilla-io/argilla</code>.</p>"},{"location":"community/contributor/#clone-your-forked-repository","title":"Clone your forked repository","text":"<p>In order to make the required adjustments, clone the forked repository to your local machine. Choose the destination folder and run the following command:</p> <pre><code>git clone https://github.com/[your-github-username]/argilla.git\ncd argilla\n</code></pre> <p>To keep your fork\u2019s main/develop branch up to date with our repo, add it as an upstream remote branch.</p> <pre><code>git remote add upstream https://github.com/argilla-io/argilla.git\n</code></pre>"},{"location":"community/contributor/#create-a-new-branch","title":"Create a new branch","text":"<p>For each issue you're addressing, it's advisable to create a new branch. GitHub offers a straightforward method to streamline this process.</p> <p>\u26a0\ufe0f Never work directly on the <code>main</code> or <code>develop</code> branch. Always create a new branch for your changes.</p> <p>Navigate to your issue and on the right column, select <code>Create a branch</code>.</p> <p></p> <p>After the new window pops up, the branch will be named after the issue, include a prefix such as feature/, bug/, or docs/ to facilitate quick recognition of the issue type. In the <code>Repository destination</code>, pick your fork ( [your-github-username]/argilla), and then select <code>Change branch source</code> to specify the source branch for creating the new one. Complete the process by clicking <code>Create branch</code>.</p> <p>\ud83e\udd14 Remember that the <code>main</code> branch is only used to work with the documentation. For any other changes, use the <code>develop</code> branch.</p> <p></p> <p>Now, locally change to the new branch you just created.</p> <pre><code>git fetch origin\ngit checkout [branch-name]\n</code></pre>"},{"location":"community/contributor/#use-changelogmd","title":"Use CHANGELOG.md","text":"<p>If you are working on a new feature, it is a good practice to make note of it for others to keep up with the changes. For that, we utilize the <code>CHANGELOG.md</code> file in the root directory. This file is used to list changes made in each version of the project and there are headers that we use to denote each type of change.</p> <ul> <li>Added: for new features.</li> <li>Changed: for changes in existing functionality.</li> <li>Deprecated: for soon-to-be removed features.</li> <li>Removed: for now removed features.</li> <li>Fixed: for any bug fixes.</li> <li>Security: in case of vulnerabilities.</li> </ul> <p>A sample addition would be:</p> <pre><code>- Fixed the key errors for the `init` method ([#NUMBER_OF_PR](LINK_TO_PR)). Contributed by @github_handle.\n</code></pre> <p>You can have a look at the CHANGELOG.md) file to see more cases and examples.</p>"},{"location":"community/contributor/#make-changes-and-push-them","title":"Make changes and push them","text":"<p>Make the changes you want in your local repository, and test that everything works and you are following the guidelines.</p> <p>Check the developer documentation to set up your environment and start working on the project.</p> <p>Once you have finished, you can check the status of your repository and synchronize with the upstreaming repo with the following command:</p> <pre><code># Check the status of your repository\ngit status\n\n# Synchronize with the upstreaming repo\ngit checkout [branch-name]\ngit rebase [default-branch]\n</code></pre> <p>If everything is right, we need to commit and push the changes to your fork. For that, run the following commands:</p> <pre><code># Add the changes to the staging area\ngit add filename\n\n# Commit the changes by writing a proper message\ngit commit -m \"commit-message\"\n\n# Push the changes to your fork\ngit push origin [branch-name]\n</code></pre> <p>When pushing, you will be asked to enter your GitHub login credentials. Once the push is complete, all local commits will be on your GitHub repository.</p>"},{"location":"community/contributor/#create-a-pull-request","title":"Create a pull request","text":"<p>Come back to GitHub, navigate to the original repository where you created your fork, and click on <code>Compare &amp; pull request</code>.</p> <p></p> <p>First, click on <code>compare across forks</code> and select the right repositories and branches.</p> <p>In the base repository, keep in mind to select either <code>main</code> or <code>develop</code> based on the modifications made. In the head repository, indicate your forked repository and the branch corresponding to the issue.</p> <p></p> <p>Then, fill in the pull request template. You should add a prefix to the PR name as we did with the branch above. If you are working on a new feature, you can name your PR as <code>feat: TITLE</code>. If your PR consists of a solution for a bug, you can name your PR as <code>bug: TITLE</code> And, if your work is for improving the documentation, you can name your PR as <code>docs: TITLE</code>.</p> <p>In addition, on the right side, you can select a reviewer (for instance, if you discussed the issue with a member of the Argilla team) and assign the pull request to yourself. It is highly advisable to add labels to PR as well. You can do this again by the labels section right to the screen. For instance, if you are addressing a bug, add the <code>bug</code> label or if the PR is related to the documentation, add the <code>documentation</code> label. This way, PRs can be easily filtered.</p> <p>Finally, fill in the template carefully and follow the guidelines. Remember to link the original issue and enable the checkbox to allow maintainer edits so the branch can be updated for a merge. Then, click on <code>Create pull request</code>.</p>"},{"location":"community/contributor/#review-your-pull-request","title":"Review your pull request","text":"<p>Once you submit your PR, a team member will review your proposal. We may ask questions, request additional information or ask for changes to be made before a PR can be merged, either using suggested changes or pull request comments.</p> <p>You can apply the changes directly through the UI (check the files changed and click on the right-corner three dots, see image below) or from your fork, and then commit them to your branch. The PR will be updated automatically and the suggestions will appear as outdated.</p> <p></p> <p>If you run into any merge issues, check out this git tutorial to help you resolve merge conflicts and other issues.</p>"},{"location":"community/contributor/#your-pr-is-merged","title":"Your PR is merged!","text":"<p>Congratulations \ud83c\udf89\ud83c\udf8a We thank you \ud83e\udd29</p> <p>Once your PR is merged, your contributions will be publicly visible on the Argilla GitHub.</p> <p>Additionally, we will include your changes in the next release based on our development branch.</p>"},{"location":"community/contributor/#additional-resources","title":"Additional resources","text":"<p>Here are some helpful resources for your reference.</p> <ul> <li>Configuring Discord, a guide to learn how to get started with Discord.</li> <li>Pro Git, a book to learn Git.</li> <li>Git in VSCode, a guide to learn how to easily use Git in VSCode.</li> <li>GitHub Skills, an interactive course to learn GitHub.</li> </ul>"},{"location":"community/developer/","title":"Developer documentation","text":"<p>As an Argilla developer, you are already part of the community, and your contribution is to our development. This guide will help you set up your development environment and start contributing.</p> <p>Argilla core components</p> <ul> <li> <p>Documentation: Argilla's documentation serves as an invaluable resource, providing a comprehensive and in-depth guide for users seeking to explore, understand, and effectively harness the core components of the Argilla ecosystem.</p> </li> <li> <p>Python SDK: A Python SDK installable with <code>pip install argilla</code> to interact with the Argilla Server and the Argilla UI. It provides an API to manage the data, configuration, and annotation workflows.</p> </li> <li> <p>FastAPI Server: The core of Argilla is a Python <code>FastAPI server</code> that manages the data by pre-processing it and storing it in the vector database. Also, it stores application information in the relational database. It provides an REST API that interacts with the data from the Python SDK and the Argilla UI. It also provides a web interface to visualize the data.</p> </li> <li> <p>Relational Database: A relational database to store the metadata of the records and the annotations. <code>SQLite</code> is used as the default built-in option and is deployed separately with the Argilla Server, but a separate <code>PostgreSQL</code> can be used.</p> </li> <li> <p>Vector Database: A vector database to store the records data and perform scalable vector similarity searches and basic document searches. We currently support <code>ElasticSearch</code> and <code>OpenSearch</code>, which can be deployed as separate Docker images.</p> </li> <li> <p>Vue.js UI: A web application to visualize and annotate your data, users, and teams. It is built with <code>Vue.js</code> and is directly deployed alongside the Argilla Server within our Argilla Docker image.</p> </li> </ul>"},{"location":"community/developer/#the-argilla-repository","title":"The Argilla repository","text":"<p>The Argilla repository has a monorepo structure, which means that all the components are located in the same repository: <code>argilla-io/argilla</code>. This repo is divided into the following folders:</p> <ul> <li><code>argilla</code>: The python SDK project</li> <li><code>argilla-server</code>: The FastAPI server project</li> <li><code>argilla-frontend</code>: The Vue.js UI project</li> <li><code>argilla/docs</code>: The documentation project</li> <li><code>examples</code>: Example resources for deployments, scripts and notebooks</li> </ul> <p>How to contribute?</p> <p>Before starting to develop, we recommend reading our contribution guide to understand the contribution process and the guidelines to follow. Once you have cloned the Argilla repository and checked out to the correct branch, you can start setting up your development environment.</p>"},{"location":"community/developer/#set-up-the-python-environment","title":"Set up the Python environment","text":"<p>To work on the Argilla Python SDK, you must install the Argilla package on your system.</p> <p>Create a virtual environment</p> <p>We recommend creating a dedicated virtual environment for SDK development to prevent conflicts. For this, you can use the manager of your choice, such as <code>venv</code>, <code>conda</code>, <code>pyenv</code>, or <code>uv</code>.</p> <p>From the root of the cloned Argilla repository, you should move to the <code>argilla</code> folder in your terminal.</p> <pre><code>cd argilla\n</code></pre> <p>Next, activate your virtual environment and make the required installations:</p> <pre><code># Install the `pdm` package manager\npip install pdm\n\n# Install argilla in editable mode and the development dependencies\npdm install --dev\n</code></pre>"},{"location":"community/developer/#linting-and-formatting","title":"Linting and formatting","text":"<p>To maintain a consistent code format, install the <code>pre-commit</code> hooks to run before each commit automatically.</p> <pre><code>pre-commit install\n</code></pre> <p>In addition, run the following scripts to check the code formatting and linting:</p> <pre><code>pdm run format\npdm run lint\n</code></pre>"},{"location":"community/developer/#running-tests","title":"Running tests","text":"<p>Running tests at the end of every development cycle is indispensable to ensure no breaking changes.</p> <pre><code># Run all tests\npdm run tests\n\n# Run specific tests\npytest tests/integration\npytest tests/unit\n</code></pre> Running linting, formatting, and tests <p>You can run all the checks at once by using the following command:</p> <pre><code>    pdm run all\n</code></pre>"},{"location":"community/developer/#set-up-the-databases","title":"Set up the databases","text":"<p>To run your development environment, you need to set up Argilla's databases.</p>"},{"location":"community/developer/#vector-database","title":"Vector database","text":"<p>Argilla supports ElasticSearch as its primary search engine for the vector database by default. For more information about setting OpenSearch, check the Server configuration.</p> <p>You can run ElasticSearch locally using Docker:</p> <pre><code># Argilla supports ElasticSearch versions &gt;=8.5\ndocker run -d --name elasticsearch-for-argilla -p 9200:9200 -p 9300:9300 -e \"ES_JAVA_OPTS=-Xms512m -Xmx512m\" -e \"discovery.type=single-node\" -e \"xpack.security.enabled=false\" docker.elastic.co/elasticsearch/elasticsearch:8.5.3\n</code></pre> <p>Install Docker</p> <p>You can find the Docker installation guides for Windows, macOS and Linux on Docker website.</p>"},{"location":"community/developer/#relational-database","title":"Relational database","text":"<p>Argilla will use SQLite as the default built-in option to store information about users, workspaces, etc., for the relational database. No additional configuration is required to start using SQLite.</p> <p>By default, the database file will be created at <code>~/.argilla/argilla.db</code>; this can be configured by setting different values for <code>ARGILLA_DATABASE_URL</code> and <code>ARGILLA_HOME_PATH</code> environment variables.</p> <p>Manage the database</p> <p>For more information about the database migration and user management, refer to the Argilla server README.</p>"},{"location":"community/developer/#set-up-the-server","title":"Set up the server","text":"<p>Once you have set up the databases, you can start the Argilla server. To run the server, you can check the Argilla server README file.</p>"},{"location":"community/developer/#set-up-the-frontend","title":"Set up the frontend","text":"<p>Optionally, if you need to run the Argilla frontend, you can follow the instructions in the Argilla frontend README.</p>"},{"location":"community/developer/#set-up-the-documentation","title":"Set up the documentation","text":"<p>Documentation is essential to provide users with a comprehensive guide about Argilla.</p> <p>From <code>main</code> or <code>develop</code>?</p> <p>If you are updating, improving, or fixing the current documentation without a code change, work on the <code>main</code> branch. For new features or bug fixes that require documentation, use the <code>develop</code> branch.</p> <p>To contribute to the documentation and generate it locally, ensure you installed the development dependencies as shown in the \"Set up the Python environment\" section, and run the following command to create the development server with <code>mkdocs</code>:</p> <pre><code>mkdocs serve\n</code></pre>"},{"location":"community/developer/#documentation-guidelines","title":"Documentation guidelines","text":"<p>As mentioned, we use <code>mkdocs</code> to build the documentation. You can write the documentation in <code>markdown</code> format, and it will automatically be converted to HTML. In addition, you can include elements such as tables, tabs, images, and others, as shown in this guide. We recommend following these guidelines:</p> <ul> <li>Use clear and concise language: Ensure the documentation is easy to understand for all users by using straightforward language and including meaningful examples. Images are not easy to maintain, so use them only when necessary and place them in the appropriate folder within the <code>docs/assets/images</code> directory.</li> <li>Verify code snippets: Double-check that all code snippets are correct and runnable.</li> <li>Review spelling and grammar: Check the spelling and grammar of the documentation.</li> <li>Update the table of contents: If you add a new page, include it in the relevant <code>index.md</code> or the <code>mkdocs.yml</code> file.</li> </ul> <p>Contribute with a tutorial</p> <p>You can also contribute a tutorial (<code>.ipynb</code>) to the \"Community\" section. We recommend aligning the tutorial with the structure of the existing tutorials. For an example, check this tutorial.</p>"},{"location":"community/popular_issues/","title":"Issue dashboard","text":"Most engaging open issuesLatest issues open by the communityPlanned issues for upcoming releases Rank Issue Reactions Comments 1 4637 - [FEATURE] Label breakdown in Feedback dataset stats \ud83d\udc4d 6 \ud83d\udcac 4 2 1607 - Support for hierarchical multilabel text classification (taxonomy) \ud83d\udc4d 5 \ud83d\udcac 15 3 4658 - Active listeners for Feedback Dataset \ud83d\udc4d 5 \ud83d\udcac 5 4 1800 - Add comments/notes to annotation datasets to share with teammates.  \ud83d\udc4d 2 \ud83d\udcac 6 5 1837 - Custom Record UI Templates \ud83d\udc4d 2 \ud83d\udcac 6 6 1922 - Show potential number of records during filter selection \ud83d\udc4d 2 \ud83d\udcac 4 7 1630 - Accepting several predictions/annotations for the same record \ud83d\udc4d 2 \ud83d\udcac 2 8 5348 - [FEATURE] Ability to create new labels on-the-fly \ud83d\udc4d 2 \ud83d\udcac 0 9 3625 - [IMPROVE] Fields with empty title shall have exactly the same value as the user entered in the name field, without altering it \ud83d\udc4d 2 \ud83d\udcac 0 10 4372 - [FEATURE] distribution indication for <code>filters</code> \ud83d\udc4d 1 \ud83d\udcac 6 Rank Issue Author 1 \ud83d\udfe2 5570 - [BUG-python/deployment] by lecheuklun 2 \ud83d\udfe2 5561 - [FEATURE] Force predetermined sorting for a dataset by lgienapp 3 \ud83d\udfe2 5557 - [DOCS] \"Bulk Labeling Multimodal Data\" Notebook outdated  by trojblue 4 \ud83d\udfe2 5548 - [BUG-python/deployment] verify=False parameter is not passed to httpx.Client through Argilla class (v2.2.0) by xiajing10 5 \ud83d\udfe3 5543 - automatically load token from collab secrets if it exists by not-lain 6 \ud83d\udfe3 5530 - [FEATURE] updated_at / inserted_at properties on retrieved Records by maxserras 7 \ud83d\udfe3 5529 - [BUG-UI/UX] API Key copy button not working by cceyda 8 \ud83d\udfe2 5528 - [FEATURE] Filter by responses &amp; suggestions by cceyda 9 \ud83d\udfe2 5516 - [FEATURE] Allow all annotators in workspace to see all the submitted records by cceyda 10 \ud83d\udfe2 5513 - [ENHANCEMENT] Improve ImageField error messaging to deal with paths, urls, none by cceyda Rank Issue Milestone 1 \ud83d\udfe2 5415 - [FEATURE] Do not stop logging records if <code>UnprocessableEntityError</code> is raised because one single record v2.2.0 2 \ud83d\udfe2 5534 - [FEATURE] preview custom field data in dataset settings page v2.3.0 3 \ud83d\udfe2 5520 - [BUG-UI/UX] Incorrect iframe height calculation in sandBox Component v2.4.0 4 \ud83d\udfe2 5513 - [ENHANCEMENT] Improve ImageField error messaging to deal with paths, urls, none v2.4.0 5 \ud83d\udfe2 5458 - [FEATURE] Controls for data schema for images when exporting datasets and records v2.4.0 6 \ud83d\udfe2 4931 - [REFACTOR] Improve handling of question models and dicts v2.4.0 7 \ud83d\udfe2 4935 - [CONFIG] Resolve python requirements for python version and dependencies with server. v2.4.0 8 \ud83d\udfe2 1836 - Webhooks v2.4.0 <p>Last update: 2024-10-07</p>"},{"location":"community/integrations/llamaindex_rag_github/","title":"LlamaIndex","text":"<pre><code>!pip install \"argilla-llama-index\"\n!pip install \"llama-index-readers-github==0.1.9\"\n</code></pre> <p>Let's make the required imports:</p> <pre><code>from llama_index.core import (\n    Settings,\n    VectorStoreIndex,\n    set_global_handler,\n)\nfrom llama_index.llms.openai import OpenAI\nfrom llama_index.readers.github import (\n    GithubClient,\n    GithubRepositoryReader,\n)\n</code></pre> <p>We need to set the OpenAI API key and the GitHub token. The OpenAI API key is required to run queries using GPT models, while the GitHub token ensures you have access to the repository you're using. Although the GitHub token might not be necessary for public repositories, it is still recommended.</p> <pre><code>import os\n\nos.environ[\"OPENAI_API_KEY\"] = \"sk-...\"\nopenai_api_key = os.getenv(\"OPENAI_API_KEY\")\n\nos.environ[\"GITHUB_TOKEN\"] = \"ghp_...\"\ngithub_token = os.getenv(\"GITHUB_TOKEN\")\n</code></pre> <pre><code>set_global_handler(\n    \"argilla\",\n    dataset_name=\"github_query_model\",\n    api_url=\"http://localhost:6900\",\n    api_key=\"argilla.apikey\",\n    number_of_retrievals=2,\n)\n</code></pre> <pre><code>github_client = GithubClient(github_token=github_token, verbose=True)\n</code></pre> <p>Before creating our <code>GithubRepositoryReader</code> instance, we need to adjust the nesting. Since the Jupyter kernel operates on an event loop, we must prevent this loop from finishing before the repository is fully read.</p> <pre><code>import nest_asyncio\n\nnest_asyncio.apply()\n</code></pre> <p>Now, let\u2019s create a GithubRepositoryReader instance with the necessary repository details. In this case, we'll target the <code>main</code> branch of the <code>argilla</code> repository. As we will focus on the documentation, we will focus on the <code>argilla/docs/</code> folder, excluding images, json files, and ipynb files.</p> <pre><code>documents = GithubRepositoryReader(\n    github_client=github_client,\n    owner=\"argilla-io\",\n    repo=\"argilla\",\n    use_parser=False,\n    verbose=False,\n    filter_directories=(\n        [\"argilla/docs/\"],\n        GithubRepositoryReader.FilterType.INCLUDE,\n    ),\n    filter_file_extensions=(\n        [\n            \".png\",\n            \".jpg\",\n            \".jpeg\",\n            \".gif\",\n            \".svg\",\n            \".ico\",\n            \".json\",\n            \".ipynb\",   # Erase this line if you want to include notebooks\n\n        ],\n        GithubRepositoryReader.FilterType.EXCLUDE,\n    ),\n).load_data(branch=\"main\")\n</code></pre> <p>Now, let's create a LlamaIndex index out of this document, and we can start querying the RAG system.</p> <pre><code># LLM settings\nSettings.llm = OpenAI(\n    model=\"gpt-3.5-turbo\", temperature=0.8, openai_api_key=openai_api_key\n)\n\n# Load the data and create the index\nindex = VectorStoreIndex.from_documents(documents)\n\n# Create the query engine\nquery_engine = index.as_query_engine()\n</code></pre> <pre><code>response = query_engine.query(\"How do I create a Dataset in Argilla?\")\nresponse\n</code></pre> <p>The generated response will be automatically logged in our Argilla instance. Check it out! From Argilla you can quickly have a look at your predictions and annotate them, so you can combine both synthetic data and human feedback.</p> <p></p> <p>Let's ask a couple of more questions to see the overall behavior of the RAG chatbot. Remember that the answers are automatically logged into your Argilla instance.</p> <pre><code>questions = [\n    \"How can I list the available datasets?\",\n    \"Which are the user credentials?\",\n    \"Can I use markdown in Argilla?\",\n    \"Could you explain how to annotate datasets in Argilla?\",\n]\n\nanswers = []\n\nfor question in questions:\n    answers.append(query_engine.query(question))\n\nfor question, answer in zip(questions, answers):\n    print(f\"Question: {question}\")\n    print(f\"Answer: {answer}\")\n    print(\"----------------------------\")\n</code></pre> <pre>\n<code>Question: How can I list the available datasets?\nAnswer: You can list all the datasets available in a workspace by utilizing the `datasets` attribute of the `Workspace` class. Additionally, you can determine the number of datasets in a workspace by using `len(workspace.datasets)`. To list the datasets, you can iterate over them and print out each dataset. Remember that dataset settings are not preloaded when listing datasets, and if you need to work with settings, you must load them explicitly for each dataset.\n----------------------------\nQuestion: Which are the user credentials?\nAnswer: The user credentials in Argilla consist of a username, password, and API key.\n----------------------------\nQuestion: Can I use markdown in Argilla?\nAnswer: Yes, you can use Markdown in Argilla.\n----------------------------\nQuestion: Could you explain how to annotate datasets in Argilla?\nAnswer: To annotate datasets in Argilla, users can manage their data annotation projects by setting up `Users`, `Workspaces`, `Datasets`, and `Records`. By deploying Argilla on the Hugging Face Hub or with `Docker`, installing the Python SDK with `pip`, and creating the first project, users can get started in just 5 minutes. The tool allows for interacting with data in a more engaging way through features like quick labeling with filters, AI feedback suggestions, and semantic search, enabling users to focus on training models and monitoring their performance effectively.\n----------------------------\n</code>\n</pre> <pre>\n<code>\n</code>\n</pre>"},{"location":"community/integrations/llamaindex_rag_github/#create-a-rag-system-expert-in-a-github-repository-and-log-your-predictions-in-argilla","title":"\ud83d\udd75\ud83c\udffb\u200d\u2640\ufe0f Create a RAG system expert in a GitHub repository and log your predictions in Argilla","text":"<p>In this tutorial, we'll show you how to create a RAG system that can answer questions about a specific GitHub repository. As example, we will target the Argilla repository. This RAG system will target the docs of the repository, as that's where most of the natural language information about the repository can be found.</p> <p>This tutorial includes the following steps:</p> <ul> <li>Setting up the Argilla callback handler for LlamaIndex.</li> <li>Initializing a GitHub client</li> <li>Creating an index with a specific set of files from the GitHub repository of our choice.</li> <li>Create a RAG system out of the Argilla repository, ask questions, and automatically log the answers to Argilla.</li> </ul> <p>This tutorial is based on the Github Repository Reader made by LlamaIndex.</p>"},{"location":"community/integrations/llamaindex_rag_github/#getting-started","title":"Getting started","text":""},{"location":"community/integrations/llamaindex_rag_github/#deploy-the-argilla-server","title":"Deploy the Argilla server\u00b6","text":"<p>If you already have deployed Argilla, you can skip this step. Otherwise, you can quickly deploy Argilla following this guide.</p>"},{"location":"community/integrations/llamaindex_rag_github/#set-up-the-environment","title":"Set up the environment\u00b6","text":"<p>To complete this tutorial, you need to install this integration and a third-party library via pip.</p> <p>Note</p> <p>Check the integration GitHub repository here.</p>"},{"location":"community/integrations/llamaindex_rag_github/#set-the-argillas-llamaindex-handler","title":"Set the Argilla's LlamaIndex handler","text":"<p>To easily log your data into Argilla within your LlamaIndex workflow, you only need a simple step. Just call the Argilla global handler for Llama Index before starting production with your LLM. This ensured that the predictions obtained using Llama Index are automatically logged to the Argilla instance.</p> <ul> <li><code>dataset_name</code>: The name of the dataset. If the dataset does not exist, it will be created with the specified name. Otherwise, it will be updated.</li> <li><code>api_url</code>: The URL to connect to the Argilla instance.</li> <li><code>api_key</code>: The API key to authenticate with the Argilla instance.</li> <li><code>number_of_retrievals</code>: The number of retrieved documents to be logged. Defaults to 0.</li> <li><code>workspace_name</code>: The name of the workspace to log the data. By default, the first available workspace.</li> </ul> <p>&gt; For more information about the credentials, check the documentation for users and workspaces.</p>"},{"location":"community/integrations/llamaindex_rag_github/#retrieve-the-data-from-github","title":"Retrieve the data from GitHub","text":"<p>First, we need to initialize the GitHub client, which will include the GitHub token for repository access.</p>"},{"location":"community/integrations/llamaindex_rag_github/#create-the-index-and-make-some-queries","title":"Create the index and make some queries","text":""},{"location":"getting_started/faq/","title":"FAQs","text":"What is Argilla? <p>Argilla is a collaboration tool for AI engineers and domain experts that require high-quality outputs, full data ownership, and overall efficiency. It is designed to help you achieve and keep high-quality data standards, store your training data, store the results of your models, evaluate their performance, and improve the data through human and AI feedback.</p> Does Argilla cost money? <p>No. Argilla is an open-source project and is free to use. You can deploy Argilla on your own infrastructure or use our cloud offering.</p> What data types does Argilla support? <p>Text data, mostly. Argilla natively supports textual data, however, we do support rich text, which means you can represent different types of data in Argilla as long as you can convert it to text. For example, you can store images, audio, video, and any other type of data as long as you can convert it to their base64 representation or render them as HTML in for example an IFrame.</p> Does Argilla train models? <p>No. Argilla is a collaboration tool to achieve and keep high-quality data standards. You can use Argilla to store your training data, store the results of your models, evaluate their performance and improve the data. For training models, you can use any machine learning framework or library that you prefer even though we recommend starting with Hugging Face Transformers.</p> Does Argilla provide annotation workforces? <p>Yes, kind of. We don't provide annotation workforce in-house but we do have partnerships with workforce providers that ensure ethical practices and secure work environments. Feel free to schedule a meeting here or contact us via email.</p> How does Argilla differ from competitors like Lilac, Snorkel, Prodigy and Scale? <p>Argilla distinguishes itself for its focus on specific use cases and human-in-the-loop approaches. While it does offer programmatic features, Argilla\u2019s core value lies in actively involving human experts in the tool-building process, setting it apart from other competitors.</p> <p>Furthermore, Argilla places particular emphasis on smooth integration with other tools in the community, particularly within the realms of MLOps and NLP. So, its compatibility with popular frameworks like spaCy and Hugging Face makes it exceptionally user-friendly and accessible.</p> <p>Finally, platforms like Snorkel, Prodigy or Scale, while more comprehensive, often require a significant commitment. Argilla, on the other hand, works more as a tool within the MLOps ecosystem, allowing users to begin with specific use cases and then scale up as needed. This flexibility is particularly beneficial for users and customers who prefer to start small and expand their applications over time, as opposed to committing to an all-encompassing tool from the outset.</p> What is the difference between Argilla 2.0 and the legacy datasets in 1.0? <p>Argilla 1.0 relied on 3 main task datasets: <code>DatasetForTextClassification</code>, <code>DatasetForTokenClassification</code>, and <code>DatasetForText2Text</code>. These tasks were designed to be simple, easy to use and high in functionality but they were limited in adaptability. With the introduction of Large Language Models (LLMs) and the increasing complexity of NLP tasks, we realized that we needed to expand the capabilities of Argilla to support more advanced feedback mechanisms which led to the introduction of the <code>FeedbackDataset</code>. Compared to its predecessor it was high in adaptability but still limited in functionality. After having ported all of the functionality of the legacy tasks to the new <code>FeedbackDataset</code>, we decided to deprecate the legacy tasks in favor of a brand new SDK with the <code>FeedbackDataset</code> at its core.</p>"},{"location":"getting_started/how-to-configure-argilla-on-huggingface/","title":"Hugging Face Spaces Settings","text":"<p>This section details how to configure and deploy Argilla on Hugging Face Spaces. It covers:</p> <ul> <li>Persistent storage</li> <li>How to deploy Argilla under a Hugging Face Organization</li> <li>How to configure and disable HF OAuth access</li> <li>How to use Private Spaces</li> </ul> <p>Looking to get started easily?</p> <p>If you just discovered Argilla and want to get started quickly, go to the Quickstart guide.</p>"},{"location":"getting_started/how-to-configure-argilla-on-huggingface/#persistent-storage","title":"Persistent storage","text":"<p>In the Space creation UI, persistent storage is set to <code>Small PAID</code>, which is a paid service, charged per hour of usage.</p> <p>Spaces get restarted due to maintainance, inactivity, and every time you change your Spaces settings. Persistent storage enables Argilla to save to disk your datasets and configurations across restarts.</p> <p>Ephimeral FREE persistent storage</p> <p>Not setting persistent storage to <code>Small</code> means that you will loose your data when the Space restarts.</p> <p>If you plan to use the Argilla Space beyond testing, it's highly recommended to set persistent storage to <code>Small</code>.</p> <p>If you just want to quickly test or use Argilla for a few hours with the risk of loosing your datasets, choose <code>Ephemeral FREE</code>. <code>Ephemeral FREE</code> means your datasets and configuration will not be saved to disk, when the Space is restarted your datasets, workspaces, and users will be lost.</p> <p>If you want to disable the persistence storage warning, you can set the environment variable <code>ARGILLA_SHOW_HUGGINGFACE_SPACE_PERSISTENT_STORAGE_WARNING=false</code></p> <p>Read this if you have datasets and want to enable persistent storage</p> <p>If you want to enable persistent storage <code>Small PAID</code> and you have created datasets, users, or workspaces, follow this process:</p> <ul> <li>First, make a local or remote copy of your datasets, following the Import and Export guide. This is the most important step, because changing the settings of your Space leads to a restart and thus a data loss.</li> <li>If you have created users (not signed in with Hugging Face login), consider storing a copy of users following the manage users guide.</li> <li>Once you have stored all your data safely, go to you Space Settings Tab and select <code>Small</code>.</li> <li>Your Space will be restarted and existing data will be lost. From now on, all the new data you create in Argilla will be kept safely</li> <li>Recover your data, by following the above mentioned guides.</li> </ul>"},{"location":"getting_started/how-to-configure-argilla-on-huggingface/#how-to-configure-and-disable-oauth-access","title":"How to configure and disable OAuth access","text":"<p>By default, Argilla Spaces are configured with Hugging Face OAuth, in the following way:</p> <ul> <li>Any Hugging Face user that can see your Space, can use the Sign in button, join as an <code>annotator</code>, and contribute to the datasets available under the <code>argilla</code> workspace. This workspace is created during the deployment process.</li> <li>These users can only explore and annotate datasets in the <code>argilla</code> workspace but can't perform any critical operation like create, delete, update, or configure datasets. By default, any other workspace you create, won't be visible to these users.</li> </ul> <p>To restrict access or change the default behaviour, there's two options:</p> <p>Set your Space to private. This is especially useful if your Space is under an organization. This will only allow members within your organization to see and join your Argilla space. It can also be used for personal, solo projects.</p> <p>Modify the <code>.oauth.yml</code> configuration file. You can find and modify this file under the <code>Files</code> tab of your Space. The default file looks like this:</p> <p><pre><code># Change to `false` to disable HF oauth integration\n#enabled: false\n\nproviders:\n  - name: huggingface\n\n# Allowed workspaces must exists\nallowed_workspaces:\n  - name: argilla\n</code></pre> You can modify two things:</p> <ul> <li>Uncomment <code>enabled: false</code> to completely disable the Sign in with Hugging Face. If you disable it make sure to set the <code>USERNAME</code> and <code>PASSWORD</code> Space secrets to be able to login as an <code>owner</code>.</li> <li>Change the list of <code>allowed</code> workspaces.</li> </ul> <p>For example if you want to let users join a new workspace <code>community-initiative</code>:</p> <pre><code>allowed_workspaces:\n  - name: argilla\n  - name: community-initiative\n</code></pre>"},{"location":"getting_started/how-to-configure-argilla-on-huggingface/#how-to-deploy-argilla-under-a-hugging-face-organization","title":"How to deploy Argilla under a Hugging Face Organization","text":"<p>Creating an Argilla Space within an organization is useful for several scenarios:</p> <ul> <li>You want to only enable members of your organization to join your Space. You can achieve this by setting your Space to private.</li> <li>You want manage the Space together with other users (e.g., Space settings, etc.). Note that if you just want to manage your Argilla datasets, workspaces, you can achieve this by adding other Argilla <code>owner</code> roles to your Argilla Server.</li> <li>More generally, you want to make available your space under an organization/community umbrella.</li> </ul> <p>The steps are very similar the Quickstart guide with two important differences:</p> <p>Setup USERNAME</p> <p>You need to set up the <code>USERNAME</code> Space Secret with your Hugging Face username. This way, the first time you enter with the <code>Hugging Face Sign in</code> button, you'll be granted the <code>owner</code> role.</p> <p>Enable Persistent Storage <code>SMALL</code></p> <p>Not setting persistent storage to <code>Small</code> means that you will loose your data when the Space restarts.</p> <p>For Argilla Spaces with many users, it's strongly recommended to set persistent storage to <code>Small</code>.</p>"},{"location":"getting_started/how-to-configure-argilla-on-huggingface/#how-to-use-private-spaces","title":"How to use Private Spaces","text":"<p>Setting your Space visibility to private can be useful if:</p> <ul> <li>You want to work on your personal, solo project.</li> <li>You want your Argilla to be available only to members of the organization where you deploy the Argilla Space.</li> </ul> <p>You can set the visibility of the Space during the Space creation process or afterwards under the <code>Settings</code> Tab.</p> <p>To use the Python SDK with private Spaces you need to specify your <code>HF_TOKEN</code> which can be found here, when creating the client:</p> <pre><code>import argilla as rg\n\nHF_TOKEN = \"...\"\n\nclient = rg.Argilla(\n    api_url=\"&lt;api_url&gt;\",\n    api_key=\"&lt;api_key&gt;\"\n    headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"}\n)\n</code></pre>"},{"location":"getting_started/how-to-configure-argilla-on-huggingface/#space-secrets-overview","title":"Space Secrets overview","text":"<p>There's two optional secrets to set up the <code>USERNAME</code> and <code>PASSWORD</code> of the <code>owner</code> of the Argilla Space. Remember that, by default Argilla Spaces are configured with a Sign in with Hugging Face button, which is also used to grant an <code>owner</code> to the creator of the Space for personal spaces.</p> <p>The <code>USERNAME</code> and <code>PASSWORD</code> are only useful in a couple of scenarios:</p> <ul> <li>You have disabled Hugging Face OAuth.</li> <li>You want to set up Argilla under an organization and want your Hugging Face username to be granted the <code>owner</code> role.</li> </ul> <p>In summary, when setting up a Space:</p> <p>Creating a Space under your personal account</p> <p>If you are creating the Space under your personal account, don't insert any value for <code>USERNAME</code> and <code>PASSWORD</code>. Once you launch the Space you will be able to Sign in with your Hugging Face username and the <code>owner</code> role.</p> <p>Creating a Space under an organization</p> <p>If you are creating the Space under an organization make sure to insert your Hugging Face username in the secret <code>USERNAME</code>. In this way, you'll be able to Sign in with your Hugging Face user.</p>"},{"location":"getting_started/how-to-deploy-argilla-with-docker/","title":"Deploy with Docker","text":"<p>This guide describes how to deploy the Argilla Server with <code>docker compose</code>. This is useful if you want to deploy Argilla locally, and/or have full control over the configuration the server, database, and search engine (Elasticsearch).</p> <p>First, you need to install <code>docker</code> on your machine and make sure you can run <code>docker compose</code>.</p> <p>Then, create a folder (you can modify the folder name):</p> <pre><code>mkdir argilla &amp;&amp; cd argilla\n</code></pre> <p>Download <code>docker-compose.yaml</code>:</p> <pre><code>wget -O docker-compose.yaml https://raw.githubusercontent.com/argilla-io/argilla/main/examples/deployments/docker/docker-compose.yaml\n</code></pre> <p>or using curl: <pre><code>curl https://raw.githubusercontent.com/argilla-io/argilla/main/examples/deployments/docker/docker-compose.yaml -o docker-compose.yaml\n</code></pre></p> <p>Run to deploy the server on <code>http://localhost:6900</code>:</p> <pre><code>docker compose up -d\n</code></pre> <p>Once is completed, go to this URL with your browser: http://localhost:6900 and you should see the Argilla login page.</p> <p>If it's not available, check the logs:</p> <pre><code>docker compose logs -f\n</code></pre> <p>Most of the deployment issues are related to ElasticSearch. Join Hugging Face Discord's server and ask for support on the Argilla channel.</p>"},{"location":"getting_started/quickstart/","title":"Quickstart","text":"<p>Argilla is a free, open-source, self-hosted tool. This means you need to deploy its UI to start using it. There is two main ways to deploy Argilla:</p> <p>Deploy on the Hugging Face Hub</p> <p>The recommended choice to get started. You can get up and running in under 5 minutes and don't need to maintain a server or run any commands.</p> <p>If you're just getting started with Argilla, click the deploy button below:</p> <p>  You can use the default values following these steps:</p> <ul> <li>Leave the default Space owner (your personal account)</li> <li>Leave <code>USERNAME</code> and <code>PASSWORD</code> secrets empty since you'll sign in with your HF user as the Argilla Space <code>owner</code>.</li> <li>Click create Space to launch Argilla \ud83d\ude80.</li> <li>Once you see the Argilla UI, go to the Sign in into the Argilla UI section. If you see the <code>Building</code> message for longer than 2-3 min refresh the page.</li> </ul> <p>Persistent storage <code>SMALL</code></p> <p>Not setting persistent storage to <code>SMALL</code> means that you will loose your data when the Space restarts. Spaces get restarted due to maintainance, inactivity, and every time you change your Spaces settings. If you want to use the Space just for testing you can use <code>FREE</code> temporarily.</p> <p>If you want to deploy Argilla within a Hugging Face organization, setup a more stable Space, or understand the settings, check out the HF Spaces settings guide.</p> <p>Deploy with Docker</p> <p>If you want to run Argilla locally on your machine or a server, or tune the server configuration, choose this option. To use this option, check this guide.</p>"},{"location":"getting_started/quickstart/#sign-in-into-the-argilla-ui","title":"Sign in into the Argilla UI","text":"<p>If everything went well, you should see the Argilla sign in page that looks like this:</p> <p></p> <p>Building errors</p> <p>If you get a build error, sometimes restarting the Space from the Settings page works, otherwise check the HF Spaces settings guide.</p> <p>In the sign in page:</p> <ol> <li>Click on Sign in with Hugging Face</li> <li>Authorize the application and you will be logged in into Argilla as an <code>owner</code>.</li> </ol> <p>Unauthorized error</p> <p>Sometimes, after authorizing you'll see an unauthorized error, and get redirected to the sign in page. Typically, clicking the Sign in button solves the issue.</p> <p>Congrats! Your Argilla server is ready to start your first project using the Python SDK. You now have full rights to create datasets. Follow the instructions in the home page, or keep reading this guide if you want a more detailed explanation.</p>"},{"location":"getting_started/quickstart/#install-the-python-sdk","title":"Install the Python SDK","text":"<p>To manage workspaces and datasets in Argilla, you need to use the Argilla Python SDK. You can install it with pip as follows:</p> <pre><code>pip install argilla\n</code></pre>"},{"location":"getting_started/quickstart/#create-your-first-dataset","title":"Create your first dataset","text":"<p>For getting started with Argilla and its SDK, we recommend to use Jupyter Notebook or Google Colab.</p> <p>To start interacting with your Argilla server, you need to create a instantiate a client with an API key and API URL:</p> <ul> <li> <p>The <code>&lt;api_key&gt;</code> is in the <code>My Settings</code> page of your Argilla Space.</p> </li> <li> <p>The <code>&lt;api_url&gt;</code> is the URL shown in your browser if it ends with <code>*.hf.space</code>.</p> </li> </ul> <pre><code>import argilla as rg\n\nclient = rg.Argilla(\n    api_url=\"&lt;api_url&gt;\",\n    api_key=\"&lt;api_key&gt;\"\n)\n</code></pre> <p>You can't find your API URL</p> <p>If you're using Spaces, sometimes the Argilla UI is embedded into the Hub UI so the URL of the browser won't match the API URL. In these scenarios, there are two options:     1. Click on the three points menu at the top of the Space, select \"Embed this Space\", and open the direct URL.     2. Use this pattern: <code>https://[your-owner-name]-[your_space_name].hf.space</code>.</p> <p>To create a dataset with a simple text classification task, first, you need to define the dataset settings.</p> <pre><code>settings = rg.Settings(\n    guidelines=\"Classify the reviews as positive or negative.\",\n    fields=[\n        rg.TextField(\n            name=\"review\",\n            title=\"Text from the review\",\n            use_markdown=False,\n        ),\n    ],\n    questions=[\n        rg.LabelQuestion(\n            name=\"my_label\",\n            title=\"In which category does this article fit?\",\n            labels=[\"positive\", \"negative\"],\n        )\n    ],\n)\n</code></pre> <p>Now you can create the dataset with these settings. Publish the dataset to make it available in the UI and add the records.</p> <p>About workspaces</p> <p>Workspaces in Argilla group datasets and user access rights. The <code>workspace</code> parameter is optional in this case. If you don't specify it, the dataset will be created in the default workspace <code>argilla</code>.</p> <p>By default, this workspace will be visible to users joining with the Sign in with Hugging Face button. You can create other workspaces and decide to grant access to users either with the SDK or the changing the OAuth configuration.</p> <pre><code>dataset = rg.Dataset(\n    name=f\"my_first_dataset\",\n    settings=settings,\n    client=client,\n    #workspace=\"argilla\"\n)\ndataset.create()\n</code></pre> <p>Now you can add records to your dataset. We will use the IMDB dataset from the Hugging Face Datasets library as an example. The <code>mapping</code> parameter indicates which keys/columns in the source dataset correspond to the Argilla dataset fields.</p> <pre><code>from datasets import load_dataset\n\ndata = load_dataset(\"imdb\", split=\"train[:100]\").to_list()\n\ndataset.records.log(records=data, mapping={\"text\": \"review\"})\n</code></pre> <p>\ud83c\udf89 You have successfully created your first dataset with Argilla. You can now access it in the Argilla UI and start annotating the records.</p>"},{"location":"getting_started/quickstart/#next-steps","title":"Next steps","text":"<ul> <li> <p>To learn how to create your datasets, workspace, and manage users, check the how-to guides.</p> </li> <li> <p>To learn Argilla with hands-on examples, check the Tutorials section.</p> </li> <li> <p>To further configure your Argilla Space, check the Hugging Face Spaces settings guide.</p> </li> </ul>"},{"location":"how_to_guides/","title":"How-to guides","text":"<p>These guides provide step-by-step instructions for common scenarios, including detailed explanations and code samples. They are divided into two categories: basic and advanced. The basic guides will help you get started with the core concepts of Argilla, while the advanced guides will help you explore more advanced features.</p>"},{"location":"how_to_guides/#basic","title":"Basic","text":"<ul> <li> <p>Manage users and credentials</p> <p>Learn what they are and how to manage (create, read and delete) <code>Users</code> in Argilla.</p> <p> How-to guide</p> </li> <li> <p>Manage workspaces</p> <p>Learn what they are and how to manage (create, read and delete) <code>Workspaces</code> in Argilla.</p> <p> How-to guide</p> </li> <li> <p>Create, update, and delete datasets</p> <p>Learn what they are and how to manage (create, read and delete) <code>Datasets</code> and customize them using the <code>Settings</code> for <code>Fields</code>, <code>Questions</code>,  <code>Metadata</code> and <code>Vectors</code>.</p> <p> How-to guide</p> </li> <li> <p>Add, update, and delete records</p> <p>Learn what they are and how to add, update and delete the values for a <code>Record</code>, which are made up of <code>Metadata</code>, <code>Vectors</code>, <code>Suggestions</code> and <code>Responses</code>.</p> <p> How-to guide</p> </li> <li> <p>Distribute the annotation</p> <p>Learn how to use Argilla's automatic <code>TaskDistribution</code> to annotate as a team efficiently.</p> <p> How-to guide</p> </li> <li> <p>Annotate a dataset</p> <p>Learn how to use the Argilla UI to navigate <code>Datasets</code> and submit <code>Responses</code>.</p> <p> How-to guide</p> </li> <li> <p>Query and filter a dataset</p> <p>Learn how to query and filter a <code>Dataset</code>.</p> <p> How-to guide</p> </li> <li> <p>Import and export datasets and records</p> <p>Learn how to export your <code>Dataset</code> or its <code>Records</code> to Python, your local disk, or the Hugging Face Hub.</p> <p> How-to guide</p> </li> </ul>"},{"location":"how_to_guides/#advanced","title":"Advanced","text":"<ul> <li> <p>Custom fields with layout templates</p> <p>Learn how to create <code>CustomFields</code> with HTML, CSS and JavaScript templates.</p> <p> How-to guide</p> </li> <li> <p>Use Markdown to format rich content</p> <p>Learn how to use Markdown and HTML in <code>TextField</code> to format chat conversations and allow for basic multi-modal support for images, audio, video and PDFs.</p> <p> How-to guide</p> </li> <li> <p>Migrate to Argilla V2</p> <p>Learn how to migrate <code>Users</code>, <code>Workspaces</code> and <code>Datasets</code> from Argilla V1 to V2.</p> <p> How-to guide</p> </li> </ul>"},{"location":"how_to_guides/annotate/","title":"Annotate your dataset","text":"<p>To experience the UI features firsthand, you can take a look at the Demo \u2197.</p> <p>Argilla UI offers many functions to help you manage your annotation workflow, aiming to provide the most flexible approach to fit the wide variety of use cases handled by the community.</p>"},{"location":"how_to_guides/annotate/#annotation-interface-overview","title":"Annotation interface overview","text":""},{"location":"how_to_guides/annotate/#flexible-layout","title":"Flexible layout","text":"<p>The UI is responsive with two columns for larger devices and one column for smaller devices. This enables you to annotate data using your mobile phone for simple datasets (i.e., not very long text and 1-2 questions) or resize your screen to get a more compact UI.</p> HeaderLeft paneRight paneLeft bottom panelRight bottom panel <p>At the right side of the navigation breadcrumb, you can customize the dataset settings and edit your profile.</p> <p>This area displays the control panel on the top. The control panel is used for performing keyword-based search, applying filters, and sorting the results.</p> <p>Below the control panel, the record card(s) are displayed one by one (Focus view) or in a vertical list (Bulk view).</p> <p>This is where you annotate your dataset. Simply fill it out as a form, then choose to <code>Submit</code>, <code>Save as Draft</code>, or <code>Discard</code>.</p> <p>This expandable area displays the annotation guidelines. The annotation guidelines can be edited by owner and admin roles in the dataset settings.</p> <p>This expandable area displays your annotation progress.</p>"},{"location":"how_to_guides/annotate/#shortcuts","title":"Shortcuts","text":"<p>The Argilla UI includes a range of shortcuts. For the main actions (submit, discard, save as draft and selecting labels) the keys are showed in the corresponding button.</p> <p>To learn how to move from one question to another or between records using the keyboard, take a look at the table below.</p> <p>Shortcuts provide a smoother annotation experience, especially with datasets using a single question (Label, MultiLabel, Rating, or Ranking).</p> Available shortcuts Action Keys Activate form \u21e5 Tab Move between questions \u2193 Down arrow\u00a0or\u00a0\u2191 Up arrow Select and unselect label 1,\u00a02,\u00a03 Move between labels or ranking options \u21e5 Tab\u00a0or\u00a0\u21e7 Shift\u00a0\u21e5 Tab Select rating and rank 1,\u00a02,\u00a03 Fit span to character selection Hold\u00a0\u21e7 Shift Activate text area \u21e7 Shift\u00a0\u21b5 Enter Exit text area Esc Discard \u232b Backspace Save draft (Mac os) \u2318 Cmd\u00a0S Save draft (Other) Ctrl\u00a0S Submit \u21b5 Enter Move between pages \u2192 Right arrow\u00a0or\u00a0\u2190 Left arrow"},{"location":"how_to_guides/annotate/#view-by-status","title":"View by status","text":"<p>The view selector is set by default on Pending.</p> <p>If you are starting an annotation effort, all the records are initially kept in the Pending view. Once you start annotating, the records will move to the other queues: Draft, Submitted, Discarded.</p> <ul> <li>Pending: The records without a response.</li> <li>Draft: The records with partial responses. They can be submitted or discarded later. You can\u2019t move them back to the pending queue.</li> <li>Discarded: The records may or may not have responses. They can be edited but you can\u2019t move them back to the pending queue.</li> <li>Submitted: The records have been fully annotated and have already been submitted. You can remove them from this queue and send them to the draft or discarded queues, but never back to the pending queue.</li> </ul> <p>Note</p> <p>If you are working as part of a team, the number of records in your Pending queue may change as other members of the team submit responses and those records get completed.</p> <p>Tip</p> <p>If you are working as part of a team, the records in the draft queue that have been completed by other team members will show a check mark to indicate that there is no need to provide a response.</p>"},{"location":"how_to_guides/annotate/#suggestions","title":"Suggestions","text":"<p>If your dataset includes model predictions, you will see them represented by a sparkle icon <code>\u2728</code> in the label or value button. We call them \u201cSuggestions\u201d and they appear in the form as pre-filled responses. If confidence scores have been included by the dataset admin, they will be shown alongside with the label. Additionally, admins can choose to always show suggested labels at the beginning of the list. This can be configured from the dataset settings.</p> <p>If you agree with the suggestions, you just need to click on the <code>Submit</code> button, and they will be considered as your response. If the suggestion is incorrect, you can modify it and submit your final response.</p>"},{"location":"how_to_guides/annotate/#focus-view","title":"Focus view","text":"<p>This is the default view to annotate your dataset linearly, displaying one record after another.</p> <p>Tip</p> <p>You should use this view if you have a large number of required questions or need a strong focus on the record content to be labelled. This is also the recommended view for annotating a dataset sample to avoid potential biases introduced by using filters, search, sorting and bulk labelling.</p> <p>Once you submit your first response, the next record will appear automatically. To see again your submitted response, just click on <code>Prev</code>.</p> <p>Navigating through the records</p> <p>To navigate through the records, you can use the\u00a0<code>Prev</code>, shown as\u00a0<code>&lt;</code>, and\u00a0<code>Next</code>,\u00a0<code>&gt;</code> buttons on top of the record card.</p> <p>Each time the page is fully refreshed, the records with modified statuses (Pending to Discarded, Pending to Save as Draft, Pending to Submitted) are sent to the corresponding queue. The control panel displays the status selector, which is set to Pending by default.</p>"},{"location":"how_to_guides/annotate/#bulk-view","title":"Bulk view","text":"<p>The bulk view is designed to speed up the annotation and get a quick overview of the whole dataset.</p> <p>The bulk view displays the records in a vertical list. Once this view is active, some functions from the control panel will activate to optimize the view. You can define the number of records to display by page between <code>10</code>, <code>25</code>, <code>50</code>, <code>100</code> and whether records are shown with a fixed (<code>Collapse records</code>) or their natural height (<code>Expand records</code>).</p> <p>Tip</p> <p>You should use this to quickly explore a dataset. This view is also recommended if you have a good understanding of the domain and want to apply your knowledge based on things like similarity and keyword search, filters, and suggestion score thresholds. For a datasets with a large number of required questions or very long fields, the focus view would be more suitable.</p> <p>With multiple questions, think about using the bulk view to annotate massively one question. Then, you can complete the annotation per record from the draft queue.</p> <p>Note</p> <p>Please note that suggestions are not shown in bulk view (except for Spans) and that you will need to save as a draft when you are not providing responses to all required questions.</p>"},{"location":"how_to_guides/annotate/#annotation-progress","title":"Annotation progress","text":"<p>You can track the progress of an annotation task in the progress bar shown in the dataset list and in the progress panel inside the dataset. This bar shows the number of records that have been completed (i.e., those that have the minimum number of submitted responses) and those left to be completed.</p> <p>You can also track your own progress in real time expanding the right-bottom panel inside the dataset page. There you can see the number of records for which you have <code>Pending</code>,\u00a0<code>Draft</code>,\u00a0<code>Submitted</code>\u00a0and\u00a0<code>Discarded</code> responses.</p> <p>Note</p> <p>You can also explore the dataset progress from the SDK. Check the Track your team's progress to know more about it.</p>"},{"location":"how_to_guides/annotate/#use-search-filters-and-sort","title":"Use search, filters, and sort","text":"<p>The UI offers various features designed for data exploration and understanding. Combining these features with bulk labelling can save you and your team hours of time.</p> <p>Tip</p> <p>You should use this when you are familiar with your data and have large volumes to annotate based on verified beliefs and experience.</p>"},{"location":"how_to_guides/annotate/#search","title":"Search","text":"<p>From the control panel at the top of the left pane, you can search by keyword across the entire dataset. If you have more than one field in your records, you may specify if the search is to be performed \u201cAll\u201d fields or on a specific one. Matched results are highlighted in color.</p> <p>Note</p> <p>If you introduce more than one keyword, the search will return results where all keywords have a match.</p> <p>Tip</p> <p>For more advanced searches, take a look at the advanced queries DSL.</p>"},{"location":"how_to_guides/annotate/#order-by-record-semantic-similarity","title":"Order by record semantic similarity","text":"<p>You can retrieve records based on their similarity to another record if vectors have been added to the dataset.</p> <p>Note</p> <p>Check these guides to know how to add vectors to your\u00a0dataset and\u00a0records.</p> <p>To use the search by semantic similarity function, click on <code>Find similar</code> within the record you wish to use as a reference. If multiple vectors are available, select the desired vector. You can also choose whether to retrieve the most or least similar records.</p> <p>The retrieved records are then ordered by similarity, with the similarity score displayed on each record card.</p> <p>While the semantic search is active, you can update the selected vector or adjust the order of similarity, and specify the number of desired results.</p> <p>To cancel the search, click on the cross icon next to the reference record.</p>"},{"location":"how_to_guides/annotate/#filter-and-sort-by-metadata-responses-and-suggestions","title":"Filter and sort by metadata, responses, and suggestions","text":""},{"location":"how_to_guides/annotate/#filter","title":"Filter","text":"<p>If the dataset contains metadata, responses and suggestions, click on\u00a0Filter in the control panel to display the available filters. You can select multiple filters and combine them.</p> <p>Note</p> <p>Record info including metadata is visible from the ellipsis menu in the record card.</p> <p>From the <code>Metadata</code> dropdown, type and select the property. You can set a range for integer and float properties, and select specific values for term metadata.</p> <p>Note</p> <p>Note that if a metadata property was set to <code>visible_for_annotators=False</code> this metadata property will only appear in the metadata filter for users with the <code>admin</code> or <code>owner</code> role.</p> <p>From the <code>Responses</code> dropdown, type and select the question. You can set a range for rating questions and select specific values for label, multi-label, and span questions.</p> <p>Note</p> <p>The text and ranking questions are not available for filtering.</p> <p>From the Suggestions dropdown, filter the suggestions by\u00a0<code>Suggestion values</code>,\u00a0<code>Score</code>\u00a0, or\u00a0<code>Agent</code>.\u00a0</p>"},{"location":"how_to_guides/annotate/#sort","title":"Sort","text":"<p>You can sort your records according to one or several attributes.</p> <p>The insertion time and last update are general to all records.</p> <p>The suggestion scores, response, and suggestion values for rating questions and metadata properties are available only when they were provided.</p>"},{"location":"how_to_guides/custom_fields/","title":"Custom fields with layout templates","text":"<p>This guide demonstrates how to create custom fields in Argilla using HTML, CSS, and JavaScript templates.</p> <p>Main Class</p> <pre><code>rg.CustomField(\n    name=\"custom\",\n    title=\"Custom\",\n    template=\"&lt;div&gt;{{record.fields.custom.key}}&lt;/div&gt;\",\n    advanced_mode=False,\n    required=True,\n    description=\"Field description\",\n)\n</code></pre> <p>Check the CustomField - Python Reference to see the attributes, arguments, and methods of the <code>CustomField</code> class in detail.</p>"},{"location":"how_to_guides/custom_fields/#understanding-the-record-object","title":"Understanding the Record Object","text":"<p>The <code>record</code> object is the main JavaScript object that contains all the information about the Argilla <code>record</code> object in the UI, like <code>fields</code>, <code>metadata</code>, etc. Your template can use this object to display record information within the custom field. You can for example access the fields of the record by navigating to <code>record.fields.&lt;field_name&gt;</code> and this generally works the same for <code>metadata</code>, <code>responses</code>, etc.</p>"},{"location":"how_to_guides/custom_fields/#using-handlebars-in-your-template","title":"Using Handlebars in your template","text":"<p>By default, custom fields will use the handlebars syntax engine to render templates with <code>record</code> information. This engine will convert the content inside the brackets <code>{{}}</code> to the values of record's field's object that you reference within your template. As described in the Understanding the Record Object section, you can access the fields of the record by navigating to <code>{{record.fields.&lt;field_name&gt;}}</code>. For more complex use cases, handlebars has various expressions, partials, and helpers that you can use to render your data. You can deactivate the <code>handlebars</code> engine with the <code>advanced_mode=True</code> parameter in <code>CustomField</code>, then you will need to define custom javascript to access the record attributes, like described in the Advanced Mode section.</p>"},{"location":"how_to_guides/custom_fields/#usage-example","title":"Usage example","text":"<p>Because of the handlebars syntax engine, we only need to pass the HTML and potentially some CSS in between the <code>&lt;style&gt;</code> tags.</p> <pre><code>css_template = \"\"\"\n&lt;style&gt;\n#container {\n    display: flex;\n    gap: 10px;\n}\n.column {\n    flex: 1;\n}\n&lt;/style&gt;\n\"\"\" # (1)\n\nhtml_template = \"\"\"\n&lt;div id=\"container\"&gt;\n    &lt;div class=\"column\"&gt;\n        &lt;h3&gt;Original&lt;/h3&gt;\n        &lt;img src=\"{{record.fields.image.original}}\" /&gt;\n    &lt;/div&gt;\n    &lt;div class=\"column\"&gt;\n        &lt;h3&gt;Revision&lt;/h3&gt;\n        &lt;img src=\"{{record.fields.image.revision}}\" /&gt;\n    &lt;/div&gt;\n&lt;/div&gt;\n\"\"\" # (2)\n</code></pre> <ol> <li>This is a CSS template, which ensures that the container and columns are styled.</li> <li>This is an HTML template, which creates a <code>container</code> with two columns and injects the value corresponding to the <code>key</code> of the <code>image</code> field into it.</li> </ol> <p>We can now pass these templates to the <code>CustomField</code> class.</p> <pre><code>import argilla as rg\n\ncustom_field = rg.CustomField(\n    name=\"image\",\n    template=css_template + html_template,\n)\n\nsettings = rg.Settings(\n    fields=[custom_field],\n    questions=[rg.TextQuestion(name=\"response\")],\n)\n\ndataset = rg.Dataset(\n    name=\"custom_field_dataset\",\n    settings=settings,\n).create()\n\ndataset.records.log([\n    rg.Record(\n        fields={\n            \"image\": {\n                \"original\": \"https://argilla.io/brand-assets/argilla/argilla-logo-color-black.png\",\n                \"revision\": \"https://argilla.io/brand-assets/argilla/argilla-logo-black.png\",\n            }\n        }\n    )]\n)\n</code></pre> <p>The result will be the following:</p> <p></p>"},{"location":"how_to_guides/custom_fields/#example-gallery","title":"Example Gallery","text":"Metadata in a table <p>You can make it easier to read metadata by displaying it in a table. This uses handlebars to iterate over the metadata object and display each key-value pair in a row.</p> <p><pre><code>template = \"\"\"\n&lt;style&gt;\n    .container {\n        border: 1px solid #ddd;\n        font-family: sans-serif;\n    }\n    .row {\n        display: flex;\n        border-bottom: 1px solid #ddd;\n    }\n    .row:last-child {\n        border-bottom: none;\n    }\n    .column {\n        flex: 1;\n        padding: 8px;\n    }\n    .column:first-child {\n        border-right: 1px solid #ddd;\n    }\n&lt;/style&gt;\n&lt;div class=\"container\"&gt;\n    &lt;div class=\"header\"&gt;\n        &lt;div class=\"column\"&gt;Metadata&lt;/div&gt;\n        &lt;div class=\"column\"&gt;Value&lt;/div&gt;\n    &lt;/div&gt;\n    {{#each record.metadata}}\n    &lt;div class=\"row\"&gt;\n        &lt;div class=\"column\"&gt;{{@key}}&lt;/div&gt;\n        &lt;div class=\"column\"&gt;{{this}}&lt;/div&gt;\n    &lt;/div&gt;\n    {{/each}}\n&lt;/div&gt;\n\"\"\"\nrecord = rg.Record(\n    fields={\"text\": \"hello\"},\n    metadata={\n        \"name\": \"John Doe\",\n        \"age\": 25,\n    }\n)\n</code></pre> </p> JSON viewer <p>The value of a custom field is a dictionary in Python and a JavaScript object in the browser. You can render this object as a JSON string using the <code>json</code> helper. This is implemented in Argilla's frontend for convenience. If you want to learn more about handlebars helpers, you can check the handlebars documentation.</p> <pre><code>template = \"{{ json record.fields.user_profile }}\"\n\nrecord = rg.Record(\n    fields={\n        \"user_profile\": {\n            \"name\": \"John Doe\",\n            \"age\": 30,\n            \"address\": \"123 Main St\",\n            \"email\": \"john.doe@hooli.com\",\n        }\n    },\n)\n</code></pre>"},{"location":"how_to_guides/custom_fields/#advanced-mode","title":"Advanced Mode","text":"<p>When <code>advanced_mode=True</code>, you can use the <code>template</code> argument to pass a full HTML page. This allows for more complex customizations, including the use of JavaScript. The record object will be available in the global scope, so you can access it in your JavaScript code as described in the Understanding the Record Object section.</p>"},{"location":"how_to_guides/custom_fields/#usage-example_1","title":"Usage example","text":"<p>Let's reproduce example from the Without advanced mode section but this time we will insert the handlebars syntax engine into the template ourselves.</p> <pre><code>template = \"\"\"\n&lt;div id=\"content\"&gt;&lt;/div&gt;\n&lt;script id=\"template\" type=\"text/x-handlebars-template\"&gt;\n    &lt;style&gt;\n    #container {\n        display: flex;\n        gap: 10px;\n    }\n    .column {\n        flex: 1;\n    }\n    &lt;/style&gt;\n    &lt;div id=\"container\"&gt;\n        &lt;div class=\"column\"&gt;\n            &lt;h3&gt;Original&lt;/h3&gt;\n            &lt;img src=\"{{record.fields.image.original}}\" /&gt;\n        &lt;/div&gt;\n        &lt;div class=\"column\"&gt;\n            &lt;h3&gt;Revision&lt;/h3&gt;\n            &lt;img src=\"{{record.fields.image.revision}}\" /&gt;\n        &lt;/div&gt;\n    &lt;/div&gt;\n&lt;/script&gt;\n\"\"\" # (1)\n\nscript = \"\"\"\n&lt;script src=\"https://cdn.jsdelivr.net/npm/handlebars@latest/dist/handlebars.js\"&gt;&lt;/script&gt;\n&lt;script&gt;\n    const template = document.getElementById(\"template\").innerHTML;\n    const compiledTemplate = Handlebars.compile(template);\n    const html = compiledTemplate({ record });\n    document.getElementById(\"content\").innerHTML = html;\n&lt;/script&gt;\n\"\"\" # (2)\n</code></pre> <ol> <li>This is a JavaScript template script. We set <code>id</code> to <code>template</code> to use it later in our JavaScript code and <code>type</code> to <code>text/x-handlebars-template</code> to indicate that this is a Handlebars template. Note that we also added a <code>div</code> with <code>id</code> to <code>content</code> to render the template into.</li> <li>This is a JavaScript template script. We load the Handlebars library and then use it to compile the template and render the record. Eventually, we render the result into the <code>div</code> with <code>id</code> to <code>content</code>.</li> </ol> <p>We can now pass these templates to the <code>CustomField</code> class, ensuring that the <code>advanced_mode</code> is set to <code>True</code>.</p> <pre><code>import argilla as rg\n\ncustom_field = rg.CustomField(\n    name=\"image\",\n    template=template + script,\n    advanced_mode=True\n)\n</code></pre> <p>Besides the new <code>CustomField</code> code above, reusing the same approach as in the Using handlebars in your template section, will create a dataset and log a record to it, yielding the same result.</p> <p></p>"},{"location":"how_to_guides/custom_fields/#example-gallery_1","title":"Example Gallery","text":"3D object viewer <p>We will now use native javascript and three.js to create a 3D object viewer. We will then use the <code>record</code> object directly to insert URLs from the record's fields.</p> <pre><code>template = \"\"\"\n&lt;script src=\"https://cdnjs.cloudflare.com/ajax/libs/three.js/r128/three.min.js\"&gt;&lt;/script&gt;\n&lt;script src=\"https://cdn.jsdelivr.net/npm/three@0.128.0/examples/js/loaders/GLTFLoader.js\"&gt;&lt;/script&gt;\n&lt;script src=\"https://cdn.jsdelivr.net/npm/three@0.128.0/examples/js/controls/OrbitControls.js\"&gt;&lt;/script&gt;\n\n\n&lt;div style=\"display: flex;\"&gt;\n    &lt;div&gt;\n        &lt;h3&gt;Option A&lt;/h3&gt;\n        &lt;canvas id=\"canvas1\" width=\"400\" height=\"400\"&gt;&lt;/canvas&gt;\n    &lt;/div&gt;\n    &lt;div&gt;\n        &lt;h3&gt;Option B&lt;/h3&gt;\n        &lt;canvas id=\"canvas2\" width=\"400\" height=\"400\"&gt;&lt;/canvas&gt;\n    &lt;/div&gt;\n&lt;/div&gt;\n\n&lt;script&gt;\n    function init(canvasId, modelUrl) {\n    let scene, camera, renderer, controls;\n\n    const canvas = document.getElementById(canvasId);\n    scene = new THREE.Scene();\n    camera = new THREE.PerspectiveCamera(75, 1, 0.1, 1000);\n    renderer = new THREE.WebGLRenderer({ canvas, alpha: true });\n\n    renderer.setSize(canvas.clientWidth, canvas.clientHeight);\n\n    const directionalLight = new THREE.DirectionalLight(0xffffff, 1);\n    directionalLight.position.set(2, 2, 5);\n    scene.add(directionalLight);\n\n    const ambientLight = new THREE.AmbientLight(0x404040, 7);\n    scene.add(ambientLight);\n\n    controls = new THREE.OrbitControls(camera, renderer.domElement);\n    controls.maxPolarAngle = Math.PI / 2;\n\n    const loader = new THREE.GLTFLoader();\n    loader.load(\n        modelUrl,\n        function (gltf) {\n        const model = gltf.scene;\n        scene.add(model);\n        model.position.set(0, 0, 0);\n\n        const box = new THREE.Box3().setFromObject(model);\n        const center = box.getCenter(new THREE.Vector3());\n        model.position.sub(center);\n        camera.position.set(center.x, center.y, center.z + 1.2);\n\n        animate();\n        },\n        undefined,\n        function (error) {\n        console.error(error);\n        }\n    );\n\n    function animate() {\n        requestAnimationFrame(animate);\n        controls.update();\n        renderer.render(scene, camera);\n    }\n    }\n\n    init(\"canvas1\", record.fields.object.option_a);\n    init(\"canvas2\", record.fields.object.option_b);\n&lt;/script&gt;\n\n\"\"\"\n</code></pre> <p>Next, we will create a record with two URLs to 3D objects from the 3d-arena dataset.</p> <pre><code>record = rg.Record(\n    fields={\n        \"object\": {\n            \"option_a\": \"https://huggingface.co/datasets/dylanebert/3d-arena/resolve/main/outputs/Strawb3rry/a_bookshelf_with_ten_books_stacked_vertically.glb\",\n            \"option_b\": \"https://huggingface.co/datasets/dylanebert/3d-arena/resolve/main/outputs/MeshFormer/a_bookshelf_with_ten_books_stacked_vertically.glb\",\n        }\n    }\n)\n</code></pre> <p></p>"},{"location":"how_to_guides/custom_fields/#updating-templates","title":"Updating templates","text":"<p>As described in the dataset guide, you can update certain setting attributes for a published dataset. This includes the custom fields templates, which is a usefule feature when you want to iterate on the template of a custom field without the need to create a new dataset. The following example shows how to update the template of a custom field.</p> <pre><code>dataset.settings.fields[\"custom\"].template = \"&lt;new-template&gt;\"\ndataset.update()\n</code></pre>"},{"location":"how_to_guides/dataset/","title":"Dataset management","text":"<p>This guide provides an overview of datasets, explaining the basics of how to set them up and manage them in Argilla.</p> <p>A dataset is a collection of records that you can configure for labelers to provide feedback using the UI. Depending on the specific requirements of your task, you may need various types of feedback. You can customize the dataset to include different kinds of questions, so the first step will be to define the aim of your project and the kind of data and feedback you will need. With this information, you can start configuring a dataset by defining fields, questions, metadata, vectors, and guidelines through settings.</p> Question: Who can manage datasets? <p>Only users with the <code>owner</code> role can manage (create, retrieve, update and delete) all the datasets.</p> <p>The users with the <code>admin</code> role can manage (create, retrieve, update and delete) the datasets in the workspaces they have access to.</p> <p>Main Classes</p> <code>rg.Dataset</code><code>rg.Settings</code> <pre><code>rg.Dataset(\n    name=\"name\",\n    workspace=\"workspace\",\n    settings=settings,\n    client=client\n)\n</code></pre> <p>Check the Dataset - Python Reference to see the attributes, arguments, and methods of the <code>Dataset</code> class in detail.</p> <pre><code>rg.Settings(\n    fields=[rg.TextField(name=\"text\")],\n    questions=[\n        rg.LabelQuestion(\n            name=\"label\",\n            labels=[\"label_1\", \"label_2\", \"label_3\"]\n        )\n    ],\n    metadata=[rg.TermsMetadataProperty(name=\"metadata\")],\n    vectors=[rg.VectorField(name=\"vector\", dimensions=10)],\n    guidelines=\"guidelines\",\n    allow_extra_metadata=True,\n    distribution=rg.TaskDistribution(min_submitted=2),\n)\n</code></pre> <p>Check the Settings - Python Reference to see the attributes, arguments, and methods of the <code>Settings</code> class in detail.</p>"},{"location":"how_to_guides/dataset/#create-a-dataset","title":"Create a dataset","text":"<p>To create a dataset, you can define it in the <code>Dataset</code> class and then call the <code>create</code> method that will send the dataset to the server so that it can be visualized in the UI. If the dataset does not appear in the UI, you may need to click the refresh button to update the view. For further configuration of the dataset, you can refer to the settings section.</p> <p>Info</p> <p>If you have deployed Argilla with Hugging Face Spaces and HF Sign in, you can use <code>argilla</code> as a workspace name. Otherwise, you might need to create a workspace following this guide.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nsettings = rg.Settings(\n    guidelines=\"These are some guidelines.\",\n    fields=[\n        rg.TextField(\n            name=\"text\",\n        ),\n    ],\n    questions=[\n        rg.LabelQuestion(\n            name=\"label\",\n            labels=[\"label_1\", \"label_2\", \"label_3\"]\n        ),\n    ],\n)\n\ndataset = rg.Dataset(\n    name=\"my_dataset\",\n    workspace=\"my_workspace\",\n    settings=settings,\n)\n\ndataset.create()\n</code></pre> <p>The created dataset will be empty, to add records go to this how-to guide.</p> <p>Accessing attributes</p> <p>Access the attributes of a dataset by calling them directly on the <code>dataset</code> object. For example, <code>dataset.id</code>, <code>dataset.name</code> or <code>dataset.settings</code>. You can similarly access the fields, questions, metadata, vectors and guidelines. For instance, <code>dataset.fields</code> or <code>dataset.questions</code>.</p>"},{"location":"how_to_guides/dataset/#create-multiple-datasets-with-the-same-settings","title":"Create multiple datasets with the same settings","text":"<p>To create multiple datasets with the same settings, define the settings once and pass it to each dataset.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nsettings = rg.Settings(\n    guidelines=\"These are some guidelines.\",\n    fields=[rg.TextField(name=\"text\", use_markdown=True)],\n    questions=[\n        rg.LabelQuestion(name=\"label\", labels=[\"label_1\", \"label_2\", \"label_3\"])\n    ],\n    distribution=rg.TaskDistribution(min_submitted=3),\n)\n\ndataset1 = rg.Dataset(name=\"my_dataset_1\", settings=settings)\ndataset2 = rg.Dataset(name=\"my_dataset_2\", settings=settings)\n\n# Create the datasets on the server\ndataset1.create()\ndataset2.create()\n</code></pre>"},{"location":"how_to_guides/dataset/#create-a-dataset-from-an-existing-dataset","title":"Create a dataset from an existing dataset","text":"<p>To create a new dataset from an existing dataset, get the settings from the existing dataset and pass them to the new dataset.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nexisting_dataset = client.datasets(\"my_dataset\")\n\nnew_dataset = rg.Dataset(name=\"my_dataset_copy\", settings=existing_dataset.settings)\n\nnew_dataset.create()\n</code></pre> <p>Info</p> <p>You can also copy the records from the original dataset to the new one:</p> <pre><code>records = list(existing_dataset.records)\nnew_dataset.records.log(records)\n</code></pre>"},{"location":"how_to_guides/dataset/#define-dataset-settings","title":"Define dataset settings","text":"<p>Tip</p> <p>Instead of defining your own custom settings, you can use some of our pre-built templates for text classification, ranking and rating. Learn more here.</p>"},{"location":"how_to_guides/dataset/#fields","title":"Fields","text":"<p>The fields in a dataset consist of one or more data items requiring annotation. Currently, Argilla supports plain text and markdown through the <code>TextField</code>, images through the <code>ImageField</code>, chat formatted data through the <code>ChatField</code> and full custom templates through our <code>CustomField</code>.</p> <p>Note</p> <p>The order of the fields in the UI follows the order in which these are added to the fields attribute in the Python SDK.</p> <p>Check the Field - Python Reference to see the field classes in detail.</p> TextImageChatCustom <p><pre><code>rg.TextField(\n    name=\"text\",\n    title=\"Text\",\n    use_markdown=False,\n    required=True,\n    description=\"Field description\",\n)\n</code></pre> </p> <p><pre><code>rg.ImageField(\n    name=\"image\",\n    title=\"Image\",\n    required=True,\n    description=\"Field description\",\n)\n</code></pre> </p> <p><pre><code>rg.ChatField(\n    name=\"chat\",\n    title=\"Chat\",\n    use_markdown=True,\n    required=True,\n    description=\"Field description\",\n)\n</code></pre> </p> <p>A <code>CustomField</code> allows you to use a custom template for the field. This is useful if you want to use a custom UI for the field. You can use the <code>template</code> argument to pass a string that will be rendered as the field's UI.</p> <p>By default, <code>advanced_mode=False</code>, which will use a brackets syntax engine for the templates. This engine converts <code>{{record.fields.field.key}}</code> to the values of record's field's object. You can also use <code>advanced_mode=True</code>, which deactivates the above brackets syntax engine and allows you to add custom javascript to your template to render the field.</p> <pre><code>rg.CustomField(\n    name=\"custom\",\n    title=\"Custom\",\n    template=\"&lt;div&gt;{{record.fields.custom.key}}&lt;/div&gt;\",\n    advanced_mode=False,\n    required=True,\n    description=\"Field description\",\n)\n</code></pre> <p>Tip</p> <p>To learn more about how to create custom fields with HTML and CSS templates, check this how-to guide.</p>"},{"location":"how_to_guides/dataset/#questions","title":"Questions","text":"<p>To collect feedback for your dataset, you need to formulate questions that annotators will be asked to answer.</p> <p>Check the Questions - Python Reference to see the question classes in detail.</p> LabelMulti-labelRankingRatingSpanText <p>A <code>LabelQuestion</code> asks annotators to choose a unique label from a list of options. This type is useful for text classification tasks. In the UI, they will have a rounded shape.</p> <p><pre><code>rg.LabelQuestion(\n    name=\"label\",\n    labels={\"YES\": \"Yes\", \"NO\": \"No\"}, # or [\"YES\", \"NO\"]\n    title=\"Is the response relevant for the given prompt?\",\n    description=\"Select the one that applies.\",\n    required=True,\n    visible_labels=10\n)\n</code></pre> </p> <p>A <code>MultiLabelQuestion</code> asks annotators to choose all applicable labels from a list of options. This type is useful for multi-label text classification tasks. In the UI, they will have a squared shape.</p> <p><pre><code>rg.MultiLabelQuestion(\n    name=\"multi_label\",\n    labels={\n        \"hate\": \"Hate Speech\",\n        \"sexual\": \"Sexual content\",\n        \"violent\": \"Violent content\",\n        \"pii\": \"Personal information\",\n        \"untruthful\": \"Untruthful info\",\n        \"not_english\": \"Not English\",\n        \"inappropriate\": \"Inappropriate content\"\n    }, # or [\"hate\", \"sexual\", \"violent\", \"pii\", \"untruthful\", \"not_english\", \"inappropriate\"]\n    title=\"Does the response include any of the following?\",\n    description=\"Select all that apply.\",\n    required=True,\n    visible_labels=10,\n    labels_order=\"natural\"\n)\n</code></pre> </p> <p>A <code>RankingQuestion</code> asks annotators to order a list of options. It is useful to gather information on the preference or relevance of a set of options.</p> <pre><code>rg.RankingQuestion(\n    name=\"ranking\",\n    values={\n        \"reply-1\": \"Reply 1\",\n        \"reply-2\": \"Reply 2\",\n        \"reply-3\": \"Reply 3\"\n    } # or [\"reply-1\", \"reply-2\", \"reply-3\"]\n    title=\"Order replies based on your preference\",\n    description=\"1 = best, 3 = worst. Ties are allowed.\",\n    required=True,\n)\n</code></pre> <p></p> <p>A <code>RatingQuestion</code> asks annotators to select one option from a list of integer values. This type is useful for collecting numerical scores.</p> <pre><code>rg.RatingQuestion(\n    name=\"rating\",\n    values=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n    title=\"How satisfied are you with the response?\",\n    description=\"1 = very unsatisfied, 10 = very satisfied\",\n    required=True,\n)\n</code></pre> <p></p> <p>A <code>SpanQuestion</code> asks annotators to select a portion of the text of a specific field and apply a label to it. This type of question is useful for named entity recognition or information extraction tasks.</p> <pre><code>rg.SpanQuestion(\n    name=\"span\",\n    field=\"text\",\n    labels={\n        \"PERSON\": \"Person\",\n        \"ORG\": \"Organization\",\n        \"LOC\": \"Location\",\n        \"MISC\": \"Miscellaneous\"\n    }, # or [\"PERSON\", \"ORG\", \"LOC\", \"MISC\"]\n    title=\"Select the entities in the text\",\n    description=\"Select the entities in the text\",\n    required=True,\n    allow_overlapping=False,\n    visible_labels=10\n)\n</code></pre> <p></p> <p>A <code>TextQuestion</code> offers to annotators a free-text area where they can enter any text. This type is useful for collecting natural language data, such as corrections or explanations.</p> <pre><code>rg.TextQuestion(\n    name=\"text\",\n    title=\"Please provide feedback on the response\",\n    description=\"Please provide feedback on the response\",\n    required=True,\n    use_markdown=True\n)\n</code></pre> <p></p>"},{"location":"how_to_guides/dataset/#metadata","title":"Metadata","text":"<p>Metadata properties allow you to configure the use of metadata information for the filtering and sorting features available in the UI and Python SDK.</p> <p>Check the Metadata - Python Reference to see the metadata classes in detail.</p> TermsIntegerFloat <p>A <code>TermsMetadataProperty</code> allows to add a list of strings as metadata options.</p> <p><pre><code>rg.TermsMetadataProperty(\n    name=\"terms\",\n    options=[\"group-a\", \"group-b\", \"group-c\"]\n    title=\"Annotation groups\",\n    visible_for_annotators=True,\n)\n</code></pre> </p> <p>An <code>IntegerMetadataProperty</code> allows to add integer values as metadata.</p> <p><pre><code>rg.IntegerMetadataProperty(\n    name=\"integer\",\n    title=\"length-input\",\n    min=42,\n    max=1984,\n)\n</code></pre> </p> <p>A <code>FloatMetadataProperty</code> allows to add float values as metadata.</p> <p><pre><code>rg.FloatMetadataProperty(\n    name=\"float\",\n    title=\"Reading ease\",\n    min=-92.29914,\n    max=119.6975,\n)\n</code></pre> </p> <p>Note</p> <p>You can also set the <code>allow_extra_metadata</code> argument in the dataset to <code>True</code> to specify whether the dataset will allow metadata fields in the records other than those specified under metadata. Note that these will not be accessible from the UI for any user, only retrievable using the Python SDK.</p>"},{"location":"how_to_guides/dataset/#vectors","title":"Vectors","text":"<p>To use the similarity search in the UI and the Python SDK, you will need to configure vectors using the <code>VectorField</code> class.</p> <p>Check the Vector - Python Reference to see the <code>VectorField</code> class in detail.</p> <p><pre><code>rg.VectorField(\n    name=\"my_vector\",\n    title=\"My Vector\",\n    dimensions=768\n)\n</code></pre> </p>"},{"location":"how_to_guides/dataset/#guidelines","title":"Guidelines","text":"<p>Once you have decided on the data to show and the questions to ask, it's important to provide clear guidelines to the annotators. These guidelines help them understand the task and answer the questions consistently. You can provide guidelines in two ways:</p> <ul> <li>In the dataset guidelines: this is added as an argument when you create your dataset in the Python SDK. They will appear in the annotation interface.</li> </ul> <p><pre><code>guidelines = \"In this dataset, you will find a collection of records that show a category, an instruction, a context and a response to that instruction. [...]\"\n</code></pre> </p> <ul> <li>As question descriptions: these are added as an argument when you create questions in the Python SDK. This text will appear in a tooltip next to the question in the UI. </li> </ul> <p>It is good practice to use at least the dataset guidelines if not both methods. Question descriptions should be short and provide context to a specific question. They can be a summary of the guidelines to that question, but often that is not sufficient to align the whole annotation team. In the guidelines, you can include a description of the project, details on how to answer each question with examples, instructions on when to discard a record, etc.</p> <p>Tip</p> <p>If you want further guidance on good practices for guidelines during the project development, check our blog post.</p>"},{"location":"how_to_guides/dataset/#distribution","title":"Distribution","text":"<p>When working as a team, you may want to distribute the annotation task to ensure efficiency and quality. You can use the\u00a0<code>TaskDistribution</code> settings to configure the number of minimum submitted responses expected for each record. Argilla will use this setting to automatically handle records in your team members' pending queues.</p> <p>Check the Task Distribution - Python Reference to see the <code>TaskDistribution</code> class in detail.</p> <pre><code>rg.TaskDistribution(\n    min_submitted = 2\n)\n</code></pre> <p>To learn more about how to distribute the task among team members in the Distribute the annotation guide.</p>"},{"location":"how_to_guides/dataset/#list-datasets","title":"List datasets","text":"<p>You can list all the datasets available in a workspace using the <code>datasets</code> attribute of the <code>Workspace</code> class. You can also use <code>len(workspace.datasets)</code> to get the number of datasets in a workspace.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nworkspace = client.workspaces(\"my_workspace\")\n\ndatasets = workspace.datasets\n\nfor dataset in datasets:\n    print(dataset)\n</code></pre> <p>When you list datasets, dataset settings are not preloaded, since this can introduce extra requests to the server. If you want to work with settings when listing datasets, you need to load them:</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nfor dataset in client.datasets:\n    dataset.settings.get() # this will get the dataset settings from the server\n    print(dataset.settings)\n</code></pre> <p>Notebooks</p> <p>When using a notebook, executing <code>client.datasets</code> will display a table with the <code>name</code>of the existing datasets, the <code>id</code>, <code>workspace_id</code> to which they belong, and the last update as <code>updated_at</code>. .</p>"},{"location":"how_to_guides/dataset/#retrieve-a-dataset","title":"Retrieve a dataset","text":"<p>You can retrieve a dataset by calling the <code>datasets</code> method on the <code>Argilla</code> class and passing the <code>name</code> or <code>id</code> of the dataset as an argument. If the dataset does not exist, a warning message will be raised and <code>None</code> will be returned.</p> By nameBy id <p>By default, this method attempts to retrieve the dataset from the first workspace. If the dataset is in a different workspace, you must specify either the workspace or workspace name as an argument.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\n# Retrieve the dataset from the first workspace\nretrieved_dataset = client.datasets(name=\"my_dataset\")\n\n# Retrieve the dataset from the specified workspace\nretrieved_dataset = client.datasets(name=\"my_dataset\", workspace=\"my_workspace\")\n</code></pre> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(id=\"&lt;uuid-or-uuid-string&gt;\")\n</code></pre>"},{"location":"how_to_guides/dataset/#check-dataset-existence","title":"Check dataset existence","text":"<p>You can check if a dataset exists. The <code>client.datasets</code> method will return <code>None</code> if the dataset was not found.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\")\n\nif dataset is not None:\n    pass\n</code></pre>"},{"location":"how_to_guides/dataset/#update-a-dataset","title":"Update a dataset","text":"<p>Once a dataset is published, there are limited things you can update. Here is a summary of the attributes you can change for each setting:</p> FieldsQuestionsMetadataVectorsGuidelinesDistribution Attributes From SDK From UI Name \u274c \u274c Title \u2705 \u2705 Required \u274c \u274c Use markdown \u2705 \u2705 Template \u2705 \u274c Attributes From SDK From UI Name \u274c \u274c Title \u274c \u2705 Description \u274c \u2705 Required \u274c \u274c Labels \u274c \u274c Values \u274c \u274c Label order \u274c \u2705 Suggestions first \u274c \u2705 Visible labels \u274c \u2705 Field \u274c \u274c Allow overlapping \u274c \u274c Use markdown \u274c \u2705 Attributes From SDK From UI Name \u274c \u274c Title \u2705 \u2705 Options \u274c \u274c Minimum value \u274c \u274c Maximum value \u274c \u274c Visible for annotators \u2705 \u2705 Allow extra metadata \u2705 \u2705 Attributes From SDK From UI Name \u274c \u274c Title \u2705 \u2705 Dimensions \u274c \u274c From SDK From UI \u2705 \u2705 Attributes From SDK From UI Minimum submitted \u2705 \u2705 <p>To modify these attributes, you can simply set the new value of the attributes you wish to change and call the <code>update</code> method on the <code>Dataset</code> object.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(\"my_dataset\")\n\ndataset.settings.fields[\"text\"].use_markdown = True\ndataset.settings.metadata[\"my_metadata\"].visible_for_annotators = False\n\ndataset.update()\n</code></pre> <p>You can also add and delete metadata properties and vector fields using the <code>add</code> and <code>delete</code> methods.</p> AddDelete <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(\"my_dataset\")\n\ndataset.settings.vectors.add(rg.VectorField(name=\"my_new_vector\", dimensions=123))\ndataset.settings.metadata.add(\n    rg.TermsMetadataProperty(\n        name=\"my_new_metadata\",\n        options=[\"option_1\", \"option_2\", \"option_3\"],\n    ),\n)\ndataset.update()\n</code></pre> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(\"my_dataset\")\n\ndataset.settings.vectors[\"my_old_vector\"].delete()\ndataset.settings.metadata[\"my_old_metadata\"].delete()\n\ndataset.update()\n</code></pre>"},{"location":"how_to_guides/dataset/#delete-a-dataset","title":"Delete a dataset","text":"<p>You can delete an existing dataset by calling the <code>delete</code> method on the <code>Dataset</code> class.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset_to_delete = client.datasets(name=\"my_dataset\")\n\ndataset_deleted = dataset_to_delete.delete()\n</code></pre>"},{"location":"how_to_guides/distribution/","title":"Distribute the annotation task among the team","text":"<p>This guide explains how you can use Argilla\u2019s automatic task distribution to efficiently divide the task of annotating a dataset among multiple team members.</p> <p>Owners and admins can define the minimum number of submitted responses expected for each record. Argilla will use this setting to handle automatically the records that will be shown in the pending queues of all users with access to the dataset.</p> <p>When a record has met the minimum number of submissions, the status of the record will change to <code>completed</code>, and the record will be removed from the <code>Pending</code> queue of all team members so they can focus on providing responses where they are most needed. The dataset\u2019s annotation task will be fully completed once all records have the <code>completed</code> status.</p> <p></p> <p>Note</p> <p>The status of a record can be either <code>completed</code>, when it has the required number of responses with <code>submitted</code> status, or <code>pending</code>, when it doesn\u2019t meet this requirement.</p> <p>Each record can have multiple responses, and each of those can have the status <code>submitted</code>, <code>discarded</code>, or <code>draft.</code></p> <p>Main Class</p> <pre><code>rg.TaskDistribution(\n    min_submitted = 2\n)\n</code></pre> <p>Check the Task Distribution - Python Reference to see the attributes, arguments, and methods of the <code>TaskDistribution</code> class in detail.</p>"},{"location":"how_to_guides/distribution/#configure-task-distribution-settings","title":"Configure task distribution settings","text":"<p>By default, Argilla will set the required minimum submitted responses to 1. This means that whenever a record has at least 1 response with the status <code>submitted</code> the status of the record will be <code>completed</code> and removed from the <code>Pending</code> queue of other team members.</p> <p>Tip</p> <p>Leave the default value of minimum submissions (1) if you are working on your own or when you don't require more than one submitted response per record.</p> <p>If you wish to set a different number, you can do so through the <code>distribution</code> setting in your dataset settings:</p> <pre><code>settings = rg.Settings(\n    guidelines=\"These are some guidelines.\",\n    fields=[\n        rg.TextField(\n            name=\"text\",\n        ),\n    ],\n    questions=[\n        rg.LabelQuestion(\n            name=\"label\",\n            labels=[\"label_1\", \"label_2\", \"label_3\"]\n        ),\n    ],\n    distribution=rg.TaskDistribution(min_submitted=3)\n)\n</code></pre> <p>Learn more about configuring dataset settings in the Dataset management guide.</p> <p>Tip</p> <p>Increase the number of minimum subsmissions if you\u2019d like to ensure you get more than one submitted response per record. Make sure that this number is never higher than the number of members in your team. Note that the lower this number is, the faster the task will be completed.</p> <p>Note</p> <p>Note that some records may have more responses than expected if multiple team members submit responses on the same record simultaneously.</p>"},{"location":"how_to_guides/distribution/#change-task-distribution-settings","title":"Change task distribution settings","text":"<p>If you wish to change the minimum submitted responses required in a dataset, you can do so as long as the annotation hasn\u2019t started, i.e., the dataset has no responses for any records.</p> <p>Admins and owners can change this value from the dataset settings page in the UI or from the SDK:</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(\"my_dataset\")\n\ndataset.settings.distribution.min_submitted = 4\n\ndataset.update()\n</code></pre>"},{"location":"how_to_guides/distribution/#track-your-teams-progress","title":"Track your team's progress","text":"<p>You can check the progress of the annotation task by using the <code>dataset.progress</code> method. This method will return the number of records that have the status <code>completed</code>, <code>pending</code>, and the total number of records in the dataset.</p> <p><pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(\"my_dataset\")\n\nprogress = dataset.progress()\n</code></pre> <pre><code>{\n    \"total\": 100,\n    \"completed\": 10,\n    \"pending\": 90\n}\n</code></pre></p> <p>You can see also include to the progress the users distribution by setting the <code>with_users_distribution</code> parameter to <code>True</code>. This will return the number of records that have the status <code>completed</code>, <code>pending</code>, and the total number of records in the dataset, as well as the number of completed submissions per user. You can visit the Annotation Progress section for more information.</p> <p><pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(\"my_dataset\")\n\nprogress = dataset.progress(with_users_distribution=True)\n</code></pre> <pre><code>{\n    \"total\": 100,\n    \"completed\": 50,\n    \"pending\": 50,\n    \"users\": {\n        \"user1\": {\n           \"completed\": { \"submitted\": 10, \"draft\": 5, \"discarded\": 5},\n           \"pending\": { \"submitted\": 5, \"draft\": 10, \"discarded\": 10},\n        },\n        \"user2\": {\n           \"completed\": { \"submitted\": 20, \"draft\": 10, \"discarded\": 5},\n           \"pending\": { \"submitted\": 2, \"draft\": 25, \"discarded\": 0},\n        },\n        ...\n}\n</code></pre></p> <p>Note</p> <p>Since the completed records can contain submissions from multiple users, the number of completed submissions per user may not match the total number of completed records.</p>"},{"location":"how_to_guides/import_export/","title":"Importing and exporting datasets and records","text":"<p>This guide provides an overview of how to import and export your dataset or its records to Python, your local disk, or the Hugging Face Hub.</p> <p>In Argilla, you can import/export two main components of a dataset:</p> <ul> <li>The dataset's complete configuration is defined in <code>rg.Settings</code>. This is useful if you want to share your feedback task or restore it later in Argilla.</li> <li>The records stored in the dataset, including <code>Metadata</code>, <code>Vectors</code>, <code>Suggestions</code>, and <code>Responses</code>. This is useful if you want to use your dataset's records outside of Argilla.</li> </ul> <p>Check the Dataset - Python Reference to see the attributes, arguments, and methods of the export <code>Dataset</code> class in detail.</p> <p>Main Classes</p> <code>rg.Dataset.to_hub</code><code>rg.Dataset.from_hub</code><code>rg.Dataset.to_disk</code><code>rg.Dataset.from_disk</code><code>rg.Dataset.records.to_datasets()</code><code>rg.Dataset.records.to_dict()</code><code>rg.Dataset.records.to_list()</code> <pre><code>rg.Dataset.to_hub(\n    repo_id=\"&lt;my_org&gt;/&lt;my_dataset&gt;\",\n    with_records=True,\n    generate_card=True\n)\n</code></pre> <pre><code>rg.Dataset.from_hub(\n    repo_id=\"&lt;my_org&gt;/&lt;my_dataset&gt;\",\n    name=\"my_dataset\",\n    workspace=\"my_workspace\",\n    client=rg.Client(),\n    with_records=True\n)\n</code></pre> <pre><code>rg.Dataset.to_disk(\n    path=\"&lt;path-empty-directory&gt;\",\n    with_records=True\n)\n</code></pre> <pre><code>rg.Dataset.from_disk(\n    path=\"&lt;path-dataset-directory&gt;\",\n    name=\"my_dataset\",\n    workspace=\"my_workspace\",\n    client=rg.Client(),\n    with_records=True\n)\n</code></pre> <pre><code>rg.Dataset.records.to_datasets()\n</code></pre> <pre><code>rg.Dataset.records.to_dict()\n</code></pre> <pre><code>rg.Dataset.records.to_list()\n</code></pre> <p>Check the Dataset - Python Reference to see the attributes, arguments, and methods of the export <code>Dataset</code> class in detail.</p> <p>Check the Record - Python Reference to see the attributes, arguments, and methods of the <code>Record</code> class in detail.</p>"},{"location":"how_to_guides/import_export/#importing-and-exporting-datasets","title":"Importing and exporting datasets","text":"<p>First, we will go through exporting a complete dataset from Argilla. This includes the dataset's settings and records. All of these methods use the <code>rg.Dataset.from_*</code> and <code>rg.Dataset.to_*</code> methods.</p>"},{"location":"how_to_guides/import_export/#hugging-face-hub","title":"Hugging Face Hub","text":""},{"location":"how_to_guides/import_export/#export-to-hub","title":"Export to Hub","text":"<p>You can push a dataset from Argilla to the Hugging Face Hub. This is useful if you want to share your dataset with the community or version control it. You can push the dataset to the Hugging Face Hub using the <code>rg.Dataset.to_hub</code> method.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\")\n\ndataset.to_hub(repo_id=\"&lt;my_org&gt;/&lt;my_dataset&gt;\")\n</code></pre> <p>With or without records</p> <p>The example above will push the dataset's <code>Settings</code> and records to the hub. If you only want to push the dataset's configuration, you can set the <code>with_records</code> parameter to <code>False</code>. This is useful if you're just interested in a specific dataset template or you want to make changes in the dataset settings and/or records.</p> <pre><code>dataset.to_hub(repo_id=\"&lt;my_org&gt;/&lt;my_dataset&gt;\", with_records=False)\n</code></pre>"},{"location":"how_to_guides/import_export/#import-from-hub","title":"Import from Hub","text":"<p>You can pull a dataset from the Hugging Face Hub to Argilla. This is useful if you want to restore a dataset and its configuration. You can pull the dataset from the Hugging Face Hub using the <code>rg.Dataset.from_hub</code> method.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = rg.Dataset.from_hub(repo_id=\"&lt;my_org&gt;/&lt;my_dataset&gt;\")\n</code></pre> <p>The <code>rg.Dataset.from_hub</code> method loads the configuration and records from the dataset repo. If you only want to load records, you can pass a <code>datasets.Dataset</code> object to the <code>rg.Dataset.log</code> method. This enables you to configure your own dataset and reuse existing Hub datasets. See the guide on records for more information.</p> <p>With or without records</p> <p>The example above will pull the dataset's <code>Settings</code> and records from the hub. If you only want to pull the dataset's configuration, you can set the <code>with_records</code> parameter to <code>False</code>. This is useful if you're just interested in a specific dataset template or you want to make changes in the records.</p> <pre><code>dataset = rg.Dataset.from_hub(repo_id=\"&lt;my_org&gt;/&lt;my_dataset&gt;\", with_records=False)\n</code></pre> <p>You could then log the dataset's records using the <code>load_dataset</code> method of the <code>datasets</code> package and pass the dataset to the <code>rg.Dataset.log</code> method.</p> <pre><code>hf_dataset = load_dataset(\"&lt;my_org&gt;/&lt;my_dataset&gt;\")\ndataset.records.log(hf_dataset) # (1)\n</code></pre> <ol> <li>You could also use the <code>mapping</code> parameter to map record field names to argilla field and question names.</li> </ol>"},{"location":"how_to_guides/import_export/#import-settings-from-hub","title":"Import settings from Hub","text":"<p>When importing datasets from the hub, Argilla will load settings from the hub in three ways:</p> <ol> <li>If the dataset was pushed to hub by Argilla, then the settings will be loaded from the hub via the configuration file.</li> <li>If the dataset was loaded by another source, then Argilla will define the settings based on the dataset's features in <code>datasets.Features</code>. For example, creating a <code>TextField</code> for a text feature or a <code>LabelQuestion</code> for a label class.</li> <li>You can pass a custom <code>rg.Settings</code> object to the <code>rg.Dataset.from_hub</code> method via the <code>settings</code> parameter. This will override the settings loaded from the hub.</li> </ol> <pre><code>settings = rg.Settings(\n    fields=[rg.TextField(name=\"text\")],\n    questions=[rg.TextQuestion(name=\"answer\")]\n) # (1)\n\ndataset = rg.Dataset.from_hub(repo_id=\"&lt;my_org&gt;/&lt;my_dataset&gt;\", settings=settings)\n</code></pre> <ol> <li>The settings that you pass to the <code>rg.Dataset.from_hub</code> method will override the settings loaded from the hub, and need to align with the dataset being loaded.</li> </ol>"},{"location":"how_to_guides/import_export/#local-disk","title":"Local Disk","text":""},{"location":"how_to_guides/import_export/#export-to-disk","title":"Export to Disk","text":"<p>You can save a dataset from Argilla to your local disk. This is useful if you want to back up your dataset. You can use the <code>rg.Dataset.to_disk</code> method. We recommend you to use an empty directory.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\")\n\ndataset.to_disk(path=\"&lt;path-empty-directory&gt;\")\n</code></pre> <p>This will save the dataset's configuration and records to the specified path. If you only want to save the dataset's configuration, you can set the <code>with_records</code> parameter to <code>False</code>.</p> <pre><code>dataset.to_disk(path=\"&lt;path-empty-directory&gt;\", with_records=False)\n</code></pre>"},{"location":"how_to_guides/import_export/#import-from-disk","title":"Import from Disk","text":"<p>You can load a dataset from your local disk to Argilla. This is useful if you want to restore a dataset's configuration. You can use the <code>rg.Dataset.from_disk</code> method.</p> <pre><code>import argilla as rg\n\ndataset = rg.Dataset.from_disk(path=\"&lt;path-dataset-directory&gt;\")\n</code></pre> <p>Directing the dataset to a name and workspace</p> <p>You can also specify the name and workspace of the dataset when loading it from the disk.</p> <pre><code>dataset = rg.Dataset.from_disk(path=\"&lt;path-dataset-directory&gt;\", name=\"my_dataset\", workspace=\"my_workspace\")\n</code></pre>"},{"location":"how_to_guides/import_export/#importing-and-exporting-records","title":"Importing and exporting records","text":"<p>The records alone can be exported from a dataset in Argilla.  This is useful if you want to process the records in Python, export them to a different platform, or use them in model training. All of these methods use the <code>rg.Dataset.records</code> attribute.</p>"},{"location":"how_to_guides/import_export/#export-records","title":"Export records","text":"<p>The records can be exported as a dictionary, a list of dictionaries, or a <code>Dataset</code> of the <code>datasets</code> package.</p> <p>With images</p> <p>If your dataset includes images, the recommended approach for exporting records is to use the <code>to_datasets</code> method, which exports the images as rescaled PIL objects. With other methods, the images will be exported using the data URI schema.</p> To a python dictionaryTo a python listTo the <code>datasets</code> package <p>Records can be exported from <code>Dataset.records</code> as a dictionary. The <code>to_dict</code> method can be used to export records as a dictionary. You can specify the orientation of the dictionary output. You can also decide if to flatten or not the dictionary.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\ndataset = client.datasets(name=\"my_dataset\")\n\n# Export records as a dictionary\nexported_records = dataset.records.to_dict()\n# {'fields': [{'text': 'Hello'},{'text': 'World'}], suggestions': [{'label': {'value': 'positive'}}, {'label': {'value': 'negative'}}]\n\n# Export records as a dictionary with orient=index\nexported_records = dataset.records.to_dict(orient=\"index\")\n# {\"uuid\": {'fields': {'text': 'Hello'}, 'suggestions': {'label': {'value': 'positive'}}}, {\"uuid\": {'fields': {'text': 'World'}, 'suggestions': {'label': {'value': 'negative'}}},\n\n# Export records as a dictionary with flatten=True\nexported_records = dataset.records.to_dict(flatten=True)\n# {\"text\": [\"Hello\", \"World\"], \"label.suggestion\": [\"greeting\", \"greeting\"]}\n</code></pre> <p>Records can be exported from <code>Dataset.records</code> as a list of dictionaries. The <code>to_list</code> method can be used to export records as a list of dictionaries. You can decide if to flatten it or not.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nworkspace = client.workspaces(\"my_workspace\")\n\ndataset = client.datasets(name=\"my_dataset\", workspace=workspace)\n\n# Export records as a list of dictionaries\nexported_records = dataset.records.to_list()\n# [{'fields': {'text': 'Hello'}, 'suggestion': {'label': {value: 'greeting'}}}, {'fields': {'text': 'World'}, 'suggestion': {'label': {value: 'greeting'}}}]\n\n# Export records as a list of dictionaries with flatten=True\nexported_records = dataset.records.to_list(flatten=True)\n# [{\"text\": \"Hello\", \"label\": \"greeting\"}, {\"text\": \"World\", \"label\": \"greeting\"}]\n</code></pre> <p>Records can be exported from <code>Dataset.records</code> to the <code>datasets</code> package. The <code>to_dataset</code> method can be used to export records to the <code>datasets</code> package. You can specify the name of the dataset and the split to export the records.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\ndataset = client.datasets(name=\"my_dataset\")\n\n# Export records as a dictionary\nexported_dataset = dataset.records.to_datasets()\n</code></pre>"},{"location":"how_to_guides/import_export/#import-records","title":"Import records","text":"<p>To import records to a dataset, use the <code>rg.Datasets.records.log</code> method. There is a guide on how to do this in How-to guides - Record, or you can check the Record - Python Reference.</p>"},{"location":"how_to_guides/migrate_from_legacy_datasets/","title":"Migrate users, workspaces and datasets to Argilla 2.x","text":"<p>This guide will help you migrate task to Argilla V2. These do not include the <code>FeedbackDataset</code> which is just an interim naming convention for the latest extensible dataset. Task-specific datasets are datasets that are used for a specific task, such as text classification, token classification, etc. If you would like to learn about the backstory of SDK this migration, please refer to the SDK migration blog post. Additionally, we will provide guidance on how to maintain your <code>User</code>'s and <code>Workspace</code>'s within the new Argilla V2 format.</p> <p>Note</p> <p>Legacy datasets include: <code>DatasetForTextClassification</code>, <code>DatasetForTokenClassification</code>, and <code>DatasetForText2Text</code>.</p> <p><code>FeedbackDataset</code>'s do not need to be migrated as they are already in the Argilla V2 format. Anyway, since the 2.x version includes changes to the search index structure, you should reindex the datasets by enabling the docker environment variable REINDEX_DATASET (This step is automatically executed if you're running Argilla in an HF Space). See the server configuration docs section for more details.</p> <p>To follow this guide, you will need to have the following prerequisites:</p> <ul> <li>An argilla 1.* server instance running with legacy datasets.</li> <li>An argilla &gt;=1.29 server instance running. If you don't have one, you can create one by following this Argilla guide.</li> <li>The <code>argilla</code> sdk package installed in your environment.</li> </ul> <p>Warning</p> <p>This guide will recreate all <code>User</code>'s' and <code>Workspace</code>'s' on a new server. Hence, they will be created with new passwords and IDs. If you want to keep the same passwords and IDs, you can can copy the datasets to a temporary v2 instance, then upgrade your current instance to v2.0 and copy the datasets back to your original instance after.</p> <p>If your current legacy datasets are on a server with Argilla release after 1.29, you could chose to recreate your legacy datasets as new datasets on the same server. You could then upgrade the server to Argilla 2.0 and carry on working their. Your legacy datasets will not be visible on the new server, but they will remain in storage layers if you need to access them.</p> <p>For migrating the  guides you will need to install the new <code>argilla</code> package. This includes a new <code>v1</code> module that allows you to connect to the Argilla V1 server.</p> <pre><code>pip install \"argilla&gt;=2.0.0\"\n</code></pre>"},{"location":"how_to_guides/migrate_from_legacy_datasets/#migrate-users-and-workspaces","title":"Migrate Users and Workspaces","text":"<p>The guide will take you through two steps:</p> <ol> <li>Retrieve the old users and workspaces from the Argilla V1 server using the new <code>argilla</code> package.</li> <li>Recreate the users and workspaces on the Argilla V2 server based op <code>name</code> as unique identifier.</li> </ol>"},{"location":"how_to_guides/migrate_from_legacy_datasets/#step-1-retrieve-the-old-users-and-workspaces","title":"Step 1: Retrieve the old users and workspaces","text":"<p>You can use the <code>v1</code> module to connect to the Argilla V1 server.</p> <pre><code>import argilla.v1 as rg_v1\n\n# Initialize the API with an Argilla server less than 2.0\napi_url = \"&lt;your-url&gt;\"\napi_key = \"&lt;your-api-key&gt;\"\nrg_v1.init(api_url, api_key)\n</code></pre> <p>Next, load the dataset <code>User</code> and <code>Workspaces</code> and from the Argilla V1 server:</p> <pre><code>users_v1 = rg_v1.User.list()\nworkspaces_v1 = rg_v1.Workspace.list()\n</code></pre>"},{"location":"how_to_guides/migrate_from_legacy_datasets/#step-2-recreate-the-users-and-workspaces","title":"Step 2: Recreate the users and workspaces","text":"<p>To recreate the users and workspaces on the Argilla V2 server, you can use the <code>argilla</code> package.</p> <p>First, instantiate the <code>Argilla</code> class to connect to the Argilla V2 server:</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla()\n</code></pre> <p>Next, recreate the users and workspaces on the Argilla V2 server:</p> <pre><code>for workspace in workspaces_v1:\n    rg.Workspace(\n        name=workspace.name\n    ).create()\n</code></pre> <pre><code>for user in users_v1:\n    user = rg.User(\n        username=user.username,\n        first_name=user.first_name,\n        last_name=user.last_name,\n        role=user.role,\n        password=\"&lt;your_chosen_password&gt;\" # (1)\n    ).create()\n    if user.role == \"owner\":\n       continue\n\n    for workspace_name in user.workspaces:\n        if workspace_name != user.name:\n            workspace = client.workspaces(name=workspace_name)\n            user.add_to_workspace(workspace)\n</code></pre> <ol> <li>You need to chose a new password for the user, to do this programmatically you can use the <code>uuid</code> package to generate a random password. Take care to keep track of the passwords you chose, since you will not be able to retrieve them later.</li> </ol> <p>Now you have successfully migrated your users and workspaces to Argilla V2 and can continue with the next steps.</p>"},{"location":"how_to_guides/migrate_from_legacy_datasets/#migrate-datasets","title":"Migrate datasets","text":"<p>The guide will take you through three steps:</p> <ol> <li>Retrieve the legacy dataset from the Argilla V1 server using the new <code>argilla</code> package.</li> <li>Define the new dataset in the Argilla V2 format.</li> <li>Upload the dataset records to the new Argilla V2 dataset format and attributes.</li> </ol>"},{"location":"how_to_guides/migrate_from_legacy_datasets/#step-1-retrieve-the-legacy-dataset","title":"Step 1: Retrieve the legacy dataset","text":"<p>You can use the <code>v1</code> module to connect to the Argilla V1 server.</p> <pre><code>import argilla.v1 as rg_v1\n\n# Initialize the API with an Argilla server less than 2.0\napi_url = \"&lt;your-url&gt;\"\napi_key = \"&lt;your-api-key&gt;\"\nrg_v1.init(api_url, api_key)\n</code></pre> <p>Next, load the dataset settings and records from the Argilla V1 server:</p> <pre><code>dataset_name = \"news-programmatic-labeling\"\nworkspace = \"demo\"\n\nsettings_v1 = rg_v1.load_dataset_settings(dataset_name, workspace)\nrecords_v1 = rg_v1.load(dataset_name, workspace)\nhf_dataset = records_v1.to_datasets()\n</code></pre> <p>Your legacy dataset is now loaded into the <code>hf_dataset</code> object.</p>"},{"location":"how_to_guides/migrate_from_legacy_datasets/#step-2-define-the-new-dataset","title":"Step 2: Define the new dataset","text":"<p>Define the new dataset in the Argilla V2 format. The new dataset format is defined in the <code>argilla</code> package. You can create a new dataset with the <code>Settings</code> and <code>Dataset</code> classes:</p> <p>First, instantiate the <code>Argilla</code> class to connect to the Argilla V2 server:</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla()\n</code></pre> <p>Next, define the new dataset settings:</p> For single-label classificationFor multi-label classificationFor token classificationFor text generation <pre><code>settings = rg.Settings(\n    fields=[\n        rg.TextField(name=\"text\"), # (1)\n    ],\n    questions=[\n        rg.LabelQuestion(name=\"label\", labels=settings_v1.label_schema),\n    ],\n    metadata=[\n        rg.TermsMetadataProperty(name=\"split\"), # (2)\n    ],\n    vectors=[\n        rg.VectorField(name='mini-lm-sentence-transformers', dimensions=384), # (3)\n    ],\n)\n</code></pre> <ol> <li>The default field in <code>DatasetForTextClassification</code> is <code>text</code>, but make sure you provide all fields included in <code>record.inputs</code>.</li> <li>Make sure you provide all relevant metadata fields available in the dataset.</li> <li>Make sure you provide all relevant vectors available in the dataset.</li> </ol> <pre><code>settings = rg.Settings(\n    fields=[\n        rg.TextField(name=\"text\"), # (1)\n    ],\n    questions=[\n        rg.MultiLabelQuestion(name=\"labels\", labels=settings_v1.label_schema),\n    ],\n    metadata=[\n        rg.TermsMetadataProperty(name=\"split\"), # (2)\n    ],\n    vectors=[\n        rg.VectorField(name='mini-lm-sentence-transformers', dimensions=384), # (3)\n    ],\n)\n</code></pre> <ol> <li>The default field in <code>DatasetForTextClassification</code> is <code>text</code>, but we should provide all fields included in <code>record.inputs</code>.</li> <li>Make sure you provide all relevant metadata fields available in the dataset.</li> <li>Make sure you provide all relevant vectors available in the dataset.</li> </ol> <pre><code>settings = rg.Settings(\n    fields=[\n        rg.TextField(name=\"text\"),\n    ],\n    questions=[\n        rg.SpanQuestion(name=\"spans\", labels=settings_v1.label_schema),\n    ],\n    metadata=[\n        rg.TermsMetadataProperty(name=\"split\"), # (1)\n    ],\n    vectors=[\n        rg.VectorField(name='mini-lm-sentence-transformers', dimensions=384), # (2)\n    ],\n)\n</code></pre> <ol> <li>Make sure you provide all relevant metadata fields available in the dataset.</li> <li>Make sure you provide all relevant vectors available in the dataset.</li> </ol> <pre><code>settings = rg.Settings(\n    fields=[\n        rg.TextField(name=\"text\"),\n    ],\n    questions=[\n        rg.TextQuestion(name=\"text_generation\"),\n    ],\n    metadata=[\n        rg.TermsMetadataProperty(name=\"split\"), # (1)\n    ],\n    vectors=[\n        rg.VectorField(name='mini-lm-sentence-transformers', dimensions=384), # (2)\n    ],\n)\n</code></pre> <ol> <li>We should provide all relevant metadata fields available in the dataset.</li> <li>We should provide all relevant vectors available in the dataset.</li> </ol> <p>Finally, create the new dataset on the Argilla V2 server:</p> <pre><code>dataset = rg.Dataset(name=dataset_name, workspace=workspace, settings=settings)\ndataset.create()\n</code></pre> <p>Note</p> <p>If a dataset with the same name already exists, the <code>create</code> method will raise an exception. You can check if the dataset exists and delete it before creating a new one.</p> <pre><code>dataset = client.datasets(name=dataset_name, workspace=workspace)\n\nif dataset is not None:\n    dataset.delete()\n</code></pre>"},{"location":"how_to_guides/migrate_from_legacy_datasets/#step-3-upload-the-dataset-records","title":"Step 3: Upload the dataset records","text":"<p>To upload the records to the new server, we will need to convert the records from the Argilla V1 format to the Argilla V2 format. The new <code>argilla</code> sdk package uses a generic <code>Record</code> class, but legacy datasets have specific record classes. We will need to convert the records to the generic <code>Record</code> class.</p> <p>Here are a set of example functions to convert the records for single-label and multi-label classification. You can modify these functions to suit your dataset.</p> For single-label classificationFor multi-label classificationFor token classificationFor text generation <pre><code>def map_to_record_for_single_label(data: dict, users_by_name: dict, current_user: rg.User) -&gt; rg.Record:\n    \"\"\" This function maps a text classification record dictionary to the new Argilla record.\"\"\"\n    suggestions = []\n    responses = []\n\n    if prediction := data.get(\"prediction\"):\n        label, score = prediction[0].values()\n        agent = data[\"prediction_agent\"]\n        suggestions.append(\n            rg.Suggestion(\n                question_name=\"label\", # (1)\n                value=label,\n                score=score,\n                agent=agent\n            )\n        )\n\n    if annotation := data.get(\"annotation\"):\n        user_id = users_by_name.get(data[\"annotation_agent\"], current_user).id\n        responses.append(\n            rg.Response(\n                question_name=\"label\", # (2)\n                value=annotation,\n                user_id=user_id\n            )\n        )\n\n    return rg.Record(\n        id=data[\"id\"],\n        fields=data[\"inputs\"],\n        # The inputs field should be a dictionary with the same keys as the `fields` in the settings\n        metadata=data[\"metadata\"],\n        # The metadata field should be a dictionary with the same keys as the `metadata` in the settings\n        vectors=data.get(\"vectors\") or {},\n        suggestions=suggestions,\n        responses=responses,\n    )\n</code></pre> <ol> <li> <p>Make sure the <code>question_name</code> matches the name of the question in question settings.</p> </li> <li> <p>Make sure the <code>question_name</code> matches the name of the question in question settings.</p> </li> </ol> <pre><code>def map_to_record_for_multi_label(data: dict, users_by_name: dict, current_user: rg.User) -&gt; rg.Record:\n    \"\"\" This function maps a text classification record dictionary to the new Argilla record.\"\"\"\n    suggestions = []\n    responses = []\n\n    if prediction := data.get(\"prediction\"):\n        labels, scores = zip(*[(pred[\"label\"], pred[\"score\"]) for pred in prediction])\n        agent = data[\"prediction_agent\"]\n        suggestions.append(\n            rg.Suggestion(\n                question_name=\"labels\", # (1)\n                value=labels,\n                score=scores,\n                agent=agent\n            )\n        )\n\n    if annotation := data.get(\"annotation\"):\n        user_id = users_by_name.get(data[\"annotation_agent\"], current_user).id\n        responses.append(\n            rg.Response(\n                question_name=\"labels\", # (2)\n                value=annotation,\n                user_id=user_id\n            )\n        )\n\n    return rg.Record(\n        id=data[\"id\"],\n        fields=data[\"inputs\"],\n        # The inputs field should be a dictionary with the same keys as the `fields` in the settings\n        metadata=data[\"metadata\"],\n        # The metadata field should be a dictionary with the same keys as the `metadata` in the settings\n        vectors=data.get(\"vectors\") or {},\n        suggestions=suggestions,\n        responses=responses,\n    )\n</code></pre> <ol> <li> <p>Make sure the <code>question_name</code> matches the name of the question in question settings.</p> </li> <li> <p>Make sure the <code>question_name</code> matches the name of the question in question settings.</p> </li> </ol> <pre><code>def map_to_record_for_span(data: dict, users_by_name: dict, current_user: rg.User) -&gt; rg.Record:\n    \"\"\" This function maps a token classification record dictionary to the new Argilla record.\"\"\"\n    suggestions = []\n    responses = []\n\n    if prediction := data.get(\"prediction\"):\n        scores = [span[\"score\"] for span in prediction]\n        agent = data[\"prediction_agent\"]\n        suggestions.append(\n            rg.Suggestion(\n                question_name=\"spans\", # (1)\n                value=prediction,\n                score=scores,\n                agent=agent\n            )\n        )\n\n    if annotation := data.get(\"annotation\"):\n        user_id = users_by_name.get(data[\"annotation_agent\"], current_user).id\n        responses.append(\n            rg.Response(\n                question_name=\"spans\", # (2)\n                value=annotation,\n                user_id=user_id\n            )\n        )\n\n    return rg.Record(\n        id=data[\"id\"],\n        fields={\"text\": data[\"text\"]},\n        # The inputs field should be a dictionary with the same keys as the `fields` in the settings\n        metadata=data[\"metadata\"],\n        # The metadata field should be a dictionary with the same keys as the `metadata` in the settings\n        vectors=data.get(\"vectors\") or {},\n        # The vectors field should be a dictionary with the same keys as the `vectors` in the settings\n        suggestions=suggestions,\n        responses=responses,\n    )\n</code></pre> <ol> <li> <p>Make sure the <code>question_name</code> matches the name of the question in question settings.</p> </li> <li> <p>Make sure the <code>question_name</code> matches the name of the question in question settings.</p> </li> </ol> <pre><code>def map_to_record_for_text_generation(data: dict, users_by_name: dict, current_user: rg.User) -&gt; rg.Record:\n    \"\"\" This function maps a text2text record dictionary to the new Argilla record.\"\"\"\n    suggestions = []\n    responses = []\n\n    if prediction := data.get(\"prediction\"):\n        first = prediction[0]\n        agent = data[\"prediction_agent\"]\n        suggestions.append(\n            rg.Suggestion(\n                question_name=\"text_generation\", # (1)\n                value=first[\"text\"],\n                score=first[\"score\"],\n                agent=agent\n            )\n        )\n\n    if annotation := data.get(\"annotation\"):\n        # From data[annotation]\n        user_id = users_by_name.get(data[\"annotation_agent\"], current_user).id\n        responses.append(\n            rg.Response(\n                question_name=\"text_generation\", # (2)\n                value=annotation,\n                user_id=user_id\n            )\n        )\n\n    return rg.Record(\n        id=data[\"id\"],\n        fields={\"text\": data[\"text\"]},\n        # The inputs field should be a dictionary with the same keys as the `fields` in the settings\n        metadata=data[\"metadata\"],\n        # The metadata field should be a dictionary with the same keys as the `metadata` in the settings\n        vectors=data.get(\"vectors\") or {},\n        # The vectors field should be a dictionary with the same keys as the `vectors` in the settings\n        suggestions=suggestions,\n        responses=responses,\n    )\n</code></pre> <ol> <li> <p>Make sure the <code>question_name</code> matches the name of the question in question settings.</p> </li> <li> <p>Make sure the <code>question_name</code> matches the name of the question in question settings.</p> </li> </ol> <p>The functions above depend on the <code>users_by_name</code> dictionary and the <code>current_user</code> object to assign responses to users, we need to load the existing users. You can retrieve the users from the Argilla V2 server and the current user as follows:</p> <pre><code>users_by_name = {user.username: user for user in client.users}\ncurrent_user = client.me\n</code></pre> <p>Finally, upload the records to the new dataset using the <code>log</code> method and map functions.</p> <pre><code>records = []\n\nfor data in hf_records:\n    records.append(map_to_record_for_single_label(data, users_by_name, current_user))\n\n# Upload the records to the new dataset\ndataset.records.log(records)\n</code></pre> <p>You have now successfully migrated your legacy dataset to Argilla V2. For more guides on how to use the Argilla SDK, please refer to the How to guides.</p>"},{"location":"how_to_guides/query/","title":"Query and filter records","text":"<p>This guide provides an overview of how to query and filter a dataset in Argilla.</p> <p>You can search for records in your dataset by querying or filtering. The query focuses on the content of the text field, while the filter is used to filter the records based on conditions. You can use them independently or combine multiple filters to create complex search queries. You can also export records from a dataset either as a single dictionary or a list of dictionaries.</p> <p>Main Classes</p> <code>rg.Query</code><code>rg.Filter</code><code>rg.Similar</code> <pre><code>rg.Query(\n    query=\"query\",\n    filter=filter\n)\n</code></pre> <p>Check the Query - Python Reference to see the attributes, arguments, and methods of the <code>Query</code> class in detail.</p> <pre><code>rg.Filter(\n    [\n        (\"field\", \"==\", \"value\"),\n    ]\n)\n</code></pre> <p>Check the Filter - Python Reference to see the attributes, arguments, and methods of the <code>Filter</code> class in detail.</p> <pre><code>rg.Similar(\n    name=\"vector\",\n    value=[0.1, 0.2, 0.3],\n)\n</code></pre> <p>Check the Similar - Python Reference to see the attributes, arguments, and methods of the <code>Similar</code> class in detail.</p>"},{"location":"how_to_guides/query/#query-with-search-terms","title":"Query with search terms","text":"<p>To search for records with terms, you can use the <code>Dataset.records</code> attribute with a query string. The search terms are used to search for records that contain the terms in the text field. You can search a single term or various terms, in the latter, all of them should appear in the record to be retrieved.</p> Single term searchMultiple terms search <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\", workspace=\"my_workspace\")\n\nquery = rg.Query(query=\"my_term\")\n\nqueried_records = dataset.records(query=query).to_list(flatten=True)\n</code></pre> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\", workspace=\"my_workspace\")\n\nquery = rg.Query(query=\"my_term1 my_term2\")\n\nqueried_records = dataset.records(query=query).to_list(flatten=True)\n</code></pre>"},{"location":"how_to_guides/query/#advanced-queries","title":"Advanced queries","text":"<p>If you need more complex searches, you can use Elasticsearch's simple query string syntax. Here is a summary of the different available operators:</p> operator description example <code>+</code> or <code>space</code> AND: search both terms <code>argilla + distilabel</code> or <code>argilla distilabel</code> return records that include the terms \"argilla\" and \"distilabel\" <code>|</code> OR: search either term <code>argilla | distilabel</code>  returns records that include the term \"argilla\" or \"distilabel\" <code>-</code> Negation: exclude a term <code>argilla -distilabel</code>  returns records that contain the term \"argilla\" and don't have the term \"distilabel\" <code>*</code> Prefix: search a prefix <code>arg*</code> returns records with any words starting with \"arg-\" <code>\"</code> Phrase: search a phrase <code>\"argilla and distilabel\"</code>  returns records that contain the phrase \"argilla and distilabel\" <code>(</code> and <code>)</code> Precedence: group terms <code>(argilla | distilabel) rules</code>  returns records that contain either \"argilla\" or \"distilabel\" and \"rules\" <code>~N</code> Edit distance: search a term or phrase with an edit distance <code>argilla~1</code>  returns records that contain the term \"argilla\" with an edit distance of 1, e.g. \"argila\" <p>Tip</p> <p>To use one of these characters literally, escape it with a preceding backslash <code>\\</code>, e.g. <code>\"1 \\+ 2\"</code> would match records where the phrase \"1 + 2\" is found.</p>"},{"location":"how_to_guides/query/#filter-by-conditions","title":"Filter by conditions","text":"<p>You can use the <code>Filter</code> class to define the conditions and pass them to the <code>Dataset.records</code> attribute to fetch records based on the conditions. Conditions include \"==\", \"&gt;=\", \"&lt;=\", or \"in\". Conditions can be combined with dot notation to filter records based on metadata, suggestions, or responses. You can use a single condition or multiple conditions to filter records.</p> operator description <code>==</code> The <code>field</code> value is equal to the <code>value</code> <code>&gt;=</code> The <code>field</code> value is greater than or equal to the <code>value</code> <code>&lt;=</code> The <code>field</code> value is less than or equal to the <code>value</code> <code>in</code> The <code>field</code> value is included in a list of values Single conditionMultiple conditions <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\", workspace=\"my_workspace\")\n\nfilter_label = rg.Filter((\"label\", \"==\", \"positive\"))\n\nfiltered_records = dataset.records(query=rg.Query(filter=filter_label)).to_list(\n    flatten=True\n)\n</code></pre> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\", workspace=\"my_workspace\")\n\nfilters = rg.Filter(\n    [\n        (\"label.suggestion\", \"==\", \"positive\"),\n        (\"metadata.count\", \"&gt;=\", 10),\n        (\"metadata.count\", \"&lt;=\", 20),\n        (\"label\", \"in\", [\"positive\", \"negative\"])\n    ]\n)\n\nfiltered_records = dataset.records(\n    query=rg.Query(filter=filters), with_suggestions=True\n).to_list(flatten=True)\n</code></pre>"},{"location":"how_to_guides/query/#available-fields","title":"Available fields","text":"<p>You can filter records based on the following fields:</p> field description example <code>id</code> The record id <code>(\"id\", \"in\", [\"1\",\"2\",\"3\"])</code> <code>_server_id</code> The internal record id. This value must be a valida UUID <code>(\"_server_id\", \"==\", \"ba69a996-85c2-4af0-a473-23138929641b\")</code> <code>inserted_at</code> The date and time the record was inserted. You can pass a datetime or a string <code>(\"inserted_at\" \"&gt;=\", \"2024-10-10\")</code> <code>updated_at</code> The date and time the record was updated. <code>(\"updated_at\" \"&gt;=\", \"2024-10-10\")</code> <code>status</code> The record status, which can be <code>pending</code> or <code>completed</code>. <code>(\"status\", \"==\", \"completed\")</code> <code>response.status</code> The response status, which can be <code>draft</code>, <code>submitted</code>, or <code>discarded</code>. <code>(\"response.status\", \"==\", \"submitted\")</code> <code>metadata.&lt;name&gt;</code> Filter by a metadata property <code>(\"metadata.split\", \"==\", \"train\")</code> <code>&lt;question&gt;.suggestion</code> Filter by a question suggestion value <code>(\"label.sugggestion\", \"==\", \"positive\")</code> <code>&lt;question&gt;.score</code> Filter by a suggestion score <code>(\"label.score\", \"&lt;=\", \"0.9\")</code> <code>&lt;question&gt;.agent</code> Filter by a suggestion agent <code>(\"label.agent\", \"&lt;=\", \"ChatGPT4.0\")</code> <code>&lt;question&gt;.response</code> Filter by a question response <code>(\"label.response\", \"==\", \"negative\")</code>"},{"location":"how_to_guides/query/#filter-by-status","title":"Filter by status","text":"<p>You can filter records based on record or response status. Record status can be <code>pending</code> or <code>completed</code>, and response status can be <code>draft</code>, <code>submitted</code>, or <code>discarded</code>.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\", workspace=\"my_workspace\")\n\nstatus_filter = rg.Query(\n    filter=rg.Filter(\n        [\n            (\"status\", \"==\", \"completed\"),\n            (\"response.status\", \"==\", \"discarded\")\n        ]\n    )\n)\n\nfiltered_records = dataset.records(status_filter).to_list(flatten=True)\n</code></pre>"},{"location":"how_to_guides/query/#similarity-search","title":"Similarity search","text":"<p>You can search for records that are similar to a given vector. You can use the <code>Similar</code> class to define the vector and pass it as part of the query argument to the <code>Dataset.records</code>.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\", workspace=\"my_workspace\")\n\n\nsimilar_filter = rg.Query(\n    similar=rg.Similar(\n        name=\"vector\", value=[0.1, 0.2, 0.3],\n    )\n)\n\nfiltered_records = dataset.records(similar_filter).to_list(flatten=True)\n</code></pre> <p>Note</p> <p>The <code>Similar</code> search expects a vector field definition as part of the dataset settings. If the dataset does not have a vector field, the search will return an error. Vist the Vectors section for more details on how to define a vector field.</p>"},{"location":"how_to_guides/query/#query-and-filter-a-dataset","title":"Query and filter a dataset","text":"<p>As mentioned, you can use a query with a search term and a filter or various filters to create complex search queries.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\", workspace=\"my_workspace\")\n\nquery_filter = rg.Query(\n    query=\"my_term\",\n    filter=rg.Filter(\n        [\n            (\"label.suggestion\", \"==\", \"positive\"),\n            (\"metadata.count\", \"&gt;=\", 10),\n        ]\n    )\n)\n\nqueried_filtered_records = dataset.records(\n    query=query_filter,\n    with_metadata=True,\n    with_suggestions=True\n).to_list(flatten=True)\n</code></pre>"},{"location":"how_to_guides/record/","title":"Add, update, and delete records","text":"<p>This guide provides an overview of records, explaining the basics of how to define and manage them in Argilla.</p> <p>A record in Argilla is a data item that requires annotation, consisting of one or more fields. These are the pieces of information displayed to the user in the UI to facilitate the completion of the annotation task. Each record also includes questions that annotators are required to answer, with the option of adding suggestions and responses to assist them. Guidelines are also provided to help annotators effectively complete their tasks.</p> <p>A record is part of a dataset, so you will need to create a dataset before adding records. Check this guide to learn how to create a dataset.</p> <p>Main Class</p> <pre><code>rg.Record(\n    external_id=\"1234\",\n    fields={\n        \"question\": \"Do you need oxygen to breathe?\",\n        \"answer\": \"Yes\"\n    },\n    metadata={\n        \"category\": \"A\"\n    },\n    vectors={\n        \"my_vector\": [0.1, 0.2, 0.3],\n    },\n    suggestions=[\n        rg.Suggestion(\"my_label\", \"positive\", score=0.9, agent=\"model_name\")\n    ],\n    responses=[\n        rg.Response(\"label\", \"positive\", user_id=user_id)\n    ],\n)\n</code></pre> <p>Check the Record - Python Reference to see the attributes, arguments, and methods of the <code>Record</code> class in detail.</p>"},{"location":"how_to_guides/record/#add-records","title":"Add records","text":"<p>You can add records to a dataset in two different ways: either by using a dictionary or by directly initializing a <code>Record</code> object. You should ensure that fields, metadata and vectors match those configured in the dataset settings. In both cases, are added via the <code>Dataset.records.log</code> method. As soon as you add the records, these will be available in the Argilla UI. If they do not appear in the UI, you may need to click the refresh button to update the view.</p> <p>Tip</p> <p>Take some time to inspect the data before adding it to the dataset in case this triggers changes in the <code>questions</code> or <code>fields</code>.</p> <p>Note</p> <p>If you are planning to use public data, the Datasets page of the Hugging Face Hub is a good place to start. Remember to always check the license to make sure you can legally use it for your specific use case.</p> As <code>Record</code> objectsFrom a generic data structureFrom a Hugging Face dataset <p>You can add records to a dataset by initializing a <code>Record</code> object directly. This is ideal if you need to apply logic to the data before defining the record. If the data is already structured, you should consider adding it directly as a dictionary or Hugging Face dataset.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\")\n\nrecords = [\n    rg.Record(\n        fields={\n            \"question\": \"Do you need oxygen to breathe?\",\n            \"answer\": \"Yes\"\n        },\n    ),\n    rg.Record(\n        fields={\n            \"question\": \"What is the boiling point of water?\",\n            \"answer\": \"100 degrees Celsius\"\n        },\n    ), # (1)\n]\n\ndataset.records.log(records)\n</code></pre> <ol> <li>This is an illustration of a definition. In a real-world scenario, you would iterate over a data structure and create <code>Record</code> objects for each iteration.</li> </ol> <p>You can add the data directly as a dictionary like structure, where the keys correspond to the names of fields, questions, metadata or vectors in the dataset and the values are the data to be added.</p> <p>If your data structure does not correspond to your Argilla dataset names, you can use a <code>mapping</code> to indicate which keys in the source data correspond to the dataset fields, metadata, vectors, suggestions, or responses. If you need to add the same data to multiple attributes, you can also use a list with the name of the attributes.</p> <p>We illustrate this python dictionaries that represent your data, but we would not advise you to define dictionaries. Instead, use the <code>Record</code> object to instantiate records.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ndataset = client.datasets(name=\"my_dataset\")\n\n# Add records to the dataset with the fields 'question' and 'answer'\ndata = [\n    {\n        \"question\": \"Do you need oxygen to breathe?\",\n        \"answer\": \"Yes\",\n    },\n    {\n        \"question\": \"What is the boiling point of water?\",\n        \"answer\": \"100 degrees Celsius\",\n    }, # (1)\n]\ndataset.records.log(data)\n\n# Add records to the dataset with a mapping of the fields 'question' and 'answer'\ndata = [\n    {\n        \"query\": \"Do you need oxygen to breathe?\",\n        \"response\": \"Yes\",\n    },\n    {\n        \"query\": \"What is the boiling point of water?\",\n        \"response\": \"100 degrees Celsius\",\n    },\n]\ndataset.records.log(data, mapping={\"query\": \"question\", \"response\": \"answer\"}) # (2)\n</code></pre> <ol> <li>The data structure's keys must match the fields or questions in the Argilla dataset. In this case, there are fields named <code>question</code> and <code>answer</code>.</li> <li>The data structure has keys <code>query</code> and <code>response</code>, and the Argilla dataset has fields <code>question</code> and <code>answer</code>. You can use the <code>mapping</code> parameter to map the keys in the data structure to the fields in the Argilla dataset.</li> </ol> <p>You can also add records to a dataset using a Hugging Face dataset. This is useful when you want to use a dataset from the Hugging Face Hub and add it to your Argilla dataset.</p> <p>You can add the dataset where the column names correspond to the names of fields, metadata or vectors in the Argilla dataset.</p> <pre><code>import argilla as rg\nfrom datasets import load_dataset\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\ndataset = client.datasets(name=\"my_dataset\") # (1)\n\nhf_dataset = load_dataset(\"imdb\", split=\"train[:100]\") # (2)\n\ndataset.records.log(records=hf_dataset)\n</code></pre> <ol> <li> <p>In this case, we are using the <code>my_dataset</code> dataset from the Argilla workspace. The dataset has a <code>text</code> field and a <code>label</code> question.</p> </li> <li> <p>In this example, the Hugging Face dataset matches the Argilla dataset schema. If that is not the case, you could use the <code>.map</code> of the <code>datasets</code> library to prepare the data before adding it to the Argilla dataset.</p> </li> </ol> <p>If the Hugging Face dataset's schema does not correspond to your Argilla dataset field names, you can use a <code>mapping</code> to specify the relationship. You should indicate as key the column name of the Hugging Face dataset and, as value, the field name of the Argilla dataset.</p> <pre><code>dataset.records.log(\n    records=hf_dataset, mapping={\"text\": \"review\", \"label\": \"sentiment\"}\n) # (1)\n</code></pre> <ol> <li>In this case, the <code>text</code> key in the Hugging Face dataset would correspond to the <code>review</code> field in the Argilla dataset, and the <code>label</code> key in the Hugging Face dataset would correspond to the <code>sentiment</code> field in the Argilla dataset.</li> </ol>"},{"location":"how_to_guides/record/#fields","title":"Fields","text":"<p>Fields are the main pieces of information of the record. These are shown at first sight in the UI together with the questions form. You may only include fields that you have previously configured in the dataset settings. Depending on the type of fields included in the dataset, the data format may be slightly different:</p> TextImageChatCustom <p>Text fields expect input in the form of a <code>string</code>.</p> <pre><code>record = rg.Record(\n    fields={\"text\": \"Hello World, how are you?\"}\n)\n</code></pre> <p>Image fields expect a remote URL or local path to an image file in the form of a <code>string</code>, or a PIL object.</p> <p>Check the Dataset.records - Python Reference to see how to add records with with images in detail.</p> <pre><code>records = [\n    rg.Record(\n        fields={\"image\": \"https://example.com/image.jpg\"}\n    ),\n    rg.Record(\n        fields={\"image\": \"path/to/image.jpg\"}\n    ),\n    rg.Record(\n        fields={\"image\": Image.open(\"path/to/image.jpg\")}\n    ),\n]\n</code></pre> <p>Chat fields expect a list of dictionaries with the keys <code>role</code> and <code>content</code>, where the <code>role</code> identifies the interlocutor type (e.g., user, assistant, model, etc.), whereas the <code>content</code> contains the text of the message.</p> <pre><code>record = rg.Record(\n    fields={\n        \"chat\": [\n            {\"role\": \"user\", \"content\": \"What is Argilla?\"},\n            {\"role\": \"assistant\", \"content\": \"Argilla is a collaboration tool for AI engineers and domain experts to build high-quality datasets\"},\n        ]\n    }\n)\n</code></pre> <p>Custom fields expect a dictionary with the keys and values you define in the dataset settings. You need to ensure these are aligned with <code>CustomField.template</code> in order for them to be rendered in the UI.</p> <pre><code>record = rg.Record(\n    fields={\"custom\": {\"key\": \"value\"}}\n)\n</code></pre>"},{"location":"how_to_guides/record/#metadata","title":"Metadata","text":"<p>Record metadata can include any information about the record that is not part of the fields in the form of a dictionary. To use metadata for filtering and sorting records, make sure that the key of the dictionary corresponds with the metadata property <code>name</code>. When the key doesn't correspond, this will be considered extra metadata that will get stored with the record (as long as <code>allow_extra_metadata</code> is set to <code>True</code> for the dataset), but will not be usable for filtering and sorting.</p> <p>Note</p> <p>Remember that to use metadata within a dataset, you must define a metadata property in the dataset settings.</p> <p>Check the Metadata - Python Reference to see the attributes, arguments, and methods for using metadata in detail.</p> As <code>Record</code> objectsFrom a generic data structure <p>You can add metadata to a record in an initialized <code>Record</code> object.</p> <pre><code># Add records to the dataset with the metadata 'category'\nrecords = [\n    rg.Record(\n        fields={\n            \"question\": \"Do you need oxygen to breathe?\",\n            \"answer\": \"Yes\"\n        },\n        metadata={\"my_metadata\": \"option_1\"},\n    ),\n    rg.Record(\n        fields={\n            \"question\": \"What is the boiling point of water?\",\n            \"answer\": \"100 degrees Celsius\"\n        },\n        metadata={\"my_metadata\": \"option_1\"},\n    ),\n]\ndataset.records.log(records)\n</code></pre> <p>You can add metadata to a record directly as a dictionary structure, where the keys correspond to the names of metadata properties in the dataset and the values are the metadata to be added. Remember that you can also use the <code>mapping</code> parameter to specify the data structure.</p> <pre><code># Add records to the dataset with the metadata 'category'\ndata = [\n    {\n        \"question\": \"Do you need oxygen to breathe?\",\n        \"answer\": \"Yes\",\n        \"my_metadata\": \"option_1\",\n    },\n    {\n        \"question\": \"What is the boiling point of water?\",\n        \"answer\": \"100 degrees Celsius\",\n        \"my_metadata\": \"option_1\",\n    },\n]\ndataset.records.log(data)\n</code></pre>"},{"location":"how_to_guides/record/#vectors","title":"Vectors","text":"<p>You can associate vectors, like text embeddings, to your records. They can be used for semantic search in the UI and the Python SDK. Make sure that the length of the list corresponds to the dimensions set in the vector settings.</p> <p>Note</p> <p>Remember that to use vectors within a dataset, you must define them in the dataset settings.</p> <p>Check the Vector - Python Reference to see the attributes, arguments, and methods of the <code>Vector</code> class in detail.</p> As <code>Record</code> objectsFrom a generic data structure <p>You can also add vectors to a record in an initialized <code>Record</code> object.</p> <pre><code># Add records to the dataset with the vector 'my_vector' and dimension=3\nrecords = [\n    rg.Record(\n        fields={\n            \"question\": \"Do you need oxygen to breathe?\",\n            \"answer\": \"Yes\"\n        },\n        vectors={\n            \"my_vector\": [0.1, 0.2, 0.3]\n        },\n    ),\n    rg.Record(\n        fields={\n            \"question\": \"What is the boiling point of water?\",\n            \"answer\": \"100 degrees Celsius\"\n        },\n        vectors={\n            \"my_vector\": [0.2, 0.5, 0.3]\n        },\n    ),\n]\ndataset.records.log(records)\n</code></pre> <p>You can add vectors from a dictionary-like structure, where the keys correspond to the <code>name</code>s of the vector settings that were configured for your dataset and the value is a list of floats. Remember that you can also use the <code>mapping</code> parameter to specify the data structure.</p> <pre><code># Add records to the dataset with the vector 'my_vector' and dimension=3\ndata = [\n    {\n        \"question\": \"Do you need oxygen to breathe?\",\n        \"answer\": \"Yes\",\n        \"my_vector\": [0.1, 0.2, 0.3],\n    },\n    {\n        \"question\": \"What is the boiling point of water?\",\n        \"answer\": \"100 degrees Celsius\",\n        \"my_vector\": [0.2, 0.5, 0.3],\n    },\n]\ndataset.records.log(data)\n</code></pre>"},{"location":"how_to_guides/record/#suggestions","title":"Suggestions","text":"<p>Suggestions refer to suggested responses (e.g. model predictions) that you can add to your records to make the annotation process faster. These can be added during the creation of the record or at a later stage. Only one suggestion can be provided for each question, and suggestion values must be compliant with the pre-defined questions e.g. if we have a <code>RatingQuestion</code> between 1 and 5, the suggestion should have a valid value within that range.</p> <p>Check the Suggestions - Python Reference to see the attributes, arguments, and methods of the <code>Suggestion</code> class in detail.</p> <p>Tip</p> <p>Check the Suggestions - Python Reference for different formats per <code>Question</code> type.</p> As <code>Record</code> objectsFrom a generic data structure <p>You can also add suggestions to a record in an initialized <code>Record</code> object.</p> <pre><code># Add records to the dataset with the label 'my_label'\nrecords = [\n    rg.Record(\n        fields={\n            \"question\": \"Do you need oxygen to breathe?\",\n            \"answer\": \"Yes\"\n        },\n        suggestions=[\n            rg.Suggestion(\n                \"my_label\",\n                \"positive\",\n                score=0.9,\n                agent=\"model_name\"\n            )\n        ],\n    ),\n    rg.Record(\n        fields={\n            \"question\": \"What is the boiling point of water?\",\n            \"answer\": \"100 degrees Celsius\"\n        },\n        suggestions=[\n            rg.Suggestion(\n                \"my_label\",\n                \"negative\",\n                score=0.9,\n                agent=\"model_name\"\n            )\n        ],\n    ),\n]\ndataset.records.log(records)\n</code></pre> <p>You can add suggestions as a dictionary, where the keys correspond to the <code>name</code>s of the labels that were configured for your dataset. Remember that you can also use the <code>mapping</code> parameter to specify the data structure.</p> <pre><code># Add records to the dataset with the label question 'my_label'\ndata =  [\n    {\n        \"question\": \"Do you need oxygen to breathe?\",\n        \"answer\": \"Yes\",\n        \"label\": \"positive\",\n        \"score\": 0.9,\n        \"agent\": \"model_name\",\n    },\n    {\n        \"question\": \"What is the boiling point of water?\",\n        \"answer\": \"100 degrees Celsius\",\n        \"label\": \"negative\",\n        \"score\": 0.9,\n        \"agent\": \"model_name\",\n    },\n]\ndataset.records.log(\n    data=data,\n    mapping={\n        \"label\": \"my_label\",\n        \"score\": \"my_label.suggestion.score\",\n        \"agent\": \"my_label.suggestion.agent\",\n    },\n)\n</code></pre>"},{"location":"how_to_guides/record/#responses","title":"Responses","text":"<p>If your dataset includes some annotations, you can add those to the records as you create them. Make sure that the responses adhere to the same format as Argilla's output and meet the schema requirements for the specific type of question being answered. Make sure to include the <code>user_id</code> in case you're planning to add more than one response for the same question, if not responses will apply to all the annotators.</p> <p>Check the Responses - Python Reference to see the attributes, arguments, and methods of the <code>Response</code> class in detail.</p> <p>Note</p> <p>Keep in mind that records with responses will be displayed as \"Draft\" in the UI.</p> <p>Tip</p> <p>Check the Responses - Python Reference for different formats per <code>Question</code> type.</p> As <code>Record</code> objectsFrom a generic data structure <p>You can also add suggestions to a record in an initialized <code>Record</code> object.</p> <pre><code># Add records to the dataset with the label 'my_label'\nrecords = [\n    rg.Record(\n        fields={\n            \"question\": \"Do you need oxygen to breathe?\",\n            \"answer\": \"Yes\"\n        },\n        responses=[\n            rg.Response(\"my_label\", \"positive\", user_id=user.id)\n        ]\n    ),\n    rg.Record(\n        fields={\n            \"question\": \"What is the boiling point of water?\",\n            \"answer\": \"100 degrees Celsius\"\n        },\n        responses=[\n            rg.Response(\"my_label\", \"negative\", user_id=user.id)\n        ]\n    ),\n]\ndataset.records.log(records)\n</code></pre> <p>You can add suggestions as a dictionary, where the keys correspond to the <code>name</code>s of the labels that were configured for your dataset. Remember that you can also use the <code>mapping</code> parameter to specify the data structure. If you want to specify the user that added the response, you can use the <code>user_id</code> parameter.</p> <pre><code># Add records to the dataset with the label 'my_label'\ndata = [\n    {\n        \"question\": \"Do you need oxygen to breathe?\",\n        \"answer\": \"Yes\",\n        \"label\": \"positive\",\n    },\n    {\n        \"question\": \"What is the boiling point of water?\",\n        \"answer\": \"100 degrees Celsius\",\n        \"label\": \"negative\",\n    },\n]\ndataset.records.log(data, user_id=user.id, mapping={\"label\": \"my_label.response\"})\n</code></pre>"},{"location":"how_to_guides/record/#list-records","title":"List records","text":"<p>To list records in a dataset, you can use the <code>records</code> method on the <code>Dataset</code> object. This method returns a list of <code>Record</code> objects that can be iterated over to access the record properties.</p> <pre><code>for record in dataset.records(\n    with_suggestions=True,\n    with_responses=True,\n    with_vectors=True\n):\n\n    # Access the record properties\n    print(record.metadata)\n    print(record.vectors)\n    print(record.suggestions)\n    print(record.responses)\n\n    # Access the responses of the record\n    for response in record.responses:\n        print(response.value)\n</code></pre>"},{"location":"how_to_guides/record/#update-records","title":"Update records","text":"<p>You can update records in a dataset by calling the <code>log</code> method on the <code>Dataset</code> object. To update a record, you need to provide the record <code>id</code> and the new data to be updated.</p> <pre><code>data = dataset.records.to_list(flatten=True)\n\nupdated_data = [\n    {\n        \"text\": sample[\"text\"],\n        \"label\": \"positive\",\n        \"id\": sample[\"id\"],\n    }\n    for sample in data\n]\ndataset.records.log(records=updated_data)\n</code></pre> Update the metadataUpdate vectorsUpdate suggestionsUpdate responses <p>The <code>metadata</code> of the <code>Record</code> object is a python dictionary. To update it, you can iterate over the records and update the metadata by key. After that, you should update the records in the dataset.</p> <p>Tip</p> <p>Check the Metadata - Python Reference for different formats per <code>MetadataProperty</code> type.</p> <pre><code>updated_records = []\n\nfor record in dataset.records():\n\n    record.metadata[\"my_metadata\"] = \"new_value\"\n    record.metadata[\"my_new_metadata\"] = \"new_value\"\n\n    updated_records.append(record)\n\ndataset.records.log(records=updated_records)\n</code></pre> <p>If a new vector field is added to the dataset settings or some value for the existing record vectors must be updated, you can iterate over the records and update the vectors by key. After that, you should update the records in the dataset.</p> <pre><code>updated_records = []\n\nfor record in dataset.records(with_vectors=True):\n\n    record.vectors[\"my_vector\"] = [ 0, 1, 2, 3, 4, 5 ]\n    record.vectors[\"my_new_vector\"] = [ 0, 1, 2, 3, 4, 5 ]\n\n    updated_records.append(record)\n\ndataset.records.log(records=updated_records)\n</code></pre> <p>If some value for the existing record suggestions must be updated, you can iterate over the records and update the suggestions by key. You can also add a suggestion using the <code>add</code> method. After that, you should update the records in the dataset.</p> <p>Tip</p> <p>Check the Suggestions - Python Reference for different formats per <code>Question</code> type.</p> <pre><code>updated_records = []\n\nfor record in dataset.records(with_suggestions=True):\n\n    # We can update existing suggestions\n    record.suggestions[\"label\"].value = \"new_value\"\n    record.suggestions[\"label\"].score = 0.9\n    record.suggestions[\"label\"].agent = \"model_name\"\n\n    # We can also add new suggestions with the `add` method:\n    if not record.suggestions[\"label\"]:\n        record.suggestions.add(\n            rg.Suggestion(\"value\", \"label\", score=0.9, agent=\"model_name\")\n        )\n\n    updated_records.append(record)\n\ndataset.records.log(records=updated_records)\n</code></pre> <p>If some value for the existing record responses must be updated, you can iterate over the records and update the responses by key. You can also add a response using the <code>add</code> method. After that, you should update the records in the dataset.</p> <p>Tip</p> <p>Check the Responses - Python Reference for different formats per <code>Question</code> type.</p> <pre><code>updated_records = []\n\nfor record in dataset.records(with_responses=True):\n\n    for response in record.responses[\"label\"]:\n\n        if response:\n                response.value = \"new_value\"\n                response.user_id = \"existing_user_id\"\n\n        else:\n            record.responses.add(rg.Response(\"label\", \"YES\", user_id=user.id))\n\n    updated_records.append(record)\n\ndataset.records.log(records=updated_records)\n</code></pre>"},{"location":"how_to_guides/record/#delete-records","title":"Delete records","text":"<p>You can delete records in a dataset calling the <code>delete</code> method on the <code>Dataset</code> object. To delete records, you need to retrieve them from the server and get a list with those that you want to delete.</p> <pre><code>records_to_delete = list(dataset.records)[:5]\ndataset.records.delete(records=records_to_delete)\n</code></pre> <p>Delete records based on a query</p> <p>It can be very useful to avoid eliminating records with responses.</p> <p>For more information about the query syntax, check this how-to guide.</p> <pre><code>status_filter = rg.Query(\n    filter = rg.Filter((\"response.status\", \"==\", \"pending\"))\n)\nrecords_to_delete = list(dataset.records(status_filter))\n\ndataset.records.delete(records_to_delete)\n</code></pre>"},{"location":"how_to_guides/use_markdown_to_format_rich_content/","title":"Use Markdown to format rich content","text":"<p>This guide provides an overview of how to use Markdown and HTML in <code>TextFields</code> to format chat conversations and allow for basic multi-modal support for images, audio, video and PDFs.</p> <p>The <code>TextField</code> and <code>TextQuestion</code> provide the option to enable Markdown and therefore HTML by setting <code>use_markdown=True</code>. Given the flexibility of HTML, we can get great control over the presentation of data to our annotators. We provide some out-of-the-box methods for multi-modality and chat templates in the examples below.</p> <p>Main Methods</p> image_to_htmlaudio_to_htmlvideo_to_htmlpdf_to_htmlchat_to_html <pre><code>image_to_html(\"local_image_file.png\")\n</code></pre> <pre><code>audio_to_html(\"local_audio_file.mp3\")\n</code></pre> <pre><code>audio_to_html(\"local_video_file.mp4\")\n</code></pre> <pre><code>pdf_to_html(\"local_pdf_file.pdf\")\n</code></pre> <pre><code>chat_to_html([{\"role\": \"user\", \"content\": \"hello\"}])\n</code></pre> <p>Check the Markdown - Python Reference to see the arguments of the <code>rg.markdown</code> methods in detail.</p> <p>Tip</p> <p>You can get pretty creative with HTML. For example, think about visualizing graphs and tables. You can use some interesting Python packages methods like <code>pandas.DataFrame.to_html</code> and <code>plotly.io.to_html</code>.</p>"},{"location":"how_to_guides/use_markdown_to_format_rich_content/#multi-modal-support-images-audio-video-pdfs-and-more","title":"Multi-modal support: images, audio, video, PDFs and more","text":"<p>Argilla has basic multi-modal support in different ways, each with pros and cons, but they both offer the same UI experience because they both rely on HTML.</p> <p></p>"},{"location":"how_to_guides/use_markdown_to_format_rich_content/#local-content-through-dataurls","title":"Local content through DataURLs","text":"<p>A DataURL is a scheme that allows data to be encoded into a base64-encoded string and then embedded directly into HTML. To facilitate this, we offer some functions: <code>image_to_html</code>, <code>audio_to_html</code>, <code>video_to_thml</code>, and <code>pdf_to_html</code>. These functions accept either the file path or the file's byte data and return the corresponding HTMurl to render the media file within the Argilla user interface. Additionally, you can also set the <code>width</code> and <code>height</code> in pixel or percentage for video and image (defaults to the original dimensions) and the autoplay and loop attributes to True for audio and video (defaults to False).</p> <p>Warning</p> <p>DataURLs increase the memory usage of the original filesize. Additionally, different browsers enforce different size limitations for rendering DataURLs which might block the visualization experience per user.</p> ImageAudioVideoPDF <pre><code>from argilla.markdown import image_to_html\n\nhtml = image_to_html(\n    \"local_image_file.png\",\n    width=\"300px\",\n    height=\"300px\"\n)\n\nrg.Record(\n    fields={\"markdown_enabled_field\": html}\n)\n</code></pre> <pre><code>from argilla.markdown import audio_to_html\n\nhtml = audio_to_html(\n    \"local_audio_file.mp3\",\n    width=\"300px\",\n    height=\"300px\",\n    autoplay=True,\n    loop=True\n)\n\nrg.Record(\n    fields={\"markdown_enabled_field\": html}\n)\n</code></pre> <pre><code>from argilla.markdown import video_to_thml\n\nhtml = video_to_html(\n    \"local_video_file.mp4\",\n    width=\"300px\",\n    height=\"300px\",\n    autoplay=True,\n    loop=True\n)\n\nrg.Record(\n    fields={\"markdown_enabled_field\": html}\n)\n</code></pre> <pre><code>from argilla.markdown import pdf_to_html\n\nhtml = pdf_to_html(\n    \"local_pdf_file.pdf\",\n    width=\"300px\",\n    height=\"300px\"\n)\n\nrg.Record(\n    fields={\"markdown_enabled_field\": html}\n)\n</code></pre>"},{"location":"how_to_guides/use_markdown_to_format_rich_content/#hosted-content","title":"Hosted content","text":"<p>Instead of uploading local files through DataURLs, we can also visualize URLs directly linking to media files such as images, audio, video, and PDFs hosted on a public or private server. In this case, you can use basic HTML to visualize content available on platforms like Google Drive or decide to configure a private media server.</p> <p>Warning</p> <p>When trying to access content from a private media server you have to ensure that the Argilla server has network access to the private media server, which might be done through something like IP whitelisting.</p> ImageAudioVideoPDF <pre><code>html = \"&lt;img src='https://example.com/public-image-file.jpg'&gt;\"\n\nrg.Record(\n    fields={\"markdown_enabled_field\": html}\n)\n</code></pre> <pre><code>html = \"\"\"\n&lt;audio controls&gt;\n    &lt;source src=\"https://example.com/public-audio-file.mp3\" type=\"audio/mpeg\"&gt;\n&lt;/audio&gt;\n\"\"\"\"\n\nrg.Record(\n    fields={\"markdown_enabled_field\": html}\n)\n</code></pre> <pre><code>html = \"\"\"\n&lt;video width=\"320\" height=\"240\" controls&gt;\n    &lt;source src=\"https://example.com/public-video-file.mp4\" type=\"video/mp4\"&gt;\n&lt;/video&gt;\n\"\"\"\"\n\nrg.Record(\n    fields={\"markdown_enabled_field\": html}\n)\n</code></pre> <pre><code>html = \"\"\"\n&lt;iframe\n    src=\"https://example.com/public-pdf-file.pdf\"\n    width=\"600\"\n    height=\"500\"&gt;\n&lt;/iframe&gt;\n\"\"\"\"\n\nrg.Record(\n    fields={\"markdown_enabled_field\": html}\n)\n</code></pre>"},{"location":"how_to_guides/use_markdown_to_format_rich_content/#chat-and-conversation-support","title":"Chat and conversation support","text":"<p>When working with chat data from multi-turn interaction with a Large Language Model, it might be nice to be able to visualize the conversation in a similar way as a common chat interface. To facilitate this, we offer the <code>chat_to_html</code> function, which converts messages from OpenAI chat format to an HTML-formatted chat interface.</p> OpenAI chat format <p>The OpenAI chat format is a way to structure a list of messages as input from users and returns a model-generated message as output. These messages can only contain the <code>roles</code> \"user\" for human messages and \"assistant\", \"system\" or \"model\" for model-generated messages.</p> <pre><code>from argilla.markdown import chat_to_html\n\nmessages = [\n    {\"role\": \"user\", \"content\": \"Hello! How are you?\"},\n    {\"role\": \"assistant\", \"content\": \"I'm good, thank you!\"}\n]\n\nhtml = chat_to_html(messages)\n\nrg.Record(\n    fields={\"markdown_enabled_field\": html}\n)\n</code></pre> <p></p>"},{"location":"how_to_guides/user/","title":"User management","text":"<p>This guide provides an overview of user roles and credentials, explaining how to set up and manage users in Argilla.</p> <p>A user in Argilla is an authorized person who, depending on their role, can use the Python SDK and access the UI in a running Argilla instance. We differentiate between three types of users depending on their role, permissions and needs: <code>owner</code>, <code>admin</code> and <code>annotator</code>.</p> OverviewOwnerAdminAnnotator Owner Admin Annotator Number Unlimited Unlimited Unlimited Create and delete workspaces Yes No No Assign users to workspaces Yes No No Create, configure, update, and delete datasets Yes Only within assigned workspaces No Create, update, and delete users Yes No No Provide feedback with Argila UI Yes Yes Yes <p>The <code>owner</code> refers to the root user who created the Argilla instance. Using workspaces within Argilla proves highly beneficial for organizing tasks efficiently. So, the owner has full access to all workspaces and their functionalities:</p> <ul> <li>Workspace management: It can create, read and delete a workspace.</li> <li>User management: It can create a new user, assign it to a workspace, and delete it. It can also list them and search for a specific one.</li> <li>Dataset management: It can create, configure, retrieve, update, and delete datasets.</li> <li>Annotation: It can annotate datasets in the Argilla UI.</li> <li>Feedback: It can provide feedback with the Argilla UI.</li> </ul> <p>An <code>admin</code> user can only access the workspaces it has been assigned to and cannot assign other users to it. An admin user has the following permissions:</p> <ul> <li>Dataset management: It can create, configure, retrieve, update, and delete datasets only on the assigned workspaces.</li> <li>Annotation: It can annotate datasets in the assigned workspaces via the Argilla UI.</li> <li>Feedback: It can provide feedback with the Argilla UI.</li> </ul> <p>An <code>annotator</code> user is limited to accessing only the datasets assigned to it within the workspace. It has two specific permissions:</p> <ul> <li>Annotation: It can annotate the assigned datasets in the Argilla UI.</li> <li>Feedback: It can provide feedback with the Argilla UI.</li> </ul> Question: Who can manage users? <p>Only users with the <code>owner</code> role can manage (create, retrieve, delete) other users.</p>"},{"location":"how_to_guides/user/#initial-users-and-credentials","title":"Initial users and credentials","text":"<p>Depending on your Argilla deployment, the initial user with the <code>owner</code> role will vary.</p> <ul> <li>If you deploy on the Hugging Face Hub, the initial user will correspond to the Space owner (your personal account). The API key is automatically generated and can be copied from the \"Settings\" section of the UI.</li> <li>If you deploy with Docker, the default values for the environment variables are: USERNAME: argilla, PASSWORD: 12345678, API_KEY: argilla.apikey.</li> </ul> <p>For the new users, the username and password are set during the creation process. The API key can be copied from the \"Settings\" section of the UI.</p> <p>Main Class</p> <pre><code>rg.User(\n    username=\"username\",\n    first_name=\"first_name\",\n    last_name=\"last_name\",\n    role=\"owner\",\n    password=\"password\",\n    client=client\n)\n</code></pre> <p>Check the User - Python Reference to see the attributes, arguments, and methods of the <code>User</code> class in detail.</p>"},{"location":"how_to_guides/user/#get-current-user","title":"Get current user","text":"<p>To ensure you're using the correct credentials for managing users, you can get the current user in Argilla using the <code>me</code> attribute of the <code>Argilla</code> class.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\ncurrent_user = client.me\n</code></pre>"},{"location":"how_to_guides/user/#create-a-user","title":"Create a user","text":"<p>To create a new user in Argilla, you can define it in the <code>User</code> class and then call the <code>create</code> method. This method is inherited from the <code>Resource</code> base class and operates without modifications.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nuser_to_create = rg.User(\n    username=\"my_username\",\n    password=\"12345678\",\n)\n\ncreated_user = user_to_create.create()\n</code></pre> <p>Accessing attributes</p> <p>Access the attributes of a user by calling them directly on the <code>User</code> object. For example, <code>user.id</code> or <code>user.username</code>.</p>"},{"location":"how_to_guides/user/#list-users","title":"List users","text":"<p>You can list all the existing users in Argilla by accessing the <code>users</code> attribute on the <code>Argilla</code> class and iterating over them. You can also use <code>len(client.users)</code> to get the number of users.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nusers = client.users\n\nfor user in users:\n    print(user)\n</code></pre> <p>Notebooks</p> <p>When using a notebook, executing <code>client.users</code> will display a table with <code>username</code>, <code>id</code>, <code>role</code>, and the last update as <code>updated_at</code>.</p>"},{"location":"how_to_guides/user/#retrieve-a-user","title":"Retrieve a user","text":"<p>You can retrieve an existing user from Argilla by accessing the <code>users</code> attribute on the <code>Argilla</code> class and passing the <code>username</code> or <code>id</code> as an argument. If the user does not exist, a warning message will be raised and <code>None</code> will be returned.</p> By usernameBy id <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nretrieved_user = client.users(\"my_username\")\n</code></pre> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nretrieved_user = client.users(id=\"&lt;uuid-or-uuid-string&gt;\")\n</code></pre>"},{"location":"how_to_guides/user/#check-user-existence","title":"Check user existence","text":"<p>You can check if a user exists. The <code>client.users</code> method will return <code>None</code> if the user was not found.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nuser = client.users(\"my_username\")\n\nif user is not None:\n    pass\n</code></pre>"},{"location":"how_to_guides/user/#list-users-in-a-workspace","title":"List users in a workspace","text":"<p>You can list all the users in a workspace by accessing the <code>users</code> attribute on the <code>Workspace</code> class and iterating over them. You can also use <code>len(workspace.users)</code> to get the number of users by workspace.</p> <p>For further information on how to manage workspaces, check this how-to guide.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nworkspace = client.workspaces('my_workspace')\n\nfor user in workspace.users:\n    print(user)\n</code></pre>"},{"location":"how_to_guides/user/#add-a-user-to-a-workspace","title":"Add a user to a workspace","text":"<p>You can add an existing user to a workspace in Argilla by calling the <code>add_to_workspace</code> method on the <code>User</code> class.</p> <p>For further information on how to manage workspaces, check this how-to guide.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nuser = client.users('my_username')\nworkspace = client.workspaces('my_workspace')\n\nadded_user = user.add_to_workspace(workspace)\n</code></pre>"},{"location":"how_to_guides/user/#remove-a-user-from-a-workspace","title":"Remove a user from a workspace","text":"<p>You can remove an existing user from a workspace in Argilla by calling the <code>remove_from_workspace</code> method on the <code>User</code> class.</p> <p>For further information on how to manage workspaces, check this how-to guide.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nuser = client.users('my_username')\nworkspace = client.workspaces('my_workspace')\n\nremoved_user = user.remove_from_workspace(workspace)\n</code></pre>"},{"location":"how_to_guides/user/#delete-a-user","title":"Delete a user","text":"<p>You can delete an existing user from Argilla by calling the <code>delete</code> method on the <code>User</code> class.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nuser_to_delete = client.users('my_username')\n\ndeleted_user = user_to_delete.delete()\n</code></pre>"},{"location":"how_to_guides/workspace/","title":"Workspace management","text":"<p>This guide provides an overview of workspaces, explaining how to set up and manage workspaces in Argilla.</p> <p>A workspace is a space inside your Argilla instance where authorized users can collaborate on datasets. It is accessible through the Python SDK and the UI.</p> Question: Who can manage workspaces? <p>Only users with the <code>owner</code> role can manage (create, read and delete) workspaces.</p> <p>A user with the <code>admin</code> role can only read the workspace to which it belongs.</p>"},{"location":"how_to_guides/workspace/#initial-workspaces","title":"Initial workspaces","text":"<p>Depending on your Argilla deployment, the initial workspace will vary.</p> <ul> <li>If you deploy on the Hugging Face Hub, the initial workspace will be the one indicated in the <code>.oauth.yaml</code> file. By default, <code>argilla</code>.</li> <li>If you deploy with Docker, you will need to create a workspace as shown in the next section.</li> </ul> <p>Main Class</p> <pre><code>rg.Workspace(\n    name = \"name\",\n    client=client\n)\n</code></pre> <p>Check the Workspace - Python Reference to see the attributes, arguments, and methods of the <code>Workspace</code> class in detail.</p>"},{"location":"how_to_guides/workspace/#create-a-new-workspace","title":"Create a new workspace","text":"<p>To create a new workspace in Argilla, you can define it in the <code>Workspace</code> class and then call the <code>create</code> method. This method is inherited from the <code>Resource</code> base class and operates without modifications.</p> <p>When you create a new workspace, it will be empty. To create and add a new dataset, check these guides.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nworkspace_to_create = rg.Workspace(name=\"my_workspace\")\n\ncreated_workspace = workspace_to_create.create()\n</code></pre> <p>Accessing attributes</p> <p>Access the attributes of a workspace by calling them directly on the <code>Workspace</code> object. For example, <code>workspace.id</code> or <code>workspace.name</code>.</p>"},{"location":"how_to_guides/workspace/#list-workspaces","title":"List workspaces","text":"<p>You can list all the existing workspaces in Argilla by calling the <code>workspaces</code> attribute on the <code>Argilla</code> class and iterating over them. You can also use <code>len(client.workspaces)</code> to get the number of workspaces.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nworkspaces = client.workspaces\n\nfor workspace in workspaces:\n    print(workspace)\n</code></pre> <p>Notebooks</p> <p>When using a notebook, executing <code>client.workspaces</code> will display a table with the number of <code>datasets</code> in each workspace, <code>name</code>, <code>id</code>, and the last update as <code>updated_at</code>.</p>"},{"location":"how_to_guides/workspace/#retrieve-a-workspace","title":"Retrieve a workspace","text":"<p>You can retrieve a workspace by accessing the <code>workspaces</code> method on the <code>Argilla</code> class and passing the <code>name</code> or <code>id</code> of the workspace as an argument. If the workspace does not exist, a warning message will be raised and <code>None</code> will be returned.</p> By nameBy id <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nretrieved_workspace = client.workspaces(\"my_workspace\")\n</code></pre> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nretrieved_workspace = client.workspaces(id=\"&lt;uuid-or-uuid-string&gt;\")\n</code></pre>"},{"location":"how_to_guides/workspace/#check-workspace-existence","title":"Check workspace existence","text":"<p>You can check if a workspace exists. The <code>client.workspaces</code> method will return <code>None</code> if the workspace is not found.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nworkspace = client.workspaces(\"my_workspace\")\n\nif workspace is not None:\n    pass\n</code></pre>"},{"location":"how_to_guides/workspace/#list-users-in-a-workspace","title":"List users in a workspace","text":"<p>You can list all the users in a workspace by accessing the <code>users</code> attribute on the <code>Workspace</code> class and iterating over them. You can also use <code>len(workspace.users)</code> to get the number of users by workspace.</p> <p>For further information on how to manage users, check this how-to guide.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nworkspace = client.workspaces('my_workspace')\n\nfor user in workspace.users:\n    print(user)\n</code></pre>"},{"location":"how_to_guides/workspace/#add-a-user-to-a-workspace","title":"Add a user to a workspace","text":"<p>You can also add a user to a workspace by calling the <code>add_user</code> method on the <code>Workspace</code> class.</p> <p>For further information on how to manage users, check this how-to guide.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nworkspace = client.workspaces(\"my_workspace\")\n\nadded_user = workspace.add_user(\"my_username\")\n</code></pre>"},{"location":"how_to_guides/workspace/#remove-a-user-from-workspace","title":"Remove a user from workspace","text":"<p>You can also remove a user from a workspace by calling the <code>remove_user</code> method on the <code>Workspace</code> class.</p> <p>For further information on how to manage users, check this how-to guide.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nworkspace = client.workspaces(\"my_workspace\")\n\nremoved_user = workspace.remove_user(\"my_username\")\n</code></pre>"},{"location":"how_to_guides/workspace/#delete-a-workspace","title":"Delete a workspace","text":"<p>To delete a workspace, no dataset can be associated with it. If the workspace contains any dataset, deletion will fail. You can delete a workspace by calling the <code>delete</code> method on the <code>Workspace</code> class.</p> <p>To clear a workspace and delete all their datasets, refer to this how-to guide.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(api_url=\"&lt;api_url&gt;\", api_key=\"&lt;api_key&gt;\")\n\nworkspace_to_delete = client.workspaces(\"my_workspace\")\n\ndeleted_workspace = workspace_to_delete.delete()\n</code></pre>"},{"location":"reference/argilla/SUMMARY/","title":"SUMMARY","text":"<ul> <li>rg.Argilla</li> <li>rg.Workspace</li> <li>rg.User</li> <li>rg.Dataset<ul> <li>rg.Dataset.records</li> </ul> </li> <li>rg.Settings<ul> <li>Fields</li> <li>Questions</li> <li>Metadata</li> <li>Vectors</li> <li>Distribution</li> </ul> </li> <li>rg.Record<ul> <li>rg.Response</li> <li>rg.Suggestion</li> <li>rg.Vector</li> <li>rg.Metadata</li> </ul> </li> <li>rg.Query</li> <li>rg.markdown</li> </ul>"},{"location":"reference/argilla/client/","title":"<code>rg.Argilla</code>","text":"<p>To interact with the Argilla server from Python you can use the <code>Argilla</code> class. The <code>Argilla</code> client is used to create, get, update, and delete all Argilla resources, such as workspaces, users, datasets, and records.</p>"},{"location":"reference/argilla/client/#usage-examples","title":"Usage Examples","text":""},{"location":"reference/argilla/client/#connecting-to-an-argilla-server","title":"Connecting to an Argilla server","text":"<p>To connect to an Argilla server, instantiate the <code>Argilla</code> class and pass the <code>api_url</code> of the server and the <code>api_key</code> to authenticate.</p> <pre><code>import argilla as rg\n\nclient = rg.Argilla(\n    api_url=\"https://argilla.example.com\",\n    api_key=\"my_api_key\",\n)\n</code></pre>"},{"location":"reference/argilla/client/#accessing-dataset-workspace-and-user-objects","title":"Accessing Dataset, Workspace, and User objects","text":"<p>The <code>Argilla</code> clients provides access to the <code>Dataset</code>, <code>Workspace</code>, and <code>User</code> objects of the Argilla server.</p> <pre><code>my_dataset = client.datasets(\"my_dataset\")\n\nmy_workspace = client.workspaces(\"my_workspace\")\n\nmy_user = client.users(\"my_user\")\n</code></pre> <p>These resources can then be interacted with to access their properties and methods. For example, to list all datasets in a workspace:</p> <pre><code>for dataset in my_workspace.datasets:\n    print(dataset.name)\n</code></pre>"},{"location":"reference/argilla/client/#src.argilla.client.Argilla","title":"<code>Argilla</code>","text":"<p>               Bases: <code>APIClient</code></p> <p>Argilla API client. This is the main entry point to interact with the API.</p> <p>Attributes:</p> Name Type Description <code>workspaces</code> <code>Workspaces</code> <p>A collection of workspaces.</p> <code>datasets</code> <code>Datasets</code> <p>A collection of datasets.</p> <code>users</code> <code>Users</code> <p>A collection of users.</p> <code>me</code> <code>User</code> <p>The current user.</p> Source code in <code>src/argilla/client.py</code> <pre><code>class Argilla(_api.APIClient):\n    \"\"\"Argilla API client. This is the main entry point to interact with the API.\n\n    Attributes:\n        workspaces: A collection of workspaces.\n        datasets: A collection of datasets.\n        users: A collection of users.\n        me: The current user.\n    \"\"\"\n\n    # Default instance of Argilla\n    _default_client: Optional[\"Argilla\"] = None\n\n    def __init__(\n        self,\n        api_url: Optional[str] = DEFAULT_HTTP_CONFIG.api_url,\n        api_key: Optional[str] = DEFAULT_HTTP_CONFIG.api_key,\n        timeout: int = DEFAULT_HTTP_CONFIG.timeout,\n        retries: int = DEFAULT_HTTP_CONFIG.retries,\n        **http_client_args,\n    ) -&gt; None:\n        \"\"\"Inits the `Argilla` client.\n\n        Args:\n            api_url: the URL of the Argilla API. If not provided, then the value will try\n                to be set from `ARGILLA_API_URL` environment variable. Defaults to\n                `\"http://localhost:6900\"`.\n            api_key: the key to be used to authenticate in the Argilla API. If not provided,\n                then the value will try to be set from `ARGILLA_API_KEY` environment variable.\n                Defaults to `None`.\n            timeout: the maximum time in seconds to wait for a request to the Argilla API\n                to be completed before raising an exception. Defaults to `60`.\n            retries: the number of times to retry the HTTP connection to the Argilla API\n                before raising an exception. Defaults to `5`.\n        \"\"\"\n        super().__init__(api_url=api_url, api_key=api_key, timeout=timeout, retries=retries, **http_client_args)\n\n        self._set_default(self)\n\n    @property\n    def workspaces(self) -&gt; \"Workspaces\":\n        \"\"\"A collection of workspaces on the server.\"\"\"\n        return Workspaces(client=self)\n\n    @property\n    def datasets(self) -&gt; \"Datasets\":\n        \"\"\"A collection of datasets on the server.\"\"\"\n        return Datasets(client=self)\n\n    @property\n    def users(self) -&gt; \"Users\":\n        \"\"\"A collection of users on the server.\"\"\"\n        return Users(client=self)\n\n    @cached_property\n    def me(self) -&gt; \"User\":\n        from argilla.users import User\n\n        return User(client=self, _model=self.api.users.get_me())\n\n    ############################\n    # Private methods\n    ############################\n\n    @classmethod\n    def _set_default(cls, client: \"Argilla\") -&gt; None:\n        \"\"\"Set the default instance of Argilla.\"\"\"\n        cls._default_client = client\n\n    @classmethod\n    def _get_default(cls) -&gt; \"Argilla\":\n        \"\"\"Get the default instance of Argilla. If it doesn't exist, create a new one.\"\"\"\n        if cls._default_client is None:\n            cls._default_client = Argilla()\n        return cls._default_client\n</code></pre>"},{"location":"reference/argilla/client/#src.argilla.client.Argilla.workspaces","title":"<code>workspaces: Workspaces</code>  <code>property</code>","text":"<p>A collection of workspaces on the server.</p>"},{"location":"reference/argilla/client/#src.argilla.client.Argilla.datasets","title":"<code>datasets: Datasets</code>  <code>property</code>","text":"<p>A collection of datasets on the server.</p>"},{"location":"reference/argilla/client/#src.argilla.client.Argilla.users","title":"<code>users: Users</code>  <code>property</code>","text":"<p>A collection of users on the server.</p>"},{"location":"reference/argilla/client/#src.argilla.client.Argilla.__init__","title":"<code>__init__(api_url=DEFAULT_HTTP_CONFIG.api_url, api_key=DEFAULT_HTTP_CONFIG.api_key, timeout=DEFAULT_HTTP_CONFIG.timeout, retries=DEFAULT_HTTP_CONFIG.retries, **http_client_args)</code>","text":"<p>Inits the <code>Argilla</code> client.</p> <p>Parameters:</p> Name Type Description Default <code>api_url</code> <code>Optional[str]</code> <p>the URL of the Argilla API. If not provided, then the value will try to be set from <code>ARGILLA_API_URL</code> environment variable. Defaults to <code>\"http://localhost:6900\"</code>.</p> <code>api_url</code> <code>api_key</code> <code>Optional[str]</code> <p>the key to be used to authenticate in the Argilla API. If not provided, then the value will try to be set from <code>ARGILLA_API_KEY</code> environment variable. Defaults to <code>None</code>.</p> <code>api_key</code> <code>timeout</code> <code>int</code> <p>the maximum time in seconds to wait for a request to the Argilla API to be completed before raising an exception. Defaults to <code>60</code>.</p> <code>timeout</code> <code>retries</code> <code>int</code> <p>the number of times to retry the HTTP connection to the Argilla API before raising an exception. Defaults to <code>5</code>.</p> <code>retries</code> Source code in <code>src/argilla/client.py</code> <pre><code>def __init__(\n    self,\n    api_url: Optional[str] = DEFAULT_HTTP_CONFIG.api_url,\n    api_key: Optional[str] = DEFAULT_HTTP_CONFIG.api_key,\n    timeout: int = DEFAULT_HTTP_CONFIG.timeout,\n    retries: int = DEFAULT_HTTP_CONFIG.retries,\n    **http_client_args,\n) -&gt; None:\n    \"\"\"Inits the `Argilla` client.\n\n    Args:\n        api_url: the URL of the Argilla API. If not provided, then the value will try\n            to be set from `ARGILLA_API_URL` environment variable. Defaults to\n            `\"http://localhost:6900\"`.\n        api_key: the key to be used to authenticate in the Argilla API. If not provided,\n            then the value will try to be set from `ARGILLA_API_KEY` environment variable.\n            Defaults to `None`.\n        timeout: the maximum time in seconds to wait for a request to the Argilla API\n            to be completed before raising an exception. Defaults to `60`.\n        retries: the number of times to retry the HTTP connection to the Argilla API\n            before raising an exception. Defaults to `5`.\n    \"\"\"\n    super().__init__(api_url=api_url, api_key=api_key, timeout=timeout, retries=retries, **http_client_args)\n\n    self._set_default(self)\n</code></pre>"},{"location":"reference/argilla/markdown/","title":"<code>rg.markdown</code>","text":"<p>To support the usage of Markdown within Argilla, we've created some helper functions to easy the usage of DataURL conversions and chat message visualizations.</p>"},{"location":"reference/argilla/markdown/#src.argilla.markdown.media","title":"<code>media</code>","text":""},{"location":"reference/argilla/markdown/#src.argilla.markdown.media.video_to_html","title":"<code>video_to_html(file_source, file_type=None, width=None, height=None, autoplay=False, loop=False)</code>","text":"<p>Convert a video file to an HTML tag with embedded base64 data.</p> <p>Parameters:</p> Name Type Description Default <code>file_source</code> <code>Union[str, bytes]</code> <p>The path to the media file or a non-b64 encoded byte string.</p> required <code>file_type</code> <code>Optional[str]</code> <p>The type of the video file. If not provided, it will be inferred from the file extension.</p> <code>None</code> <code>width</code> <code>Optional[str]</code> <p>Display width in HTML. Defaults to None.</p> <code>None</code> <code>height</code> <code>Optional[str]</code> <p>Display height in HTML. Defaults to None.</p> <code>None</code> <code>autoplay</code> <code>bool</code> <p>True to autoplay media. Defaults to False.</p> <code>False</code> <code>loop</code> <code>bool</code> <p>True to loop media. Defaults to False.</p> <code>False</code> <p>Returns:</p> Type Description <code>str</code> <p>The HTML tag with embedded base64 data.</p> <p>Examples:</p> <pre><code>from argilla.markdown import video_to_html\nhtml = video_to_html(\"my_video.mp4\", width=\"300px\", height=\"300px\", autoplay=True, loop=True)\n</code></pre> Source code in <code>src/argilla/markdown/media.py</code> <pre><code>def video_to_html(\n    file_source: Union[str, bytes],\n    file_type: Optional[str] = None,\n    width: Optional[str] = None,\n    height: Optional[str] = None,\n    autoplay: bool = False,\n    loop: bool = False,\n) -&gt; str:\n    \"\"\"\n    Convert a video file to an HTML tag with embedded base64 data.\n\n    Args:\n        file_source: The path to the media file or a non-b64 encoded byte string.\n        file_type: The type of the video file. If not provided, it will be inferred from the file extension.\n        width: Display width in HTML. Defaults to None.\n        height: Display height in HTML. Defaults to None.\n        autoplay: True to autoplay media. Defaults to False.\n        loop: True to loop media. Defaults to False.\n\n    Returns:\n        The HTML tag with embedded base64 data.\n\n    Examples:\n        ```python\n        from argilla.markdown import video_to_html\n        html = video_to_html(\"my_video.mp4\", width=\"300px\", height=\"300px\", autoplay=True, loop=True)\n        ```\n    \"\"\"\n    return _media_to_html(\"video\", file_source, file_type, width, height, autoplay, loop)\n</code></pre>"},{"location":"reference/argilla/markdown/#src.argilla.markdown.media.audio_to_html","title":"<code>audio_to_html(file_source, file_type=None, width=None, height=None, autoplay=False, loop=False)</code>","text":"<p>Convert an audio file to an HTML tag with embedded base64 data.</p> <p>Parameters:</p> Name Type Description Default <code>file_source</code> <code>Union[str, bytes]</code> <p>The path to the media file or a non-b64 encoded byte string.</p> required <code>file_type</code> <code>Optional[str]</code> <p>The type of the audio file. If not provided, it will be inferred from the file extension.</p> <code>None</code> <code>width</code> <code>Optional[str]</code> <p>Display width in HTML. Defaults to None.</p> <code>None</code> <code>height</code> <code>Optional[str]</code> <p>Display height in HTML. Defaults to None.</p> <code>None</code> <code>autoplay</code> <code>bool</code> <p>True to autoplay media. Defaults to False.</p> <code>False</code> <code>loop</code> <code>bool</code> <p>True to loop media. Defaults to False.</p> <code>False</code> <p>Returns:</p> Type Description <code>str</code> <p>The HTML tag with embedded base64 data.</p> <p>Examples:</p> <pre><code>from argilla.markdown import audio_to_html\nhtml = audio_to_html(\"my_audio.mp3\", width=\"300px\", height=\"300px\", autoplay=True, loop=True)\n</code></pre> Source code in <code>src/argilla/markdown/media.py</code> <pre><code>def audio_to_html(\n    file_source: Union[str, bytes],\n    file_type: Optional[str] = None,\n    width: Optional[str] = None,\n    height: Optional[str] = None,\n    autoplay: bool = False,\n    loop: bool = False,\n) -&gt; str:\n    \"\"\"\n    Convert an audio file to an HTML tag with embedded base64 data.\n\n    Args:\n        file_source: The path to the media file or a non-b64 encoded byte string.\n        file_type: The type of the audio file. If not provided, it will be inferred from the file extension.\n        width: Display width in HTML. Defaults to None.\n        height: Display height in HTML. Defaults to None.\n        autoplay: True to autoplay media. Defaults to False.\n        loop: True to loop media. Defaults to False.\n\n    Returns:\n        The HTML tag with embedded base64 data.\n\n    Examples:\n        ```python\n        from argilla.markdown import audio_to_html\n        html = audio_to_html(\"my_audio.mp3\", width=\"300px\", height=\"300px\", autoplay=True, loop=True)\n        ```\n    \"\"\"\n    return _media_to_html(\"audio\", file_source, file_type, width, height, autoplay, loop)\n</code></pre>"},{"location":"reference/argilla/markdown/#src.argilla.markdown.media.image_to_html","title":"<code>image_to_html(file_source, file_type=None, width=None, height=None)</code>","text":"<p>Convert an image file to an HTML tag with embedded base64 data.</p> <p>Parameters:</p> Name Type Description Default <code>file_source</code> <code>Union[str, bytes]</code> <p>The path to the media file or a non-b64 encoded byte string.</p> required <code>file_type</code> <code>Optional[str]</code> <p>The type of the image file. If not provided, it will be inferred from the file extension.</p> <code>None</code> <code>width</code> <code>Optional[str]</code> <p>Display width in HTML. Defaults to None.</p> <code>None</code> <code>height</code> <code>Optional[str]</code> <p>Display height in HTML. Defaults to None.</p> <code>None</code> <p>Returns:</p> Type Description <code>str</code> <p>The HTML tag with embedded base64 data.</p> <p>Examples:</p> <pre><code>from argilla.markdown import image_to_html\nhtml = image_to_html(\"my_image.png\", width=\"300px\", height=\"300px\")\n</code></pre> Source code in <code>src/argilla/markdown/media.py</code> <pre><code>def image_to_html(\n    file_source: Union[str, bytes],\n    file_type: Optional[str] = None,\n    width: Optional[str] = None,\n    height: Optional[str] = None,\n) -&gt; str:\n    \"\"\"\n    Convert an image file to an HTML tag with embedded base64 data.\n\n    Args:\n        file_source: The path to the media file or a non-b64 encoded byte string.\n        file_type: The type of the image file. If not provided, it will be inferred from the file extension.\n        width: Display width in HTML. Defaults to None.\n        height: Display height in HTML. Defaults to None.\n\n    Returns:\n        The HTML tag with embedded base64 data.\n\n    Examples:\n        ```python\n        from argilla.markdown import image_to_html\n        html = image_to_html(\"my_image.png\", width=\"300px\", height=\"300px\")\n        ```\n    \"\"\"\n    return _media_to_html(\"image\", file_source, file_type, width, height)\n</code></pre>"},{"location":"reference/argilla/markdown/#src.argilla.markdown.media.pdf_to_html","title":"<code>pdf_to_html(file_source, width='1000px', height='1000px')</code>","text":"<p>Convert a pdf file to an HTML tag with embedded data.</p> <p>Parameters:</p> Name Type Description Default <code>file_source</code> <code>Union[str, bytes]</code> <p>The path to the PDF file, a bytes object with PDF data, or a URL.</p> required <code>width</code> <code>Optional[str]</code> <p>Display width in HTML. Defaults to \"1000px\".</p> <code>'1000px'</code> <code>height</code> <code>Optional[str]</code> <p>Display height in HTML. Defaults to \"1000px\".</p> <code>'1000px'</code> <p>Returns:</p> Type Description <code>str</code> <p>HTML string embedding the PDF.</p> <p>Raises:</p> Type Description <code>ValueError</code> <p>If the width and height are not pixel or percentage.</p> <p>Examples:</p> <pre><code>from argilla.markdown import pdf_to_html\nhtml = pdf_to_html(\"my_pdf.pdf\", width=\"300px\", height=\"300px\")\n</code></pre> Source code in <code>src/argilla/markdown/media.py</code> <pre><code>def pdf_to_html(\n    file_source: Union[str, bytes], width: Optional[str] = \"1000px\", height: Optional[str] = \"1000px\"\n) -&gt; str:\n    \"\"\"\n    Convert a pdf file to an HTML tag with embedded data.\n\n    Args:\n        file_source: The path to the PDF file, a bytes object with PDF data, or a URL.\n        width: Display width in HTML. Defaults to \"1000px\".\n        height: Display height in HTML. Defaults to \"1000px\".\n\n    Returns:\n        HTML string embedding the PDF.\n\n    Raises:\n        ValueError: If the width and height are not pixel or percentage.\n\n    Examples:\n        ```python\n        from argilla.markdown import pdf_to_html\n        html = pdf_to_html(\"my_pdf.pdf\", width=\"300px\", height=\"300px\")\n        ```\n    \"\"\"\n    if not _is_valid_dimension(width) or not _is_valid_dimension(height):\n        raise ValueError(\"Width and height must be valid pixel (e.g., '300px') or percentage (e.g., '50%') values.\")\n\n    if isinstance(file_source, str) and urlparse(file_source).scheme in [\"http\", \"https\"]:\n        return f'&lt;embed src=\"{file_source}\" type=\"application/pdf\" width=\"{width}\" height=\"{height}\"&gt;&lt;/embed&gt;'\n\n    file_data, _ = _get_file_data(file_source, \"pdf\")\n    pdf_base64 = base64.b64encode(file_data).decode(\"utf-8\")\n    data_url = f\"data:application/pdf;base64,{pdf_base64}\"\n    return f'&lt;object id=\"pdf\" data=\"{data_url}\" type=\"application/pdf\" width=\"{width}\" height=\"{height}\"&gt;&lt;/object&gt;'\n</code></pre>"},{"location":"reference/argilla/markdown/#src.argilla.markdown.chat","title":"<code>chat</code>","text":""},{"location":"reference/argilla/markdown/#src.argilla.markdown.chat.chat_to_html","title":"<code>chat_to_html(messages)</code>","text":"<p>Converts a list of chat messages in the OpenAI format to HTML.</p> <p>Parameters:</p> Name Type Description Default <code>messages</code> <code>List[Dict[str, str]]</code> <p>A list of dictionaries where each dictionary represents a chat message. Each dictionary should have the keys:     - \"role\": A string indicating the role of the sender (e.g., \"user\", \"model\", \"assistant\", \"system\").     - \"content\": The content of the message.</p> required <p>Returns:</p> Name Type Description <code>str</code> <code>str</code> <p>An HTML string that represents the chat conversation.</p> <p>Raises:</p> Type Description <code>ValueError</code> <p>If the an invalid role is passed.</p> <p>Examples:</p> <pre><code>from argilla.markdown import chat_to_html\nhtml = chat_to_html([\n    {\"role\": \"user\", \"content\": \"hello\"},\n    {\"role\": \"assistant\", \"content\": \"goodbye\"}\n])\n</code></pre> Source code in <code>src/argilla/markdown/chat.py</code> <pre><code>def chat_to_html(messages: List[Dict[str, str]]) -&gt; str:\n    \"\"\"\n    Converts a list of chat messages in the OpenAI format to HTML.\n\n    Args:\n        messages (List[Dict[str, str]]): A list of dictionaries where each dictionary represents a chat message.\n            Each dictionary should have the keys:\n                - \"role\": A string indicating the role of the sender (e.g., \"user\", \"model\", \"assistant\", \"system\").\n                - \"content\": The content of the message.\n\n    Returns:\n        str: An HTML string that represents the chat conversation.\n\n    Raises:\n        ValueError: If the an invalid role is passed.\n\n    Examples:\n        ```python\n        from argilla.markdown import chat_to_html\n        html = chat_to_html([\n            {\"role\": \"user\", \"content\": \"hello\"},\n            {\"role\": \"assistant\", \"content\": \"goodbye\"}\n        ])\n        ```\n    \"\"\"\n    chat_html = \"\"\n    for message in messages:\n        role = message[\"role\"]\n        content = message[\"content\"]\n        content_html = markdown.markdown(content)\n\n        if role == \"user\":\n            html = '&lt;div class=\"user-message\"&gt;' + '&lt;div class=\"message-content\"&gt;'\n        elif role in [\"model\", \"assistant\", \"system\"]:\n            html = '&lt;div class=\"system-message\"&gt;' + '&lt;div class=\"message-content\"&gt;'\n        else:\n            raise ValueError(f\"Invalid role: {role}\")\n\n        html += f\"{content_html}\"\n        html += \"&lt;/div&gt;&lt;/div&gt;\"\n        chat_html += html\n\n    return f\"&lt;body&gt;{CHAT_CSS_STYLE}{chat_html}&lt;/body&gt;\"\n</code></pre>"},{"location":"reference/argilla/search/","title":"<code>rg.Query</code>","text":"<p>To collect records based on searching criteria, you can use the <code>Query</code> and <code>Filter</code> classes. The <code>Query</code> class is used to define the search criteria, while the <code>Filter</code> class is used to filter the search results. <code>Filter</code> is passed to a <code>Query</code> object so you can combine multiple filters to create complex search queries. A <code>Query</code> object can also be passed to <code>Dataset.records</code> to fetch records based on the search criteria.</p>"},{"location":"reference/argilla/search/#usage-examples","title":"Usage Examples","text":""},{"location":"reference/argilla/search/#searching-for-records-with-terms","title":"Searching for records with terms","text":"<p>To search for records with terms, you can use the <code>Dataset.records</code> attribute with a query string. The search terms are used to search for records that contain the terms in the text field.</p> <pre><code>for record in dataset.records(query=\"paris\"):\n    print(record)\n</code></pre>"},{"location":"reference/argilla/search/#filtering-records-by-conditions","title":"Filtering records by conditions","text":"<p>Argilla allows you to filter records based on conditions. You can use the <code>Filter</code> class to define the conditions and pass them to the <code>Dataset.records</code> attribute to fetch records based on the conditions. Conditions include \"==\", \"&gt;=\", \"&lt;=\", or \"in\". Conditions can be combined with dot notation to filter records based on metadata, suggestions, or responses.</p> <pre><code># create a range from 10 to 20\nrange_filter = rg.Filter(\n    [\n        (\"metadata.count\", \"&gt;=\", 10),\n        (\"metadata.count\", \"&lt;=\", 20)\n    ]\n)\n\n# query records with metadata count greater than 10 and less than 20\nquery = rg.Query(filters=range_filter, query=\"paris\")\n\n# iterate over the results\nfor record in dataset.records(query=query):\n    print(record)\n</code></pre>"},{"location":"reference/argilla/search/#src.argilla.records._search.Query","title":"<code>Query</code>","text":"<p>This class is used to map user queries to the internal query models</p> Source code in <code>src/argilla/records/_search.py</code> <pre><code>class Query:\n    \"\"\"This class is used to map user queries to the internal query models\"\"\"\n\n    def __init__(\n        self,\n        *,\n        query: Union[str, None] = None,\n        similar: Union[Similar, None] = None,\n        filter: Union[Filter, Conditions, None] = None,\n    ):\n        \"\"\"Create a query object for use in Argilla search requests.add()\n\n        Parameters:\n            query (Union[str, None], optional): The query string that will be used to search.\n            similar (Union[Similar, None], optional): The similar object that will be used to search for similar records\n            filter (Union[Filter, None], optional): The filter object that will be used to filter the search results.\n        \"\"\"\n\n        if isinstance(filter, tuple):\n            filter = [filter]\n\n        if isinstance(filter, list):\n            filter = Filter(conditions=filter)\n\n        self.query = query\n        self.filter = filter\n        self.similar = similar\n\n    def has_search(self) -&gt; bool:\n        return bool(self.query or self.similar or self.filter)\n\n    def api_model(self) -&gt; SearchQueryModel:\n        model = SearchQueryModel()\n\n        if self.query or self.similar:\n            query = QueryModel()\n\n            if self.query is not None:\n                query.text = TextQueryModel(q=self.query)\n\n            if self.similar is not None:\n                query.vector = self.similar.api_model()\n\n            model.query = query\n\n        if self.filter is not None:\n            model.filters = self.filter.api_model()\n\n        return model\n</code></pre>"},{"location":"reference/argilla/search/#src.argilla.records._search.Query.__init__","title":"<code>__init__(*, query=None, similar=None, filter=None)</code>","text":"<p>Create a query object for use in Argilla search requests.add()</p> <p>Parameters:</p> Name Type Description Default <code>query</code> <code>Union[str, None]</code> <p>The query string that will be used to search.</p> <code>None</code> <code>similar</code> <code>Union[Similar, None]</code> <p>The similar object that will be used to search for similar records</p> <code>None</code> <code>filter</code> <code>Union[Filter, None]</code> <p>The filter object that will be used to filter the search results.</p> <code>None</code> Source code in <code>src/argilla/records/_search.py</code> <pre><code>def __init__(\n    self,\n    *,\n    query: Union[str, None] = None,\n    similar: Union[Similar, None] = None,\n    filter: Union[Filter, Conditions, None] = None,\n):\n    \"\"\"Create a query object for use in Argilla search requests.add()\n\n    Parameters:\n        query (Union[str, None], optional): The query string that will be used to search.\n        similar (Union[Similar, None], optional): The similar object that will be used to search for similar records\n        filter (Union[Filter, None], optional): The filter object that will be used to filter the search results.\n    \"\"\"\n\n    if isinstance(filter, tuple):\n        filter = [filter]\n\n    if isinstance(filter, list):\n        filter = Filter(conditions=filter)\n\n    self.query = query\n    self.filter = filter\n    self.similar = similar\n</code></pre>"},{"location":"reference/argilla/search/#src.argilla.records._search.Filter","title":"<code>Filter</code>","text":"<p>This class is used to map user filters to the internal filter models</p> Source code in <code>src/argilla/records/_search.py</code> <pre><code>class Filter:\n    \"\"\"This class is used to map user filters to the internal filter models\"\"\"\n\n    def __init__(self, conditions: Union[Conditions, None] = None):\n        \"\"\" Create a filter object for use in Argilla search requests.\n\n        Parameters:\n            conditions (Union[List[Tuple[str, str, Any]], Tuple[str, str, Any], None], optional): \\\n                The conditions that will be used to filter the search results. \\\n                The conditions should be a list of tuples where each tuple contains \\\n                the field, operator, and value. For example `(\"label\", \"in\", [\"positive\",\"happy\"])`.\\\n        \"\"\"\n\n        if isinstance(conditions, tuple):\n            conditions = [conditions]\n        self.conditions = [Condition(condition) for condition in conditions]\n\n    def api_model(self) -&gt; AndFilterModel:\n        return AndFilterModel.model_validate({\"and\": [condition.api_model() for condition in self.conditions]})\n</code></pre>"},{"location":"reference/argilla/search/#src.argilla.records._search.Filter.__init__","title":"<code>__init__(conditions=None)</code>","text":"<p>Create a filter object for use in Argilla search requests.</p> <p>Parameters:</p> Name Type Description Default <code>conditions</code> <code>Union[List[Tuple[str, str, Any]], Tuple[str, str, Any], None]</code> <p>The conditions that will be used to filter the search results.                 The conditions should be a list of tuples where each tuple contains                 the field, operator, and value. For example <code>(\"label\", \"in\", [\"positive\",\"happy\"])</code>.</p> <code>None</code> Source code in <code>src/argilla/records/_search.py</code> <pre><code>def __init__(self, conditions: Union[Conditions, None] = None):\n    \"\"\" Create a filter object for use in Argilla search requests.\n\n    Parameters:\n        conditions (Union[List[Tuple[str, str, Any]], Tuple[str, str, Any], None], optional): \\\n            The conditions that will be used to filter the search results. \\\n            The conditions should be a list of tuples where each tuple contains \\\n            the field, operator, and value. For example `(\"label\", \"in\", [\"positive\",\"happy\"])`.\\\n    \"\"\"\n\n    if isinstance(conditions, tuple):\n        conditions = [conditions]\n    self.conditions = [Condition(condition) for condition in conditions]\n</code></pre>"},{"location":"reference/argilla/search/#src.argilla.records._search.Similar","title":"<code>Similar</code>","text":"<p>This class is used to map user similar queries to the internal query models</p> Source code in <code>src/argilla/records/_search.py</code> <pre><code>class Similar:\n    \"\"\"This class is used to map user similar queries to the internal query models\"\"\"\n\n    def __init__(self, name: str, value: Union[Iterable[float], \"Record\"], most_similar: bool = True):\n        \"\"\"\n        Create a similar object for use in Argilla search requests.\n\n        Parameters:\n            name: The name of the vector field\n            value: The vector value or the record to search for similar records\n            most_similar: Whether to search for the most similar records or the least similar records\n        \"\"\"\n\n        self.name = name\n        self.value = value\n        self.most_similar = most_similar if most_similar is not None else True\n\n    def api_model(self) -&gt; VectorQueryModel:\n        from argilla.records import Record\n\n        order = \"most_similar\" if self.most_similar else \"least_similar\"\n\n        if isinstance(self.value, Record):\n            return VectorQueryModel(name=self.name, record_id=self.value._server_id, order=order)\n\n        return VectorQueryModel(name=self.name, value=self.value, order=order)\n</code></pre>"},{"location":"reference/argilla/search/#src.argilla.records._search.Similar.__init__","title":"<code>__init__(name, value, most_similar=True)</code>","text":"<p>Create a similar object for use in Argilla search requests.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the vector field</p> required <code>value</code> <code>Union[Iterable[float], Record]</code> <p>The vector value or the record to search for similar records</p> required <code>most_similar</code> <code>bool</code> <p>Whether to search for the most similar records or the least similar records</p> <code>True</code> Source code in <code>src/argilla/records/_search.py</code> <pre><code>def __init__(self, name: str, value: Union[Iterable[float], \"Record\"], most_similar: bool = True):\n    \"\"\"\n    Create a similar object for use in Argilla search requests.\n\n    Parameters:\n        name: The name of the vector field\n        value: The vector value or the record to search for similar records\n        most_similar: Whether to search for the most similar records or the least similar records\n    \"\"\"\n\n    self.name = name\n    self.value = value\n    self.most_similar = most_similar if most_similar is not None else True\n</code></pre>"},{"location":"reference/argilla/users/","title":"<code>rg.User</code>","text":"<p>A user in Argilla is a profile that uses the SDK or UI. Their profile can be used to track their feedback activity and to manage their access to the Argilla server.</p>"},{"location":"reference/argilla/users/#usage-examples","title":"Usage Examples","text":"<p>To create a new user, instantiate the <code>User</code> object with the client and the username:</p> <pre><code>user = rg.User(username=\"my_username\", password=\"my_password\")\nuser.create()\n</code></pre> <p>Existing users can be retrieved by their username:</p> <pre><code>user = client.users(\"my_username\")\n</code></pre> <p>The current user of the <code>rg.Argilla</code> client can be accessed using the <code>me</code> attribute:</p> <pre><code>client.me\n</code></pre>"},{"location":"reference/argilla/users/#src.argilla.users._resource.User","title":"<code>User</code>","text":"<p>               Bases: <code>Resource</code></p> <p>Class for interacting with Argilla users in the Argilla server. User profiles         are used to manage access to the Argilla server and track responses to records.</p> <p>Attributes:</p> Name Type Description <code>username</code> <code>str</code> <p>The username of the user.</p> <code>first_name</code> <code>str</code> <p>The first name of the user.</p> <code>last_name</code> <code>str</code> <p>The last name of the user.</p> <code>role</code> <code>str</code> <p>The role of the user, either 'annotator' or 'admin'.</p> <code>password</code> <code>str</code> <p>The password of the user.</p> <code>id</code> <code>UUID</code> <p>The ID of the user.</p> Source code in <code>src/argilla/users/_resource.py</code> <pre><code>class User(Resource):\n    \"\"\"Class for interacting with Argilla users in the Argilla server. User profiles \\\n        are used to manage access to the Argilla server and track responses to records.\n\n    Attributes:\n        username (str): The username of the user.\n        first_name (str): The first name of the user.\n        last_name (str): The last name of the user.\n        role (str): The role of the user, either 'annotator' or 'admin'.\n        password (str): The password of the user.\n        id (UUID): The ID of the user.\n    \"\"\"\n\n    _model: UserModel\n    _api: UsersAPI\n\n    def __init__(\n        self,\n        username: Optional[str] = None,\n        first_name: Optional[str] = None,\n        last_name: Optional[str] = None,\n        role: Optional[str] = None,\n        password: Optional[str] = None,\n        client: Optional[\"Argilla\"] = None,\n        id: Optional[UUID] = None,\n        _model: Optional[UserModel] = None,\n    ) -&gt; None:\n        \"\"\"Initializes a User object with a client and a username\n\n        Parameters:\n            username (str): The username of the user\n            first_name (str): The first name of the user\n            last_name (str): The last name of the user\n            role (str): The role of the user, either 'annotator', admin, or 'owner'\n            password (str): The password of the user\n            client (Argilla): The client used to interact with Argilla\n\n        Returns:\n            User: The initialized user object\n        \"\"\"\n        client = client or Argilla._get_default()\n        super().__init__(client=client, api=client.api.users)\n\n        if _model is None:\n            _model = UserModel(\n                username=username,\n                password=password,\n                first_name=first_name or username,\n                last_name=last_name,\n                role=role or Role.annotator,\n                id=id,\n            )\n            self._log_message(f\"Initialized user with username {username}\")\n        self._model = _model\n\n    def create(self) -&gt; \"User\":\n        \"\"\"Creates the user in Argilla. After creating a user, it will be able to log in to the Argilla server.\n\n        Returns:\n            User: The user that was created in Argilla.\n        \"\"\"\n        model_create = self.api_model()\n        model = self._api.create(model_create)\n        # The password is not returned in the response\n        model.password = model_create.password\n        self._model = model\n        return self\n\n    def delete(self) -&gt; None:\n        \"\"\"Deletes the user from Argilla. After deleting a user, it will no longer be able to log in to the Argilla server.\"\"\"\n        super().delete()\n        # exists relies on the id, so we need to set it to None\n        self._model = UserModel(username=self.username)\n\n    def add_to_workspace(self, workspace: \"Workspace\") -&gt; \"User\":\n        \"\"\"Adds the user to a workspace. After adding a user to a workspace, it will have access to the datasets\n        in the workspace.\n\n        Args:\n            workspace (Workspace): The workspace to add the user to.\n\n        Returns:\n            User: The user that was added to the workspace.\n        \"\"\"\n        self._model = self._api.add_to_workspace(workspace.id, self.id)\n        return self\n\n    def remove_from_workspace(self, workspace: \"Workspace\") -&gt; \"User\":\n        \"\"\"Removes the user from a workspace. After removing a user from a workspace, it will no longer have access to\n        the datasets in the workspace.\n\n        Args:\n            workspace (Workspace): The workspace to remove the user from.\n\n        Returns:\n            User: The user that was removed from the workspace.\n\n        \"\"\"\n        self._model = self._api.delete_from_workspace(workspace.id, self.id)\n        return self\n\n    ############################\n    # Properties\n    ############################\n    @property\n    def username(self) -&gt; str:\n        return self._model.username\n\n    @username.setter\n    def username(self, value: str) -&gt; None:\n        self._model.username = value\n\n    @property\n    def password(self) -&gt; str:\n        return self._model.password\n\n    @password.setter\n    def password(self, value: str) -&gt; None:\n        self._model.password = value\n\n    @property\n    def first_name(self) -&gt; str:\n        return self._model.first_name\n\n    @first_name.setter\n    def first_name(self, value: str) -&gt; None:\n        self._model.first_name = value\n\n    @property\n    def last_name(self) -&gt; str:\n        return self._model.last_name\n\n    @last_name.setter\n    def last_name(self, value: str) -&gt; None:\n        self._model.last_name = value\n\n    @property\n    def role(self) -&gt; Role:\n        return self._model.role\n\n    @role.setter\n    def role(self, value: Role) -&gt; None:\n        self._model.role = value\n</code></pre>"},{"location":"reference/argilla/users/#src.argilla.users._resource.User.__init__","title":"<code>__init__(username=None, first_name=None, last_name=None, role=None, password=None, client=None, id=None, _model=None)</code>","text":"<p>Initializes a User object with a client and a username</p> <p>Parameters:</p> Name Type Description Default <code>username</code> <code>str</code> <p>The username of the user</p> <code>None</code> <code>first_name</code> <code>str</code> <p>The first name of the user</p> <code>None</code> <code>last_name</code> <code>str</code> <p>The last name of the user</p> <code>None</code> <code>role</code> <code>str</code> <p>The role of the user, either 'annotator', admin, or 'owner'</p> <code>None</code> <code>password</code> <code>str</code> <p>The password of the user</p> <code>None</code> <code>client</code> <code>Argilla</code> <p>The client used to interact with Argilla</p> <code>None</code> <p>Returns:</p> Name Type Description <code>User</code> <code>None</code> <p>The initialized user object</p> Source code in <code>src/argilla/users/_resource.py</code> <pre><code>def __init__(\n    self,\n    username: Optional[str] = None,\n    first_name: Optional[str] = None,\n    last_name: Optional[str] = None,\n    role: Optional[str] = None,\n    password: Optional[str] = None,\n    client: Optional[\"Argilla\"] = None,\n    id: Optional[UUID] = None,\n    _model: Optional[UserModel] = None,\n) -&gt; None:\n    \"\"\"Initializes a User object with a client and a username\n\n    Parameters:\n        username (str): The username of the user\n        first_name (str): The first name of the user\n        last_name (str): The last name of the user\n        role (str): The role of the user, either 'annotator', admin, or 'owner'\n        password (str): The password of the user\n        client (Argilla): The client used to interact with Argilla\n\n    Returns:\n        User: The initialized user object\n    \"\"\"\n    client = client or Argilla._get_default()\n    super().__init__(client=client, api=client.api.users)\n\n    if _model is None:\n        _model = UserModel(\n            username=username,\n            password=password,\n            first_name=first_name or username,\n            last_name=last_name,\n            role=role or Role.annotator,\n            id=id,\n        )\n        self._log_message(f\"Initialized user with username {username}\")\n    self._model = _model\n</code></pre>"},{"location":"reference/argilla/users/#src.argilla.users._resource.User.create","title":"<code>create()</code>","text":"<p>Creates the user in Argilla. After creating a user, it will be able to log in to the Argilla server.</p> <p>Returns:</p> Name Type Description <code>User</code> <code>User</code> <p>The user that was created in Argilla.</p> Source code in <code>src/argilla/users/_resource.py</code> <pre><code>def create(self) -&gt; \"User\":\n    \"\"\"Creates the user in Argilla. After creating a user, it will be able to log in to the Argilla server.\n\n    Returns:\n        User: The user that was created in Argilla.\n    \"\"\"\n    model_create = self.api_model()\n    model = self._api.create(model_create)\n    # The password is not returned in the response\n    model.password = model_create.password\n    self._model = model\n    return self\n</code></pre>"},{"location":"reference/argilla/users/#src.argilla.users._resource.User.delete","title":"<code>delete()</code>","text":"<p>Deletes the user from Argilla. After deleting a user, it will no longer be able to log in to the Argilla server.</p> Source code in <code>src/argilla/users/_resource.py</code> <pre><code>def delete(self) -&gt; None:\n    \"\"\"Deletes the user from Argilla. After deleting a user, it will no longer be able to log in to the Argilla server.\"\"\"\n    super().delete()\n    # exists relies on the id, so we need to set it to None\n    self._model = UserModel(username=self.username)\n</code></pre>"},{"location":"reference/argilla/users/#src.argilla.users._resource.User.add_to_workspace","title":"<code>add_to_workspace(workspace)</code>","text":"<p>Adds the user to a workspace. After adding a user to a workspace, it will have access to the datasets in the workspace.</p> <p>Parameters:</p> Name Type Description Default <code>workspace</code> <code>Workspace</code> <p>The workspace to add the user to.</p> required <p>Returns:</p> Name Type Description <code>User</code> <code>User</code> <p>The user that was added to the workspace.</p> Source code in <code>src/argilla/users/_resource.py</code> <pre><code>def add_to_workspace(self, workspace: \"Workspace\") -&gt; \"User\":\n    \"\"\"Adds the user to a workspace. After adding a user to a workspace, it will have access to the datasets\n    in the workspace.\n\n    Args:\n        workspace (Workspace): The workspace to add the user to.\n\n    Returns:\n        User: The user that was added to the workspace.\n    \"\"\"\n    self._model = self._api.add_to_workspace(workspace.id, self.id)\n    return self\n</code></pre>"},{"location":"reference/argilla/users/#src.argilla.users._resource.User.remove_from_workspace","title":"<code>remove_from_workspace(workspace)</code>","text":"<p>Removes the user from a workspace. After removing a user from a workspace, it will no longer have access to the datasets in the workspace.</p> <p>Parameters:</p> Name Type Description Default <code>workspace</code> <code>Workspace</code> <p>The workspace to remove the user from.</p> required <p>Returns:</p> Name Type Description <code>User</code> <code>User</code> <p>The user that was removed from the workspace.</p> Source code in <code>src/argilla/users/_resource.py</code> <pre><code>def remove_from_workspace(self, workspace: \"Workspace\") -&gt; \"User\":\n    \"\"\"Removes the user from a workspace. After removing a user from a workspace, it will no longer have access to\n    the datasets in the workspace.\n\n    Args:\n        workspace (Workspace): The workspace to remove the user from.\n\n    Returns:\n        User: The user that was removed from the workspace.\n\n    \"\"\"\n    self._model = self._api.delete_from_workspace(workspace.id, self.id)\n    return self\n</code></pre>"},{"location":"reference/argilla/workspaces/","title":"<code>rg.Workspace</code>","text":"<p>In Argilla, workspaces are used to organize datasets in to groups. For example, you might have a workspace for each project or team.</p>"},{"location":"reference/argilla/workspaces/#usage-examples","title":"Usage Examples","text":"<p>To create a new workspace, instantiate the <code>Workspace</code> object with the client and the name:</p> <pre><code>workspace = rg.Workspace(name=\"my_workspace\")\nworkspace.create()\n</code></pre> <p>To retrieve an existing workspace, use the <code>client.workspaces</code> attribute:</p> <pre><code>workspace = client.workspaces(\"my_workspace\")\n</code></pre>"},{"location":"reference/argilla/workspaces/#src.argilla.workspaces._resource.Workspace","title":"<code>Workspace</code>","text":"<p>               Bases: <code>Resource</code></p> <p>Class for interacting with Argilla workspaces. Workspaces are used to organize datasets in the Argilla server.</p> <p>Attributes:</p> Name Type Description <code>name</code> <code>str</code> <p>The name of the workspace.</p> <code>id</code> <code>UUID</code> <p>The ID of the workspace. This is a unique identifier for the workspace in the server.</p> <code>datasets</code> <code>List[Dataset]</code> <p>A list of all datasets in the workspace.</p> <code>users</code> <code>WorkspaceUsers</code> <p>A list of all users in the workspace.</p> Source code in <code>src/argilla/workspaces/_resource.py</code> <pre><code>class Workspace(Resource):\n    \"\"\"Class for interacting with Argilla workspaces. Workspaces are used to organize datasets in the Argilla server.\n\n    Attributes:\n        name (str): The name of the workspace.\n        id (UUID): The ID of the workspace. This is a unique identifier for the workspace in the server.\n        datasets (List[Dataset]): A list of all datasets in the workspace.\n        users (WorkspaceUsers): A list of all users in the workspace.\n    \"\"\"\n\n    name: Optional[str]\n\n    _api: \"WorkspacesAPI\"\n\n    def __init__(\n        self,\n        name: Optional[str] = None,\n        id: Optional[UUID] = None,\n        client: Optional[\"Argilla\"] = None,\n    ) -&gt; None:\n        \"\"\"Initializes a Workspace object with a client and a name or id\n\n        Parameters:\n            client (Argilla): The client used to interact with Argilla\n            name (str): The name of the workspace\n            id (UUID): The id of the workspace\n\n        Returns:\n            Workspace: The initialized workspace object\n        \"\"\"\n        client = client or Argilla._get_default()\n        super().__init__(client=client, api=client.api.workspaces)\n\n        self._model = WorkspaceModel(name=name, id=id)\n\n    def add_user(self, user: Union[\"User\", str]) -&gt; \"User\":\n        \"\"\"Adds a user to the workspace. After adding a user to the workspace, it will have access to the datasets\n        in the workspace.\n\n        Args:\n            user (Union[User, str]): The user to add to the workspace. Can be a User object or a username.\n\n        Returns:\n            User: The user that was added to the workspace\n        \"\"\"\n        return self.users.add(user)\n\n    def remove_user(self, user: Union[\"User\", str]) -&gt; \"User\":\n        \"\"\"Removes a user from the workspace. After removing a user from the workspace, it will no longer have access\n\n        Args:\n            user (Union[User, str]): The user to remove from the workspace. Can be a User object or a username.\n\n        Returns:\n            User: The user that was removed from the workspace.\n        \"\"\"\n        return self.users.delete(user)\n\n    # TODO: Make this method private\n    def list_datasets(self) -&gt; List[\"Dataset\"]:\n        from argilla.datasets import Dataset\n\n        datasets = self._client.api.datasets.list(self.id)\n        self._log_message(f\"Got {len(datasets)} datasets for workspace {self.id}\")\n        return [Dataset.from_model(model=dataset, client=self._client) for dataset in datasets]\n\n    @classmethod\n    def from_model(cls, model: WorkspaceModel, client: Argilla) -&gt; \"Workspace\":\n        instance = cls(name=model.name, id=model.id, client=client)\n        instance._model = model\n\n        return instance\n\n    ############################\n    # Properties\n    ############################\n\n    @property\n    def name(self) -&gt; Optional[str]:\n        return self._model.name\n\n    @name.setter\n    def name(self, value: str) -&gt; None:\n        self._model.name = value\n\n    @property\n    def datasets(self) -&gt; List[\"Dataset\"]:\n        \"\"\"List all datasets in the workspace\n\n        Returns:\n            List[Dataset]: A list of all datasets in the workspace\n        \"\"\"\n        return self.list_datasets()\n\n    @property\n    def users(self) -&gt; \"WorkspaceUsers\":\n        \"\"\"List all users in the workspace\n\n        Returns:\n            WorkspaceUsers: A list of all users in the workspace\n        \"\"\"\n        return WorkspaceUsers(workspace=self)\n</code></pre>"},{"location":"reference/argilla/workspaces/#src.argilla.workspaces._resource.Workspace.datasets","title":"<code>datasets: List[Dataset]</code>  <code>property</code>","text":"<p>List all datasets in the workspace</p> <p>Returns:</p> Type Description <code>List[Dataset]</code> <p>List[Dataset]: A list of all datasets in the workspace</p>"},{"location":"reference/argilla/workspaces/#src.argilla.workspaces._resource.Workspace.users","title":"<code>users: WorkspaceUsers</code>  <code>property</code>","text":"<p>List all users in the workspace</p> <p>Returns:</p> Name Type Description <code>WorkspaceUsers</code> <code>WorkspaceUsers</code> <p>A list of all users in the workspace</p>"},{"location":"reference/argilla/workspaces/#src.argilla.workspaces._resource.Workspace.__init__","title":"<code>__init__(name=None, id=None, client=None)</code>","text":"<p>Initializes a Workspace object with a client and a name or id</p> <p>Parameters:</p> Name Type Description Default <code>client</code> <code>Argilla</code> <p>The client used to interact with Argilla</p> <code>None</code> <code>name</code> <code>str</code> <p>The name of the workspace</p> <code>None</code> <code>id</code> <code>UUID</code> <p>The id of the workspace</p> <code>None</code> <p>Returns:</p> Name Type Description <code>Workspace</code> <code>None</code> <p>The initialized workspace object</p> Source code in <code>src/argilla/workspaces/_resource.py</code> <pre><code>def __init__(\n    self,\n    name: Optional[str] = None,\n    id: Optional[UUID] = None,\n    client: Optional[\"Argilla\"] = None,\n) -&gt; None:\n    \"\"\"Initializes a Workspace object with a client and a name or id\n\n    Parameters:\n        client (Argilla): The client used to interact with Argilla\n        name (str): The name of the workspace\n        id (UUID): The id of the workspace\n\n    Returns:\n        Workspace: The initialized workspace object\n    \"\"\"\n    client = client or Argilla._get_default()\n    super().__init__(client=client, api=client.api.workspaces)\n\n    self._model = WorkspaceModel(name=name, id=id)\n</code></pre>"},{"location":"reference/argilla/workspaces/#src.argilla.workspaces._resource.Workspace.add_user","title":"<code>add_user(user)</code>","text":"<p>Adds a user to the workspace. After adding a user to the workspace, it will have access to the datasets in the workspace.</p> <p>Parameters:</p> Name Type Description Default <code>user</code> <code>Union[User, str]</code> <p>The user to add to the workspace. Can be a User object or a username.</p> required <p>Returns:</p> Name Type Description <code>User</code> <code>User</code> <p>The user that was added to the workspace</p> Source code in <code>src/argilla/workspaces/_resource.py</code> <pre><code>def add_user(self, user: Union[\"User\", str]) -&gt; \"User\":\n    \"\"\"Adds a user to the workspace. After adding a user to the workspace, it will have access to the datasets\n    in the workspace.\n\n    Args:\n        user (Union[User, str]): The user to add to the workspace. Can be a User object or a username.\n\n    Returns:\n        User: The user that was added to the workspace\n    \"\"\"\n    return self.users.add(user)\n</code></pre>"},{"location":"reference/argilla/workspaces/#src.argilla.workspaces._resource.Workspace.remove_user","title":"<code>remove_user(user)</code>","text":"<p>Removes a user from the workspace. After removing a user from the workspace, it will no longer have access</p> <p>Parameters:</p> Name Type Description Default <code>user</code> <code>Union[User, str]</code> <p>The user to remove from the workspace. Can be a User object or a username.</p> required <p>Returns:</p> Name Type Description <code>User</code> <code>User</code> <p>The user that was removed from the workspace.</p> Source code in <code>src/argilla/workspaces/_resource.py</code> <pre><code>def remove_user(self, user: Union[\"User\", str]) -&gt; \"User\":\n    \"\"\"Removes a user from the workspace. After removing a user from the workspace, it will no longer have access\n\n    Args:\n        user (Union[User, str]): The user to remove from the workspace. Can be a User object or a username.\n\n    Returns:\n        User: The user that was removed from the workspace.\n    \"\"\"\n    return self.users.delete(user)\n</code></pre>"},{"location":"reference/argilla/datasets/dataset_records/","title":"<code>rg.Dataset.records</code>","text":""},{"location":"reference/argilla/datasets/dataset_records/#usage-examples","title":"Usage Examples","text":"<p>In most cases, you will not need to create a <code>DatasetRecords</code> object directly. Instead, you can access it via the <code>Dataset</code> object:</p> <pre><code>dataset.records\n</code></pre> <p>For user familiar with legacy approaches</p> <ol> <li><code>Dataset.records</code> object is used to interact with the records in a dataset. It interactively fetches records from the server in batches without using a local copy of the records.</li> <li>The <code>log</code> method of <code>Dataset.records</code> is used to both add and update records in a dataset. If the record includes a known <code>id</code> field, the record will be updated. If the record does not include a known <code>id</code> field, the record will be added.</li> </ol>"},{"location":"reference/argilla/datasets/dataset_records/#adding-records-to-a-dataset","title":"Adding records to a dataset","text":"<p>To add records to a dataset, use the <code>log</code> method. Records can be added as dictionaries or as <code>Record</code> objects. Single records can also be added as a dictionary or <code>Record</code>.</p> As a <code>Record</code> objectFrom a data structureFrom a data structure with a mappingFrom a Hugging Face dataset <p>You can also add records to a dataset by initializing a <code>Record</code> object directly.</p> <pre><code>records = [\n    rg.Record(\n        fields={\n            \"question\": \"Do you need oxygen to breathe?\",\n            \"answer\": \"Yes\"\n        },\n    ),\n    rg.Record(\n        fields={\n            \"question\": \"What is the boiling point of water?\",\n            \"answer\": \"100 degrees Celsius\"\n        },\n    ),\n] # (1)\n\ndataset.records.log(records)\n</code></pre> <ol> <li>This is an illustration of a definition. In a real world scenario, you would iterate over a data structure and create <code>Record</code> objects for each iteration.</li> </ol> <pre><code>data = [\n    {\n        \"question\": \"Do you need oxygen to breathe?\",\n        \"answer\": \"Yes\",\n    },\n    {\n        \"question\": \"What is the boiling point of water?\",\n        \"answer\": \"100 degrees Celsius\",\n    },\n] # (1)\n\ndataset.records.log(data)\n</code></pre> <ol> <li>The data structure's keys must match the fields or questions in the Argilla dataset. In this case, there are fields named <code>question</code> and <code>answer</code>.</li> </ol> <pre><code>data = [\n    {\n        \"query\": \"Do you need oxygen to breathe?\",\n        \"response\": \"Yes\",\n    },\n    {\n        \"query\": \"What is the boiling point of water?\",\n        \"response\": \"100 degrees Celsius\",\n    },\n] # (1)\ndataset.records.log(\n    records=data,\n    mapping={\"query\": \"question\", \"response\": \"answer\"} # (2)\n)\n</code></pre> <ol> <li>The data structure's keys must match the fields or questions in the Argilla dataset. In this case, there are fields named <code>question</code> and <code>answer</code>.</li> <li>The data structure has keys <code>query</code> and <code>response</code> and the Argilla dataset has <code>question</code> and <code>answer</code>. You can use the <code>mapping</code> parameter to map the keys in the data structure to the fields in the Argilla dataset.</li> </ol> <p>You can also add records to a dataset using a Hugging Face dataset. This is useful when you want to use a dataset from the Hugging Face Hub and add it to your Argilla dataset.</p> <p>You can add the dataset where the column names correspond to the names of fields, questions, metadata or vectors in the Argilla dataset.</p> <p>If the dataset's schema does not correspond to your Argilla dataset names, you can use a <code>mapping</code> to indicate which columns in the dataset correspond to the Argilla dataset fields.</p> <pre><code>from datasets import load_dataset\n\nhf_dataset = load_dataset(\"imdb\", split=\"train[:100]\") # (1)\n\ndataset.records.log(records=hf_dataset)\n</code></pre> <ol> <li>In this example, the Hugging Face dataset matches the Argilla dataset schema. If that is not the case, you could use the <code>.map</code> of the <code>datasets</code> library to prepare the data before adding it to the Argilla dataset.</li> </ol> <p>Here we use the <code>mapping</code> parameter to specify the relationship between the Hugging Face dataset and the Argilla dataset.</p> <pre><code>dataset.records.log(records=hf_dataset, mapping={\"txt\": \"text\", \"y\": \"label\"}) # (1)\n</code></pre> <ol> <li>In this case, the <code>txt</code> key in the Hugging Face dataset corresponds to the <code>text</code> field in the Argilla dataset, and the <code>y</code> key in the Hugging Face dataset corresponds to the <code>label</code> field in the Argilla dataset.</li> </ol>"},{"location":"reference/argilla/datasets/dataset_records/#updating-records-in-a-dataset","title":"Updating records in a dataset","text":"<p>Records can also be updated using the <code>log</code> method with records that contain an <code>id</code> to identify the records to be updated. As above, records can be added as dictionaries or as <code>Record</code> objects.</p> As a <code>Record</code> objectFrom a data structureFrom a data structure with a mappingFrom a Hugging Face dataset <p>You can update records in a dataset by initializing a <code>Record</code> object directly and providing the <code>id</code> field.</p> <pre><code>records = [\n    rg.Record(\n        metadata={\"department\": \"toys\"},\n        id=\"2\" # (1)\n    ),\n]\n\ndataset.records.log(records)\n</code></pre> <ol> <li>The <code>id</code> field is required to identify the record to be updated. The <code>id</code> field must be unique for each record in the dataset. If the <code>id</code> field is not provided, the record will be added as a new record.</li> </ol> <p>You can also update records in a dataset by providing the <code>id</code> field in the data structure.</p> <pre><code>data = [\n    {\n        \"metadata\": {\"department\": \"toys\"},\n        \"id\": \"2\" # (1)\n    },\n]\n\ndataset.records.log(data)\n</code></pre> <ol> <li>The <code>id</code> field is required to identify the record to be updated. The <code>id</code> field must be unique for each record in the dataset. If the <code>id</code> field is not provided, the record will be added as a new record.</li> </ol> <p>You can also update records in a dataset by providing the <code>id</code> field in the data structure and using a mapping to map the keys in the data structure to the fields in the dataset.</p> <pre><code>data = [\n    {\n        \"metadata\": {\"department\": \"toys\"},\n        \"my_id\": \"2\" # (1)\n    },\n]\n\ndataset.records.log(\n    records=data,\n    mapping={\"my_id\": \"id\"} # (2)\n)\n</code></pre> <ol> <li>The <code>id</code> field is required to identify the record to be updated. The <code>id</code> field must be unique for each record in the dataset. If the <code>id</code> field is not provided, the record will be added as a new record.</li> <li>Let's say that your data structure has keys <code>my_id</code> instead of <code>id</code>. You can use the <code>mapping</code> parameter to map the keys in the data structure to the fields in the dataset.</li> </ol> <p>You can also update records to an Argilla dataset using a Hugging Face dataset. To update records, the Hugging Face dataset must contain an <code>id</code> field to identify the records to be updated, or you can use a mapping to map the keys in the Hugging Face dataset to the fields in the Argilla dataset.</p> <pre><code>from datasets import load_dataset\n\nhf_dataset = load_dataset(\"imdb\", split=\"train[:100]\") # (1)\n\ndataset.records.log(records=hf_dataset, mapping={\"uuid\": \"id\"}) # (2)\n</code></pre> <ol> <li>In this example, the Hugging Face dataset matches the Argilla dataset schema.</li> <li>The <code>uuid</code> key in the Hugging Face dataset corresponds to the <code>id</code> field in the Argilla dataset.</li> </ol>"},{"location":"reference/argilla/datasets/dataset_records/#adding-and-updating-records-with-images","title":"Adding and updating records with images","text":"<p>Argilla datasets can contain image fields. You can add images to a dataset by passing the image to the record object as either a remote URL, a local path to an image file, or a PIL object. The field names must be defined as an <code>rg.ImageField</code> in the dataset's <code>Settings</code> object to be accepted. Images will be stored in the Argilla database and returned using the data URI schema.</p> <p>As PIL objects</p> <p>To retrieve the images as rescaled PIL objects, you can use the <code>to_datasets</code> method when exporting the records, as shown in this how-to guide.</p> From a data structure with remote URLsFrom a data structure with local files or PIL objectsFrom a Hugging Face dataset <pre><code>data = [\n    {\n        \"image\": \"https://example.com/image1.jpg\",\n    },\n    {\n        \"image\": \"https://example.com/image2.jpg\",\n    },\n]\n\ndataset.records.log(data)\n</code></pre> <pre><code>import os\nfrom PIL import Image\n\nimage_dir = \"path/to/images\"\n\ndata = [\n    {\n        \"image\": os.path.join(image_dir, \"image1.jpg\"), # (1)\n    },\n    {\n        \"image\": Image.open(os.path.join(image_dir, \"image2.jpg\")), # (2)\n    },\n]\n\ndataset.records.log(data)\n</code></pre> <ol> <li>The image is a local file path.</li> <li>The image is a PIL object.</li> </ol> <p>Hugging Face datasets can be passed directly to the <code>log</code> method. The image field must be defined as an <code>Image</code> in the dataset's features.</p> <pre><code>hf_dataset = load_dataset(\"ylecun/mnist\", split=\"train[:100]\")\ndataset.records.log(records=hf_dataset)\n</code></pre> <p>If the image field is not defined as an <code>Image</code> in the dataset's features, you can cast the dataset to the correct schema before adding it to the Argilla dataset. This is only necessary if the image field is not defined as an <code>Image</code> in the dataset's features, and is not one of the supported image types by Argilla (URL, local path, or PIL object).</p> <pre><code>hf_dataset = load_dataset(\"&lt;my_custom_dataset&gt;\") # (1)\nhf_dataset = hf_dataset.cast(\n    features=Features({\"image\": Image(), \"label\": Value(\"string\")}),\n)\ndataset.records.log(records=hf_dataset)\n</code></pre> <ol> <li>In this example, the Hugging Face dataset matches the Argilla dataset schema but the image field is not defined as an <code>Image</code> in the dataset's features.</li> </ol>"},{"location":"reference/argilla/datasets/dataset_records/#iterating-over-records-in-a-dataset","title":"Iterating over records in a dataset","text":"<p><code>Dataset.records</code> can be used to iterate over records in a dataset from the server. The records will be fetched in batches from the server::</p> <pre><code>for record in dataset.records:\n    print(record)\n\n# Fetch records with suggestions and responses\nfor record in dataset.records(with_suggestions=True, with_responses=True):\n    print(record.suggestions)\n    print(record.responses)\n\n# Filter records by a query and fetch records with vectors\nfor record in dataset.records(query=\"capital\", with_vectors=True):\n    print(record.vectors)\n</code></pre> <p>Check out the <code>rg.Record</code> class reference for more information on the properties and methods available on a record and the <code>rg.Query</code> class reference for more information on the query syntax.</p>"},{"location":"reference/argilla/datasets/dataset_records/#src.argilla.records._dataset_records.DatasetRecords","title":"<code>DatasetRecords</code>","text":"<p>               Bases: <code>Iterable[Record]</code>, <code>LoggingMixin</code></p> <p>This class is used to work with records from a dataset and is accessed via <code>Dataset.records</code>. The responsibility of this class is to provide an interface to interact with records in a dataset, by adding, updating, fetching, querying, deleting, and exporting records.</p> <p>Attributes:</p> Name Type Description <code>client</code> <code>Argilla</code> <p>The Argilla client object.</p> <code>dataset</code> <code>Dataset</code> <p>The dataset object.</p> Source code in <code>src/argilla/records/_dataset_records.py</code> <pre><code>class DatasetRecords(Iterable[Record], LoggingMixin):\n    \"\"\"This class is used to work with records from a dataset and is accessed via `Dataset.records`.\n    The responsibility of this class is to provide an interface to interact with records in a dataset,\n    by adding, updating, fetching, querying, deleting, and exporting records.\n\n    Attributes:\n        client (Argilla): The Argilla client object.\n        dataset (Dataset): The dataset object.\n    \"\"\"\n\n    _api: RecordsAPI\n\n    DEFAULT_BATCH_SIZE = 256\n    DEFAULT_DELETE_BATCH_SIZE = 64\n\n    def __init__(\n        self, client: \"Argilla\", dataset: \"Dataset\", mapping: Optional[Dict[str, Union[str, Sequence[str]]]] = None\n    ):\n        \"\"\"Initializes a DatasetRecords object with a client and a dataset.\n        Args:\n            client: An Argilla client object.\n            dataset: A Dataset object.\n        \"\"\"\n        self.__client = client\n        self.__dataset = dataset\n        self._mapping = mapping or {}\n        self._api = self.__client.api.records\n\n    def __iter__(self):\n        return DatasetRecordsIterator(self.__dataset, self.__client, with_suggestions=True, with_responses=True)\n\n    def __call__(\n        self,\n        query: Optional[Union[str, Query]] = None,\n        batch_size: Optional[int] = DEFAULT_BATCH_SIZE,\n        start_offset: int = 0,\n        with_suggestions: bool = True,\n        with_responses: bool = True,\n        with_vectors: Optional[Union[List, bool, str]] = None,\n        limit: Optional[int] = None,\n    ) -&gt; DatasetRecordsIterator:\n        \"\"\"Returns an iterator over the records in the dataset on the server.\n\n        Parameters:\n            query: A string or a Query object to filter the records.\n            batch_size: The number of records to fetch in each batch. The default is 256.\n            start_offset: The offset from which to start fetching records. The default is 0.\n            with_suggestions: Whether to include suggestions in the records. The default is True.\n            with_responses: Whether to include responses in the records. The default is True.\n            with_vectors: A list of vector names to include in the records. The default is None.\n                If a list is provided, only the specified vectors will be included.\n                If True is provided, all vectors will be included.\n            limit: The maximum number of records to fetch. The default is None.\n\n        Returns:\n            An iterator over the records in the dataset on the server.\n\n        \"\"\"\n        if query and isinstance(query, str):\n            query = Query(query=query)\n\n        if with_vectors:\n            self._validate_vector_names(vector_names=with_vectors)\n\n        return DatasetRecordsIterator(\n            dataset=self.__dataset,\n            client=self.__client,\n            query=query,\n            batch_size=batch_size,\n            start_offset=start_offset,\n            with_suggestions=with_suggestions,\n            with_responses=with_responses,\n            with_vectors=with_vectors,\n            limit=limit,\n        )\n\n    def __repr__(self) -&gt; str:\n        return f\"{self.__class__.__name__}({self.__dataset})\"\n\n    ############################\n    # Public methods\n    ############################\n\n    def log(\n        self,\n        records: Union[List[dict], List[Record], HFDataset],\n        mapping: Optional[Dict[str, Union[str, Sequence[str]]]] = None,\n        user_id: Optional[UUID] = None,\n        batch_size: int = DEFAULT_BATCH_SIZE,\n        on_error: RecordErrorHandling = RecordErrorHandling.RAISE,\n    ) -&gt; \"DatasetRecords\":\n        \"\"\"Add or update records in a dataset on the server using the provided records.\n        If the record includes a known `id` field, the record will be updated.\n        If the record does not include a known `id` field, the record will be added as a new record.\n        See `rg.Record` for more information on the record definition.\n\n        Parameters:\n            records: A list of `Record` objects, a Hugging Face Dataset, or a list of dictionaries representing the records.\n                     If records are defined as a dictionaries or a dataset, the keys/ column names should correspond to the\n                     fields in the Argilla dataset's fields and questions. `id` should be provided to identify the records when updating.\n            mapping: A dictionary that maps the keys/ column names in the records to the fields or questions in the Argilla dataset.\n                     To assign an incoming key or column to multiple fields or questions, provide a list or tuple of field or question names.\n            user_id: The user id to be associated with the records' response. If not provided, the current user id is used.\n            batch_size: The number of records to send in each batch. The default is 256.\n\n        Returns:\n            A list of Record objects representing the updated records.\n        \"\"\"\n        record_models = self._ingest_records(\n            records=records, mapping=mapping, user_id=user_id or self.__client.me.id, on_error=on_error\n        )\n        batch_size = self._normalize_batch_size(\n            batch_size=batch_size,\n            records_length=len(record_models),\n            max_value=self._api.MAX_RECORDS_PER_UPSERT_BULK,\n        )\n\n        created_or_updated = []\n        records_updated = 0\n\n        for batch in tqdm(\n            iterable=range(0, len(records), batch_size),\n            desc=\"Sending records...\",\n            total=len(records) // batch_size,\n            unit=\"batch\",\n        ):\n            self._log_message(message=f\"Sending records from {batch} to {batch + batch_size}.\")\n            batch_records = record_models[batch : batch + batch_size]\n            models, updated = self._api.bulk_upsert(dataset_id=self.__dataset.id, records=batch_records)\n            created_or_updated.extend([Record.from_model(model=model, dataset=self.__dataset) for model in models])\n            records_updated += updated\n\n        records_created = len(created_or_updated) - records_updated\n        self._log_message(\n            message=f\"Updated {records_updated} records and added {records_created} records to dataset {self.__dataset.name}\",\n            level=\"info\",\n        )\n\n        return self\n\n    def delete(\n        self,\n        records: List[Record],\n        batch_size: int = DEFAULT_DELETE_BATCH_SIZE,\n    ) -&gt; List[Record]:\n        \"\"\"Delete records in a dataset on the server using the provided records\n            and matching based on the id.\n\n        Parameters:\n            records: A list of `Record` objects representing the records to be deleted.\n            batch_size: The number of records to send in each batch. The default is 64.\n\n        Returns:\n            A list of Record objects representing the deleted records.\n\n        \"\"\"\n        mapping = None\n        user_id = self.__client.me.id\n        record_models = self._ingest_records(records=records, mapping=mapping, user_id=user_id)\n        batch_size = self._normalize_batch_size(\n            batch_size=batch_size,\n            records_length=len(record_models),\n            max_value=self._api.MAX_RECORDS_PER_DELETE_BULK,\n        )\n\n        records_deleted = 0\n        for batch in tqdm(\n            iterable=range(0, len(records), batch_size),\n            desc=\"Sending records...\",\n            total=len(records) // batch_size,\n            unit=\"batch\",\n        ):\n            self._log_message(message=f\"Sending records from {batch} to {batch + batch_size}.\")\n            batch_records = record_models[batch : batch + batch_size]\n            self._api.delete_many(dataset_id=self.__dataset.id, records=batch_records)\n            records_deleted += len(batch_records)\n\n        self._log_message(\n            message=f\"Deleted {len(record_models)} records from dataset {self.__dataset.name}\",\n            level=\"info\",\n        )\n\n        return records\n\n    def to_dict(self, flatten: bool = False, orient: str = \"names\") -&gt; Dict[str, Any]:\n        \"\"\"\n        Return the records as a dictionary. This is a convenient shortcut for dataset.records(...).to_dict().\n\n        Parameters:\n            flatten (bool): The structure of the exported dictionary.\n                - True: The record fields, metadata, suggestions and responses will be flattened.\n                - False: The record fields, metadata, suggestions and responses will be nested.\n            orient (str): The orientation of the exported dictionary.\n                - \"names\": The keys of the dictionary will be the names of the fields, metadata, suggestions and responses.\n                - \"index\": The keys of the dictionary will be the id of the records.\n        Returns:\n            A dictionary of records.\n\n        \"\"\"\n        return self().to_dict(flatten=flatten, orient=orient)\n\n    def to_list(self, flatten: bool = False) -&gt; List[Dict[str, Any]]:\n        \"\"\"\n        Return the records as a list of dictionaries. This is a convenient shortcut for dataset.records(...).to_list().\n\n        Parameters:\n            flatten (bool): The structure of the exported dictionaries in the list.\n                - True: The record keys are flattened and a dot notation is used to record attributes and their attributes . For example, `label.suggestion` and `label.response`. Records responses are spread across multiple columns for values and users.\n                - False: The record fields, metadata, suggestions and responses will be nested dictionary with keys for record attributes.\n        Returns:\n            A list of dictionaries of records.\n        \"\"\"\n        data = self().to_list(flatten=flatten)\n        return data\n\n    def to_json(self, path: Union[Path, str]) -&gt; Path:\n        \"\"\"\n        Export the records to a file on disk.\n\n        Parameters:\n            path (str): The path to the file to save the records.\n\n        Returns:\n            The path to the file where the records were saved.\n\n        \"\"\"\n        return self().to_json(path=path)\n\n    def from_json(self, path: Union[Path, str]) -&gt; List[Record]:\n        \"\"\"Creates a DatasetRecords object from a disk path to a JSON file.\n            The JSON file should be defined by `DatasetRecords.to_json`.\n\n        Args:\n            path (str): The path to the file containing the records.\n\n        Returns:\n            DatasetRecords: The DatasetRecords object created from the disk path.\n\n        \"\"\"\n        records = JsonIO._records_from_json(path=path)\n        return self.log(records=records)\n\n    def to_datasets(self) -&gt; HFDataset:\n        \"\"\"\n        Export the records to a HFDataset.\n\n        Returns:\n            The dataset containing the records.\n\n        \"\"\"\n\n        return self().to_datasets()\n\n    ############################\n    # Private methods\n    ############################\n\n    def _ingest_records(\n        self,\n        records: Union[List[Dict[str, Any]], List[Record], HFDataset],\n        mapping: Optional[Dict[str, Union[str, Sequence[str]]]] = None,\n        user_id: Optional[UUID] = None,\n        on_error: RecordErrorHandling = RecordErrorHandling.RAISE,\n    ) -&gt; List[RecordModel]:\n        \"\"\"Ingests records from a list of dictionaries, a Hugging Face Dataset, or a list of Record objects.\"\"\"\n\n        mapping = mapping or self._mapping\n        if len(records) == 0:\n            raise ValueError(\"No records provided to ingest.\")\n\n        if HFDatasetsIO._is_hf_dataset(dataset=records):\n            records = HFDatasetsIO._record_dicts_from_datasets(hf_dataset=records)\n\n        ingested_records = []\n        record_mapper = IngestedRecordMapper(mapping=mapping, dataset=self.__dataset, user_id=user_id)\n        for record in records:\n            try:\n                if isinstance(record, dict):\n                    record = record_mapper(data=record)\n                elif isinstance(record, Record):\n                    record.dataset = self.__dataset\n                else:\n                    raise ValueError(\n                        \"Records should be a a list Record instances, \"\n                        \"a Hugging Face Dataset, or a list of dictionaries representing the records.\"\n                        f\"Found a record of type {type(record)}: {record}.\"\n                    )\n            except Exception as e:\n                if on_error == RecordErrorHandling.IGNORE:\n                    self._log_message(\n                        message=f\"Failed to ingest record from dict {record}: {e}\",\n                        level=\"info\",\n                    )\n                    continue\n                elif on_error == RecordErrorHandling.WARN:\n                    warnings.warn(f\"Failed to ingest record from dict {record}: {e}\")\n                    continue\n                raise RecordsIngestionError(f\"Failed to ingest record from dict {record}\") from e\n            ingested_records.append(record.api_model())\n        return ingested_records\n\n    def _normalize_batch_size(self, batch_size: int, records_length, max_value: int):\n        norm_batch_size = min(batch_size, records_length, max_value)\n\n        if batch_size != norm_batch_size:\n            self._log_message(\n                message=f\"The provided batch size {batch_size} was normalized. Using value {norm_batch_size}.\",\n                level=\"warning\",\n            )\n\n        return norm_batch_size\n\n    def _validate_vector_names(self, vector_names: Union[List[str], str]) -&gt; None:\n        if not isinstance(vector_names, list):\n            vector_names = [vector_names]\n        for vector_name in vector_names:\n            if isinstance(vector_name, bool):\n                continue\n            if vector_name not in self.__dataset.schema:\n                raise ValueError(f\"Vector field {vector_name} not found in dataset schema.\")\n</code></pre>"},{"location":"reference/argilla/datasets/dataset_records/#src.argilla.records._dataset_records.DatasetRecords.__init__","title":"<code>__init__(client, dataset, mapping=None)</code>","text":"<p>Initializes a DatasetRecords object with a client and a dataset. Args:     client: An Argilla client object.     dataset: A Dataset object.</p> Source code in <code>src/argilla/records/_dataset_records.py</code> <pre><code>def __init__(\n    self, client: \"Argilla\", dataset: \"Dataset\", mapping: Optional[Dict[str, Union[str, Sequence[str]]]] = None\n):\n    \"\"\"Initializes a DatasetRecords object with a client and a dataset.\n    Args:\n        client: An Argilla client object.\n        dataset: A Dataset object.\n    \"\"\"\n    self.__client = client\n    self.__dataset = dataset\n    self._mapping = mapping or {}\n    self._api = self.__client.api.records\n</code></pre>"},{"location":"reference/argilla/datasets/dataset_records/#src.argilla.records._dataset_records.DatasetRecords.__call__","title":"<code>__call__(query=None, batch_size=DEFAULT_BATCH_SIZE, start_offset=0, with_suggestions=True, with_responses=True, with_vectors=None, limit=None)</code>","text":"<p>Returns an iterator over the records in the dataset on the server.</p> <p>Parameters:</p> Name Type Description Default <code>query</code> <code>Optional[Union[str, Query]]</code> <p>A string or a Query object to filter the records.</p> <code>None</code> <code>batch_size</code> <code>Optional[int]</code> <p>The number of records to fetch in each batch. The default is 256.</p> <code>DEFAULT_BATCH_SIZE</code> <code>start_offset</code> <code>int</code> <p>The offset from which to start fetching records. The default is 0.</p> <code>0</code> <code>with_suggestions</code> <code>bool</code> <p>Whether to include suggestions in the records. The default is True.</p> <code>True</code> <code>with_responses</code> <code>bool</code> <p>Whether to include responses in the records. The default is True.</p> <code>True</code> <code>with_vectors</code> <code>Optional[Union[List, bool, str]]</code> <p>A list of vector names to include in the records. The default is None. If a list is provided, only the specified vectors will be included. If True is provided, all vectors will be included.</p> <code>None</code> <code>limit</code> <code>Optional[int]</code> <p>The maximum number of records to fetch. The default is None.</p> <code>None</code> <p>Returns:</p> Type Description <code>DatasetRecordsIterator</code> <p>An iterator over the records in the dataset on the server.</p> Source code in <code>src/argilla/records/_dataset_records.py</code> <pre><code>def __call__(\n    self,\n    query: Optional[Union[str, Query]] = None,\n    batch_size: Optional[int] = DEFAULT_BATCH_SIZE,\n    start_offset: int = 0,\n    with_suggestions: bool = True,\n    with_responses: bool = True,\n    with_vectors: Optional[Union[List, bool, str]] = None,\n    limit: Optional[int] = None,\n) -&gt; DatasetRecordsIterator:\n    \"\"\"Returns an iterator over the records in the dataset on the server.\n\n    Parameters:\n        query: A string or a Query object to filter the records.\n        batch_size: The number of records to fetch in each batch. The default is 256.\n        start_offset: The offset from which to start fetching records. The default is 0.\n        with_suggestions: Whether to include suggestions in the records. The default is True.\n        with_responses: Whether to include responses in the records. The default is True.\n        with_vectors: A list of vector names to include in the records. The default is None.\n            If a list is provided, only the specified vectors will be included.\n            If True is provided, all vectors will be included.\n        limit: The maximum number of records to fetch. The default is None.\n\n    Returns:\n        An iterator over the records in the dataset on the server.\n\n    \"\"\"\n    if query and isinstance(query, str):\n        query = Query(query=query)\n\n    if with_vectors:\n        self._validate_vector_names(vector_names=with_vectors)\n\n    return DatasetRecordsIterator(\n        dataset=self.__dataset,\n        client=self.__client,\n        query=query,\n        batch_size=batch_size,\n        start_offset=start_offset,\n        with_suggestions=with_suggestions,\n        with_responses=with_responses,\n        with_vectors=with_vectors,\n        limit=limit,\n    )\n</code></pre>"},{"location":"reference/argilla/datasets/dataset_records/#src.argilla.records._dataset_records.DatasetRecords.log","title":"<code>log(records, mapping=None, user_id=None, batch_size=DEFAULT_BATCH_SIZE, on_error=RecordErrorHandling.RAISE)</code>","text":"<p>Add or update records in a dataset on the server using the provided records. If the record includes a known <code>id</code> field, the record will be updated. If the record does not include a known <code>id</code> field, the record will be added as a new record. See <code>rg.Record</code> for more information on the record definition.</p> <p>Parameters:</p> Name Type Description Default <code>records</code> <code>Union[List[dict], List[Record], HFDataset]</code> <p>A list of <code>Record</code> objects, a Hugging Face Dataset, or a list of dictionaries representing the records.      If records are defined as a dictionaries or a dataset, the keys/ column names should correspond to the      fields in the Argilla dataset's fields and questions. <code>id</code> should be provided to identify the records when updating.</p> required <code>mapping</code> <code>Optional[Dict[str, Union[str, Sequence[str]]]]</code> <p>A dictionary that maps the keys/ column names in the records to the fields or questions in the Argilla dataset.      To assign an incoming key or column to multiple fields or questions, provide a list or tuple of field or question names.</p> <code>None</code> <code>user_id</code> <code>Optional[UUID]</code> <p>The user id to be associated with the records' response. If not provided, the current user id is used.</p> <code>None</code> <code>batch_size</code> <code>int</code> <p>The number of records to send in each batch. The default is 256.</p> <code>DEFAULT_BATCH_SIZE</code> <p>Returns:</p> Type Description <code>DatasetRecords</code> <p>A list of Record objects representing the updated records.</p> Source code in <code>src/argilla/records/_dataset_records.py</code> <pre><code>def log(\n    self,\n    records: Union[List[dict], List[Record], HFDataset],\n    mapping: Optional[Dict[str, Union[str, Sequence[str]]]] = None,\n    user_id: Optional[UUID] = None,\n    batch_size: int = DEFAULT_BATCH_SIZE,\n    on_error: RecordErrorHandling = RecordErrorHandling.RAISE,\n) -&gt; \"DatasetRecords\":\n    \"\"\"Add or update records in a dataset on the server using the provided records.\n    If the record includes a known `id` field, the record will be updated.\n    If the record does not include a known `id` field, the record will be added as a new record.\n    See `rg.Record` for more information on the record definition.\n\n    Parameters:\n        records: A list of `Record` objects, a Hugging Face Dataset, or a list of dictionaries representing the records.\n                 If records are defined as a dictionaries or a dataset, the keys/ column names should correspond to the\n                 fields in the Argilla dataset's fields and questions. `id` should be provided to identify the records when updating.\n        mapping: A dictionary that maps the keys/ column names in the records to the fields or questions in the Argilla dataset.\n                 To assign an incoming key or column to multiple fields or questions, provide a list or tuple of field or question names.\n        user_id: The user id to be associated with the records' response. If not provided, the current user id is used.\n        batch_size: The number of records to send in each batch. The default is 256.\n\n    Returns:\n        A list of Record objects representing the updated records.\n    \"\"\"\n    record_models = self._ingest_records(\n        records=records, mapping=mapping, user_id=user_id or self.__client.me.id, on_error=on_error\n    )\n    batch_size = self._normalize_batch_size(\n        batch_size=batch_size,\n        records_length=len(record_models),\n        max_value=self._api.MAX_RECORDS_PER_UPSERT_BULK,\n    )\n\n    created_or_updated = []\n    records_updated = 0\n\n    for batch in tqdm(\n        iterable=range(0, len(records), batch_size),\n        desc=\"Sending records...\",\n        total=len(records) // batch_size,\n        unit=\"batch\",\n    ):\n        self._log_message(message=f\"Sending records from {batch} to {batch + batch_size}.\")\n        batch_records = record_models[batch : batch + batch_size]\n        models, updated = self._api.bulk_upsert(dataset_id=self.__dataset.id, records=batch_records)\n        created_or_updated.extend([Record.from_model(model=model, dataset=self.__dataset) for model in models])\n        records_updated += updated\n\n    records_created = len(created_or_updated) - records_updated\n    self._log_message(\n        message=f\"Updated {records_updated} records and added {records_created} records to dataset {self.__dataset.name}\",\n        level=\"info\",\n    )\n\n    return self\n</code></pre>"},{"location":"reference/argilla/datasets/dataset_records/#src.argilla.records._dataset_records.DatasetRecords.delete","title":"<code>delete(records, batch_size=DEFAULT_DELETE_BATCH_SIZE)</code>","text":"<p>Delete records in a dataset on the server using the provided records     and matching based on the id.</p> <p>Parameters:</p> Name Type Description Default <code>records</code> <code>List[Record]</code> <p>A list of <code>Record</code> objects representing the records to be deleted.</p> required <code>batch_size</code> <code>int</code> <p>The number of records to send in each batch. The default is 64.</p> <code>DEFAULT_DELETE_BATCH_SIZE</code> <p>Returns:</p> Type Description <code>List[Record]</code> <p>A list of Record objects representing the deleted records.</p> Source code in <code>src/argilla/records/_dataset_records.py</code> <pre><code>def delete(\n    self,\n    records: List[Record],\n    batch_size: int = DEFAULT_DELETE_BATCH_SIZE,\n) -&gt; List[Record]:\n    \"\"\"Delete records in a dataset on the server using the provided records\n        and matching based on the id.\n\n    Parameters:\n        records: A list of `Record` objects representing the records to be deleted.\n        batch_size: The number of records to send in each batch. The default is 64.\n\n    Returns:\n        A list of Record objects representing the deleted records.\n\n    \"\"\"\n    mapping = None\n    user_id = self.__client.me.id\n    record_models = self._ingest_records(records=records, mapping=mapping, user_id=user_id)\n    batch_size = self._normalize_batch_size(\n        batch_size=batch_size,\n        records_length=len(record_models),\n        max_value=self._api.MAX_RECORDS_PER_DELETE_BULK,\n    )\n\n    records_deleted = 0\n    for batch in tqdm(\n        iterable=range(0, len(records), batch_size),\n        desc=\"Sending records...\",\n        total=len(records) // batch_size,\n        unit=\"batch\",\n    ):\n        self._log_message(message=f\"Sending records from {batch} to {batch + batch_size}.\")\n        batch_records = record_models[batch : batch + batch_size]\n        self._api.delete_many(dataset_id=self.__dataset.id, records=batch_records)\n        records_deleted += len(batch_records)\n\n    self._log_message(\n        message=f\"Deleted {len(record_models)} records from dataset {self.__dataset.name}\",\n        level=\"info\",\n    )\n\n    return records\n</code></pre>"},{"location":"reference/argilla/datasets/dataset_records/#src.argilla.records._dataset_records.DatasetRecords.to_dict","title":"<code>to_dict(flatten=False, orient='names')</code>","text":"<p>Return the records as a dictionary. This is a convenient shortcut for dataset.records(...).to_dict().</p> <p>Parameters:</p> Name Type Description Default <code>flatten</code> <code>bool</code> <p>The structure of the exported dictionary. - True: The record fields, metadata, suggestions and responses will be flattened. - False: The record fields, metadata, suggestions and responses will be nested.</p> <code>False</code> <code>orient</code> <code>str</code> <p>The orientation of the exported dictionary. - \"names\": The keys of the dictionary will be the names of the fields, metadata, suggestions and responses. - \"index\": The keys of the dictionary will be the id of the records.</p> <code>'names'</code> <p>Returns:     A dictionary of records.</p> Source code in <code>src/argilla/records/_dataset_records.py</code> <pre><code>def to_dict(self, flatten: bool = False, orient: str = \"names\") -&gt; Dict[str, Any]:\n    \"\"\"\n    Return the records as a dictionary. This is a convenient shortcut for dataset.records(...).to_dict().\n\n    Parameters:\n        flatten (bool): The structure of the exported dictionary.\n            - True: The record fields, metadata, suggestions and responses will be flattened.\n            - False: The record fields, metadata, suggestions and responses will be nested.\n        orient (str): The orientation of the exported dictionary.\n            - \"names\": The keys of the dictionary will be the names of the fields, metadata, suggestions and responses.\n            - \"index\": The keys of the dictionary will be the id of the records.\n    Returns:\n        A dictionary of records.\n\n    \"\"\"\n    return self().to_dict(flatten=flatten, orient=orient)\n</code></pre>"},{"location":"reference/argilla/datasets/dataset_records/#src.argilla.records._dataset_records.DatasetRecords.to_list","title":"<code>to_list(flatten=False)</code>","text":"<p>Return the records as a list of dictionaries. This is a convenient shortcut for dataset.records(...).to_list().</p> <p>Parameters:</p> Name Type Description Default <code>flatten</code> <code>bool</code> <p>The structure of the exported dictionaries in the list. - True: The record keys are flattened and a dot notation is used to record attributes and their attributes . For example, <code>label.suggestion</code> and <code>label.response</code>. Records responses are spread across multiple columns for values and users. - False: The record fields, metadata, suggestions and responses will be nested dictionary with keys for record attributes.</p> <code>False</code> <p>Returns:     A list of dictionaries of records.</p> Source code in <code>src/argilla/records/_dataset_records.py</code> <pre><code>def to_list(self, flatten: bool = False) -&gt; List[Dict[str, Any]]:\n    \"\"\"\n    Return the records as a list of dictionaries. This is a convenient shortcut for dataset.records(...).to_list().\n\n    Parameters:\n        flatten (bool): The structure of the exported dictionaries in the list.\n            - True: The record keys are flattened and a dot notation is used to record attributes and their attributes . For example, `label.suggestion` and `label.response`. Records responses are spread across multiple columns for values and users.\n            - False: The record fields, metadata, suggestions and responses will be nested dictionary with keys for record attributes.\n    Returns:\n        A list of dictionaries of records.\n    \"\"\"\n    data = self().to_list(flatten=flatten)\n    return data\n</code></pre>"},{"location":"reference/argilla/datasets/dataset_records/#src.argilla.records._dataset_records.DatasetRecords.to_json","title":"<code>to_json(path)</code>","text":"<p>Export the records to a file on disk.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>str</code> <p>The path to the file to save the records.</p> required <p>Returns:</p> Type Description <code>Path</code> <p>The path to the file where the records were saved.</p> Source code in <code>src/argilla/records/_dataset_records.py</code> <pre><code>def to_json(self, path: Union[Path, str]) -&gt; Path:\n    \"\"\"\n    Export the records to a file on disk.\n\n    Parameters:\n        path (str): The path to the file to save the records.\n\n    Returns:\n        The path to the file where the records were saved.\n\n    \"\"\"\n    return self().to_json(path=path)\n</code></pre>"},{"location":"reference/argilla/datasets/dataset_records/#src.argilla.records._dataset_records.DatasetRecords.from_json","title":"<code>from_json(path)</code>","text":"<p>Creates a DatasetRecords object from a disk path to a JSON file.     The JSON file should be defined by <code>DatasetRecords.to_json</code>.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>str</code> <p>The path to the file containing the records.</p> required <p>Returns:</p> Name Type Description <code>DatasetRecords</code> <code>List[Record]</code> <p>The DatasetRecords object created from the disk path.</p> Source code in <code>src/argilla/records/_dataset_records.py</code> <pre><code>def from_json(self, path: Union[Path, str]) -&gt; List[Record]:\n    \"\"\"Creates a DatasetRecords object from a disk path to a JSON file.\n        The JSON file should be defined by `DatasetRecords.to_json`.\n\n    Args:\n        path (str): The path to the file containing the records.\n\n    Returns:\n        DatasetRecords: The DatasetRecords object created from the disk path.\n\n    \"\"\"\n    records = JsonIO._records_from_json(path=path)\n    return self.log(records=records)\n</code></pre>"},{"location":"reference/argilla/datasets/dataset_records/#src.argilla.records._dataset_records.DatasetRecords.to_datasets","title":"<code>to_datasets()</code>","text":"<p>Export the records to a HFDataset.</p> <p>Returns:</p> Type Description <code>HFDataset</code> <p>The dataset containing the records.</p> Source code in <code>src/argilla/records/_dataset_records.py</code> <pre><code>def to_datasets(self) -&gt; HFDataset:\n    \"\"\"\n    Export the records to a HFDataset.\n\n    Returns:\n        The dataset containing the records.\n\n    \"\"\"\n\n    return self().to_datasets()\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/","title":"<code>rg.Dataset</code>","text":"<p><code>Dataset</code> is a class that represents a collection of records. It is used to store and manage records in Argilla.</p>"},{"location":"reference/argilla/datasets/datasets/#usage-examples","title":"Usage Examples","text":""},{"location":"reference/argilla/datasets/datasets/#creating-a-dataset","title":"Creating a Dataset","text":"<p>To create a new dataset you need to define its name and settings. Optional parameters are <code>workspace</code> and <code>client</code>, if you want to create the dataset in a specific workspace or on a specific Argilla instance.</p> <pre><code>dataset = rg.Dataset(\n    name=\"my_dataset\",\n    settings=rg.Settings(\n        fields=[\n            rg.TextField(name=\"text\"),\n        ],\n        questions=[\n            rg.TextQuestion(name=\"response\"),\n        ],\n    ),\n)\ndataset.create()\n</code></pre> <p>For a detail guide of the dataset creation and publication process, see the Dataset how to guide.</p>"},{"location":"reference/argilla/datasets/datasets/#retrieving-an-existing-dataset","title":"Retrieving an existing Dataset","text":"<p>To retrieve an existing dataset, use <code>client.datasets(\"my_dataset\")</code> instead.</p> <pre><code>dataset = client.datasets(\"my_dataset\")\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/#src.argilla.datasets._resource.Dataset","title":"<code>Dataset</code>","text":"<p>               Bases: <code>Resource</code>, <code>HubImportExportMixin</code>, <code>DiskImportExportMixin</code></p> <p>Class for interacting with Argilla Datasets</p> <p>Attributes:</p> Name Type Description <code>name</code> <code>str</code> <p>Name of the dataset.</p> <code>records</code> <code>DatasetRecords</code> <p>The records object for the dataset. Used to interact with the records of the dataset by iterating, searching, etc.</p> <code>settings</code> <code>Settings</code> <p>The settings object of the dataset. Used to configure the dataset with fields, questions, guidelines, etc.</p> <code>fields</code> <code>list</code> <p>The fields of the dataset, for example the <code>rg.TextField</code> of the dataset. Defined in the settings.</p> <code>questions</code> <code>list</code> <p>The questions of the dataset defined in the settings. For example, the <code>rg.TextQuestion</code> that you want labelers to answer.</p> <code>guidelines</code> <code>str</code> <p>The guidelines of the dataset defined in the settings. Used to provide instructions to labelers.</p> <code>allow_extra_metadata</code> <code>bool</code> <p>True if extra metadata is allowed, False otherwise.</p> Source code in <code>src/argilla/datasets/_resource.py</code> <pre><code>class Dataset(Resource, HubImportExportMixin, DiskImportExportMixin):\n    \"\"\"Class for interacting with Argilla Datasets\n\n    Attributes:\n        name: Name of the dataset.\n        records (DatasetRecords): The records object for the dataset. Used to interact with the records of the dataset by iterating, searching, etc.\n        settings (Settings): The settings object of the dataset. Used to configure the dataset with fields, questions, guidelines, etc.\n        fields (list): The fields of the dataset, for example the `rg.TextField` of the dataset. Defined in the settings.\n        questions (list): The questions of the dataset defined in the settings. For example, the `rg.TextQuestion` that you want labelers to answer.\n        guidelines (str): The guidelines of the dataset defined in the settings. Used to provide instructions to labelers.\n        allow_extra_metadata (bool): True if extra metadata is allowed, False otherwise.\n    \"\"\"\n\n    name: str\n    id: Optional[UUID]\n\n    _api: \"DatasetsAPI\"\n    _model: \"DatasetModel\"\n\n    def __init__(\n        self,\n        name: Optional[str] = None,\n        workspace: Optional[Union[\"Workspace\", str, UUID]] = None,\n        settings: Optional[Settings] = None,\n        client: Optional[\"Argilla\"] = None,\n    ) -&gt; None:\n        \"\"\"Initializes a new Argilla Dataset object with the given parameters.\n\n        Parameters:\n            name (str): Name of the dataset. Replaced by random UUID if not assigned.\n            workspace (UUID): Workspace of the dataset. Default is the first workspace found in the server.\n            settings (Settings): Settings class to be used to configure the dataset.\n            client (Argilla): Instance of Argilla to connect with the server. Default is the default client.\n        \"\"\"\n        client = client or Argilla._get_default()\n        super().__init__(client=client, api=client.api.datasets)\n        if name is None:\n            name = f\"dataset_{uuid4()}\"\n            self._log_message(f\"Settings dataset name to unique UUID: {name}\")\n\n        self._workspace = workspace\n        self._model = DatasetModel(name=name)\n        self._settings = settings._copy() if settings else Settings(_dataset=self)\n        self._settings.dataset = self\n        self.__records = DatasetRecords(client=self._client, dataset=self, mapping=self._settings.mapping)\n\n    #####################\n    #  Properties       #\n    #####################\n\n    @property\n    def name(self) -&gt; str:\n        return self._model.name\n\n    @name.setter\n    def name(self, value: str) -&gt; None:\n        self._model.name = value\n\n    @property\n    def records(self) -&gt; \"DatasetRecords\":\n        return self.__records\n\n    @property\n    def settings(self) -&gt; Settings:\n        return self._settings\n\n    @settings.setter\n    def settings(self, value: Settings) -&gt; None:\n        settings_copy = value._copy()\n        settings_copy.dataset = self\n        self._settings = settings_copy\n\n    @property\n    def fields(self) -&gt; list:\n        return self.settings.fields\n\n    @property\n    def questions(self) -&gt; list:\n        return self.settings.questions\n\n    @property\n    def guidelines(self) -&gt; str:\n        return self.settings.guidelines\n\n    @guidelines.setter\n    def guidelines(self, value: str) -&gt; None:\n        self.settings.guidelines = value\n\n    @property\n    def allow_extra_metadata(self) -&gt; bool:\n        return self.settings.allow_extra_metadata\n\n    @allow_extra_metadata.setter\n    def allow_extra_metadata(self, value: bool) -&gt; None:\n        self.settings.allow_extra_metadata = value\n\n    @property\n    def schema(self) -&gt; dict:\n        return self.settings.schema\n\n    @property\n    def workspace(self) -&gt; Workspace:\n        self._workspace = self._resolve_workspace()\n        return self._workspace\n\n    @property\n    def distribution(self) -&gt; TaskDistribution:\n        return self.settings.distribution\n\n    @distribution.setter\n    def distribution(self, value: TaskDistribution) -&gt; None:\n        self.settings.distribution = value\n\n    #####################\n    #  Core methods     #\n    #####################\n\n    def get(self) -&gt; \"Dataset\":\n        super().get()\n        self.settings.get()\n        return self\n\n    def create(self) -&gt; \"Dataset\":\n        \"\"\"Creates the dataset on the server with the `Settings` configuration.\n\n        Returns:\n            Dataset: The created dataset object.\n        \"\"\"\n        try:\n            super().create()\n        except ForbiddenError as e:\n            settings_url = f\"{self._client.api_url}/user-settings\"\n            user_role = self._client.me.role.value\n            user_name = self._client.me.username\n            workspace_name = self.workspace.name\n            message = f\"\"\"User '{user_name}' is not authorized to create a dataset in workspace '{workspace_name}'\n            with role '{user_role}'. Go to {settings_url} to view your role.\"\"\"\n            raise ForbiddenError(message) from e\n        try:\n            return self._publish()\n        except Exception as e:\n            self._log_message(message=f\"Error creating dataset: {e}\", level=\"error\")\n            self._rollback_dataset_creation()\n            raise SettingsError from e\n\n    def update(self) -&gt; \"Dataset\":\n        \"\"\"Updates the dataset on the server with the current settings.\n\n        Returns:\n            Dataset: The updated dataset object.\n        \"\"\"\n        self.settings.update()\n        return self\n\n    def progress(self, with_users_distribution: bool = False) -&gt; dict:\n        \"\"\"Returns the team's progress on the dataset.\n\n        Parameters:\n            with_users_distribution (bool): If True, the progress of the dataset is returned\n                with users distribution. This includes the number of responses made by each user.\n\n        Returns:\n            dict: The team's progress on the dataset.\n\n        An example of a response when `with_users_distribution` is `True`:\n        ```json\n        {\n            \"total\": 100,\n            \"completed\": 50,\n            \"pending\": 50,\n            \"users\": {\n                \"user1\": {\n                   \"completed\": { \"submitted\": 10, \"draft\": 5, \"discarded\": 5},\n                   \"pending\": { \"submitted\": 5, \"draft\": 10, \"discarded\": 10},\n                },\n                \"user2\": {\n                   \"completed\": { \"submitted\": 20, \"draft\": 10, \"discarded\": 5},\n                   \"pending\": { \"submitted\": 2, \"draft\": 25, \"discarded\": 0},\n                },\n                ...\n        }\n        ```\n\n        \"\"\"\n\n        progress = self._api.get_progress(dataset_id=self._model.id).model_dump()\n\n        if with_users_distribution:\n            users_progress = self._api.list_users_progress(dataset_id=self._model.id)\n            users_distribution = {\n                user.username: {\n                    \"completed\": user.completed.model_dump(),\n                    \"pending\": user.pending.model_dump(),\n                }\n                for user in users_progress\n            }\n\n            progress.update({\"users\": users_distribution})\n\n        return progress\n\n    @classmethod\n    def from_model(cls, model: DatasetModel, client: \"Argilla\") -&gt; \"Dataset\":\n        instance = cls(client=client, workspace=model.workspace_id, name=model.name)\n        instance._model = model\n\n        return instance\n\n    #####################\n    #  Utility methods  #\n    #####################\n\n    def api_model(self) -&gt; DatasetModel:\n        self._model.workspace_id = self.workspace.id\n        return self._model\n\n    def _publish(self) -&gt; \"Dataset\":\n        self._settings.create()\n        self._api.publish(dataset_id=self._model.id)\n\n        return self.get()\n\n    def _resolve_workspace(self) -&gt; Workspace:\n        workspace = self._workspace\n\n        if workspace is None:\n            workspace = self._client.workspaces.default\n            warnings.warn(f\"Workspace not provided. Using default workspace: {workspace.name} id: {workspace.id}\")\n        elif isinstance(workspace, str):\n            workspace = self._client.workspaces(workspace)\n            if workspace is None:\n                available_workspace_names = [ws.name for ws in self._client.workspaces]\n                raise NotFoundError(\n                    f\"Workspace with name {workspace} not found. Available workspaces: {available_workspace_names}\"\n                )\n        elif isinstance(workspace, UUID):\n            ws_model = self._client.api.workspaces.get(workspace)\n            workspace = Workspace.from_model(ws_model, client=self._client)\n        elif not isinstance(workspace, Workspace):\n            raise ValueError(f\"Wrong workspace value found {workspace}\")\n\n        return workspace\n\n    def _rollback_dataset_creation(self):\n        if not self._is_published():\n            self.delete()\n\n    def _is_published(self) -&gt; bool:\n        return self._model.status == \"ready\"\n\n    @classmethod\n    def _sanitize_name(cls, name: str):\n        name = name.replace(\" \", \"_\")\n\n        for character in [\"/\", \"\\\\\", \".\", \",\", \";\", \":\", \"-\", \"+\", \"=\"]:\n            name = name.replace(character, \"-\")\n        return name\n\n    def _with_client(self, client: Argilla) -&gt; \"Self\":\n        return super()._with_client(client=client)\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/#src.argilla.datasets._resource.Dataset.__init__","title":"<code>__init__(name=None, workspace=None, settings=None, client=None)</code>","text":"<p>Initializes a new Argilla Dataset object with the given parameters.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>Name of the dataset. Replaced by random UUID if not assigned.</p> <code>None</code> <code>workspace</code> <code>UUID</code> <p>Workspace of the dataset. Default is the first workspace found in the server.</p> <code>None</code> <code>settings</code> <code>Settings</code> <p>Settings class to be used to configure the dataset.</p> <code>None</code> <code>client</code> <code>Argilla</code> <p>Instance of Argilla to connect with the server. Default is the default client.</p> <code>None</code> Source code in <code>src/argilla/datasets/_resource.py</code> <pre><code>def __init__(\n    self,\n    name: Optional[str] = None,\n    workspace: Optional[Union[\"Workspace\", str, UUID]] = None,\n    settings: Optional[Settings] = None,\n    client: Optional[\"Argilla\"] = None,\n) -&gt; None:\n    \"\"\"Initializes a new Argilla Dataset object with the given parameters.\n\n    Parameters:\n        name (str): Name of the dataset. Replaced by random UUID if not assigned.\n        workspace (UUID): Workspace of the dataset. Default is the first workspace found in the server.\n        settings (Settings): Settings class to be used to configure the dataset.\n        client (Argilla): Instance of Argilla to connect with the server. Default is the default client.\n    \"\"\"\n    client = client or Argilla._get_default()\n    super().__init__(client=client, api=client.api.datasets)\n    if name is None:\n        name = f\"dataset_{uuid4()}\"\n        self._log_message(f\"Settings dataset name to unique UUID: {name}\")\n\n    self._workspace = workspace\n    self._model = DatasetModel(name=name)\n    self._settings = settings._copy() if settings else Settings(_dataset=self)\n    self._settings.dataset = self\n    self.__records = DatasetRecords(client=self._client, dataset=self, mapping=self._settings.mapping)\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/#src.argilla.datasets._resource.Dataset.create","title":"<code>create()</code>","text":"<p>Creates the dataset on the server with the <code>Settings</code> configuration.</p> <p>Returns:</p> Name Type Description <code>Dataset</code> <code>Dataset</code> <p>The created dataset object.</p> Source code in <code>src/argilla/datasets/_resource.py</code> <pre><code>def create(self) -&gt; \"Dataset\":\n    \"\"\"Creates the dataset on the server with the `Settings` configuration.\n\n    Returns:\n        Dataset: The created dataset object.\n    \"\"\"\n    try:\n        super().create()\n    except ForbiddenError as e:\n        settings_url = f\"{self._client.api_url}/user-settings\"\n        user_role = self._client.me.role.value\n        user_name = self._client.me.username\n        workspace_name = self.workspace.name\n        message = f\"\"\"User '{user_name}' is not authorized to create a dataset in workspace '{workspace_name}'\n        with role '{user_role}'. Go to {settings_url} to view your role.\"\"\"\n        raise ForbiddenError(message) from e\n    try:\n        return self._publish()\n    except Exception as e:\n        self._log_message(message=f\"Error creating dataset: {e}\", level=\"error\")\n        self._rollback_dataset_creation()\n        raise SettingsError from e\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/#src.argilla.datasets._resource.Dataset.update","title":"<code>update()</code>","text":"<p>Updates the dataset on the server with the current settings.</p> <p>Returns:</p> Name Type Description <code>Dataset</code> <code>Dataset</code> <p>The updated dataset object.</p> Source code in <code>src/argilla/datasets/_resource.py</code> <pre><code>def update(self) -&gt; \"Dataset\":\n    \"\"\"Updates the dataset on the server with the current settings.\n\n    Returns:\n        Dataset: The updated dataset object.\n    \"\"\"\n    self.settings.update()\n    return self\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/#src.argilla.datasets._resource.Dataset.progress","title":"<code>progress(with_users_distribution=False)</code>","text":"<p>Returns the team's progress on the dataset.</p> <p>Parameters:</p> Name Type Description Default <code>with_users_distribution</code> <code>bool</code> <p>If True, the progress of the dataset is returned with users distribution. This includes the number of responses made by each user.</p> <code>False</code> <p>Returns:</p> Name Type Description <code>dict</code> <code>dict</code> <p>The team's progress on the dataset.</p> <p>An example of a response when <code>with_users_distribution</code> is <code>True</code>: <pre><code>{\n    \"total\": 100,\n    \"completed\": 50,\n    \"pending\": 50,\n    \"users\": {\n        \"user1\": {\n           \"completed\": { \"submitted\": 10, \"draft\": 5, \"discarded\": 5},\n           \"pending\": { \"submitted\": 5, \"draft\": 10, \"discarded\": 10},\n        },\n        \"user2\": {\n           \"completed\": { \"submitted\": 20, \"draft\": 10, \"discarded\": 5},\n           \"pending\": { \"submitted\": 2, \"draft\": 25, \"discarded\": 0},\n        },\n        ...\n}\n</code></pre></p> Source code in <code>src/argilla/datasets/_resource.py</code> <pre><code>def progress(self, with_users_distribution: bool = False) -&gt; dict:\n    \"\"\"Returns the team's progress on the dataset.\n\n    Parameters:\n        with_users_distribution (bool): If True, the progress of the dataset is returned\n            with users distribution. This includes the number of responses made by each user.\n\n    Returns:\n        dict: The team's progress on the dataset.\n\n    An example of a response when `with_users_distribution` is `True`:\n    ```json\n    {\n        \"total\": 100,\n        \"completed\": 50,\n        \"pending\": 50,\n        \"users\": {\n            \"user1\": {\n               \"completed\": { \"submitted\": 10, \"draft\": 5, \"discarded\": 5},\n               \"pending\": { \"submitted\": 5, \"draft\": 10, \"discarded\": 10},\n            },\n            \"user2\": {\n               \"completed\": { \"submitted\": 20, \"draft\": 10, \"discarded\": 5},\n               \"pending\": { \"submitted\": 2, \"draft\": 25, \"discarded\": 0},\n            },\n            ...\n    }\n    ```\n\n    \"\"\"\n\n    progress = self._api.get_progress(dataset_id=self._model.id).model_dump()\n\n    if with_users_distribution:\n        users_progress = self._api.list_users_progress(dataset_id=self._model.id)\n        users_distribution = {\n            user.username: {\n                \"completed\": user.completed.model_dump(),\n                \"pending\": user.pending.model_dump(),\n            }\n            for user in users_progress\n        }\n\n        progress.update({\"users\": users_distribution})\n\n    return progress\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/#src.argilla.datasets._io._disk.DiskImportExportMixin","title":"<code>DiskImportExportMixin</code>","text":"<p>               Bases: <code>ABC</code></p> <p>A mixin for exporting and importing datasets to and from disk.</p> Source code in <code>src/argilla/datasets/_io/_disk.py</code> <pre><code>class DiskImportExportMixin(ABC):\n    \"\"\"A mixin for exporting and importing datasets to and from disk.\"\"\"\n\n    _model: DatasetModel\n    _DEFAULT_RECORDS_PATH = \"records.json\"\n    _DEFAULT_CONFIG_REPO_DIR = \".argilla\"\n    _DEFAULT_SETTINGS_PATH = f\"{_DEFAULT_CONFIG_REPO_DIR}/settings.json\"\n    _DEFAULT_DATASET_PATH = f\"{_DEFAULT_CONFIG_REPO_DIR}/dataset.json\"\n    _DEFAULT_CONFIGURATION_FILES = [_DEFAULT_SETTINGS_PATH, _DEFAULT_DATASET_PATH]\n\n    def to_disk(self: \"Dataset\", path: str, *, with_records: bool = True) -&gt; str:\n        \"\"\"Exports the dataset to disk in the given path. The dataset is exported as a directory containing the dataset model, settings and records as json files.\n\n        Parameters:\n            path (str): The path to export the dataset to. Must be an empty directory.\n            with_records: whether to load the records from the Hugging Face dataset. Defaults to `True`.\n        \"\"\"\n        dataset_path, settings_path, records_path = self._define_child_paths(path=path)\n        logging.info(f\"Loading dataset from {dataset_path}\")\n        logging.info(f\"Loading settings from {settings_path}\")\n        logging.info(f\"Loading records from {records_path}\")\n        # Export the dataset model, settings and records\n        self._persist_dataset_model(path=dataset_path)\n        self.settings.to_json(path=settings_path)\n        if with_records:\n            self.records.to_json(path=records_path)\n\n        return path\n\n    @classmethod\n    def from_disk(\n        cls: Type[\"Dataset\"],\n        path: str,\n        *,\n        name: Optional[str] = None,\n        workspace: Optional[Union[\"Workspace\", str]] = None,\n        client: Optional[\"Argilla\"] = None,\n        with_records: bool = True,\n    ) -&gt; \"Dataset\":\n        \"\"\"Imports a dataset from disk as a directory containing the dataset model, settings and records.\n        The directory should be defined using the `to_disk` method.\n\n        Parameters:\n            path (str): The path to the directory containing the dataset model, settings and records.\n            name (str, optional): The name to assign to the new dataset. Defaults to None and the dataset's source name is used, unless it already exists, in which case a unique UUID is appended.\n            workspace (Union[Workspace, str], optional): The workspace to import the dataset to. Defaults to None and default workspace is used.\n            client (Argilla, optional): The client to use for the import. Defaults to None and the default client is used.\n            with_records: whether to load the records from the Hugging Face dataset. Defaults to `True`.\n        \"\"\"\n\n        client = client or Argilla._get_default()\n\n        try:\n            dataset_path, settings_path, records_path = cls._define_child_paths(path=path)\n            logging.info(f\"Loading dataset from {dataset_path}\")\n            logging.info(f\"Loading settings from {settings_path}\")\n            logging.info(f\"Loading records from {records_path}\")\n\n            dataset_model = cls._load_dataset_model(path=dataset_path)\n        except (NotADirectoryError, FileNotFoundError) as e:\n            raise ImportDatasetError(f\"Error loading dataset from disk. {e}\") from e\n\n        # Get the relevant workspace_id of the incoming dataset\n        if isinstance(workspace, str):\n            workspace = client.workspaces(workspace)\n            if not workspace:\n                raise ArgillaError(f\"Workspace {workspace} not found on the server.\")\n        else:\n            warnings.warn(\"Workspace not provided. Using default workspace.\")\n            workspace = client.workspaces.default\n        dataset_model.workspace_id = workspace.id\n\n        if name and (name != dataset_model.name):\n            logging.info(f\"Changing dataset name from {dataset_model.name} to {name}\")\n            dataset_model.name = name\n\n        if client.api.datasets.name_exists(name=dataset_model.name, workspace_id=workspace.id):\n            warnings.warn(\n                f\"Loaded dataset name {dataset_model.name} already exists in the workspace {workspace.name} so using it. To create a new dataset, provide a unique name to the `name` parameter.\"\n            )\n            dataset_model = client.api.datasets.get_by_name_and_workspace_id(\n                name=dataset_model.name, workspace_id=workspace.id\n            )\n            dataset = cls.from_model(model=dataset_model, client=client)\n        else:\n            # Create a new dataset and load the settings and records\n            if not os.path.exists(settings_path):\n                raise ImportDatasetError(f\"Settings file not found at {settings_path}\")\n\n            dataset = cls.from_model(model=dataset_model, client=client)\n            dataset.settings = Settings.from_json(path=settings_path)\n            dataset.create()\n\n        if os.path.exists(records_path) and with_records:\n            try:\n                dataset.records.from_json(path=records_path)\n            except RecordsIngestionError as e:\n                raise RecordsIngestionError(\n                    message=\"Error importing dataset records from disk. \"\n                    \"Records and datasets settings are not compatible.\"\n                ) from e\n\n        return dataset\n\n    ############################\n    # Utility methods\n    ############################\n\n    def _persist_dataset_model(self, path: Path):\n        \"\"\"Persists the dataset model to disk.\"\"\"\n        if path.exists():\n            raise FileExistsError(f\"Dataset already exists at {path}\")\n        with open(file=path, mode=\"w\") as f:\n            json.dump(self.api_model().model_dump(), f)\n\n    @classmethod\n    def _load_dataset_model(cls, path: Path):\n        \"\"\"Loads the dataset model from disk.\"\"\"\n        if not os.path.exists(path):\n            raise FileNotFoundError(f\"Dataset model not found at {path}\")\n        with open(file=path, mode=\"r\") as f:\n            dataset_model = json.load(f)\n            dataset_model = DatasetModel(**dataset_model)\n        return dataset_model\n\n    @classmethod\n    def _define_child_paths(cls, path: Union[Path, str]) -&gt; Tuple[Path, Path, Path]:\n        path = Path(path)\n        if not path.is_dir():\n            raise NotADirectoryError(f\"Path {path} is not a directory\")\n        main_path = path / cls._DEFAULT_CONFIG_REPO_DIR\n        main_path.mkdir(exist_ok=True)\n        dataset_path = path / cls._DEFAULT_DATASET_PATH\n        settings_path = path / cls._DEFAULT_SETTINGS_PATH\n        records_path = path / cls._DEFAULT_RECORDS_PATH\n        return dataset_path, settings_path, records_path\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/#src.argilla.datasets._io._disk.DiskImportExportMixin.to_disk","title":"<code>to_disk(path, *, with_records=True)</code>","text":"<p>Exports the dataset to disk in the given path. The dataset is exported as a directory containing the dataset model, settings and records as json files.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>str</code> <p>The path to export the dataset to. Must be an empty directory.</p> required <code>with_records</code> <code>bool</code> <p>whether to load the records from the Hugging Face dataset. Defaults to <code>True</code>.</p> <code>True</code> Source code in <code>src/argilla/datasets/_io/_disk.py</code> <pre><code>def to_disk(self: \"Dataset\", path: str, *, with_records: bool = True) -&gt; str:\n    \"\"\"Exports the dataset to disk in the given path. The dataset is exported as a directory containing the dataset model, settings and records as json files.\n\n    Parameters:\n        path (str): The path to export the dataset to. Must be an empty directory.\n        with_records: whether to load the records from the Hugging Face dataset. Defaults to `True`.\n    \"\"\"\n    dataset_path, settings_path, records_path = self._define_child_paths(path=path)\n    logging.info(f\"Loading dataset from {dataset_path}\")\n    logging.info(f\"Loading settings from {settings_path}\")\n    logging.info(f\"Loading records from {records_path}\")\n    # Export the dataset model, settings and records\n    self._persist_dataset_model(path=dataset_path)\n    self.settings.to_json(path=settings_path)\n    if with_records:\n        self.records.to_json(path=records_path)\n\n    return path\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/#src.argilla.datasets._io._disk.DiskImportExportMixin.from_disk","title":"<code>from_disk(path, *, name=None, workspace=None, client=None, with_records=True)</code>  <code>classmethod</code>","text":"<p>Imports a dataset from disk as a directory containing the dataset model, settings and records. The directory should be defined using the <code>to_disk</code> method.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>str</code> <p>The path to the directory containing the dataset model, settings and records.</p> required <code>name</code> <code>str</code> <p>The name to assign to the new dataset. Defaults to None and the dataset's source name is used, unless it already exists, in which case a unique UUID is appended.</p> <code>None</code> <code>workspace</code> <code>Union[Workspace, str]</code> <p>The workspace to import the dataset to. Defaults to None and default workspace is used.</p> <code>None</code> <code>client</code> <code>Argilla</code> <p>The client to use for the import. Defaults to None and the default client is used.</p> <code>None</code> <code>with_records</code> <code>bool</code> <p>whether to load the records from the Hugging Face dataset. Defaults to <code>True</code>.</p> <code>True</code> Source code in <code>src/argilla/datasets/_io/_disk.py</code> <pre><code>@classmethod\ndef from_disk(\n    cls: Type[\"Dataset\"],\n    path: str,\n    *,\n    name: Optional[str] = None,\n    workspace: Optional[Union[\"Workspace\", str]] = None,\n    client: Optional[\"Argilla\"] = None,\n    with_records: bool = True,\n) -&gt; \"Dataset\":\n    \"\"\"Imports a dataset from disk as a directory containing the dataset model, settings and records.\n    The directory should be defined using the `to_disk` method.\n\n    Parameters:\n        path (str): The path to the directory containing the dataset model, settings and records.\n        name (str, optional): The name to assign to the new dataset. Defaults to None and the dataset's source name is used, unless it already exists, in which case a unique UUID is appended.\n        workspace (Union[Workspace, str], optional): The workspace to import the dataset to. Defaults to None and default workspace is used.\n        client (Argilla, optional): The client to use for the import. Defaults to None and the default client is used.\n        with_records: whether to load the records from the Hugging Face dataset. Defaults to `True`.\n    \"\"\"\n\n    client = client or Argilla._get_default()\n\n    try:\n        dataset_path, settings_path, records_path = cls._define_child_paths(path=path)\n        logging.info(f\"Loading dataset from {dataset_path}\")\n        logging.info(f\"Loading settings from {settings_path}\")\n        logging.info(f\"Loading records from {records_path}\")\n\n        dataset_model = cls._load_dataset_model(path=dataset_path)\n    except (NotADirectoryError, FileNotFoundError) as e:\n        raise ImportDatasetError(f\"Error loading dataset from disk. {e}\") from e\n\n    # Get the relevant workspace_id of the incoming dataset\n    if isinstance(workspace, str):\n        workspace = client.workspaces(workspace)\n        if not workspace:\n            raise ArgillaError(f\"Workspace {workspace} not found on the server.\")\n    else:\n        warnings.warn(\"Workspace not provided. Using default workspace.\")\n        workspace = client.workspaces.default\n    dataset_model.workspace_id = workspace.id\n\n    if name and (name != dataset_model.name):\n        logging.info(f\"Changing dataset name from {dataset_model.name} to {name}\")\n        dataset_model.name = name\n\n    if client.api.datasets.name_exists(name=dataset_model.name, workspace_id=workspace.id):\n        warnings.warn(\n            f\"Loaded dataset name {dataset_model.name} already exists in the workspace {workspace.name} so using it. To create a new dataset, provide a unique name to the `name` parameter.\"\n        )\n        dataset_model = client.api.datasets.get_by_name_and_workspace_id(\n            name=dataset_model.name, workspace_id=workspace.id\n        )\n        dataset = cls.from_model(model=dataset_model, client=client)\n    else:\n        # Create a new dataset and load the settings and records\n        if not os.path.exists(settings_path):\n            raise ImportDatasetError(f\"Settings file not found at {settings_path}\")\n\n        dataset = cls.from_model(model=dataset_model, client=client)\n        dataset.settings = Settings.from_json(path=settings_path)\n        dataset.create()\n\n    if os.path.exists(records_path) and with_records:\n        try:\n            dataset.records.from_json(path=records_path)\n        except RecordsIngestionError as e:\n            raise RecordsIngestionError(\n                message=\"Error importing dataset records from disk. \"\n                \"Records and datasets settings are not compatible.\"\n            ) from e\n\n    return dataset\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/#src.argilla.datasets._io._hub.HubImportExportMixin","title":"<code>HubImportExportMixin</code>","text":"<p>               Bases: <code>DiskImportExportMixin</code></p> Source code in <code>src/argilla/datasets/_io/_hub.py</code> <pre><code>class HubImportExportMixin(DiskImportExportMixin):\n    def to_hub(\n        self: \"Dataset\",\n        repo_id: str,\n        *,\n        with_records: bool = True,\n        generate_card: Optional[bool] = True,\n        **kwargs: Any,\n    ) -&gt; None:\n        \"\"\"Pushes the `Dataset` to the Hugging Face Hub. If the dataset has been previously pushed to the\n        Hugging Face Hub, it will be updated instead of creating a new dataset repo.\n\n        Parameters:\n            repo_id: the ID of the Hugging Face Hub repo to push the `Dataset` to.\n            with_records: whether to load the records from the Hugging Face dataset. Defaults to `True`.\n            generate_card: whether to generate a dataset card for the `Dataset` in the Hugging Face Hub. Defaults\n                to `True`.\n            **kwargs: the kwargs to pass to `datasets.Dataset.push_to_hub`.\n\n        Returns:\n            None\n        \"\"\"\n\n        from huggingface_hub import DatasetCardData, HfApi\n\n        from argilla.datasets._io.card import (\n            ArgillaDatasetCard,\n            size_categories_parser,\n        )\n\n        hf_api = HfApi(token=kwargs.get(\"token\"))\n\n        hfds = False\n        if with_records:\n            hfds = self.records(with_vectors=True, with_responses=True, with_suggestions=True).to_datasets()\n            hfds.push_to_hub(repo_id, **kwargs)\n        else:\n            hf_api.create_repo(repo_id=repo_id, repo_type=\"dataset\", exist_ok=kwargs.get(\"exist_ok\") or True)\n\n        with TemporaryDirectory() as tmpdirname:\n            config_dir = os.path.join(tmpdirname)\n\n            self.to_disk(path=config_dir, with_records=False)\n\n            if generate_card:\n                sample_argilla_record = next(iter(self.records(with_suggestions=True, with_responses=True)))\n                sample_huggingface_record = self._get_sample_hf_record(hfds) if with_records else None\n                dataset_size = len(hfds) if with_records else 0\n                card = ArgillaDatasetCard.from_template(\n                    card_data=DatasetCardData(\n                        size_categories=size_categories_parser(dataset_size),\n                        tags=[\"rlfh\", \"argilla\", \"human-feedback\"],\n                    ),\n                    repo_id=repo_id,\n                    argilla_fields=self.settings.fields,\n                    argilla_questions=self.settings.questions,\n                    argilla_guidelines=self.settings.guidelines or None,\n                    argilla_vectors_settings=self.settings.vectors or None,\n                    argilla_metadata_properties=self.settings.metadata,\n                    argilla_record=sample_argilla_record.to_dict(),\n                    huggingface_record=sample_huggingface_record,\n                )\n                card.save(filepath=os.path.join(tmpdirname, \"README.md\"))\n\n            hf_api.upload_folder(\n                folder_path=tmpdirname,\n                repo_id=repo_id,\n                repo_type=\"dataset\",\n            )\n\n    @classmethod\n    def from_hub(\n        cls: Type[\"Dataset\"],\n        repo_id: str,\n        *,\n        name: Optional[str] = None,\n        workspace: Optional[Union[\"Workspace\", str]] = None,\n        client: Optional[\"Argilla\"] = None,\n        with_records: bool = True,\n        settings: Optional[\"Settings\"] = None,\n        split: Optional[str] = None,\n        subset: Optional[str] = None,\n        **kwargs: Any,\n    ) -&gt; \"Dataset\":\n        \"\"\"Loads a `Dataset` from the Hugging Face Hub.\n\n        Parameters:\n            repo_id: the ID of the Hugging Face Hub repo to load the `Dataset` from.\n            name (str, optional): The name to assign to the new dataset. Defaults to None and the dataset's source name is used, unless it already exists, in which case a unique UUID is appended.\n            workspace (Union[Workspace, str], optional): The workspace to import the dataset to. Defaults to None and default workspace is used.\n            client: the client to use to load the `Dataset`. If not provided, the default client will be used.\n            with_records: whether to load the records from the Hugging Face dataset. Defaults to `True`.\n            settings: the settings to use to load the `Dataset`. If not provided, the settings will be loaded from the Hugging Face dataset.\n            split: the split to load from the Hugging Face dataset. If not provided, the first split will be loaded.\n            **kwargs: the kwargs to pass to `datasets.Dataset.load_from_hub`.\n\n        Returns:\n            A `Dataset` loaded from the Hugging Face Hub.\n        \"\"\"\n        from datasets import load_dataset\n        from huggingface_hub import snapshot_download\n        from argilla import Dataset\n\n        if name is None:\n            name = Dataset._sanitize_name(repo_id)\n\n        if settings is not None:\n            dataset = cls(name=name, settings=settings)\n            dataset.create()\n        else:\n            try:\n                # download configuration files from the hub\n                folder_path = snapshot_download(\n                    repo_id=repo_id,\n                    repo_type=\"dataset\",\n                    allow_patterns=cls._DEFAULT_CONFIGURATION_FILES,\n                    token=kwargs.get(\"token\"),\n                )\n\n                dataset = cls.from_disk(\n                    path=folder_path, workspace=workspace, name=name, client=client, with_records=with_records\n                )\n            except ImportDatasetError:\n                from argilla import Settings\n\n                settings = Settings.from_hub(repo_id=repo_id, subset=subset)\n                dataset = cls.from_hub(\n                    repo_id=repo_id,\n                    name=name,\n                    workspace=workspace,\n                    client=client,\n                    with_records=with_records,\n                    settings=settings,\n                    split=split,\n                    subset=subset,\n                    **kwargs,\n                )\n                return dataset\n\n        if with_records:\n            try:\n                hf_dataset = load_dataset(\n                    path=repo_id,\n                    split=split,\n                    name=subset,\n                    **kwargs,\n                )  # type: ignore\n                hf_dataset = cls._get_dataset_split(hf_dataset=hf_dataset, split=split, **kwargs)\n                cls._log_dataset_records(hf_dataset=hf_dataset, dataset=dataset)\n            except EmptyDatasetError:\n                warnings.warn(\n                    message=\"Trying to load a dataset `with_records=True` but dataset does not contain any records.\",\n                    category=UserWarning,\n                )\n\n        return dataset\n\n    @staticmethod\n    def _log_dataset_records(hf_dataset: \"HFDataset\", dataset: \"Dataset\"):\n        \"\"\"This method extracts the responses from a Hugging Face dataset and returns a list of `Record` objects\"\"\"\n        # THIS IS REQUIRED SINCE THE NAME RESTRICTION IN ARGILLA. HUGGING FACE DATASET COLUMNS ARE CASE SENSITIVE\n        # Also, there is a logic with column names including \".responses\" and \".suggestion\" in the name.\n        columns_map = {}\n        for column in hf_dataset.column_names:\n            if \".responses\" in column or \".suggestion\" in column:\n                columns_map[column] = column.lower()\n            else:\n                columns_map[column] = dataset.settings._sanitize_settings_name(column)\n\n        hf_dataset = hf_dataset.rename_columns(columns_map)\n\n        # Identify columns that columns that contain responses\n        responses_columns = [col for col in hf_dataset.column_names if \".responses\" in col]\n        response_questions = defaultdict(dict)\n        user_ids = {}\n        for col in responses_columns:\n            question_name = col.split(\".\")[0]\n            if col.endswith(\"users\"):\n                response_questions[question_name][\"users\"] = hf_dataset[col]\n                user_ids.update({UUID(user_id): UUID(user_id) for user_id in set(sum(hf_dataset[col], []))})\n            elif col.endswith(\"responses\"):\n                response_questions[question_name][\"responses\"] = hf_dataset[col]\n            elif col.endswith(\"status\"):\n                response_questions[question_name][\"status\"] = hf_dataset[col]\n\n        # Check if all user ids are known to this Argilla client\n        known_users_ids = [user.id for user in dataset._client.users]\n        unknown_user_ids = set(user_ids.keys()) - set(known_users_ids)\n        my_user = dataset._client.me\n        if len(unknown_user_ids) &gt; 1:\n            warnings.warn(\n                message=f\"\"\"Found unknown user ids in dataset repo: {unknown_user_ids}.\n                    Assigning first response for each record to current user ({my_user.username}) and discarding the rest.\"\"\"\n            )\n        for unknown_user_id in unknown_user_ids:\n            user_ids[unknown_user_id] = my_user.id\n\n        # Create a mapper to map the Hugging Face dataset to a Record object\n        mapping = {col: col for col in hf_dataset.column_names if \".suggestion\" in col}\n        mapper = IngestedRecordMapper(dataset=dataset, mapping=mapping, user_id=my_user.id)\n\n        # Extract responses and create Record objects\n        records = []\n        hf_dataset = HFDatasetsIO.to_argilla(hf_dataset=hf_dataset)\n        for idx, row in enumerate(hf_dataset):\n            record = mapper(row)\n            for question_name, values in response_questions.items():\n                response_values = values[\"responses\"][idx]\n                response_users = values[\"users\"][idx]\n                response_status = values[\"status\"][idx]\n                for value, user_id, status in zip(response_values, response_users, response_status):\n                    user_id = user_ids[UUID(user_id)]\n                    if user_id in response_users:\n                        continue\n                    response_users[user_id] = True\n                    response = Response(\n                        user_id=user_id,\n                        question_name=question_name,\n                        value=value,\n                        status=status,\n                    )\n                    record.responses.add(response)\n            records.append(record)\n\n        try:\n            dataset.records.log(records=records)\n        except (RecordsIngestionError, UnprocessableEntityError) as e:\n            raise SettingsError(\n                message=f\"Failed to load records from Hugging Face dataset. Defined settings do not match dataset schema. Hugging face dataset features: {hf_dataset.features}. Argilla dataset settings : {dataset.settings}\"\n            ) from e\n\n    @staticmethod\n    def _get_dataset_split(hf_dataset: \"HFDataset\", split: Optional[str] = None, **kwargs: Dict) -&gt; \"HFDataset\":\n        \"\"\"Get a single dataset from a Hugging Face dataset.\n\n        Parameters:\n            hf_dataset (HFDataset): The Hugging Face dataset to get a single dataset from.\n\n        Returns:\n            HFDataset: The single dataset.\n        \"\"\"\n\n        if isinstance(hf_dataset, DatasetDict) and split is None:\n            split = next(iter(hf_dataset.keys()))\n            if len(hf_dataset.keys()) &gt; 1:\n                warnings.warn(\n                    message=f\"Multiple splits found in Hugging Face dataset. Using the first split: {split}. \"\n                    f\"Available splits are: {', '.join(hf_dataset.keys())}.\"\n                )\n            hf_dataset = hf_dataset[split]\n        return hf_dataset\n\n    @staticmethod\n    def _get_sample_hf_record(hf_dataset: \"HFDataset\") -&gt; Dict:\n        \"\"\"Get a sample record from a Hugging Face dataset.\n\n        Parameters:\n            hf_dataset (HFDataset): The Hugging Face dataset to get a sample record from.\n\n        Returns:\n            Dict: The sample record.\n        \"\"\"\n\n        if hf_dataset:\n            sample_huggingface_record = {}\n            for key, value in hf_dataset[0].items():\n                try:\n                    json.dumps(value)\n                    sample_huggingface_record[key] = value\n                except TypeError:\n                    if isinstance(value, Image.Image):\n                        sample_huggingface_record[key] = pil_to_data_uri(value)\n                    else:\n                        sample_huggingface_record[key] = \"Record value is not serializable\"\n            return sample_huggingface_record\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/#src.argilla.datasets._io._hub.HubImportExportMixin.to_hub","title":"<code>to_hub(repo_id, *, with_records=True, generate_card=True, **kwargs)</code>","text":"<p>Pushes the <code>Dataset</code> to the Hugging Face Hub. If the dataset has been previously pushed to the Hugging Face Hub, it will be updated instead of creating a new dataset repo.</p> <p>Parameters:</p> Name Type Description Default <code>repo_id</code> <code>str</code> <p>the ID of the Hugging Face Hub repo to push the <code>Dataset</code> to.</p> required <code>with_records</code> <code>bool</code> <p>whether to load the records from the Hugging Face dataset. Defaults to <code>True</code>.</p> <code>True</code> <code>generate_card</code> <code>Optional[bool]</code> <p>whether to generate a dataset card for the <code>Dataset</code> in the Hugging Face Hub. Defaults to <code>True</code>.</p> <code>True</code> <code>**kwargs</code> <code>Any</code> <p>the kwargs to pass to <code>datasets.Dataset.push_to_hub</code>.</p> <code>{}</code> <p>Returns:</p> Type Description <code>None</code> <p>None</p> Source code in <code>src/argilla/datasets/_io/_hub.py</code> <pre><code>def to_hub(\n    self: \"Dataset\",\n    repo_id: str,\n    *,\n    with_records: bool = True,\n    generate_card: Optional[bool] = True,\n    **kwargs: Any,\n) -&gt; None:\n    \"\"\"Pushes the `Dataset` to the Hugging Face Hub. If the dataset has been previously pushed to the\n    Hugging Face Hub, it will be updated instead of creating a new dataset repo.\n\n    Parameters:\n        repo_id: the ID of the Hugging Face Hub repo to push the `Dataset` to.\n        with_records: whether to load the records from the Hugging Face dataset. Defaults to `True`.\n        generate_card: whether to generate a dataset card for the `Dataset` in the Hugging Face Hub. Defaults\n            to `True`.\n        **kwargs: the kwargs to pass to `datasets.Dataset.push_to_hub`.\n\n    Returns:\n        None\n    \"\"\"\n\n    from huggingface_hub import DatasetCardData, HfApi\n\n    from argilla.datasets._io.card import (\n        ArgillaDatasetCard,\n        size_categories_parser,\n    )\n\n    hf_api = HfApi(token=kwargs.get(\"token\"))\n\n    hfds = False\n    if with_records:\n        hfds = self.records(with_vectors=True, with_responses=True, with_suggestions=True).to_datasets()\n        hfds.push_to_hub(repo_id, **kwargs)\n    else:\n        hf_api.create_repo(repo_id=repo_id, repo_type=\"dataset\", exist_ok=kwargs.get(\"exist_ok\") or True)\n\n    with TemporaryDirectory() as tmpdirname:\n        config_dir = os.path.join(tmpdirname)\n\n        self.to_disk(path=config_dir, with_records=False)\n\n        if generate_card:\n            sample_argilla_record = next(iter(self.records(with_suggestions=True, with_responses=True)))\n            sample_huggingface_record = self._get_sample_hf_record(hfds) if with_records else None\n            dataset_size = len(hfds) if with_records else 0\n            card = ArgillaDatasetCard.from_template(\n                card_data=DatasetCardData(\n                    size_categories=size_categories_parser(dataset_size),\n                    tags=[\"rlfh\", \"argilla\", \"human-feedback\"],\n                ),\n                repo_id=repo_id,\n                argilla_fields=self.settings.fields,\n                argilla_questions=self.settings.questions,\n                argilla_guidelines=self.settings.guidelines or None,\n                argilla_vectors_settings=self.settings.vectors or None,\n                argilla_metadata_properties=self.settings.metadata,\n                argilla_record=sample_argilla_record.to_dict(),\n                huggingface_record=sample_huggingface_record,\n            )\n            card.save(filepath=os.path.join(tmpdirname, \"README.md\"))\n\n        hf_api.upload_folder(\n            folder_path=tmpdirname,\n            repo_id=repo_id,\n            repo_type=\"dataset\",\n        )\n</code></pre>"},{"location":"reference/argilla/datasets/datasets/#src.argilla.datasets._io._hub.HubImportExportMixin.from_hub","title":"<code>from_hub(repo_id, *, name=None, workspace=None, client=None, with_records=True, settings=None, split=None, subset=None, **kwargs)</code>  <code>classmethod</code>","text":"<p>Loads a <code>Dataset</code> from the Hugging Face Hub.</p> <p>Parameters:</p> Name Type Description Default <code>repo_id</code> <code>str</code> <p>the ID of the Hugging Face Hub repo to load the <code>Dataset</code> from.</p> required <code>name</code> <code>str</code> <p>The name to assign to the new dataset. Defaults to None and the dataset's source name is used, unless it already exists, in which case a unique UUID is appended.</p> <code>None</code> <code>workspace</code> <code>Union[Workspace, str]</code> <p>The workspace to import the dataset to. Defaults to None and default workspace is used.</p> <code>None</code> <code>client</code> <code>Optional[Argilla]</code> <p>the client to use to load the <code>Dataset</code>. If not provided, the default client will be used.</p> <code>None</code> <code>with_records</code> <code>bool</code> <p>whether to load the records from the Hugging Face dataset. Defaults to <code>True</code>.</p> <code>True</code> <code>settings</code> <code>Optional[Settings]</code> <p>the settings to use to load the <code>Dataset</code>. If not provided, the settings will be loaded from the Hugging Face dataset.</p> <code>None</code> <code>split</code> <code>Optional[str]</code> <p>the split to load from the Hugging Face dataset. If not provided, the first split will be loaded.</p> <code>None</code> <code>**kwargs</code> <code>Any</code> <p>the kwargs to pass to <code>datasets.Dataset.load_from_hub</code>.</p> <code>{}</code> <p>Returns:</p> Type Description <code>Dataset</code> <p>A <code>Dataset</code> loaded from the Hugging Face Hub.</p> Source code in <code>src/argilla/datasets/_io/_hub.py</code> <pre><code>@classmethod\ndef from_hub(\n    cls: Type[\"Dataset\"],\n    repo_id: str,\n    *,\n    name: Optional[str] = None,\n    workspace: Optional[Union[\"Workspace\", str]] = None,\n    client: Optional[\"Argilla\"] = None,\n    with_records: bool = True,\n    settings: Optional[\"Settings\"] = None,\n    split: Optional[str] = None,\n    subset: Optional[str] = None,\n    **kwargs: Any,\n) -&gt; \"Dataset\":\n    \"\"\"Loads a `Dataset` from the Hugging Face Hub.\n\n    Parameters:\n        repo_id: the ID of the Hugging Face Hub repo to load the `Dataset` from.\n        name (str, optional): The name to assign to the new dataset. Defaults to None and the dataset's source name is used, unless it already exists, in which case a unique UUID is appended.\n        workspace (Union[Workspace, str], optional): The workspace to import the dataset to. Defaults to None and default workspace is used.\n        client: the client to use to load the `Dataset`. If not provided, the default client will be used.\n        with_records: whether to load the records from the Hugging Face dataset. Defaults to `True`.\n        settings: the settings to use to load the `Dataset`. If not provided, the settings will be loaded from the Hugging Face dataset.\n        split: the split to load from the Hugging Face dataset. If not provided, the first split will be loaded.\n        **kwargs: the kwargs to pass to `datasets.Dataset.load_from_hub`.\n\n    Returns:\n        A `Dataset` loaded from the Hugging Face Hub.\n    \"\"\"\n    from datasets import load_dataset\n    from huggingface_hub import snapshot_download\n    from argilla import Dataset\n\n    if name is None:\n        name = Dataset._sanitize_name(repo_id)\n\n    if settings is not None:\n        dataset = cls(name=name, settings=settings)\n        dataset.create()\n    else:\n        try:\n            # download configuration files from the hub\n            folder_path = snapshot_download(\n                repo_id=repo_id,\n                repo_type=\"dataset\",\n                allow_patterns=cls._DEFAULT_CONFIGURATION_FILES,\n                token=kwargs.get(\"token\"),\n            )\n\n            dataset = cls.from_disk(\n                path=folder_path, workspace=workspace, name=name, client=client, with_records=with_records\n            )\n        except ImportDatasetError:\n            from argilla import Settings\n\n            settings = Settings.from_hub(repo_id=repo_id, subset=subset)\n            dataset = cls.from_hub(\n                repo_id=repo_id,\n                name=name,\n                workspace=workspace,\n                client=client,\n                with_records=with_records,\n                settings=settings,\n                split=split,\n                subset=subset,\n                **kwargs,\n            )\n            return dataset\n\n    if with_records:\n        try:\n            hf_dataset = load_dataset(\n                path=repo_id,\n                split=split,\n                name=subset,\n                **kwargs,\n            )  # type: ignore\n            hf_dataset = cls._get_dataset_split(hf_dataset=hf_dataset, split=split, **kwargs)\n            cls._log_dataset_records(hf_dataset=hf_dataset, dataset=dataset)\n        except EmptyDatasetError:\n            warnings.warn(\n                message=\"Trying to load a dataset `with_records=True` but dataset does not contain any records.\",\n                category=UserWarning,\n            )\n\n    return dataset\n</code></pre>"},{"location":"reference/argilla/records/metadata/","title":"<code>metadata</code>","text":"<p>Metadata in argilla is a dictionary that can be attached to a record. It is used to store additional information about the record that is not part of the record's fields or responses. For example, the source of the record, the date it was created, or any other information that is relevant to the record. Metadata can be added to a record directly or as valules within a dictionary.</p>"},{"location":"reference/argilla/records/metadata/#usage-examples","title":"Usage Examples","text":"<p>To use metadata within a dataset, you must define a metadata property in the dataset settings. The metadata property is a list of metadata properties that can be attached to a record. The following example demonstrates how to add metadata to a dataset and how to access metadata from a record object:</p> <pre><code>import argilla as rg\n\ndataset = Dataset(\n    name=\"dataset_with_metadata\",\n    settings=Settings(\n        fields=[TextField(name=\"text\")],\n        questions=[LabelQuestion(name=\"label\", labels=[\"positive\", \"negative\"])],\n        metadata=[\n            rg.TermsMetadataProperty(name=\"category\", options=[\"A\", \"B\", \"C\"]),\n        ],\n    ),\n)\ndataset.create()\n</code></pre> <p>Then, you can add records to the dataset with metadata that corresponds to the metadata property defined in the dataset settings:</p> <pre><code>dataset_with_metadata.records.log(\n    [\n        {\"text\": \"text\", \"label\": \"positive\", \"category\": \"A\"},\n        {\"text\": \"text\", \"label\": \"negative\", \"category\": \"B\"},\n    ]\n)\n</code></pre>"},{"location":"reference/argilla/records/metadata/#format-per-metadataproperty-type","title":"Format per <code>MetadataProperty</code> type","text":"<p>Depending on the <code>MetadataProperty</code> type, metadata might need to be formatted in a slightly different way.</p> For <code>TermsMetadataProperty</code>For <code>FloatMetadataProperty</code>For <code>IntegerMetadataProperty</code> <pre><code>rg.Records(\n    fields={\"text\": \"example\"},\n    metadata={\"category\": \"A\"}\n)\n\n# with multiple terms\n\nrg.Records(\n    fields={\"text\": \"example\"},\n    metadata={\"category\": [\"A\", \"B\"]}\n)\n</code></pre> <pre><code>rg.Records(\n    fields={\"text\": \"example\"},\n    metadata={\"category\": 2.1}\n)\n</code></pre> <pre><code>rg.Records(\n    fields={\"text\": \"example\"},\n    metadata={\"category\": 42}\n)\n</code></pre>"},{"location":"reference/argilla/records/records/","title":"<code>rg.Record</code>","text":"<p>The <code>Record</code> object is used to represent a single record in Argilla. It contains fields, suggestions, responses, metadata, and vectors.</p>"},{"location":"reference/argilla/records/records/#usage-examples","title":"Usage Examples","text":""},{"location":"reference/argilla/records/records/#creating-a-record","title":"Creating a Record","text":"<p>To create records, you can use the <code>Record</code> class and pass it to the <code>Dataset.records.log</code> method. The <code>Record</code> class requires a <code>fields</code> parameter, which is a dictionary of field names and values. The field names must match the field names in the dataset's <code>Settings</code> object to be accepted.</p> <pre><code>dataset.records.log(\n    records=[\n        rg.Record(\n            fields={\"text\": \"Hello World, how are you?\"},\n        ),\n    ]\n) # (1)\n</code></pre> <ol> <li>The Argilla dataset contains a field named <code>text</code> matching the key here.</li> </ol> <p>To create records with image fields, pass the image to the record object as either a remote url, local path to an image file, or a PIL object. The field names must be defined as an <code>rg.ImageField</code>in the dataset's <code>Settings</code> object to be accepted. Images will be stored in the Argilla database and returned as rescaled PIL objects.</p> <pre><code>dataset.records.log(\n    records=[\n        rg.Record(\n            fields={\"image\": \"https://example.com/image.jpg\"}, # (1)\n        ),\n    ]\n)\n</code></pre> <ol> <li>The image can be referenced as either a remote url, a local file path, or a PIL object.</li> </ol> <p>Note</p> <p>The image will be stored in the Argilla database and can impact the dataset's storage usage. Images should be less than 5mb in size and datasets should contain less than 10,000 images.</p>"},{"location":"reference/argilla/records/records/#accessing-record-attributes","title":"Accessing Record Attributes","text":"<p>The <code>Record</code> object has suggestions, responses, metadata, and vectors attributes that can be accessed directly whilst iterating over records in a dataset.</p> <pre><code>for record in dataset.records(\n    with_suggestions=True,\n    with_responses=True,\n    with_metadata=True,\n    with_vectors=True\n    ):\n    print(record.suggestions)\n    print(record.responses)\n    print(record.metadata)\n    print(record.vectors)\n</code></pre> <p>Record properties can also be updated whilst iterating over records in a dataset.</p> <pre><code>for record in dataset.records(with_metadata=True):\n    record.metadata = {\"department\": \"toys\"}\n</code></pre> <p>For changes to take effect, the user must call the <code>update</code> method on the <code>Dataset</code> object, or pass the updated records to <code>Dataset.records.log</code>. All core record atttributes can be updated in this way. Check their respective documentation for more information: Suggestions, Responses, Metadata, Vectors.</p>"},{"location":"reference/argilla/records/records/#src.argilla.records._resource.Record","title":"<code>Record</code>","text":"<p>               Bases: <code>Resource</code></p> <p>The class for interacting with Argilla Records. A <code>Record</code> is a single sample in a dataset. Records receives feedback in the form of responses and suggestions. Records contain fields, metadata, and vectors.</p> <p>Attributes:</p> Name Type Description <code>id</code> <code>Union[str, UUID]</code> <p>The id of the record.</p> <code>fields</code> <code>RecordFields</code> <p>The fields of the record.</p> <code>metadata</code> <code>RecordMetadata</code> <p>The metadata of the record.</p> <code>vectors</code> <code>RecordVectors</code> <p>The vectors of the record.</p> <code>responses</code> <code>RecordResponses</code> <p>The responses of the record.</p> <code>suggestions</code> <code>RecordSuggestions</code> <p>The suggestions of the record.</p> <code>dataset</code> <code>Dataset</code> <p>The dataset to which the record belongs.</p> <code>_server_id</code> <code>UUID</code> <p>An id for the record generated by the Argilla server.</p> Source code in <code>src/argilla/records/_resource.py</code> <pre><code>class Record(Resource):\n    \"\"\"The class for interacting with Argilla Records. A `Record` is a single sample\n    in a dataset. Records receives feedback in the form of responses and suggestions.\n    Records contain fields, metadata, and vectors.\n\n    Attributes:\n        id (Union[str, UUID]): The id of the record.\n        fields (RecordFields): The fields of the record.\n        metadata (RecordMetadata): The metadata of the record.\n        vectors (RecordVectors): The vectors of the record.\n        responses (RecordResponses): The responses of the record.\n        suggestions (RecordSuggestions): The suggestions of the record.\n        dataset (Dataset): The dataset to which the record belongs.\n        _server_id (UUID): An id for the record generated by the Argilla server.\n    \"\"\"\n\n    _model: RecordModel\n\n    def __init__(\n        self,\n        id: Optional[Union[UUID, str]] = None,\n        fields: Optional[Dict[str, FieldValue]] = None,\n        metadata: Optional[Dict[str, MetadataValue]] = None,\n        vectors: Optional[Dict[str, VectorValue]] = None,\n        responses: Optional[List[Response]] = None,\n        suggestions: Optional[List[Suggestion]] = None,\n        _server_id: Optional[UUID] = None,\n        _dataset: Optional[\"Dataset\"] = None,\n    ):\n        \"\"\"Initializes a Record with fields, metadata, vectors, responses, suggestions, external_id, and id.\n        Records are typically defined as flat dictionary objects with fields, metadata, vectors, responses, and suggestions\n        and passed to Dataset.DatasetRecords.add() as a list of dictionaries.\n\n        Args:\n            id: An id for the record. If not provided, a UUID will be generated.\n            fields: A dictionary of fields for the record.\n            metadata: A dictionary of metadata for the record.\n            vectors: A dictionary of vectors for the record.\n            responses: A list of Response objects for the record.\n            suggestions: A list of Suggestion objects for the record.\n            _server_id: An id for the record. (Read-only and set by the server)\n            _dataset: The dataset object to which the record belongs.\n        \"\"\"\n\n        if fields is None and metadata is None and vectors is None and responses is None and suggestions is None:\n            raise ValueError(\"At least one of fields, metadata, vectors, responses, or suggestions must be provided.\")\n        if fields is None and id is None:\n            raise ValueError(\"If fields are not provided, an id must be provided.\")\n        if fields == {} and id is None:\n            raise ValueError(\"If fields are an empty dictionary, an id must be provided.\")\n\n        self._dataset = _dataset\n        self._model = RecordModel(external_id=id, id=_server_id)\n        self.__fields = RecordFields(fields=fields, record=self)\n        self.__vectors = RecordVectors(vectors=vectors)\n        self.__metadata = RecordMetadata(metadata=metadata)\n        self.__responses = RecordResponses(responses=responses, record=self)\n        self.__suggestions = RecordSuggestions(suggestions=suggestions, record=self)\n\n    def __repr__(self) -&gt; str:\n        return (\n            f\"Record(id={self.id},status={self.status},fields={self.fields},metadata={self.metadata},\"\n            f\"suggestions={self.suggestions},responses={self.responses})\"\n        )\n\n    ############################\n    # Properties\n    ############################\n\n    @property\n    def id(self) -&gt; str:\n        return self._model.external_id\n\n    @id.setter\n    def id(self, value: str) -&gt; None:\n        self._model.external_id = value\n\n    @property\n    def dataset(self) -&gt; \"Dataset\":\n        return self._dataset\n\n    @dataset.setter\n    def dataset(self, value: \"Dataset\") -&gt; None:\n        self._dataset = value\n\n    @property\n    def fields(self) -&gt; \"RecordFields\":\n        return self.__fields\n\n    @property\n    def responses(self) -&gt; \"RecordResponses\":\n        return self.__responses\n\n    @property\n    def suggestions(self) -&gt; \"RecordSuggestions\":\n        return self.__suggestions\n\n    @property\n    def metadata(self) -&gt; \"RecordMetadata\":\n        return self.__metadata\n\n    @property\n    def vectors(self) -&gt; \"RecordVectors\":\n        return self.__vectors\n\n    @property\n    def status(self) -&gt; str:\n        return self._model.status\n\n    @property\n    def _server_id(self) -&gt; Optional[UUID]:\n        return self._model.id\n\n    ############################\n    # Public methods\n    ############################\n\n    def get(self) -&gt; \"Record\":\n        \"\"\"Retrieves the record from the server.\"\"\"\n        model = self._client.api.records.get(self._server_id)\n        instance = self.from_model(model, dataset=self.dataset)\n        self.__dict__ = instance.__dict__\n\n        return self\n\n    def api_model(self) -&gt; RecordModel:\n        return RecordModel(\n            id=self._model.id,\n            external_id=self._model.external_id,\n            fields=self.fields.to_dict(),\n            metadata=self.metadata.api_models(),\n            vectors=self.vectors.api_models(),\n            responses=self.responses.api_models(),\n            suggestions=self.suggestions.api_models(),\n            status=self.status,\n        )\n\n    def serialize(self) -&gt; Dict[str, Any]:\n        \"\"\"Serializes the Record to a dictionary for interaction with the API\"\"\"\n        serialized_model = self._model.model_dump()\n        serialized_suggestions = [suggestion.serialize() for suggestion in self.__suggestions]\n        serialized_responses = [response.serialize() for response in self.__responses]\n        serialized_model[\"responses\"] = serialized_responses\n        serialized_model[\"suggestions\"] = serialized_suggestions\n\n        return serialized_model\n\n    def to_dict(self) -&gt; Dict[str, Dict]:\n        \"\"\"Converts a Record object to a dictionary for export.\n        Returns:\n            A dictionary representing the record where the keys are \"fields\",\n            \"metadata\", \"suggestions\", and \"responses\". Each field and question is\n            represented as a key-value pair in the dictionary of the respective key. i.e.\n            `{\"fields\": {\"prompt\": \"...\", \"response\": \"...\"}, \"responses\": {\"rating\": \"...\"},\n        \"\"\"\n        id = str(self.id) if self.id else None\n        server_id = str(self._model.id) if self._model.id else None\n        status = self.status\n        fields = self.fields.to_dict()\n        metadata = self.metadata.to_dict()\n        suggestions = self.suggestions.to_dict()\n        responses = self.responses.to_dict()\n        vectors = self.vectors.to_dict()\n\n        # TODO: Review model attributes when to_dict and serialize methods are unified\n        return {\n            \"id\": id,\n            \"fields\": fields,\n            \"metadata\": metadata,\n            \"suggestions\": suggestions,\n            \"responses\": responses,\n            \"vectors\": vectors,\n            \"status\": status,\n            \"_server_id\": server_id,\n        }\n\n    @classmethod\n    def from_dict(cls, data: Dict[str, Dict], dataset: Optional[\"Dataset\"] = None) -&gt; \"Record\":\n        \"\"\"Converts a dictionary to a Record object.\n        Args:\n            data: A dictionary representing the record.\n            dataset: The dataset object to which the record belongs.\n        Returns:\n            A Record object.\n        \"\"\"\n        fields = data.get(\"fields\", {})\n        metadata = data.get(\"metadata\", {})\n        suggestions = data.get(\"suggestions\", {})\n        responses = data.get(\"responses\", {})\n        vectors = data.get(\"vectors\", {})\n        record_id = data.get(\"id\", None)\n        _server_id = data.get(\"_server_id\", None)\n\n        suggestions = [Suggestion(question_name=question_name, **value) for question_name, value in suggestions.items()]\n        responses = [\n            Response(question_name=question_name, **value)\n            for question_name, _responses in responses.items()\n            for value in _responses\n        ]\n\n        return cls(\n            id=record_id,\n            fields=fields,\n            suggestions=suggestions,\n            responses=responses,\n            vectors=vectors,\n            metadata=metadata,\n            _dataset=dataset,\n            _server_id=_server_id,\n        )\n\n    @classmethod\n    def from_model(cls, model: RecordModel, dataset: \"Dataset\") -&gt; \"Record\":\n        \"\"\"Converts a RecordModel object to a Record object.\n        Args:\n            model: A RecordModel object.\n            dataset: The dataset object to which the record belongs.\n        Returns:\n            A Record object.\n        \"\"\"\n        instance = cls(\n            id=model.external_id,\n            fields=model.fields,\n            metadata={meta.name: meta.value for meta in model.metadata},\n            vectors={vector.name: vector.vector_values for vector in model.vectors},\n            _dataset=dataset,\n            responses=[],\n            suggestions=[],\n        )\n\n        # set private attributes\n        instance._dataset = dataset\n        instance._model = model\n\n        # Responses and suggestions are computed separately based on the record model\n        instance.responses.from_models(model.responses)\n        instance.suggestions.from_models(model.suggestions)\n\n        return instance\n\n    @property\n    def _client(self) -&gt; Optional[\"Argilla\"]:\n        if self._dataset:\n            return self.dataset._client\n\n    @property\n    def _api(self) -&gt; Optional[\"RecordsAPI\"]:\n        if self._client:\n            return self._client.api.records\n</code></pre>"},{"location":"reference/argilla/records/records/#src.argilla.records._resource.Record.__init__","title":"<code>__init__(id=None, fields=None, metadata=None, vectors=None, responses=None, suggestions=None, _server_id=None, _dataset=None)</code>","text":"<p>Initializes a Record with fields, metadata, vectors, responses, suggestions, external_id, and id. Records are typically defined as flat dictionary objects with fields, metadata, vectors, responses, and suggestions and passed to Dataset.DatasetRecords.add() as a list of dictionaries.</p> <p>Parameters:</p> Name Type Description Default <code>id</code> <code>Optional[Union[UUID, str]]</code> <p>An id for the record. If not provided, a UUID will be generated.</p> <code>None</code> <code>fields</code> <code>Optional[Dict[str, FieldValue]]</code> <p>A dictionary of fields for the record.</p> <code>None</code> <code>metadata</code> <code>Optional[Dict[str, MetadataValue]]</code> <p>A dictionary of metadata for the record.</p> <code>None</code> <code>vectors</code> <code>Optional[Dict[str, VectorValue]]</code> <p>A dictionary of vectors for the record.</p> <code>None</code> <code>responses</code> <code>Optional[List[Response]]</code> <p>A list of Response objects for the record.</p> <code>None</code> <code>suggestions</code> <code>Optional[List[Suggestion]]</code> <p>A list of Suggestion objects for the record.</p> <code>None</code> <code>_server_id</code> <code>Optional[UUID]</code> <p>An id for the record. (Read-only and set by the server)</p> <code>None</code> <code>_dataset</code> <code>Optional[Dataset]</code> <p>The dataset object to which the record belongs.</p> <code>None</code> Source code in <code>src/argilla/records/_resource.py</code> <pre><code>def __init__(\n    self,\n    id: Optional[Union[UUID, str]] = None,\n    fields: Optional[Dict[str, FieldValue]] = None,\n    metadata: Optional[Dict[str, MetadataValue]] = None,\n    vectors: Optional[Dict[str, VectorValue]] = None,\n    responses: Optional[List[Response]] = None,\n    suggestions: Optional[List[Suggestion]] = None,\n    _server_id: Optional[UUID] = None,\n    _dataset: Optional[\"Dataset\"] = None,\n):\n    \"\"\"Initializes a Record with fields, metadata, vectors, responses, suggestions, external_id, and id.\n    Records are typically defined as flat dictionary objects with fields, metadata, vectors, responses, and suggestions\n    and passed to Dataset.DatasetRecords.add() as a list of dictionaries.\n\n    Args:\n        id: An id for the record. If not provided, a UUID will be generated.\n        fields: A dictionary of fields for the record.\n        metadata: A dictionary of metadata for the record.\n        vectors: A dictionary of vectors for the record.\n        responses: A list of Response objects for the record.\n        suggestions: A list of Suggestion objects for the record.\n        _server_id: An id for the record. (Read-only and set by the server)\n        _dataset: The dataset object to which the record belongs.\n    \"\"\"\n\n    if fields is None and metadata is None and vectors is None and responses is None and suggestions is None:\n        raise ValueError(\"At least one of fields, metadata, vectors, responses, or suggestions must be provided.\")\n    if fields is None and id is None:\n        raise ValueError(\"If fields are not provided, an id must be provided.\")\n    if fields == {} and id is None:\n        raise ValueError(\"If fields are an empty dictionary, an id must be provided.\")\n\n    self._dataset = _dataset\n    self._model = RecordModel(external_id=id, id=_server_id)\n    self.__fields = RecordFields(fields=fields, record=self)\n    self.__vectors = RecordVectors(vectors=vectors)\n    self.__metadata = RecordMetadata(metadata=metadata)\n    self.__responses = RecordResponses(responses=responses, record=self)\n    self.__suggestions = RecordSuggestions(suggestions=suggestions, record=self)\n</code></pre>"},{"location":"reference/argilla/records/records/#src.argilla.records._resource.Record.get","title":"<code>get()</code>","text":"<p>Retrieves the record from the server.</p> Source code in <code>src/argilla/records/_resource.py</code> <pre><code>def get(self) -&gt; \"Record\":\n    \"\"\"Retrieves the record from the server.\"\"\"\n    model = self._client.api.records.get(self._server_id)\n    instance = self.from_model(model, dataset=self.dataset)\n    self.__dict__ = instance.__dict__\n\n    return self\n</code></pre>"},{"location":"reference/argilla/records/records/#src.argilla.records._resource.Record.serialize","title":"<code>serialize()</code>","text":"<p>Serializes the Record to a dictionary for interaction with the API</p> Source code in <code>src/argilla/records/_resource.py</code> <pre><code>def serialize(self) -&gt; Dict[str, Any]:\n    \"\"\"Serializes the Record to a dictionary for interaction with the API\"\"\"\n    serialized_model = self._model.model_dump()\n    serialized_suggestions = [suggestion.serialize() for suggestion in self.__suggestions]\n    serialized_responses = [response.serialize() for response in self.__responses]\n    serialized_model[\"responses\"] = serialized_responses\n    serialized_model[\"suggestions\"] = serialized_suggestions\n\n    return serialized_model\n</code></pre>"},{"location":"reference/argilla/records/records/#src.argilla.records._resource.Record.to_dict","title":"<code>to_dict()</code>","text":"<p>Converts a Record object to a dictionary for export. Returns:     A dictionary representing the record where the keys are \"fields\",     \"metadata\", \"suggestions\", and \"responses\". Each field and question is     represented as a key-value pair in the dictionary of the respective key. i.e.     `{\"fields\": {\"prompt\": \"...\", \"response\": \"...\"}, \"responses\": {\"rating\": \"...\"},</p> Source code in <code>src/argilla/records/_resource.py</code> <pre><code>def to_dict(self) -&gt; Dict[str, Dict]:\n    \"\"\"Converts a Record object to a dictionary for export.\n    Returns:\n        A dictionary representing the record where the keys are \"fields\",\n        \"metadata\", \"suggestions\", and \"responses\". Each field and question is\n        represented as a key-value pair in the dictionary of the respective key. i.e.\n        `{\"fields\": {\"prompt\": \"...\", \"response\": \"...\"}, \"responses\": {\"rating\": \"...\"},\n    \"\"\"\n    id = str(self.id) if self.id else None\n    server_id = str(self._model.id) if self._model.id else None\n    status = self.status\n    fields = self.fields.to_dict()\n    metadata = self.metadata.to_dict()\n    suggestions = self.suggestions.to_dict()\n    responses = self.responses.to_dict()\n    vectors = self.vectors.to_dict()\n\n    # TODO: Review model attributes when to_dict and serialize methods are unified\n    return {\n        \"id\": id,\n        \"fields\": fields,\n        \"metadata\": metadata,\n        \"suggestions\": suggestions,\n        \"responses\": responses,\n        \"vectors\": vectors,\n        \"status\": status,\n        \"_server_id\": server_id,\n    }\n</code></pre>"},{"location":"reference/argilla/records/records/#src.argilla.records._resource.Record.from_dict","title":"<code>from_dict(data, dataset=None)</code>  <code>classmethod</code>","text":"<p>Converts a dictionary to a Record object. Args:     data: A dictionary representing the record.     dataset: The dataset object to which the record belongs. Returns:     A Record object.</p> Source code in <code>src/argilla/records/_resource.py</code> <pre><code>@classmethod\ndef from_dict(cls, data: Dict[str, Dict], dataset: Optional[\"Dataset\"] = None) -&gt; \"Record\":\n    \"\"\"Converts a dictionary to a Record object.\n    Args:\n        data: A dictionary representing the record.\n        dataset: The dataset object to which the record belongs.\n    Returns:\n        A Record object.\n    \"\"\"\n    fields = data.get(\"fields\", {})\n    metadata = data.get(\"metadata\", {})\n    suggestions = data.get(\"suggestions\", {})\n    responses = data.get(\"responses\", {})\n    vectors = data.get(\"vectors\", {})\n    record_id = data.get(\"id\", None)\n    _server_id = data.get(\"_server_id\", None)\n\n    suggestions = [Suggestion(question_name=question_name, **value) for question_name, value in suggestions.items()]\n    responses = [\n        Response(question_name=question_name, **value)\n        for question_name, _responses in responses.items()\n        for value in _responses\n    ]\n\n    return cls(\n        id=record_id,\n        fields=fields,\n        suggestions=suggestions,\n        responses=responses,\n        vectors=vectors,\n        metadata=metadata,\n        _dataset=dataset,\n        _server_id=_server_id,\n    )\n</code></pre>"},{"location":"reference/argilla/records/records/#src.argilla.records._resource.Record.from_model","title":"<code>from_model(model, dataset)</code>  <code>classmethod</code>","text":"<p>Converts a RecordModel object to a Record object. Args:     model: A RecordModel object.     dataset: The dataset object to which the record belongs. Returns:     A Record object.</p> Source code in <code>src/argilla/records/_resource.py</code> <pre><code>@classmethod\ndef from_model(cls, model: RecordModel, dataset: \"Dataset\") -&gt; \"Record\":\n    \"\"\"Converts a RecordModel object to a Record object.\n    Args:\n        model: A RecordModel object.\n        dataset: The dataset object to which the record belongs.\n    Returns:\n        A Record object.\n    \"\"\"\n    instance = cls(\n        id=model.external_id,\n        fields=model.fields,\n        metadata={meta.name: meta.value for meta in model.metadata},\n        vectors={vector.name: vector.vector_values for vector in model.vectors},\n        _dataset=dataset,\n        responses=[],\n        suggestions=[],\n    )\n\n    # set private attributes\n    instance._dataset = dataset\n    instance._model = model\n\n    # Responses and suggestions are computed separately based on the record model\n    instance.responses.from_models(model.responses)\n    instance.suggestions.from_models(model.suggestions)\n\n    return instance\n</code></pre>"},{"location":"reference/argilla/records/responses/","title":"<code>rg.Response</code>","text":"<p>Class for interacting with Argilla Responses of records. Responses are answers to questions by a user. Therefore, a recod question can have multiple responses, one for each user that has answered the question. A <code>Response</code> is typically created by a user in the UI or consumed from a data source as a label, unlike a <code>Suggestion</code> which is typically created by a model prediction.</p>"},{"location":"reference/argilla/records/responses/#usage-examples","title":"Usage Examples","text":"<p>Responses can be added to an instantiated <code>Record</code> directly or as a dictionary a dictionary. The following examples demonstrate how to add responses to a record object and how to access responses from a record object:</p> <p>Instantiate the <code>Record</code> and related <code>Response</code> objects:</p> <pre><code>dataset.records.log(\n    [\n        rg.Record(\n            fields={\"text\": \"Hello World, how are you?\"},\n            responses=[rg.Response(\"label\", \"negative\", user_id=user.id)],\n            external_id=str(uuid.uuid4()),\n        )\n    ]\n)\n</code></pre> <p>Or, add a response from a dictionary where key is the question name and value is the response:</p> <pre><code>dataset.records.log(\n    [\n        {\n            \"text\": \"Hello World, how are you?\",\n            \"label.response\": \"negative\",\n        },\n    ]\n)\n</code></pre> <p>Responses can be accessed from a <code>Record</code> via their question name as an attribute of the record. So if a question is named <code>label</code>, the response can be accessed as <code>record.label</code>. The following example demonstrates how to access responses from a record object:</p> <p><pre><code># iterate over the records and responses\n\nfor record in dataset.records:\n    for response in record.responses[\"label\"]: # (1)\n        print(response.value)\n        print(response.user_id)\n\n# validate that the record has a response\n\nfor record in dataset.records:\n    if record.responses[\"label\"]:\n        for response in record.responses[\"label\"]:\n            print(response.value)\n            print(response.user_id)\n    else:\n        record.responses.add(\n            rg.Response(\"label\", \"positive\", user_id=user.id)\n        ) # (2)\n</code></pre>     1. Access the responses for the question named <code>label</code> for each record like a dictionary containing a list of <code>Response</code> objects.     2. Add a response to the record if it does not already have one.</p>"},{"location":"reference/argilla/records/responses/#format-per-question-type","title":"Format per <code>Question</code> type","text":"<p>Depending on the <code>Question</code> type, responses might need to be formatted in a slightly different way.</p> For <code>LabelQuestion</code>For <code>MultiLabelQuestion</code>For <code>RankingQuestion</code>For <code>RatingQuestion</code>For <code>SpanQuestion</code>For <code>TextQuestion</code> <pre><code>rg.Response(\n    question_name=\"label\",\n    value=\"positive\",\n    user_id=user.id,\n    status=\"draft\"\n)\n</code></pre> <pre><code>rg.Response(\n    question_name=\"multi-label\",\n    value=[\"positive\", \"negative\"],\n    user_id=user.id,\n    status=\"draft\"\n)\n</code></pre> <pre><code>rg.Response(\n    question_name=\"rank\",\n    value=[\"1\", \"3\", \"2\"],\n    user_id=user.id,\n    status=\"draft\"\n)\n</code></pre> <pre><code>rg.Response(\n    question_name=\"rating\",\n    value=4,\n    user_id=user.id,\n    status=\"draft\"\n)\n</code></pre> <pre><code>rg.Response(\n    question_name=\"span\",\n    value=[{\"start\": 0, \"end\": 9, \"label\": \"MISC\"}],\n    user_id=user.id,\n    status=\"draft\"\n)\n</code></pre> <pre><code>rg.Response(\n    question_name=\"text\",\n    value=\"value\",\n    user_id=user.id,\n    status=\"draft\"\n)\n</code></pre>"},{"location":"reference/argilla/records/responses/#src.argilla.responses.Response","title":"<code>Response</code>","text":"<p>Class for interacting with Argilla Responses of records. Responses are answers to questions by a user. Therefore, a record question can have multiple responses, one for each user that has answered the question. A <code>Response</code> is typically created by a user in the UI or consumed from a data source as a label, unlike a <code>Suggestion</code> which is typically created by a model prediction.</p> Source code in <code>src/argilla/responses.py</code> <pre><code>class Response:\n    \"\"\"Class for interacting with Argilla Responses of records. Responses are answers to questions by a user.\n    Therefore, a record question can have multiple responses, one for each user that has answered the question.\n    A `Response` is typically created by a user in the UI or consumed from a data source as a label,\n    unlike a `Suggestion` which is typically created by a model prediction.\n\n    \"\"\"\n\n    def __init__(\n        self,\n        question_name: str,\n        value: Any,\n        user_id: UUID,\n        status: Optional[Union[ResponseStatus, str]] = None,\n        _record: Optional[\"Record\"] = None,\n    ) -&gt; None:\n        \"\"\"Initializes a `Response` for a `Record` with a user_id and value\n\n        Attributes:\n            question_name (str): The name of the question that the suggestion is for.\n            value (str): The value of the response\n            user_id (UUID): The id of the user that submits the response\n            status (Union[ResponseStatus, str]): The status of the response as \"draft\", \"submitted\", \"discarded\".\n        \"\"\"\n\n        if question_name is None:\n            raise ValueError(\"question_name is required\")\n        if value is None:\n            raise ValueError(\"value is required\")\n        if user_id is None:\n            raise ValueError(\"user_id is required\")\n\n        if isinstance(status, str):\n            status = ResponseStatus(status)\n\n        self._record = _record\n        self.question_name = question_name\n        self.value = value\n        self.user_id = user_id\n        self.status = status\n\n    @property\n    def record(self) -&gt; \"Record\":\n        \"\"\"Returns the record associated with the response\"\"\"\n        return self._record\n\n    @record.setter\n    def record(self, record: \"Record\") -&gt; None:\n        \"\"\"Sets the record associated with the response\"\"\"\n        self._record = record\n\n    def serialize(self) -&gt; dict[str, Any]:\n        \"\"\"Serializes the Response to a dictionary. This is principally used for sending the response to the API, \\\n            but can be used for data wrangling or manual export.\n\n        Returns:\n            dict[str, Any]: The serialized response as a dictionary with keys `question_name`, `value`, and `user_id`.\n\n        Examples:\n\n        ```python\n        response = rg.Response(\"label\", \"negative\", user_id=user.id)\n        response.serialize()\n        ```\n        \"\"\"\n        return {\n            \"question_name\": self.question_name,\n            \"value\": self.value,\n            \"user_id\": self.user_id,\n            \"status\": self.status,\n        }\n</code></pre>"},{"location":"reference/argilla/records/responses/#src.argilla.responses.Response.record","title":"<code>record: Record</code>  <code>property</code> <code>writable</code>","text":"<p>Returns the record associated with the response</p>"},{"location":"reference/argilla/records/responses/#src.argilla.responses.Response.__init__","title":"<code>__init__(question_name, value, user_id, status=None, _record=None)</code>","text":"<p>Initializes a <code>Response</code> for a <code>Record</code> with a user_id and value</p> <p>Attributes:</p> Name Type Description <code>question_name</code> <code>str</code> <p>The name of the question that the suggestion is for.</p> <code>value</code> <code>str</code> <p>The value of the response</p> <code>user_id</code> <code>UUID</code> <p>The id of the user that submits the response</p> <code>status</code> <code>Union[ResponseStatus, str]</code> <p>The status of the response as \"draft\", \"submitted\", \"discarded\".</p> Source code in <code>src/argilla/responses.py</code> <pre><code>def __init__(\n    self,\n    question_name: str,\n    value: Any,\n    user_id: UUID,\n    status: Optional[Union[ResponseStatus, str]] = None,\n    _record: Optional[\"Record\"] = None,\n) -&gt; None:\n    \"\"\"Initializes a `Response` for a `Record` with a user_id and value\n\n    Attributes:\n        question_name (str): The name of the question that the suggestion is for.\n        value (str): The value of the response\n        user_id (UUID): The id of the user that submits the response\n        status (Union[ResponseStatus, str]): The status of the response as \"draft\", \"submitted\", \"discarded\".\n    \"\"\"\n\n    if question_name is None:\n        raise ValueError(\"question_name is required\")\n    if value is None:\n        raise ValueError(\"value is required\")\n    if user_id is None:\n        raise ValueError(\"user_id is required\")\n\n    if isinstance(status, str):\n        status = ResponseStatus(status)\n\n    self._record = _record\n    self.question_name = question_name\n    self.value = value\n    self.user_id = user_id\n    self.status = status\n</code></pre>"},{"location":"reference/argilla/records/responses/#src.argilla.responses.Response.serialize","title":"<code>serialize()</code>","text":"<p>Serializes the Response to a dictionary. This is principally used for sending the response to the API,             but can be used for data wrangling or manual export.</p> <p>Returns:</p> Type Description <code>dict[str, Any]</code> <p>dict[str, Any]: The serialized response as a dictionary with keys <code>question_name</code>, <code>value</code>, and <code>user_id</code>.</p> <p>Examples:</p> <pre><code>response = rg.Response(\"label\", \"negative\", user_id=user.id)\nresponse.serialize()\n</code></pre> Source code in <code>src/argilla/responses.py</code> <pre><code>def serialize(self) -&gt; dict[str, Any]:\n    \"\"\"Serializes the Response to a dictionary. This is principally used for sending the response to the API, \\\n        but can be used for data wrangling or manual export.\n\n    Returns:\n        dict[str, Any]: The serialized response as a dictionary with keys `question_name`, `value`, and `user_id`.\n\n    Examples:\n\n    ```python\n    response = rg.Response(\"label\", \"negative\", user_id=user.id)\n    response.serialize()\n    ```\n    \"\"\"\n    return {\n        \"question_name\": self.question_name,\n        \"value\": self.value,\n        \"user_id\": self.user_id,\n        \"status\": self.status,\n    }\n</code></pre>"},{"location":"reference/argilla/records/suggestions/","title":"<code>rg.Suggestion</code>","text":"<p>Class for interacting with Argilla Suggestions of records. Suggestions are typically created by a model prediction, unlike a <code>Response</code> which is typically created by a user in the UI or consumed from a data source as a label.</p>"},{"location":"reference/argilla/records/suggestions/#usage-examples","title":"Usage Examples","text":""},{"location":"reference/argilla/records/suggestions/#adding-records-with-suggestions","title":"Adding records with suggestions","text":"<p>Suggestions can be added to a record directly or via a dictionary structure. The following examples demonstrate how to add suggestions to a record object and how to access suggestions from a record object:</p> <p>Add a response from a dictionary where key is the question name and value is the response:</p> <pre><code>dataset.records.log(\n    [\n        {\n            \"text\": \"Hello World, how are you?\",\n            \"label\": \"negative\", # this will be used as a suggestion\n        },\n    ]\n)\n</code></pre> <p>If your data contains scores for suggestions you can add them as well via the <code>mapping</code> parameter. The following example demonstrates how to add a suggestion with a score to a record object:</p> <pre><code>dataset.records.log(\n    [\n        {\n            \"prompt\": \"Hello World, how are you?\",\n            \"label\": \"negative\",  # this will be used as a suggestion\n            \"score\": 0.9,  # this will be used as the suggestion score\n            \"model\": \"model_name\",  # this will be used as the suggestion agent\n        },\n    ],\n    mapping={\n        \"score\": \"label.suggestion.score\",\n        \"model\": \"label.suggestion.agent\",\n    },  # `label` is the question name in the dataset settings\n)\n</code></pre> <p>Or, instantiate the <code>Record</code> and related <code>Suggestions</code> objects directly, like this:</p> <pre><code>dataset.records.log(\n    [\n        rg.Record(\n            fields={\"text\": \"Hello World, how are you?\"},\n            suggestions=[rg.Suggestion(\"negative\", \"label\", score=0.9, agent=\"model_name\")],\n        )\n    ]\n)\n</code></pre>"},{"location":"reference/argilla/records/suggestions/#iterating-over-records-with-suggestions","title":"Iterating over records with suggestions","text":"<p>Just like responses, suggestions can be accessed from a <code>Record</code> via their question name as an attribute of the record. So if a question is named <code>label</code>, the suggestion can be accessed as <code>record.label</code>. The following example demonstrates how to access suggestions from a record object:</p> <pre><code>for record in dataset.records(with_suggestions=True):\n    print(record.suggestions[\"label\"].value)\n</code></pre> <p>We can also add suggestions to records as we iterate over them using the <code>add</code> method:</p> <pre><code>for record in dataset.records(with_suggestions=True):\n    if not record.suggestions[\"label\"]: # (1)\n        record.suggestions.add(\n            rg.Suggestion(\"positive\", \"label\", score=0.9, agent=\"model_name\")\n        ) # (2)\n</code></pre> <ol> <li>Validate that the record has a suggestion</li> <li>Add a suggestion to the record if it does not already have one</li> </ol>"},{"location":"reference/argilla/records/suggestions/#format-per-question-type","title":"Format per <code>Question</code> type","text":"<p>Depending on the <code>Question</code> type, responses might need to be formatted in a slightly different way.</p> For <code>LabelQuestion</code>For <code>MultiLabelQuestion</code>For <code>RankingQuestion</code>For <code>RatingQuestion</code>For <code>SpanQuestion</code>For <code>TextQuestion</code> <pre><code>rg.Suggestion(\n    question_name=\"label\",\n    value=\"positive\",\n    score=0.9,\n    agent=\"model_name\"\n)\n</code></pre> <pre><code>rg.Suggestion(\n    question_name=\"multi-label\",\n    value=[\"positive\", \"negative\"],\n    score=0.9,\n    agent=\"model_name\"\n)\n</code></pre> <pre><code>rg.Suggestion(\n    question_name=\"rank\",\n    value=[\"1\", \"3\", \"2\"],\n    score=0.9,\n    agent=\"model_name\"\n)\n</code></pre> <pre><code>rg.Suggestion(\n    question_name=\"rating\",\n    value=4,\n    score=0.9,\n    agent=\"model_name\"\n)\n</code></pre> <pre><code>rg.Suggestion(\n    question_name=\"span\",\n    value=[{\"start\": 0, \"end\": 9, \"label\": \"MISC\"}],\n    score=0.9,\n    agent=\"model_name\"\n)\n</code></pre> <pre><code>rg.Suggestion(\n    question_name=\"text\",\n    value=\"value\",\n    score=0.9,\n    agent=\"model_name\"\n)\n</code></pre>"},{"location":"reference/argilla/records/suggestions/#src.argilla.suggestions.Suggestion","title":"<code>Suggestion</code>","text":"<p>               Bases: <code>Resource</code></p> <p>Class for interacting with Argilla Suggestions. Suggestions are typically model predictions for records. Suggestions are rendered in the user interfaces as 'hints' or 'suggestions' for the user to review and accept or reject.</p> <p>Attributes:</p> Name Type Description <code>question_name</code> <code>str</code> <p>The name of the question that the suggestion is for.</p> <code>value</code> <code>str</code> <p>The value of the suggestion</p> <code>score</code> <code>float</code> <p>The score of the suggestion. For example, the probability of the model prediction.</p> <code>agent</code> <code>str</code> <p>The agent that created the suggestion. For example, the model name.</p> <code>type</code> <code>str</code> <p>The type of suggestion, either 'model' or 'human'.</p> Source code in <code>src/argilla/suggestions.py</code> <pre><code>class Suggestion(Resource):\n    \"\"\"Class for interacting with Argilla Suggestions. Suggestions are typically model predictions for records.\n    Suggestions are rendered in the user interfaces as 'hints' or 'suggestions' for the user to review and accept or reject.\n\n    Attributes:\n        question_name (str): The name of the question that the suggestion is for.\n        value (str): The value of the suggestion\n        score (float): The score of the suggestion. For example, the probability of the model prediction.\n        agent (str): The agent that created the suggestion. For example, the model name.\n        type (str): The type of suggestion, either 'model' or 'human'.\n    \"\"\"\n\n    _model: SuggestionModel\n\n    def __init__(\n        self,\n        question_name: str,\n        value: Any,\n        score: Union[float, List[float], None] = None,\n        agent: Optional[str] = None,\n        type: Optional[Literal[\"model\", \"human\"]] = None,\n        _record: Optional[\"Record\"] = None,\n    ) -&gt; None:\n        super().__init__()\n\n        if question_name is None:\n            raise ValueError(\"question_name is required\")\n        if value is None:\n            raise ValueError(\"value is required\")\n\n        self._record = _record\n        self._model = SuggestionModel(\n            question_name=question_name,\n            value=value,\n            type=type,\n            score=score,\n            agent=agent,\n        )\n\n    ##############################\n    # Properties\n    ##############################\n\n    @property\n    def value(self) -&gt; Any:\n        \"\"\"The value of the suggestion.\"\"\"\n        return self._model.value\n\n    @property\n    def question_name(self) -&gt; Optional[str]:\n        \"\"\"The name of the question that the suggestion is for.\"\"\"\n        return self._model.question_name\n\n    @question_name.setter\n    def question_name(self, value: str) -&gt; None:\n        self._model.question_name = value\n\n    @property\n    def type(self) -&gt; Optional[Literal[\"model\", \"human\"]]:\n        \"\"\"The type of suggestion, either 'model' or 'human'.\"\"\"\n        return self._model.type\n\n    @property\n    def score(self) -&gt; Optional[Union[float, List[float]]]:\n        \"\"\"The score of the suggestion.\"\"\"\n        return self._model.score\n\n    @score.setter\n    def score(self, value: float) -&gt; None:\n        self._model.score = value\n\n    @property\n    def agent(self) -&gt; Optional[str]:\n        \"\"\"The agent that created the suggestion.\"\"\"\n        return self._model.agent\n\n    @agent.setter\n    def agent(self, value: str) -&gt; None:\n        self._model.agent = value\n\n    @property\n    def record(self) -&gt; Optional[\"Record\"]:\n        \"\"\"The record that the suggestion is for.\"\"\"\n        return self._record\n\n    @record.setter\n    def record(self, value: \"Record\") -&gt; None:\n        self._record = value\n\n    @classmethod\n    def from_model(cls, model: SuggestionModel, record: \"Record\") -&gt; \"Suggestion\":\n        question = record.dataset.settings.questions[model.question_id]\n        model.question_name = question.name\n        model.value = cls.__from_model_value(model.value, question)\n\n        instance = cls(question.name, model.value, _record=record)\n        instance._model = model\n\n        return instance\n\n    def api_model(self) -&gt; SuggestionModel:\n        if self.record is None or self.record.dataset is None:\n            return self._model\n\n        question = self.record.dataset.settings.questions[self.question_name]\n        if question:\n            return SuggestionModel(\n                value=self.__to_model_value(self.value, question),\n                question_name=None if not question else question.name,\n                question_id=None if not question else question.id,\n                type=self._model.type,\n                score=self._model.score,\n                agent=self._model.agent,\n                id=self._model.id,\n            )\n        else:\n            raise RecordSuggestionsError(\n                f\"Record suggestion is invalid because question with name={self.question_name} does not exist in the dataset ({self.record.dataset.name}). Available questions are: {list(self.record.dataset.settings.questions._properties_by_name.keys())}\"\n            )\n\n    @classmethod\n    def __to_model_value(cls, value: Any, question: \"QuestionType\") -&gt; Any:\n        if isinstance(question, RankingQuestion):\n            return cls.__ranking_to_model_value(value)\n        return value\n\n    @classmethod\n    def __from_model_value(cls, value: Any, question: \"QuestionType\") -&gt; Any:\n        if isinstance(question, RankingQuestion):\n            return cls.__ranking_from_model_value(value)\n        return value\n\n    @classmethod\n    def __ranking_from_model_value(cls, value: List[Dict[str, Any]]) -&gt; List[str]:\n        return [v[\"value\"] for v in value]\n\n    @classmethod\n    def __ranking_to_model_value(cls, value: List[str]) -&gt; List[Dict[str, str]]:\n        return [{\"value\": str(v)} for v in value]\n</code></pre>"},{"location":"reference/argilla/records/suggestions/#src.argilla.suggestions.Suggestion.value","title":"<code>value: Any</code>  <code>property</code>","text":"<p>The value of the suggestion.</p>"},{"location":"reference/argilla/records/suggestions/#src.argilla.suggestions.Suggestion.question_name","title":"<code>question_name: Optional[str]</code>  <code>property</code> <code>writable</code>","text":"<p>The name of the question that the suggestion is for.</p>"},{"location":"reference/argilla/records/suggestions/#src.argilla.suggestions.Suggestion.type","title":"<code>type: Optional[Literal['model', 'human']]</code>  <code>property</code>","text":"<p>The type of suggestion, either 'model' or 'human'.</p>"},{"location":"reference/argilla/records/suggestions/#src.argilla.suggestions.Suggestion.score","title":"<code>score: Optional[Union[float, List[float]]]</code>  <code>property</code> <code>writable</code>","text":"<p>The score of the suggestion.</p>"},{"location":"reference/argilla/records/suggestions/#src.argilla.suggestions.Suggestion.agent","title":"<code>agent: Optional[str]</code>  <code>property</code> <code>writable</code>","text":"<p>The agent that created the suggestion.</p>"},{"location":"reference/argilla/records/suggestions/#src.argilla.suggestions.Suggestion.record","title":"<code>record: Optional[Record]</code>  <code>property</code> <code>writable</code>","text":"<p>The record that the suggestion is for.</p>"},{"location":"reference/argilla/records/vectors/","title":"<code>rg.Vector</code>","text":"<p>A vector is a numerical representation of a <code>Record</code> field or attribute, usually the record's text. Vectors can be used to search for similar records via the UI or SDK. Vectors can be added to a record directly or as a dictionary with a key that the matches <code>rg.VectorField</code> name.</p>"},{"location":"reference/argilla/records/vectors/#usage-examples","title":"Usage Examples","text":"<p>To use vectors within a dataset, you must define a vector field in the dataset settings. The vector field is a list of vector fields that can be attached to a record. The following example demonstrates how to add vectors to a dataset and how to access vectors from a record object:</p> <pre><code>import argilla as rg\n\ndataset = Dataset(\n    name=\"dataset_with_metadata\",\n    settings=Settings(\n        fields=[TextField(name=\"text\")],\n        questions=[LabelQuestion(name=\"label\", labels=[\"positive\", \"negative\"])],\n        vectors=[\n            VectorField(name=\"vector_name\"),\n        ],\n    ),\n)\ndataset.create()\n</code></pre> <p>Then, you can add records to the dataset with vectors that correspond to the vector field defined in the dataset settings:</p> <pre><code>dataset.records.log(\n    [\n        {\n            \"text\": \"Hello World, how are you?\",\n            \"vector_name\": [0.1, 0.2, 0.3]\n        }\n    ]\n)\n</code></pre> <p>Vectors can be passed using a mapping, where the key is the key in the data source and the value is the name in the dataset's setting's <code>rg.VectorField</code> object. For example, the following code adds a record with a vector using a mapping:</p> <pre><code>dataset.records.log(\n    [\n        {\n            \"text\": \"Hello World, how are you?\",\n            \"x\": [0.1, 0.2, 0.3]\n        }\n    ],\n    mapping={\"x\": \"vector_name\"}\n)\n</code></pre> <p>Or, vectors can be instantiated and added to a record directly, like this:</p> <pre><code>dataset.records.log(\n    [\n        rg.Record(\n            fields={\"text\": \"Hello World, how are you?\"},\n            vectors=[rg.Vector(\"embedding\", [0.1, 0.2, 0.3])],\n        )\n    ]\n)\n</code></pre>"},{"location":"reference/argilla/records/vectors/#src.argilla.vectors.Vector","title":"<code>Vector</code>","text":"<p>               Bases: <code>Resource</code></p> <p>Class for interacting with Argilla Vectors. Vectors are typically used to represent         embeddings or features of records. The <code>Vector</code> class is used to deliver vectors to the Argilla server.</p> <p>Attributes:</p> Name Type Description <code>name</code> <code>str</code> <p>The name of the vector.</p> <code>values</code> <code>list[float]</code> <p>The values of the vector.</p> Source code in <code>src/argilla/vectors.py</code> <pre><code>class Vector(Resource):\n    \"\"\" Class for interacting with Argilla Vectors. Vectors are typically used to represent \\\n        embeddings or features of records. The `Vector` class is used to deliver vectors to the Argilla server.\n\n    Attributes:\n        name (str): The name of the vector.\n        values (list[float]): The values of the vector.\n    \"\"\"\n\n    _model: VectorModel\n\n    def __init__(\n        self,\n        name: str,\n        values: list[float],\n    ) -&gt; None:\n        \"\"\"Initializes a Vector with a name and values that can be used to search in the Argilla ui.\n\n        Parameters:\n            name (str): Name of the vector\n            values (list[float]): List of float values\n\n        \"\"\"\n        self._model = VectorModel(\n            name=name,\n            vector_values=values,\n        )\n\n    def __repr__(self) -&gt; str:\n        return repr(f\"{self.__class__.__name__}({self._model})\")\n\n    ##############################\n    # Properties\n    ##############################\n\n    @property\n    def name(self) -&gt; str:\n        \"\"\"Name of the vector that corresponds to the name of the vector in the dataset's `Settings`\"\"\"\n        return self._model.name\n\n    @property\n    def values(self) -&gt; list[float]:\n        \"\"\"List of float values that represent the vector.\"\"\"\n        return self._model.vector_values\n\n    ##############################\n    # Methods\n    ##############################\n\n    @classmethod\n    def from_model(cls, model: VectorModel) -&gt; \"Vector\":\n        return cls(\n            name=model.name,\n            values=model.vector_values,\n        )\n\n    def serialize(self) -&gt; dict[str, Any]:\n        dumped_model = self._model.model_dump()\n        name = dumped_model.pop(\"name\")\n        values = dumped_model.pop(\"vector_values\")\n        return {name: values}\n</code></pre>"},{"location":"reference/argilla/records/vectors/#src.argilla.vectors.Vector.name","title":"<code>name: str</code>  <code>property</code>","text":"<p>Name of the vector that corresponds to the name of the vector in the dataset's <code>Settings</code></p>"},{"location":"reference/argilla/records/vectors/#src.argilla.vectors.Vector.values","title":"<code>values: list[float]</code>  <code>property</code>","text":"<p>List of float values that represent the vector.</p>"},{"location":"reference/argilla/records/vectors/#src.argilla.vectors.Vector.__init__","title":"<code>__init__(name, values)</code>","text":"<p>Initializes a Vector with a name and values that can be used to search in the Argilla ui.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>Name of the vector</p> required <code>values</code> <code>list[float]</code> <p>List of float values</p> required Source code in <code>src/argilla/vectors.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    values: list[float],\n) -&gt; None:\n    \"\"\"Initializes a Vector with a name and values that can be used to search in the Argilla ui.\n\n    Parameters:\n        name (str): Name of the vector\n        values (list[float]): List of float values\n\n    \"\"\"\n    self._model = VectorModel(\n        name=name,\n        vector_values=values,\n    )\n</code></pre>"},{"location":"reference/argilla/settings/fields/","title":"Fields","text":"<p>Fields in Argilla define the content of a record that will be reviewed by a user.</p>"},{"location":"reference/argilla/settings/fields/#usage-examples","title":"Usage Examples","text":"<p>To define a field, instantiate the different field classes and pass it to the <code>fields</code> parameter of the <code>Settings</code> class.</p> <pre><code>text_field = rg.TextField(name=\"text\")\nmarkdown_field = rg.TextField(name=\"text\", use_markdown=True)\nimage_field = rg.ImageField(name=\"image\")\n</code></pre> <p>The <code>fields</code> parameter of the <code>Settings</code> class can accept a list of fields, like this:</p> <pre><code>settings = rg.Settings(\n    fields=[\n        text_field,\n        markdown_field,\n        image_field,\n    ],\n    questions=[\n        rg.TextQuestion(name=\"response\"),\n    ],\n)\n\ndata = rg.Dataset(\n    name=\"my_dataset\",\n    settings=settings,\n)\n</code></pre> <p>To add records with values for fields, refer to the <code>rg.Dataset.records</code> documentation.</p>"},{"location":"reference/argilla/settings/fields/#src.argilla.settings._field.TextField","title":"<code>TextField</code>","text":"<p>               Bases: <code>AbstractField</code></p> <p>Text field for use in Argilla <code>Dataset</code> <code>Settings</code></p> Source code in <code>src/argilla/settings/_field.py</code> <pre><code>class TextField(AbstractField):\n    \"\"\"Text field for use in Argilla `Dataset` `Settings`\"\"\"\n\n    def __init__(\n        self,\n        name: str,\n        title: Optional[str] = None,\n        use_markdown: Optional[bool] = False,\n        required: bool = True,\n        description: Optional[str] = None,\n        client: Optional[Argilla] = None,\n    ) -&gt; None:\n        \"\"\"Text field for use in Argilla `Dataset` `Settings`\n        Parameters:\n            name (str): The name of the field\n            title (Optional[str], optional): The title of the field. Defaults to None.\n            use_markdown (Optional[bool], optional): Whether to use markdown. Defaults to False.\n            required (bool): Whether the field is required. Defaults to True.\n            description (Optional[str], optional): The description of the field. Defaults to None.\n\n        \"\"\"\n\n        super().__init__(\n            name=name,\n            title=title,\n            required=required,\n            description=description,\n            settings=TextFieldSettings(use_markdown=use_markdown),\n            _client=client,\n        )\n\n    @property\n    def use_markdown(self) -&gt; Optional[bool]:\n        return self._model.settings.use_markdown\n\n    @use_markdown.setter\n    def use_markdown(self, value: bool) -&gt; None:\n        self._model.settings.use_markdown = value\n</code></pre>"},{"location":"reference/argilla/settings/fields/#src.argilla.settings._field.TextField.__init__","title":"<code>__init__(name, title=None, use_markdown=False, required=True, description=None, client=None)</code>","text":"<p>Text field for use in Argilla <code>Dataset</code> <code>Settings</code> Parameters:     name (str): The name of the field     title (Optional[str], optional): The title of the field. Defaults to None.     use_markdown (Optional[bool], optional): Whether to use markdown. Defaults to False.     required (bool): Whether the field is required. Defaults to True.     description (Optional[str], optional): The description of the field. Defaults to None.</p> Source code in <code>src/argilla/settings/_field.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    title: Optional[str] = None,\n    use_markdown: Optional[bool] = False,\n    required: bool = True,\n    description: Optional[str] = None,\n    client: Optional[Argilla] = None,\n) -&gt; None:\n    \"\"\"Text field for use in Argilla `Dataset` `Settings`\n    Parameters:\n        name (str): The name of the field\n        title (Optional[str], optional): The title of the field. Defaults to None.\n        use_markdown (Optional[bool], optional): Whether to use markdown. Defaults to False.\n        required (bool): Whether the field is required. Defaults to True.\n        description (Optional[str], optional): The description of the field. Defaults to None.\n\n    \"\"\"\n\n    super().__init__(\n        name=name,\n        title=title,\n        required=required,\n        description=description,\n        settings=TextFieldSettings(use_markdown=use_markdown),\n        _client=client,\n    )\n</code></pre>"},{"location":"reference/argilla/settings/fields/#src.argilla.settings._field.ImageField","title":"<code>ImageField</code>","text":"<p>               Bases: <code>AbstractField</code></p> <p>Image field for use in Argilla <code>Dataset</code> <code>Settings</code></p> Source code in <code>src/argilla/settings/_field.py</code> <pre><code>class ImageField(AbstractField):\n    \"\"\"Image field for use in Argilla `Dataset` `Settings`\"\"\"\n\n    def __init__(\n        self,\n        name: str,\n        title: Optional[str] = None,\n        required: Optional[bool] = True,\n        description: Optional[str] = None,\n        _client: Optional[Argilla] = None,\n    ) -&gt; None:\n        \"\"\"\n        Text field for use in Argilla `Dataset` `Settings`\n\n        Parameters:\n            name (str): The name of the field\n            title (Optional[str], optional): The title of the field. Defaults to None.\n            required (Optional[bool], optional): Whether the field is required. Defaults to True.\n            description (Optional[str], optional): The description of the field. Defaults to None.\n        \"\"\"\n\n        super().__init__(\n            name=name,\n            title=title,\n            required=required,\n            description=description,\n            settings=ImageFieldSettings(),\n            _client=_client,\n        )\n</code></pre>"},{"location":"reference/argilla/settings/fields/#src.argilla.settings._field.ImageField.__init__","title":"<code>__init__(name, title=None, required=True, description=None, _client=None)</code>","text":"<p>Text field for use in Argilla <code>Dataset</code> <code>Settings</code></p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the field</p> required <code>title</code> <code>Optional[str]</code> <p>The title of the field. Defaults to None.</p> <code>None</code> <code>required</code> <code>Optional[bool]</code> <p>Whether the field is required. Defaults to True.</p> <code>True</code> <code>description</code> <code>Optional[str]</code> <p>The description of the field. Defaults to None.</p> <code>None</code> Source code in <code>src/argilla/settings/_field.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    title: Optional[str] = None,\n    required: Optional[bool] = True,\n    description: Optional[str] = None,\n    _client: Optional[Argilla] = None,\n) -&gt; None:\n    \"\"\"\n    Text field for use in Argilla `Dataset` `Settings`\n\n    Parameters:\n        name (str): The name of the field\n        title (Optional[str], optional): The title of the field. Defaults to None.\n        required (Optional[bool], optional): Whether the field is required. Defaults to True.\n        description (Optional[str], optional): The description of the field. Defaults to None.\n    \"\"\"\n\n    super().__init__(\n        name=name,\n        title=title,\n        required=required,\n        description=description,\n        settings=ImageFieldSettings(),\n        _client=_client,\n    )\n</code></pre>"},{"location":"reference/argilla/settings/fields/#src.argilla.settings._field.ChatField","title":"<code>ChatField</code>","text":"<p>               Bases: <code>AbstractField</code></p> <p>Chat field for use in Argilla <code>Dataset</code> <code>Settings</code></p> Source code in <code>src/argilla/settings/_field.py</code> <pre><code>class ChatField(AbstractField):\n    \"\"\"Chat field for use in Argilla `Dataset` `Settings`\"\"\"\n\n    def __init__(\n        self,\n        name: str,\n        title: Optional[str] = None,\n        use_markdown: Optional[bool] = True,\n        required: bool = True,\n        description: Optional[str] = None,\n        _client: Optional[Argilla] = None,\n    ) -&gt; None:\n        \"\"\"\n        Chat field for use in Argilla `Dataset` `Settings`\n\n        Parameters:\n            name (str): The name of the field\n            title (Optional[str], optional): The title of the field. Defaults to None.\n            use_markdown (Optional[bool], optional): Whether to use markdown. Defaults to True.\n            required (bool): Whether the field is required. Defaults to True.\n            description (Optional[str], optional): The description of the field. Defaults to None.\n        \"\"\"\n\n        super().__init__(\n            name=name,\n            title=title,\n            required=required,\n            description=description,\n            settings=ChatFieldSettings(use_markdown=use_markdown),\n            _client=_client,\n        )\n\n    @property\n    def use_markdown(self) -&gt; Optional[bool]:\n        return self._model.settings.use_markdown\n\n    @use_markdown.setter\n    def use_markdown(self, value: bool) -&gt; None:\n        self._model.settings.use_markdown = value\n</code></pre>"},{"location":"reference/argilla/settings/fields/#src.argilla.settings._field.ChatField.__init__","title":"<code>__init__(name, title=None, use_markdown=True, required=True, description=None, _client=None)</code>","text":"<p>Chat field for use in Argilla <code>Dataset</code> <code>Settings</code></p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the field</p> required <code>title</code> <code>Optional[str]</code> <p>The title of the field. Defaults to None.</p> <code>None</code> <code>use_markdown</code> <code>Optional[bool]</code> <p>Whether to use markdown. Defaults to True.</p> <code>True</code> <code>required</code> <code>bool</code> <p>Whether the field is required. Defaults to True.</p> <code>True</code> <code>description</code> <code>Optional[str]</code> <p>The description of the field. Defaults to None.</p> <code>None</code> Source code in <code>src/argilla/settings/_field.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    title: Optional[str] = None,\n    use_markdown: Optional[bool] = True,\n    required: bool = True,\n    description: Optional[str] = None,\n    _client: Optional[Argilla] = None,\n) -&gt; None:\n    \"\"\"\n    Chat field for use in Argilla `Dataset` `Settings`\n\n    Parameters:\n        name (str): The name of the field\n        title (Optional[str], optional): The title of the field. Defaults to None.\n        use_markdown (Optional[bool], optional): Whether to use markdown. Defaults to True.\n        required (bool): Whether the field is required. Defaults to True.\n        description (Optional[str], optional): The description of the field. Defaults to None.\n    \"\"\"\n\n    super().__init__(\n        name=name,\n        title=title,\n        required=required,\n        description=description,\n        settings=ChatFieldSettings(use_markdown=use_markdown),\n        _client=_client,\n    )\n</code></pre>"},{"location":"reference/argilla/settings/fields/#src.argilla.settings._field.CustomField","title":"<code>CustomField</code>","text":"<p>               Bases: <code>AbstractField</code></p> <p>Custom field for use in Argilla <code>Dataset</code> <code>Settings</code></p> Source code in <code>src/argilla/settings/_field.py</code> <pre><code>class CustomField(AbstractField):\n    \"\"\"Custom field for use in Argilla `Dataset` `Settings`\"\"\"\n\n    def __init__(\n        self,\n        name: str,\n        title: Optional[str] = None,\n        template: Optional[str] = \"\",\n        advanced_mode: Optional[bool] = False,\n        required: bool = True,\n        description: Optional[str] = None,\n        _client: Optional[Argilla] = None,\n    ) -&gt; None:\n        \"\"\"\n        Custom field for use in Argilla `Dataset` `Settings` for working with custom HTML and CSS templates.\n        By default argilla will use a brackets syntax engine for the templates, which converts\n        `{{ field.key }}` to the values of record's field's object.\n\n        Parameters:\n            name (str): The name of the field\n            title (Optional[str], optional): The title of the field. Defaults to None.\n            template (str): The template of the field (HTML and CSS)\n            advanced_mode (Optional[bool], optional): Whether to use advanced mode. Defaults to False.\n                Deactivate the brackets syntax engine and use custom javascript to render the field.\n            required (Optional[bool], optional): Whether the field is required. Defaults to True.\n            required (bool): Whether the field is required. Defaults to True.\n            description (Optional[str], optional): The description of the field. Defaults to None.\n        \"\"\"\n        template = self._load_template(template)\n        super().__init__(\n            name=name,\n            title=title,\n            required=required,\n            description=description,\n            settings=CustomFieldSettings(template=template, advanced_mode=advanced_mode),\n            _client=_client,\n        )\n\n    @property\n    def template(self) -&gt; Optional[str]:\n        return self._model.settings.template\n\n    @template.setter\n    def template(self, value: str) -&gt; None:\n        self._model.settings.template = self._load_template(value)\n\n    @property\n    def advanced_mode(self) -&gt; Optional[bool]:\n        return self._model.settings.advanced_mode\n\n    @advanced_mode.setter\n    def advanced_mode(self, value: bool) -&gt; None:\n        self._model.settings.advanced_mode = value\n\n    def validate(self):\n        if self.template is None or self.template.strip() == \"\":\n            raise SettingsError(\"A valid template is required for CustomField\")\n\n    @classmethod\n    def _load_template(cls, template: str) -&gt; str:\n        if template.endswith(\".html\") and os.path.exists(template):\n            with open(template, \"r\") as f:\n                return f.read()\n        if template.startswith(\"http\") or template.startswith(\"https\"):\n            return requests.get(template).text\n        if isinstance(template, str):\n            return template\n        raise ArgillaError(\n            \"Invalid template. Please provide 1: a valid path or URL to a HTML file. 2: a valid HTML string.\"\n        )\n</code></pre>"},{"location":"reference/argilla/settings/fields/#src.argilla.settings._field.CustomField.__init__","title":"<code>__init__(name, title=None, template='', advanced_mode=False, required=True, description=None, _client=None)</code>","text":"<p>Custom field for use in Argilla <code>Dataset</code> <code>Settings</code> for working with custom HTML and CSS templates. By default argilla will use a brackets syntax engine for the templates, which converts <code>{{ field.key }}</code> to the values of record's field's object.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the field</p> required <code>title</code> <code>Optional[str]</code> <p>The title of the field. Defaults to None.</p> <code>None</code> <code>template</code> <code>str</code> <p>The template of the field (HTML and CSS)</p> <code>''</code> <code>advanced_mode</code> <code>Optional[bool]</code> <p>Whether to use advanced mode. Defaults to False. Deactivate the brackets syntax engine and use custom javascript to render the field.</p> <code>False</code> <code>required</code> <code>Optional[bool]</code> <p>Whether the field is required. Defaults to True.</p> <code>True</code> <code>required</code> <code>bool</code> <p>Whether the field is required. Defaults to True.</p> <code>True</code> <code>description</code> <code>Optional[str]</code> <p>The description of the field. Defaults to None.</p> <code>None</code> Source code in <code>src/argilla/settings/_field.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    title: Optional[str] = None,\n    template: Optional[str] = \"\",\n    advanced_mode: Optional[bool] = False,\n    required: bool = True,\n    description: Optional[str] = None,\n    _client: Optional[Argilla] = None,\n) -&gt; None:\n    \"\"\"\n    Custom field for use in Argilla `Dataset` `Settings` for working with custom HTML and CSS templates.\n    By default argilla will use a brackets syntax engine for the templates, which converts\n    `{{ field.key }}` to the values of record's field's object.\n\n    Parameters:\n        name (str): The name of the field\n        title (Optional[str], optional): The title of the field. Defaults to None.\n        template (str): The template of the field (HTML and CSS)\n        advanced_mode (Optional[bool], optional): Whether to use advanced mode. Defaults to False.\n            Deactivate the brackets syntax engine and use custom javascript to render the field.\n        required (Optional[bool], optional): Whether the field is required. Defaults to True.\n        required (bool): Whether the field is required. Defaults to True.\n        description (Optional[str], optional): The description of the field. Defaults to None.\n    \"\"\"\n    template = self._load_template(template)\n    super().__init__(\n        name=name,\n        title=title,\n        required=required,\n        description=description,\n        settings=CustomFieldSettings(template=template, advanced_mode=advanced_mode),\n        _client=_client,\n    )\n</code></pre>"},{"location":"reference/argilla/settings/metadata_property/","title":"Metadata Properties","text":"<p>Metadata properties are used to define metadata fields in a dataset. Metadata fields are used to store additional information about the records in the dataset. For example, the category of a record, the price of a product, or any other information that is relevant to the record.</p>"},{"location":"reference/argilla/settings/metadata_property/#usage-examples","title":"Usage Examples","text":""},{"location":"reference/argilla/settings/metadata_property/#defining-metadata-property-for-a-dataset","title":"Defining Metadata Property for a dataset","text":"<p>We define metadata properties via type specific classes. The following example demonstrates how to define metadata properties as either a float, integer, or terms metadata property and pass them to the <code>Settings</code>.</p> <p><code>TermsMetadataProperty</code> is used to define a metadata field with a list of options. For example, a color field with options red, blue, and green. <code>FloatMetadataProperty</code> and <code>IntegerMetadataProperty</code> is used to define a metadata field with a float value. For example, a price field with a minimum value of 0.0 and a maximum value of 100.0.</p> <pre><code>metadata_field = rg.TermsMetadataProperty(\n    name=\"color\",\n    options=[\"red\", \"blue\", \"green\"],\n    title=\"Color\",\n)\n\nfloat_metadata_field = rg.FloatMetadataProperty(\n    name=\"price\",\n    min=0.0,\n    max=100.0,\n    title=\"Price\",\n)\n\nint_metadata_field = rg.IntegerMetadataProperty(\n    name=\"quantity\",\n    min=0,\n    max=100,\n    title=\"Quantity\",\n)\n\ndataset = rg.Dataset(\n    name=\"my_dataset\",\n    settings=rg.Settings(\n        fields=[\n            rg.TextField(name=\"text\"),\n        ],\n        questions=[\n            rg.TextQuestion(name=\"response\"),\n        ],\n        metadata=[\n            metadata_field,\n            float_metadata_field,\n            int_metadata_field,\n        ],\n    ),\n)\n\ndataset = rg.Dataset(\n    name=\"my_dataset\",\n    settings=settings,\n)\n</code></pre> <p>To add records with metadata, refer to the <code>rg.Metadata</code> class documentation.</p>"},{"location":"reference/argilla/settings/metadata_property/#src.argilla.settings._metadata.FloatMetadataProperty","title":"<code>FloatMetadataProperty</code>","text":"<p>               Bases: <code>MetadataPropertyBase</code></p> Source code in <code>src/argilla/settings/_metadata.py</code> <pre><code>class FloatMetadataProperty(MetadataPropertyBase):\n    def __init__(\n        self,\n        name: str,\n        min: Optional[float] = None,\n        max: Optional[float] = None,\n        title: Optional[str] = None,\n        visible_for_annotators: Optional[bool] = True,\n        client: Optional[Argilla] = None,\n    ) -&gt; None:\n        \"\"\"Create a metadata field with float settings.\n\n        Parameters:\n            name (str): The name of the metadata field\n            min (Optional[float]): The minimum valid value. If none is provided, it will be computed from the values provided in the records.\n            max (Optional[float]): The maximum valid value. If none is provided, it will be computed from the values provided in the records.\n            title (Optional[str]): The title of the metadata to be shown in the UI\n            visible_for_annotators (Optional[bool]): Whether the metadata field is visible for annotators.\n\n        Raises:\n            MetadataError: If an error occurs while defining metadata settings.\n        \"\"\"\n\n        super().__init__(client=client)\n\n        try:\n            settings = FloatMetadataPropertySettings(min=min, max=max, type=MetadataPropertyType.float)\n        except ValueError as e:\n            raise MetadataError(f\"Error defining metadata settings for {name}\") from e\n\n        self._model = MetadataFieldModel(\n            name=name,\n            type=MetadataPropertyType.float,\n            title=title,\n            settings=settings,\n            visible_for_annotators=visible_for_annotators,\n        )\n\n    @property\n    def min(self) -&gt; Optional[int]:\n        return self._model.settings.min\n\n    @min.setter\n    def min(self, value: Optional[int]) -&gt; None:\n        self._model.settings.min = value\n\n    @property\n    def max(self) -&gt; Optional[int]:\n        return self._model.settings.max\n\n    @max.setter\n    def max(self, value: Optional[int]) -&gt; None:\n        self._model.settings.max = value\n\n    @classmethod\n    def from_model(cls, model: MetadataFieldModel) -&gt; \"FloatMetadataProperty\":\n        instance = FloatMetadataProperty(name=model.name)\n        instance._model = model\n\n        return instance\n</code></pre>"},{"location":"reference/argilla/settings/metadata_property/#src.argilla.settings._metadata.FloatMetadataProperty.__init__","title":"<code>__init__(name, min=None, max=None, title=None, visible_for_annotators=True, client=None)</code>","text":"<p>Create a metadata field with float settings.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the metadata field</p> required <code>min</code> <code>Optional[float]</code> <p>The minimum valid value. If none is provided, it will be computed from the values provided in the records.</p> <code>None</code> <code>max</code> <code>Optional[float]</code> <p>The maximum valid value. If none is provided, it will be computed from the values provided in the records.</p> <code>None</code> <code>title</code> <code>Optional[str]</code> <p>The title of the metadata to be shown in the UI</p> <code>None</code> <code>visible_for_annotators</code> <code>Optional[bool]</code> <p>Whether the metadata field is visible for annotators.</p> <code>True</code> <p>Raises:</p> Type Description <code>MetadataError</code> <p>If an error occurs while defining metadata settings.</p> Source code in <code>src/argilla/settings/_metadata.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    min: Optional[float] = None,\n    max: Optional[float] = None,\n    title: Optional[str] = None,\n    visible_for_annotators: Optional[bool] = True,\n    client: Optional[Argilla] = None,\n) -&gt; None:\n    \"\"\"Create a metadata field with float settings.\n\n    Parameters:\n        name (str): The name of the metadata field\n        min (Optional[float]): The minimum valid value. If none is provided, it will be computed from the values provided in the records.\n        max (Optional[float]): The maximum valid value. If none is provided, it will be computed from the values provided in the records.\n        title (Optional[str]): The title of the metadata to be shown in the UI\n        visible_for_annotators (Optional[bool]): Whether the metadata field is visible for annotators.\n\n    Raises:\n        MetadataError: If an error occurs while defining metadata settings.\n    \"\"\"\n\n    super().__init__(client=client)\n\n    try:\n        settings = FloatMetadataPropertySettings(min=min, max=max, type=MetadataPropertyType.float)\n    except ValueError as e:\n        raise MetadataError(f\"Error defining metadata settings for {name}\") from e\n\n    self._model = MetadataFieldModel(\n        name=name,\n        type=MetadataPropertyType.float,\n        title=title,\n        settings=settings,\n        visible_for_annotators=visible_for_annotators,\n    )\n</code></pre>"},{"location":"reference/argilla/settings/metadata_property/#src.argilla.settings._metadata.IntegerMetadataProperty","title":"<code>IntegerMetadataProperty</code>","text":"<p>               Bases: <code>MetadataPropertyBase</code></p> Source code in <code>src/argilla/settings/_metadata.py</code> <pre><code>class IntegerMetadataProperty(MetadataPropertyBase):\n    def __init__(\n        self,\n        name: str,\n        min: Optional[int] = None,\n        max: Optional[int] = None,\n        title: Optional[str] = None,\n        visible_for_annotators: Optional[bool] = True,\n        client: Optional[Argilla] = None,\n    ) -&gt; None:\n        \"\"\"Create a metadata field with integer settings.\n\n        Parameters:\n            name (str): The name of the metadata field\n            min (Optional[int]): The minimum valid value. If none is provided, it will be computed from the values provided in the records.\n            max (Optional[int]): The maximum  valid value. If none is provided, it will be computed from the values provided in the records.\n            title (Optional[str]): The title of the metadata to be shown in the UI\n            visible_for_annotators (Optional[bool]): Whether the metadata field is visible for annotators.\n\n        Raises:\n            MetadataError: If an error occurs while defining metadata settings.\n        \"\"\"\n        super().__init__(client=client)\n\n        try:\n            settings = IntegerMetadataPropertySettings(min=min, max=max, type=MetadataPropertyType.integer)\n        except ValueError as e:\n            raise MetadataError(f\"Error defining metadata settings for {name}\") from e\n\n        self._model = MetadataFieldModel(\n            name=name,\n            type=MetadataPropertyType.integer,\n            title=title,\n            settings=settings,\n            visible_for_annotators=visible_for_annotators,\n        )\n\n    @property\n    def min(self) -&gt; Optional[int]:\n        return self._model.settings.min\n\n    @min.setter\n    def min(self, value: Optional[int]) -&gt; None:\n        self._model.settings.min = value\n\n    @property\n    def max(self) -&gt; Optional[int]:\n        return self._model.settings.max\n\n    @max.setter\n    def max(self, value: Optional[int]) -&gt; None:\n        self._model.settings.max = value\n\n    @classmethod\n    def from_model(cls, model: MetadataFieldModel) -&gt; \"IntegerMetadataProperty\":\n        instance = IntegerMetadataProperty(name=model.name)\n        instance._model = model\n\n        return instance\n</code></pre>"},{"location":"reference/argilla/settings/metadata_property/#src.argilla.settings._metadata.IntegerMetadataProperty.__init__","title":"<code>__init__(name, min=None, max=None, title=None, visible_for_annotators=True, client=None)</code>","text":"<p>Create a metadata field with integer settings.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the metadata field</p> required <code>min</code> <code>Optional[int]</code> <p>The minimum valid value. If none is provided, it will be computed from the values provided in the records.</p> <code>None</code> <code>max</code> <code>Optional[int]</code> <p>The maximum  valid value. If none is provided, it will be computed from the values provided in the records.</p> <code>None</code> <code>title</code> <code>Optional[str]</code> <p>The title of the metadata to be shown in the UI</p> <code>None</code> <code>visible_for_annotators</code> <code>Optional[bool]</code> <p>Whether the metadata field is visible for annotators.</p> <code>True</code> <p>Raises:</p> Type Description <code>MetadataError</code> <p>If an error occurs while defining metadata settings.</p> Source code in <code>src/argilla/settings/_metadata.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    min: Optional[int] = None,\n    max: Optional[int] = None,\n    title: Optional[str] = None,\n    visible_for_annotators: Optional[bool] = True,\n    client: Optional[Argilla] = None,\n) -&gt; None:\n    \"\"\"Create a metadata field with integer settings.\n\n    Parameters:\n        name (str): The name of the metadata field\n        min (Optional[int]): The minimum valid value. If none is provided, it will be computed from the values provided in the records.\n        max (Optional[int]): The maximum  valid value. If none is provided, it will be computed from the values provided in the records.\n        title (Optional[str]): The title of the metadata to be shown in the UI\n        visible_for_annotators (Optional[bool]): Whether the metadata field is visible for annotators.\n\n    Raises:\n        MetadataError: If an error occurs while defining metadata settings.\n    \"\"\"\n    super().__init__(client=client)\n\n    try:\n        settings = IntegerMetadataPropertySettings(min=min, max=max, type=MetadataPropertyType.integer)\n    except ValueError as e:\n        raise MetadataError(f\"Error defining metadata settings for {name}\") from e\n\n    self._model = MetadataFieldModel(\n        name=name,\n        type=MetadataPropertyType.integer,\n        title=title,\n        settings=settings,\n        visible_for_annotators=visible_for_annotators,\n    )\n</code></pre>"},{"location":"reference/argilla/settings/metadata_property/#src.argilla.settings._metadata.TermsMetadataProperty","title":"<code>TermsMetadataProperty</code>","text":"<p>               Bases: <code>MetadataPropertyBase</code></p> Source code in <code>src/argilla/settings/_metadata.py</code> <pre><code>class TermsMetadataProperty(MetadataPropertyBase):\n    def __init__(\n        self,\n        name: str,\n        options: Optional[List[str]] = None,\n        title: Optional[str] = None,\n        visible_for_annotators: Optional[bool] = True,\n        client: Optional[Argilla] = None,\n    ) -&gt; None:\n        \"\"\"Create a metadata field with terms settings.\n\n        Parameters:\n            name (str): The name of the metadata field\n            options (Optional[List[str]]): The list of options\n            title (Optional[str]): The title of the metadata to be shown in the UI\n            visible_for_annotators (Optional[bool]): Whether the metadata field is visible for annotators.\n\n        Raises:\n            MetadataError: If an error occurs while defining metadata settings\n        \"\"\"\n        super().__init__(client=client)\n\n        try:\n            settings = TermsMetadataPropertySettings(values=options, type=MetadataPropertyType.terms)\n        except ValueError as e:\n            raise MetadataError(f\"Error defining metadata settings for {name}\") from e\n\n        self._model = MetadataFieldModel(\n            name=name,\n            type=MetadataPropertyType.terms,\n            title=title,\n            settings=settings,\n            visible_for_annotators=visible_for_annotators,\n        )\n\n    @property\n    def options(self) -&gt; Optional[List[str]]:\n        return self._model.settings.values\n\n    @options.setter\n    def options(self, value: list[str]) -&gt; None:\n        self._model.settings.values = value\n\n    @classmethod\n    def from_model(cls, model: MetadataFieldModel) -&gt; \"TermsMetadataProperty\":\n        instance = TermsMetadataProperty(name=model.name)\n        instance._model = model\n\n        return instance\n</code></pre>"},{"location":"reference/argilla/settings/metadata_property/#src.argilla.settings._metadata.TermsMetadataProperty.__init__","title":"<code>__init__(name, options=None, title=None, visible_for_annotators=True, client=None)</code>","text":"<p>Create a metadata field with terms settings.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the metadata field</p> required <code>options</code> <code>Optional[List[str]]</code> <p>The list of options</p> <code>None</code> <code>title</code> <code>Optional[str]</code> <p>The title of the metadata to be shown in the UI</p> <code>None</code> <code>visible_for_annotators</code> <code>Optional[bool]</code> <p>Whether the metadata field is visible for annotators.</p> <code>True</code> <p>Raises:</p> Type Description <code>MetadataError</code> <p>If an error occurs while defining metadata settings</p> Source code in <code>src/argilla/settings/_metadata.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    options: Optional[List[str]] = None,\n    title: Optional[str] = None,\n    visible_for_annotators: Optional[bool] = True,\n    client: Optional[Argilla] = None,\n) -&gt; None:\n    \"\"\"Create a metadata field with terms settings.\n\n    Parameters:\n        name (str): The name of the metadata field\n        options (Optional[List[str]]): The list of options\n        title (Optional[str]): The title of the metadata to be shown in the UI\n        visible_for_annotators (Optional[bool]): Whether the metadata field is visible for annotators.\n\n    Raises:\n        MetadataError: If an error occurs while defining metadata settings\n    \"\"\"\n    super().__init__(client=client)\n\n    try:\n        settings = TermsMetadataPropertySettings(values=options, type=MetadataPropertyType.terms)\n    except ValueError as e:\n        raise MetadataError(f\"Error defining metadata settings for {name}\") from e\n\n    self._model = MetadataFieldModel(\n        name=name,\n        type=MetadataPropertyType.terms,\n        title=title,\n        settings=settings,\n        visible_for_annotators=visible_for_annotators,\n    )\n</code></pre>"},{"location":"reference/argilla/settings/questions/","title":"Questions","text":"<p>Argilla uses questions to gather the feedback. The questions will be answered by users or models.</p>"},{"location":"reference/argilla/settings/questions/#usage-examples","title":"Usage Examples","text":"<p>To define a label question, for example, instantiate the <code>LabelQuestion</code> class and pass it to the <code>Settings</code> class.</p> <pre><code>label_question = rg.LabelQuestion(name=\"label\", labels=[\"positive\", \"negative\"])\n\nsettings = rg.Settings(\n    fields=[\n        rg.TextField(name=\"text\"),\n    ],\n    questions=[\n        label_question,\n    ],\n)\n</code></pre> <p>Questions can be combined in extensible ways based on the type of feedback you want to collect. For example, you can combine a label question with a text question to collect both a label and a text response.</p> <pre><code>label_question = rg.LabelQuestion(name=\"label\", labels=[\"positive\", \"negative\"])\ntext_question = rg.TextQuestion(name=\"response\")\n\nsettings = rg.Settings(\n    fields=[\n        rg.TextField(name=\"text\"),\n    ],\n    questions=[\n        label_question,\n        text_question,\n    ],\n)\n\ndataset = rg.Dataset(\n    name=\"my_dataset\",\n    settings=settings,\n)\n</code></pre> <p>To add records with responses to questions, refer to the <code>rg.Response</code> class documentation.</p>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.LabelQuestion","title":"<code>LabelQuestion</code>","text":"<p>               Bases: <code>QuestionPropertyBase</code></p> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>class LabelQuestion(QuestionPropertyBase):\n    _model: LabelQuestionModel\n\n    def __init__(\n        self,\n        name: str,\n        labels: Union[List[str], Dict[str, str]],\n        title: Optional[str] = None,\n        description: Optional[str] = None,\n        required: bool = True,\n        visible_labels: Optional[int] = None,\n    ) -&gt; None:\n        \"\"\" Define a new label question for `Settings` of a `Dataset`. A label \\\n            question is a question where the user can select one label from \\\n            a list of available labels.\n\n        Parameters:\n            name (str): The name of the question to be used as a reference.\n            labels (Union[List[str], Dict[str, str]]): The list of available labels for the question, or a\n                dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.\n            title (Optional[str]): The title of the question to be shown in the UI.\n            description (Optional[str]): The description of the question to be shown in the UI.\n            required (bool): If the question is required for a record to be valid. At least one question must be required.\n            visible_labels (Optional[int]): The number of visible labels for the question to be shown in the UI. \\\n                Setting it to None show all options.\n        \"\"\"\n        self._model = LabelQuestionModel(\n            name=name,\n            title=title,\n            description=description,\n            required=required,\n            settings=LabelQuestionSettings(\n                options=self._render_values_as_options(labels), visible_options=visible_labels\n            ),\n        )\n\n    @classmethod\n    def from_model(cls, model: LabelQuestionModel) -&gt; \"LabelQuestion\":\n        instance = cls(name=model.name, labels=cls._render_options_as_values(model.settings.options))\n        instance._model = model\n        return instance\n\n    @classmethod\n    def from_dict(cls, data: dict) -&gt; \"LabelQuestion\":\n        model = LabelQuestionModel(**data)\n        return cls.from_model(model=model)\n\n    ##############################\n    # Public properties\n    ##############################\n\n    @property\n    def labels(self) -&gt; List[str]:\n        return self._render_options_as_labels(self._model.settings.options)\n\n    @labels.setter\n    def labels(self, labels: List[str]) -&gt; None:\n        self._model.settings.options = self._render_values_as_options(labels)\n\n    @property\n    def visible_labels(self) -&gt; Optional[int]:\n        return self._model.settings.visible_options\n\n    @visible_labels.setter\n    def visible_labels(self, visible_labels: Optional[int]) -&gt; None:\n        self._model.settings.visible_options = visible_labels\n</code></pre>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.LabelQuestion.__init__","title":"<code>__init__(name, labels, title=None, description=None, required=True, visible_labels=None)</code>","text":"<p>Define a new label question for <code>Settings</code> of a <code>Dataset</code>. A label             question is a question where the user can select one label from             a list of available labels.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the question to be used as a reference.</p> required <code>labels</code> <code>Union[List[str], Dict[str, str]]</code> <p>The list of available labels for the question, or a dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.</p> required <code>title</code> <code>Optional[str]</code> <p>The title of the question to be shown in the UI.</p> <code>None</code> <code>description</code> <code>Optional[str]</code> <p>The description of the question to be shown in the UI.</p> <code>None</code> <code>required</code> <code>bool</code> <p>If the question is required for a record to be valid. At least one question must be required.</p> <code>True</code> <code>visible_labels</code> <code>Optional[int]</code> <p>The number of visible labels for the question to be shown in the UI.                 Setting it to None show all options.</p> <code>None</code> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    labels: Union[List[str], Dict[str, str]],\n    title: Optional[str] = None,\n    description: Optional[str] = None,\n    required: bool = True,\n    visible_labels: Optional[int] = None,\n) -&gt; None:\n    \"\"\" Define a new label question for `Settings` of a `Dataset`. A label \\\n        question is a question where the user can select one label from \\\n        a list of available labels.\n\n    Parameters:\n        name (str): The name of the question to be used as a reference.\n        labels (Union[List[str], Dict[str, str]]): The list of available labels for the question, or a\n            dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.\n        title (Optional[str]): The title of the question to be shown in the UI.\n        description (Optional[str]): The description of the question to be shown in the UI.\n        required (bool): If the question is required for a record to be valid. At least one question must be required.\n        visible_labels (Optional[int]): The number of visible labels for the question to be shown in the UI. \\\n            Setting it to None show all options.\n    \"\"\"\n    self._model = LabelQuestionModel(\n        name=name,\n        title=title,\n        description=description,\n        required=required,\n        settings=LabelQuestionSettings(\n            options=self._render_values_as_options(labels), visible_options=visible_labels\n        ),\n    )\n</code></pre>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.MultiLabelQuestion","title":"<code>MultiLabelQuestion</code>","text":"<p>               Bases: <code>LabelQuestion</code></p> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>class MultiLabelQuestion(LabelQuestion):\n    _model: MultiLabelQuestionModel\n\n    def __init__(\n        self,\n        name: str,\n        labels: Union[List[str], Dict[str, str]],\n        visible_labels: Optional[int] = None,\n        labels_order: Literal[\"natural\", \"suggestion\"] = \"natural\",\n        title: Optional[str] = None,\n        description: Optional[str] = None,\n        required: bool = True,\n    ) -&gt; None:\n        \"\"\"Create a new multi-label question for `Settings` of a `Dataset`. A \\\n            multi-label question is a question where the user can select multiple \\\n            labels from a list of available labels.\n\n        Parameters:\n            name (str): The name of the question to be used as a reference.\n            labels (Union[List[str], Dict[str, str]]): The list of available labels for the question, or a \\\n                dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.\n            visible_labels (Optional[int]): The number of visible labels for the question to be shown in the UI. \\\n                Setting it to None show all options.\n            labels_order (Literal[\"natural\", \"suggestion\"]): The order of the labels in the UI. \\\n                Can be either \"natural\" (order in which they were specified) or \"suggestion\" (order prioritizing those associated with a suggestion). \\\n                The score of the suggestion will be taken into account for ordering if available.\n            title (Optional[str]: The title of the question to be shown in the UI.\n            description (Optional[str]): The description of the question to be shown in the UI.\n            required (bool): If the question is required for a record to be valid. At least one question must be required.\n        \"\"\"\n        self._model = MultiLabelQuestionModel(\n            name=name,\n            title=title,\n            description=description,\n            required=required,\n            settings=MultiLabelQuestionSettings(\n                options=self._render_values_as_options(labels),\n                visible_options=visible_labels,\n                options_order=labels_order,\n            ),\n        )\n\n    @classmethod\n    def from_model(cls, model: MultiLabelQuestionModel) -&gt; \"MultiLabelQuestion\":\n        instance = cls(\n            name=model.name,\n            labels=cls._render_options_as_values(model.settings.options),\n            labels_order=model.settings.options_order,\n        )\n        instance._model = model\n\n        return instance\n\n    @classmethod\n    def from_dict(cls, data: dict) -&gt; \"MultiLabelQuestion\":\n        model = MultiLabelQuestionModel(**data)\n        return cls.from_model(model=model)\n</code></pre>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.MultiLabelQuestion.__init__","title":"<code>__init__(name, labels, visible_labels=None, labels_order='natural', title=None, description=None, required=True)</code>","text":"<p>Create a new multi-label question for <code>Settings</code> of a <code>Dataset</code>. A             multi-label question is a question where the user can select multiple             labels from a list of available labels.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the question to be used as a reference.</p> required <code>labels</code> <code>Union[List[str], Dict[str, str]]</code> <p>The list of available labels for the question, or a                 dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.</p> required <code>visible_labels</code> <code>Optional[int]</code> <p>The number of visible labels for the question to be shown in the UI.                 Setting it to None show all options.</p> <code>None</code> <code>labels_order</code> <code>Literal['natural', 'suggestion']</code> <p>The order of the labels in the UI.                 Can be either \"natural\" (order in which they were specified) or \"suggestion\" (order prioritizing those associated with a suggestion).                 The score of the suggestion will be taken into account for ordering if available.</p> <code>'natural'</code> <code>title</code> <code>Optional[str]</code> <p>The title of the question to be shown in the UI.</p> <code>None</code> <code>description</code> <code>Optional[str]</code> <p>The description of the question to be shown in the UI.</p> <code>None</code> <code>required</code> <code>bool</code> <p>If the question is required for a record to be valid. At least one question must be required.</p> <code>True</code> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    labels: Union[List[str], Dict[str, str]],\n    visible_labels: Optional[int] = None,\n    labels_order: Literal[\"natural\", \"suggestion\"] = \"natural\",\n    title: Optional[str] = None,\n    description: Optional[str] = None,\n    required: bool = True,\n) -&gt; None:\n    \"\"\"Create a new multi-label question for `Settings` of a `Dataset`. A \\\n        multi-label question is a question where the user can select multiple \\\n        labels from a list of available labels.\n\n    Parameters:\n        name (str): The name of the question to be used as a reference.\n        labels (Union[List[str], Dict[str, str]]): The list of available labels for the question, or a \\\n            dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.\n        visible_labels (Optional[int]): The number of visible labels for the question to be shown in the UI. \\\n            Setting it to None show all options.\n        labels_order (Literal[\"natural\", \"suggestion\"]): The order of the labels in the UI. \\\n            Can be either \"natural\" (order in which they were specified) or \"suggestion\" (order prioritizing those associated with a suggestion). \\\n            The score of the suggestion will be taken into account for ordering if available.\n        title (Optional[str]: The title of the question to be shown in the UI.\n        description (Optional[str]): The description of the question to be shown in the UI.\n        required (bool): If the question is required for a record to be valid. At least one question must be required.\n    \"\"\"\n    self._model = MultiLabelQuestionModel(\n        name=name,\n        title=title,\n        description=description,\n        required=required,\n        settings=MultiLabelQuestionSettings(\n            options=self._render_values_as_options(labels),\n            visible_options=visible_labels,\n            options_order=labels_order,\n        ),\n    )\n</code></pre>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.RankingQuestion","title":"<code>RankingQuestion</code>","text":"<p>               Bases: <code>QuestionPropertyBase</code></p> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>class RankingQuestion(QuestionPropertyBase):\n    _model: RankingQuestionModel\n\n    def __init__(\n        self,\n        name: str,\n        values: Union[List[str], Dict[str, str]],\n        title: Optional[str] = None,\n        description: Optional[str] = None,\n        required: bool = True,\n    ) -&gt; None:\n        \"\"\"Create a new ranking question for `Settings` of a `Dataset`. A ranking question \\\n            is a question where the user can rank a list of options.\n\n        Parameters:\n            name (str): The name of the question to be used as a reference.\n            values (Union[List[str], Dict[str, str]]): The list of options to be ranked, or a \\\n                dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.\n            title (Optional[str]:) The title of the question to be shown in the UI.\n            description (Optional[str]): The description of the question to be shown in the UI.\n            required (bool): If the question is required for a record to be valid. At least one question must be required.\n        \"\"\"\n        self._model = RankingQuestionModel(\n            name=name,\n            title=title,\n            description=description,\n            required=required,\n            settings=RankingQuestionSettings(options=self._render_values_as_options(values)),\n        )\n\n    @classmethod\n    def from_model(cls, model: RankingQuestionModel) -&gt; \"RankingQuestion\":\n        instance = cls(name=model.name, values=cls._render_options_as_values(model.settings.options))\n        instance._model = model\n\n        return instance\n\n    @classmethod\n    def from_dict(cls, data: dict) -&gt; \"RankingQuestion\":\n        model = RankingQuestionModel(**data)\n        return cls.from_model(model=model)\n\n    @property\n    def values(self) -&gt; List[str]:\n        return self._render_options_as_labels(self._model.settings.options)\n\n    @values.setter\n    def values(self, values: List[int]) -&gt; None:\n        self._model.settings.options = self._render_values_as_options(values)\n</code></pre>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.RankingQuestion.__init__","title":"<code>__init__(name, values, title=None, description=None, required=True)</code>","text":"<p>Create a new ranking question for <code>Settings</code> of a <code>Dataset</code>. A ranking question             is a question where the user can rank a list of options.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the question to be used as a reference.</p> required <code>values</code> <code>Union[List[str], Dict[str, str]]</code> <p>The list of options to be ranked, or a                 dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.</p> required <code>title</code> <code>Optional[str]</code> <p>) The title of the question to be shown in the UI.</p> <code>None</code> <code>description</code> <code>Optional[str]</code> <p>The description of the question to be shown in the UI.</p> <code>None</code> <code>required</code> <code>bool</code> <p>If the question is required for a record to be valid. At least one question must be required.</p> <code>True</code> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    values: Union[List[str], Dict[str, str]],\n    title: Optional[str] = None,\n    description: Optional[str] = None,\n    required: bool = True,\n) -&gt; None:\n    \"\"\"Create a new ranking question for `Settings` of a `Dataset`. A ranking question \\\n        is a question where the user can rank a list of options.\n\n    Parameters:\n        name (str): The name of the question to be used as a reference.\n        values (Union[List[str], Dict[str, str]]): The list of options to be ranked, or a \\\n            dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.\n        title (Optional[str]:) The title of the question to be shown in the UI.\n        description (Optional[str]): The description of the question to be shown in the UI.\n        required (bool): If the question is required for a record to be valid. At least one question must be required.\n    \"\"\"\n    self._model = RankingQuestionModel(\n        name=name,\n        title=title,\n        description=description,\n        required=required,\n        settings=RankingQuestionSettings(options=self._render_values_as_options(values)),\n    )\n</code></pre>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.TextQuestion","title":"<code>TextQuestion</code>","text":"<p>               Bases: <code>QuestionPropertyBase</code></p> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>class TextQuestion(QuestionPropertyBase):\n    _model: TextQuestionModel\n\n    def __init__(\n        self,\n        name: str,\n        title: Optional[str] = None,\n        description: Optional[str] = None,\n        required: bool = True,\n        use_markdown: bool = False,\n    ) -&gt; None:\n        \"\"\"Create a new text question for `Settings` of a `Dataset`. A text question \\\n            is a question where the user can input text.\n\n        Parameters:\n            name (str): The name of the question to be used as a reference.\n            title (Optional[str]): The title of the question to be shown in the UI.\n            description (Optional[str]): The description of the question to be shown in the UI.\n            required (bool): If the question is required for a record to be valid. At least one question must be required.\n            use_markdown (Optional[bool]): Whether to render the markdown in the UI. When True, you will be able \\\n                to use all the Markdown features for text formatting, including LaTex formulas and embedding multimedia content and PDFs.\n        \"\"\"\n        self._model = TextQuestionModel(\n            name=name,\n            title=title,\n            description=description,\n            required=required,\n            settings=TextQuestionSettings(use_markdown=use_markdown),\n        )\n\n    @classmethod\n    def from_model(cls, model: TextQuestionModel) -&gt; \"TextQuestion\":\n        instance = cls(name=model.name)\n        instance._model = model\n\n        return instance\n\n    @classmethod\n    def from_dict(cls, data: dict) -&gt; \"TextQuestion\":\n        model = TextQuestionModel(**data)\n        return cls.from_model(model=model)\n\n    @property\n    def use_markdown(self) -&gt; bool:\n        return self._model.settings.use_markdown\n\n    @use_markdown.setter\n    def use_markdown(self, use_markdown: bool) -&gt; None:\n        self._model.settings.use_markdown = use_markdown\n</code></pre>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.TextQuestion.__init__","title":"<code>__init__(name, title=None, description=None, required=True, use_markdown=False)</code>","text":"<p>Create a new text question for <code>Settings</code> of a <code>Dataset</code>. A text question             is a question where the user can input text.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the question to be used as a reference.</p> required <code>title</code> <code>Optional[str]</code> <p>The title of the question to be shown in the UI.</p> <code>None</code> <code>description</code> <code>Optional[str]</code> <p>The description of the question to be shown in the UI.</p> <code>None</code> <code>required</code> <code>bool</code> <p>If the question is required for a record to be valid. At least one question must be required.</p> <code>True</code> <code>use_markdown</code> <code>Optional[bool]</code> <p>Whether to render the markdown in the UI. When True, you will be able                 to use all the Markdown features for text formatting, including LaTex formulas and embedding multimedia content and PDFs.</p> <code>False</code> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    title: Optional[str] = None,\n    description: Optional[str] = None,\n    required: bool = True,\n    use_markdown: bool = False,\n) -&gt; None:\n    \"\"\"Create a new text question for `Settings` of a `Dataset`. A text question \\\n        is a question where the user can input text.\n\n    Parameters:\n        name (str): The name of the question to be used as a reference.\n        title (Optional[str]): The title of the question to be shown in the UI.\n        description (Optional[str]): The description of the question to be shown in the UI.\n        required (bool): If the question is required for a record to be valid. At least one question must be required.\n        use_markdown (Optional[bool]): Whether to render the markdown in the UI. When True, you will be able \\\n            to use all the Markdown features for text formatting, including LaTex formulas and embedding multimedia content and PDFs.\n    \"\"\"\n    self._model = TextQuestionModel(\n        name=name,\n        title=title,\n        description=description,\n        required=required,\n        settings=TextQuestionSettings(use_markdown=use_markdown),\n    )\n</code></pre>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.RatingQuestion","title":"<code>RatingQuestion</code>","text":"<p>               Bases: <code>QuestionPropertyBase</code></p> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>class RatingQuestion(QuestionPropertyBase):\n    _model: RatingQuestionModel\n\n    def __init__(\n        self,\n        name: str,\n        values: List[int],\n        title: Optional[str] = None,\n        description: Optional[str] = None,\n        required: bool = True,\n    ) -&gt; None:\n        \"\"\"Create a new rating question for `Settings` of a `Dataset`. A rating question \\\n            is a question where the user can select a value from a sequential list of options.\n\n        Parameters:\n            name (str): The name of the question to be used as a reference.\n            values (List[int]): The list of selectable values. It should be defined in the range [0, 10].\n            title (Optional[str]:) The title of the question to be shown in the UI.\n            description (Optional[str]): The description of the question to be shown in the UI.\n            required (bool): If the question is required for a record to be valid. At least one question must be required.\n        \"\"\"\n        self._model = RatingQuestionModel(\n            name=name,\n            title=title,\n            description=description,\n            required=required,\n            values=values,\n            settings=RatingQuestionSettings(options=self._render_values_as_options(values)),\n        )\n\n    @classmethod\n    def from_model(cls, model: RatingQuestionModel) -&gt; \"RatingQuestion\":\n        instance = cls(name=model.name, values=cls._render_options_as_values(model.settings.options))\n        instance._model = model\n\n        return instance\n\n    @classmethod\n    def from_dict(cls, data: dict) -&gt; \"RatingQuestion\":\n        model = RatingQuestionModel(**data)\n        return cls.from_model(model=model)\n\n    @property\n    def values(self) -&gt; List[int]:\n        return self._render_options_as_labels(self._model.settings.options)\n\n    @values.setter\n    def values(self, values: List[int]) -&gt; None:\n        self._model.values = self._render_values_as_options(values)\n</code></pre>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.RatingQuestion.__init__","title":"<code>__init__(name, values, title=None, description=None, required=True)</code>","text":"<p>Create a new rating question for <code>Settings</code> of a <code>Dataset</code>. A rating question             is a question where the user can select a value from a sequential list of options.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the question to be used as a reference.</p> required <code>values</code> <code>List[int]</code> <p>The list of selectable values. It should be defined in the range [0, 10].</p> required <code>title</code> <code>Optional[str]</code> <p>) The title of the question to be shown in the UI.</p> <code>None</code> <code>description</code> <code>Optional[str]</code> <p>The description of the question to be shown in the UI.</p> <code>None</code> <code>required</code> <code>bool</code> <p>If the question is required for a record to be valid. At least one question must be required.</p> <code>True</code> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    values: List[int],\n    title: Optional[str] = None,\n    description: Optional[str] = None,\n    required: bool = True,\n) -&gt; None:\n    \"\"\"Create a new rating question for `Settings` of a `Dataset`. A rating question \\\n        is a question where the user can select a value from a sequential list of options.\n\n    Parameters:\n        name (str): The name of the question to be used as a reference.\n        values (List[int]): The list of selectable values. It should be defined in the range [0, 10].\n        title (Optional[str]:) The title of the question to be shown in the UI.\n        description (Optional[str]): The description of the question to be shown in the UI.\n        required (bool): If the question is required for a record to be valid. At least one question must be required.\n    \"\"\"\n    self._model = RatingQuestionModel(\n        name=name,\n        title=title,\n        description=description,\n        required=required,\n        values=values,\n        settings=RatingQuestionSettings(options=self._render_values_as_options(values)),\n    )\n</code></pre>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.SpanQuestion","title":"<code>SpanQuestion</code>","text":"<p>               Bases: <code>QuestionPropertyBase</code></p> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>class SpanQuestion(QuestionPropertyBase):\n    _model: SpanQuestionModel\n\n    def __init__(\n        self,\n        name: str,\n        field: str,\n        labels: Union[List[str], Dict[str, str]],\n        allow_overlapping: bool = False,\n        visible_labels: Optional[int] = None,\n        title: Optional[str] = None,\n        description: Optional[str] = None,\n        required: bool = True,\n    ):\n        \"\"\" Create a new span question for `Settings` of a `Dataset`. A span question \\\n            is a question where the user can select a section of text within a text field \\\n            and assign it a label.\n\n            Parameters:\n                name (str): The name of the question to be used as a reference.\n                field (str): The name of the text field where the span question will be applied.\n                labels (Union[List[str], Dict[str, str]]): The list of available labels for the question, or a \\\n                    dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.\n                allow_overlapping (bool): This value specifies whether overlapped spans are allowed or not.\n                visible_labels (Optional[int]): The number of visible labels for the question to be shown in the UI. \\\n                    Setting it to None show all options.\n                title (Optional[str]:) The title of the question to be shown in the UI.\n                description (Optional[str]): The description of the question to be shown in the UI.\n                required (bool): If the question is required for a record to be valid. At least one question must be required.\n            \"\"\"\n        self._model = SpanQuestionModel(\n            name=name,\n            title=title,\n            description=description,\n            required=required,\n            settings=SpanQuestionSettings(\n                field=field,\n                allow_overlapping=allow_overlapping,\n                visible_options=visible_labels,\n                options=self._render_values_as_options(labels),\n            ),\n        )\n\n    @property\n    def name(self):\n        return self._model.name\n\n    @property\n    def field(self):\n        return self._model.settings.field\n\n    @field.setter\n    def field(self, field: str):\n        self._model.settings.field = field\n\n    @property\n    def allow_overlapping(self):\n        return self._model.settings.allow_overlapping\n\n    @allow_overlapping.setter\n    def allow_overlapping(self, allow_overlapping: bool):\n        self._model.settings.allow_overlapping = allow_overlapping\n\n    @property\n    def visible_labels(self) -&gt; Optional[int]:\n        return self._model.settings.visible_options\n\n    @visible_labels.setter\n    def visible_labels(self, visible_labels: Optional[int]) -&gt; None:\n        self._model.settings.visible_options = visible_labels\n\n    @property\n    def labels(self) -&gt; List[str]:\n        return self._render_options_as_labels(self._model.settings.options)\n\n    @labels.setter\n    def labels(self, labels: List[str]) -&gt; None:\n        self._model.settings.options = self._render_values_as_options(labels)\n\n    @classmethod\n    def from_model(cls, model: SpanQuestionModel) -&gt; \"SpanQuestion\":\n        instance = cls(\n            name=model.name,\n            field=model.settings.field,\n            labels=cls._render_options_as_values(model.settings.options),\n        )\n        instance._model = model\n\n        return instance\n\n    @classmethod\n    def from_dict(cls, data: dict) -&gt; \"SpanQuestion\":\n        model = SpanQuestionModel(**data)\n        return cls.from_model(model=model)\n</code></pre>"},{"location":"reference/argilla/settings/questions/#src.argilla.settings._question.SpanQuestion.__init__","title":"<code>__init__(name, field, labels, allow_overlapping=False, visible_labels=None, title=None, description=None, required=True)</code>","text":"<p>Create a new span question for <code>Settings</code> of a <code>Dataset</code>. A span question             is a question where the user can select a section of text within a text field             and assign it a label.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the question to be used as a reference.</p> required <code>field</code> <code>str</code> <p>The name of the text field where the span question will be applied.</p> required <code>labels</code> <code>Union[List[str], Dict[str, str]]</code> <p>The list of available labels for the question, or a                     dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.</p> required <code>allow_overlapping</code> <code>bool</code> <p>This value specifies whether overlapped spans are allowed or not.</p> <code>False</code> <code>visible_labels</code> <code>Optional[int]</code> <p>The number of visible labels for the question to be shown in the UI.                     Setting it to None show all options.</p> <code>None</code> <code>title</code> <code>Optional[str]</code> <p>) The title of the question to be shown in the UI.</p> <code>None</code> <code>description</code> <code>Optional[str]</code> <p>The description of the question to be shown in the UI.</p> <code>None</code> <code>required</code> <code>bool</code> <p>If the question is required for a record to be valid. At least one question must be required.</p> <code>True</code> Source code in <code>src/argilla/settings/_question.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    field: str,\n    labels: Union[List[str], Dict[str, str]],\n    allow_overlapping: bool = False,\n    visible_labels: Optional[int] = None,\n    title: Optional[str] = None,\n    description: Optional[str] = None,\n    required: bool = True,\n):\n    \"\"\" Create a new span question for `Settings` of a `Dataset`. A span question \\\n        is a question where the user can select a section of text within a text field \\\n        and assign it a label.\n\n        Parameters:\n            name (str): The name of the question to be used as a reference.\n            field (str): The name of the text field where the span question will be applied.\n            labels (Union[List[str], Dict[str, str]]): The list of available labels for the question, or a \\\n                dictionary of key-value pairs where the key is the label and the value is the label name displayed in the UI.\n            allow_overlapping (bool): This value specifies whether overlapped spans are allowed or not.\n            visible_labels (Optional[int]): The number of visible labels for the question to be shown in the UI. \\\n                Setting it to None show all options.\n            title (Optional[str]:) The title of the question to be shown in the UI.\n            description (Optional[str]): The description of the question to be shown in the UI.\n            required (bool): If the question is required for a record to be valid. At least one question must be required.\n        \"\"\"\n    self._model = SpanQuestionModel(\n        name=name,\n        title=title,\n        description=description,\n        required=required,\n        settings=SpanQuestionSettings(\n            field=field,\n            allow_overlapping=allow_overlapping,\n            visible_options=visible_labels,\n            options=self._render_values_as_options(labels),\n        ),\n    )\n</code></pre>"},{"location":"reference/argilla/settings/settings/","title":"<code>rg.Settings</code>","text":"<p><code>rg.Settings</code> is used to define the setttings of an Argilla <code>Dataset</code>. The settings can be used to configure the behavior of the dataset, such as the fields, questions, guidelines, metadata, and vectors. The <code>Settings</code> class is passed to the <code>Dataset</code> class and used to create the dataset on the server. Once created, the settings of a dataset cannot be changed.</p>"},{"location":"reference/argilla/settings/settings/#usage-examples","title":"Usage Examples","text":""},{"location":"reference/argilla/settings/settings/#creating-a-new-dataset-with-settings","title":"Creating a new dataset with settings","text":"<p>To create a new dataset with settings, instantiate the <code>Settings</code> class and pass it to the <code>Dataset</code> class.</p> <pre><code>import argilla as rg\n\nsettings = rg.Settings(\n    guidelines=\"Select the sentiment of the prompt.\",\n    fields=[rg.TextField(name=\"prompt\", use_markdown=True)],\n    questions=[rg.LabelQuestion(name=\"sentiment\", labels=[\"positive\", \"negative\"])],\n)\n\ndataset = rg.Dataset(name=\"sentiment_analysis\", settings=settings)\n\n# Create the dataset on the server\ndataset.create()\n</code></pre> <p>To define the settings for fields, questions, metadata, vectors, or distribution, refer to the <code>rg.TextField</code>, <code>rg.LabelQuestion</code>, <code>rg.TermsMetadataProperty</code>, and <code>rg.VectorField</code>, <code>rg.TaskDistribution</code> class documentation.</p>"},{"location":"reference/argilla/settings/settings/#creating-settings-using-built-in-templates","title":"Creating settings using built in templates","text":"<p>Argilla provides built-in templates for creating settings for common dataset types. To use a template, use the class methods of the <code>Settings</code> class. There are three built-in templates available for classification, ranking, and rating tasks. Template settings also include default guidelines and mappings.</p>"},{"location":"reference/argilla/settings/settings/#classification-task","title":"Classification Task","text":"<p>You can define a classification task using the <code>rg.Settings.for_classification</code> class method. This will create a dataset with a text field and a label question. You can select field types using the <code>field_type</code> parameter with <code>image</code> or <code>text</code>.</p> <pre><code>settings = rg.Settings.for_classification(labels=[\"positive\", \"negative\"]) # (1)\n</code></pre> <p>This will return a <code>Settings</code> object with the following settings:</p> <pre><code>settings = Settings(\n    guidelines=\"Select a label for the document.\",\n    fields=[rg.TextField(field_type)(name=\"text\")],\n    questions=[LabelQuestion(name=\"label\", labels=labels)],\n    mapping={\"input\": \"text\", \"output\": \"label\", \"document\": \"text\"},\n)\n</code></pre>"},{"location":"reference/argilla/settings/settings/#ranking-task","title":"Ranking Task","text":"<p>You can define a ranking task using the <code>rg.Settings.for_ranking</code> class method. This will create a dataset with a text field and a ranking question.</p> <pre><code>settings = rg.Settings.for_ranking()\n</code></pre> <p>This will return a <code>Settings</code> object with the following settings:</p> <pre><code>settings = Settings(\n    guidelines=\"Rank the responses.\",\n    fields=[\n        rg.TextField(name=\"instruction\"),\n        rg.TextField(name=\"response1\"),\n        rg.TextField(name=\"response2\"),\n    ],\n    questions=[RankingQuestion(name=\"ranking\", values=[\"response1\", \"response2\"])],\n    mapping={\n        \"input\": \"instruction\",\n        \"prompt\": \"instruction\",\n        \"chosen\": \"response1\",\n        \"rejected\": \"response2\",\n    },\n)\n</code></pre>"},{"location":"reference/argilla/settings/settings/#rating-task","title":"Rating Task","text":"<p>You can define a rating task using the <code>rg.Settings.for_rating</code> class method. This will create a dataset with a text field and a rating question.</p> <pre><code>settings = rg.Settings.for_rating()\n</code></pre> <p>This will return a <code>Settings</code> object with the following settings:</p> <pre><code>settings = Settings(\n    guidelines=\"Rate the response.\",\n    fields=[\n        rg.TextField(name=\"instruction\"),\n        rg.TextField(name=\"response\"),\n    ],\n    questions=[RatingQuestion(name=\"rating\", values=[1, 2, 3, 4, 5])],\n    mapping={\n        \"input\": \"instruction\",\n        \"prompt\": \"instruction\",\n        \"output\": \"response\",\n        \"score\": \"rating\",\n    },\n)\n</code></pre>"},{"location":"reference/argilla/settings/settings/#src.argilla.settings._resource.Settings","title":"<code>Settings</code>","text":"<p>               Bases: <code>DefaultSettingsMixin</code>, <code>Resource</code></p> <p>Settings class for Argilla Datasets.</p> <p>This class is used to define the representation of a Dataset within the UI.</p> Source code in <code>src/argilla/settings/_resource.py</code> <pre><code>class Settings(DefaultSettingsMixin, Resource):\n    \"\"\"\n    Settings class for Argilla Datasets.\n\n    This class is used to define the representation of a Dataset within the UI.\n    \"\"\"\n\n    def __init__(\n        self,\n        fields: Optional[List[Field]] = None,\n        questions: Optional[List[QuestionType]] = None,\n        vectors: Optional[List[VectorField]] = None,\n        metadata: Optional[List[MetadataType]] = None,\n        guidelines: Optional[str] = None,\n        allow_extra_metadata: bool = False,\n        distribution: Optional[TaskDistribution] = None,\n        mapping: Optional[Dict[str, Union[str, Sequence[str]]]] = None,\n        _dataset: Optional[\"Dataset\"] = None,\n    ) -&gt; None:\n        \"\"\"\n        Args:\n            fields (List[Field]): A list of Field objects that represent the fields in the Dataset.\n            questions (List[Union[LabelQuestion, MultiLabelQuestion, RankingQuestion, TextQuestion, RatingQuestion]]):\n                A list of Question objects that represent the questions in the Dataset.\n            vectors (List[VectorField]): A list of VectorField objects that represent the vectors in the Dataset.\n            metadata (List[MetadataField]): A list of MetadataField objects that represent the metadata in the Dataset.\n            guidelines (str): A string containing the guidelines for the Dataset.\n            allow_extra_metadata (bool): A boolean that determines whether or not extra metadata is allowed in the\n                Dataset. Defaults to False.\n            distribution (TaskDistribution): The annotation task distribution configuration.\n                Default to DEFAULT_TASK_DISTRIBUTION\n            mapping (Dict[str, Union[str, Sequence[str]]]): A dictionary that maps incoming data names to Argilla dataset attributes in DatasetRecords.\n        \"\"\"\n        super().__init__(client=_dataset._client if _dataset else None)\n\n        self._dataset = _dataset\n        self._distribution = distribution\n        self._mapping = mapping\n        self.__guidelines = self.__process_guidelines(guidelines)\n        self.__allow_extra_metadata = allow_extra_metadata\n\n        self.__questions = QuestionsProperties(self, questions)\n        self.__fields = SettingsProperties(self, fields)\n        self.__vectors = SettingsProperties(self, vectors)\n        self.__metadata = SettingsProperties(self, metadata)\n\n    #####################\n    # Properties        #\n    #####################\n\n    @property\n    def fields(self) -&gt; \"SettingsProperties\":\n        return self.__fields\n\n    @fields.setter\n    def fields(self, fields: List[Field]):\n        self.__fields = SettingsProperties(self, fields)\n\n    @property\n    def questions(self) -&gt; \"SettingsProperties\":\n        return self.__questions\n\n    @questions.setter\n    def questions(self, questions: List[QuestionType]):\n        self.__questions = QuestionsProperties(self, questions)\n\n    @property\n    def vectors(self) -&gt; \"SettingsProperties\":\n        return self.__vectors\n\n    @vectors.setter\n    def vectors(self, vectors: List[VectorField]):\n        self.__vectors = SettingsProperties(self, vectors)\n\n    @property\n    def metadata(self) -&gt; \"SettingsProperties\":\n        return self.__metadata\n\n    @metadata.setter\n    def metadata(self, metadata: List[MetadataType]):\n        self.__metadata = SettingsProperties(self, metadata)\n\n    @property\n    def guidelines(self) -&gt; str:\n        return self.__guidelines\n\n    @guidelines.setter\n    def guidelines(self, guidelines: str):\n        self.__guidelines = self.__process_guidelines(guidelines)\n\n    @property\n    def allow_extra_metadata(self) -&gt; bool:\n        return self.__allow_extra_metadata\n\n    @allow_extra_metadata.setter\n    def allow_extra_metadata(self, value: bool):\n        self.__allow_extra_metadata = value\n\n    @property\n    def distribution(self) -&gt; TaskDistribution:\n        return self._distribution or TaskDistribution.default()\n\n    @distribution.setter\n    def distribution(self, value: TaskDistribution) -&gt; None:\n        self._distribution = value\n\n    @property\n    def mapping(self) -&gt; Dict[str, Union[str, Sequence[str]]]:\n        return self._mapping\n\n    @mapping.setter\n    def mapping(self, value: Dict[str, Union[str, Sequence[str]]]):\n        self._mapping = value\n\n    @property\n    def dataset(self) -&gt; \"Dataset\":\n        return self._dataset\n\n    @dataset.setter\n    def dataset(self, dataset: \"Dataset\"):\n        self._dataset = dataset\n        self._client = dataset._client\n\n    @cached_property\n    def schema(self) -&gt; dict:\n        schema_dict = {}\n\n        for field in self.fields:\n            schema_dict[field.name] = field\n\n        for question in self.questions:\n            schema_dict[question.name] = question\n\n        for vector in self.vectors:\n            schema_dict[vector.name] = vector\n\n        for metadata in self.metadata:\n            schema_dict[metadata.name] = metadata\n\n        return schema_dict\n\n    @cached_property\n    def schema_by_id(self) -&gt; Dict[UUID, Union[Field, QuestionType, MetadataType, VectorField]]:\n        return {v.id: v for v in self.schema.values()}\n\n    def validate(self) -&gt; None:\n        self._validate_empty_settings()\n        self._validate_duplicate_names()\n\n        for field in self.fields:\n            field.validate()\n\n    #####################\n    #  Public methods   #\n    #####################\n\n    def get(self) -&gt; \"Settings\":\n        self.fields = self._fetch_fields()\n        self.questions = self._fetch_questions()\n        self.vectors = self._fetch_vectors()\n        self.metadata = self._fetch_metadata()\n        self.__fetch_dataset_related_attributes()\n\n        self._update_last_api_call()\n        return self\n\n    def create(self) -&gt; \"Settings\":\n        self.validate()\n\n        self._update_dataset_related_attributes()\n        self.__fields.create()\n        self.__questions.create()\n        self.__vectors.create()\n        self.__metadata.create()\n\n        self._update_last_api_call()\n        return self\n\n    def update(self) -&gt; \"Resource\":\n        self.validate()\n\n        self._update_dataset_related_attributes()\n        self.__fields.update()\n        self.__vectors.update()\n        self.__metadata.update()\n        # self.questions.update()\n\n        self._update_last_api_call()\n        return self\n\n    def serialize(self):\n        try:\n            return {\n                \"guidelines\": self.guidelines,\n                \"questions\": self.__questions.serialize(),\n                \"fields\": self.__fields.serialize(),\n                \"vectors\": self.vectors.serialize(),\n                \"metadata\": self.metadata.serialize(),\n                \"allow_extra_metadata\": self.allow_extra_metadata,\n                \"distribution\": self.distribution.to_dict(),\n                \"mapping\": self.mapping,\n            }\n        except Exception as e:\n            raise ArgillaSerializeError(f\"Failed to serialize the settings. {e.__class__.__name__}\") from e\n\n    def to_json(self, path: Union[Path, str]) -&gt; None:\n        \"\"\"Save the settings to a file on disk\n\n        Parameters:\n            path (str): The path to save the settings to\n        \"\"\"\n        if not isinstance(path, Path):\n            path = Path(path)\n        if path.exists():\n            raise FileExistsError(f\"File {path} already exists\")\n        with open(path, \"w\") as file:\n            json.dump(self.serialize(), file)\n\n    @classmethod\n    def from_json(cls, path: Union[Path, str]) -&gt; \"Settings\":\n        \"\"\"Load the settings from a file on disk\"\"\"\n\n        with open(path, \"r\") as file:\n            settings_dict = json.load(file)\n            return cls._from_dict(settings_dict)\n\n    @classmethod\n    def from_hub(\n        cls,\n        repo_id: str,\n        subset: Optional[str] = None,\n        feature_mapping: Optional[Dict[str, Literal[\"question\", \"field\", \"metadata\"]]] = None,\n        **kwargs,\n    ) -&gt; \"Settings\":\n        \"\"\"Load the settings from the Hub\n\n        Parameters:\n            repo_id (str): The ID of the repository to load the settings from on the Hub.\n            subset (Optional[str]): The subset of the repository to load the settings from.\n            feature_mapping (Dict[str, Literal[\"question\", \"field\", \"metadata\"]]): A dictionary that maps incoming column names to Argilla attributes.\n        \"\"\"\n\n        settings = build_settings_from_repo_id(repo_id=repo_id, feature_mapping=feature_mapping, subset=subset)\n        return settings\n\n    def __eq__(self, other: \"Settings\") -&gt; bool:\n        return self.serialize() == other.serialize()  # TODO: Create proper __eq__ methods for fields and questions\n\n    #####################\n    #  Repr Methods     #\n    #####################\n\n    def __repr__(self) -&gt; str:\n        return (\n            f\"Settings(guidelines={self.guidelines}, allow_extra_metadata={self.allow_extra_metadata}, \"\n            f\"distribution={self.distribution}, \"\n            f\"fields={self.fields}, questions={self.questions}, vectors={self.vectors}, metadata={self.metadata})\"\n        )\n\n    #####################\n    #  Private methods  #\n    #####################\n\n    @classmethod\n    def _from_dict(cls, settings_dict: dict) -&gt; \"Settings\":\n        fields = settings_dict.get(\"fields\", [])\n        vectors = settings_dict.get(\"vectors\", [])\n        metadata = settings_dict.get(\"metadata\", [])\n        guidelines = settings_dict.get(\"guidelines\")\n        distribution = settings_dict.get(\"distribution\")\n        allow_extra_metadata = settings_dict.get(\"allow_extra_metadata\")\n        mapping = settings_dict.get(\"mapping\")\n\n        questions = [question_from_dict(question) for question in settings_dict.get(\"questions\", [])]\n        fields = [_field_from_dict(field) for field in fields]\n        vectors = [VectorField.from_dict(vector) for vector in vectors]\n        metadata = [MetadataField.from_dict(metadata) for metadata in metadata]\n\n        if distribution:\n            distribution = TaskDistribution.from_dict(distribution)\n\n        if mapping:\n            mapping = cls._validate_mapping(mapping)\n\n        return cls(\n            questions=questions,\n            fields=fields,\n            vectors=vectors,\n            metadata=metadata,\n            guidelines=guidelines,\n            allow_extra_metadata=allow_extra_metadata,\n            distribution=distribution,\n            mapping=mapping,\n        )\n\n    def _copy(self) -&gt; \"Settings\":\n        instance = self.__class__._from_dict(self.serialize())\n        return instance\n\n    def _fetch_fields(self) -&gt; List[Field]:\n        models = self._client.api.fields.list(dataset_id=self._dataset.id)\n        return [_field_from_model(model) for model in models]\n\n    def _fetch_questions(self) -&gt; List[QuestionType]:\n        models = self._client.api.questions.list(dataset_id=self._dataset.id)\n        return [question_from_model(model) for model in models]\n\n    def _fetch_vectors(self) -&gt; List[VectorField]:\n        models = self.dataset._client.api.vectors.list(self.dataset.id)\n        return [VectorField.from_model(model) for model in models]\n\n    def _fetch_metadata(self) -&gt; List[MetadataType]:\n        models = self._client.api.metadata.list(dataset_id=self._dataset.id)\n        return [MetadataField.from_model(model) for model in models]\n\n    def __fetch_dataset_related_attributes(self):\n        # This flow may be a bit weird, but it's the only way to update the dataset related attributes\n        # Everything is point that we should have several settings-related endpoints in the API to handle this.\n        # POST /api/v1/datasets/{dataset_id}/settings\n        # {\n        #   \"guidelines\": ....,\n        #   \"allow_extra_metadata\": ....,\n        # }\n        # But this is not implemented yet, so we need to update the dataset model directly\n        dataset_model = self._client.api.datasets.get(self._dataset.id)\n\n        self.guidelines = dataset_model.guidelines\n        self.allow_extra_metadata = dataset_model.allow_extra_metadata\n\n        if dataset_model.distribution:\n            self.distribution = TaskDistribution.from_model(dataset_model.distribution)\n\n    def _update_dataset_related_attributes(self):\n        # This flow may be a bit weird, but it's the only way to update the dataset related attributes\n        # Everything is point that we should have several settings-related endpoints in the API to handle this.\n        # POST /api/v1/datasets/{dataset_id}/settings\n        # {\n        #   \"guidelines\": ....,\n        #   \"allow_extra_metadata\": ....,\n        # }\n        # But this is not implemented yet, so we need to update the dataset model directly\n        dataset_model = DatasetModel(\n            id=self._dataset.id,\n            name=self._dataset.name,\n            guidelines=self.guidelines,\n            allow_extra_metadata=self.allow_extra_metadata,\n            distribution=self.distribution._api_model(),\n        )\n        self._client.api.datasets.update(dataset_model)\n\n    def _validate_empty_settings(self):\n        if not all([self.fields, self.questions]):\n            message = \"Fields and questions are required\"\n            raise SettingsError(message=message)\n\n    def _validate_duplicate_names(self) -&gt; None:\n        dataset_properties_by_name = {}\n\n        for properties in [self.fields, self.questions, self.vectors, self.metadata]:\n            for property in properties:\n                if property.name in dataset_properties_by_name:\n                    raise SettingsError(\n                        f\"names of dataset settings must be unique, \"\n                        f\"but the name {property.name!r} is used by {type(property).__name__!r} and {type(dataset_properties_by_name[property.name]).__name__!r} \"\n                    )\n                dataset_properties_by_name[property.name] = property\n\n    @classmethod\n    def _validate_mapping(cls, mapping: Dict[str, Union[str, Sequence[str]]]) -&gt; dict:\n        validate_mapping = {}\n        for key, value in mapping.items():\n            if isinstance(value, str):\n                validate_mapping[key] = value\n            elif isinstance(value, list) or isinstance(value, tuple):\n                validate_mapping[key] = tuple(value)\n            else:\n                raise SettingsError(f\"Invalid mapping value for key {key!r}: {value}\")\n\n        return validate_mapping\n\n    @classmethod\n    def _sanitize_settings_name(cls, name: str) -&gt; str:\n        \"\"\"Sanitize the name for the settings\"\"\"\n\n        for char in [\" \", \":\", \".\", \"&amp;\", \"?\", \"!\"]:\n            name = name.replace(char, \"_\")\n\n        return name.lower()\n\n    def __process_guidelines(self, guidelines):\n        if guidelines is None:\n            return guidelines\n\n        if not isinstance(guidelines, str):\n            raise SettingsError(\"Guidelines must be a string or a path to a file\")\n\n        if os.path.exists(guidelines):\n            with open(guidelines, \"r\") as file:\n                return file.read()\n\n        return guidelines\n\n    @classmethod\n    def _is_valid_name(cls, name: str) -&gt; bool:\n        \"\"\"Check if the name is valid\"\"\"\n        return bool(re.match(r\"^(?=.*[a-z0-9])[a-z0-9_-]+$\", name))\n</code></pre>"},{"location":"reference/argilla/settings/settings/#src.argilla.settings._resource.Settings.__init__","title":"<code>__init__(fields=None, questions=None, vectors=None, metadata=None, guidelines=None, allow_extra_metadata=False, distribution=None, mapping=None, _dataset=None)</code>","text":"<p>Parameters:</p> Name Type Description Default <code>fields</code> <code>List[Field]</code> <p>A list of Field objects that represent the fields in the Dataset.</p> <code>None</code> <code>questions</code> <code>List[Union[LabelQuestion, MultiLabelQuestion, RankingQuestion, TextQuestion, RatingQuestion]]</code> <p>A list of Question objects that represent the questions in the Dataset.</p> <code>None</code> <code>vectors</code> <code>List[VectorField]</code> <p>A list of VectorField objects that represent the vectors in the Dataset.</p> <code>None</code> <code>metadata</code> <code>List[MetadataField]</code> <p>A list of MetadataField objects that represent the metadata in the Dataset.</p> <code>None</code> <code>guidelines</code> <code>str</code> <p>A string containing the guidelines for the Dataset.</p> <code>None</code> <code>allow_extra_metadata</code> <code>bool</code> <p>A boolean that determines whether or not extra metadata is allowed in the Dataset. Defaults to False.</p> <code>False</code> <code>distribution</code> <code>TaskDistribution</code> <p>The annotation task distribution configuration. Default to DEFAULT_TASK_DISTRIBUTION</p> <code>None</code> <code>mapping</code> <code>Dict[str, Union[str, Sequence[str]]]</code> <p>A dictionary that maps incoming data names to Argilla dataset attributes in DatasetRecords.</p> <code>None</code> Source code in <code>src/argilla/settings/_resource.py</code> <pre><code>def __init__(\n    self,\n    fields: Optional[List[Field]] = None,\n    questions: Optional[List[QuestionType]] = None,\n    vectors: Optional[List[VectorField]] = None,\n    metadata: Optional[List[MetadataType]] = None,\n    guidelines: Optional[str] = None,\n    allow_extra_metadata: bool = False,\n    distribution: Optional[TaskDistribution] = None,\n    mapping: Optional[Dict[str, Union[str, Sequence[str]]]] = None,\n    _dataset: Optional[\"Dataset\"] = None,\n) -&gt; None:\n    \"\"\"\n    Args:\n        fields (List[Field]): A list of Field objects that represent the fields in the Dataset.\n        questions (List[Union[LabelQuestion, MultiLabelQuestion, RankingQuestion, TextQuestion, RatingQuestion]]):\n            A list of Question objects that represent the questions in the Dataset.\n        vectors (List[VectorField]): A list of VectorField objects that represent the vectors in the Dataset.\n        metadata (List[MetadataField]): A list of MetadataField objects that represent the metadata in the Dataset.\n        guidelines (str): A string containing the guidelines for the Dataset.\n        allow_extra_metadata (bool): A boolean that determines whether or not extra metadata is allowed in the\n            Dataset. Defaults to False.\n        distribution (TaskDistribution): The annotation task distribution configuration.\n            Default to DEFAULT_TASK_DISTRIBUTION\n        mapping (Dict[str, Union[str, Sequence[str]]]): A dictionary that maps incoming data names to Argilla dataset attributes in DatasetRecords.\n    \"\"\"\n    super().__init__(client=_dataset._client if _dataset else None)\n\n    self._dataset = _dataset\n    self._distribution = distribution\n    self._mapping = mapping\n    self.__guidelines = self.__process_guidelines(guidelines)\n    self.__allow_extra_metadata = allow_extra_metadata\n\n    self.__questions = QuestionsProperties(self, questions)\n    self.__fields = SettingsProperties(self, fields)\n    self.__vectors = SettingsProperties(self, vectors)\n    self.__metadata = SettingsProperties(self, metadata)\n</code></pre>"},{"location":"reference/argilla/settings/settings/#src.argilla.settings._resource.Settings.to_json","title":"<code>to_json(path)</code>","text":"<p>Save the settings to a file on disk</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>str</code> <p>The path to save the settings to</p> required Source code in <code>src/argilla/settings/_resource.py</code> <pre><code>def to_json(self, path: Union[Path, str]) -&gt; None:\n    \"\"\"Save the settings to a file on disk\n\n    Parameters:\n        path (str): The path to save the settings to\n    \"\"\"\n    if not isinstance(path, Path):\n        path = Path(path)\n    if path.exists():\n        raise FileExistsError(f\"File {path} already exists\")\n    with open(path, \"w\") as file:\n        json.dump(self.serialize(), file)\n</code></pre>"},{"location":"reference/argilla/settings/settings/#src.argilla.settings._resource.Settings.from_json","title":"<code>from_json(path)</code>  <code>classmethod</code>","text":"<p>Load the settings from a file on disk</p> Source code in <code>src/argilla/settings/_resource.py</code> <pre><code>@classmethod\ndef from_json(cls, path: Union[Path, str]) -&gt; \"Settings\":\n    \"\"\"Load the settings from a file on disk\"\"\"\n\n    with open(path, \"r\") as file:\n        settings_dict = json.load(file)\n        return cls._from_dict(settings_dict)\n</code></pre>"},{"location":"reference/argilla/settings/settings/#src.argilla.settings._resource.Settings.from_hub","title":"<code>from_hub(repo_id, subset=None, feature_mapping=None, **kwargs)</code>  <code>classmethod</code>","text":"<p>Load the settings from the Hub</p> <p>Parameters:</p> Name Type Description Default <code>repo_id</code> <code>str</code> <p>The ID of the repository to load the settings from on the Hub.</p> required <code>subset</code> <code>Optional[str]</code> <p>The subset of the repository to load the settings from.</p> <code>None</code> <code>feature_mapping</code> <code>Dict[str, Literal['question', 'field', 'metadata']]</code> <p>A dictionary that maps incoming column names to Argilla attributes.</p> <code>None</code> Source code in <code>src/argilla/settings/_resource.py</code> <pre><code>@classmethod\ndef from_hub(\n    cls,\n    repo_id: str,\n    subset: Optional[str] = None,\n    feature_mapping: Optional[Dict[str, Literal[\"question\", \"field\", \"metadata\"]]] = None,\n    **kwargs,\n) -&gt; \"Settings\":\n    \"\"\"Load the settings from the Hub\n\n    Parameters:\n        repo_id (str): The ID of the repository to load the settings from on the Hub.\n        subset (Optional[str]): The subset of the repository to load the settings from.\n        feature_mapping (Dict[str, Literal[\"question\", \"field\", \"metadata\"]]): A dictionary that maps incoming column names to Argilla attributes.\n    \"\"\"\n\n    settings = build_settings_from_repo_id(repo_id=repo_id, feature_mapping=feature_mapping, subset=subset)\n    return settings\n</code></pre>"},{"location":"reference/argilla/settings/task_distribution/","title":"Distribution","text":"<p>Distribution settings are used to define the criteria used by the tool to automatically manage records in the dataset depending on the expected number of submitted responses per record.</p>"},{"location":"reference/argilla/settings/task_distribution/#usage-examples","title":"Usage Examples","text":"<p>The default minimum submitted responses per record is 1. If you wish to increase this value, you can define it through the <code>TaskDistribution</code> class and pass it to the <code>Settings</code> class.</p> <pre><code>settings = rg.Settings(\n    guidelines=\"These are some guidelines.\",\n    fields=[\n        rg.TextField(\n            name=\"text\",\n        ),\n    ],\n    questions=[\n        rg.LabelQuestion(\n            name=\"label\",\n            labels=[\"label_1\", \"label_2\", \"label_3\"]\n        ),\n    ],\n    distribution=rg.TaskDistribution(min_submitted=3)\n)\n\ndataset = rg.Dataset(\n    name=\"my_dataset\",\n    settings=settings\n)\n</code></pre>"},{"location":"reference/argilla/settings/task_distribution/#src.argilla.settings._task_distribution.OverlapTaskDistribution","title":"<code>OverlapTaskDistribution</code>","text":"<p>The task distribution settings class.</p> <p>This task distribution defines a number of submitted responses required to complete a record.</p> <p>Parameters:</p> Name Type Description Default <code>min_submitted</code> <code>int</code> <p>The number of min. submitted responses to complete the record</p> required Source code in <code>src/argilla/settings/_task_distribution.py</code> <pre><code>class OverlapTaskDistribution:\n    \"\"\"The task distribution settings class.\n\n    This task distribution defines a number of submitted responses required to complete a record.\n\n    Parameters:\n        min_submitted (int): The number of min. submitted responses to complete the record\n    \"\"\"\n\n    strategy: Literal[\"overlap\"] = \"overlap\"\n\n    def __init__(self, min_submitted: int):\n        self._model = OverlapTaskDistributionModel(min_submitted=min_submitted, strategy=self.strategy)\n\n    def __repr__(self) -&gt; str:\n        return f\"OverlapTaskDistribution(min_submitted={self.min_submitted})\"\n\n    def __eq__(self, other) -&gt; bool:\n        if not isinstance(other, self.__class__):\n            return False\n\n        return self._model == other._model\n\n    @classmethod\n    def default(cls) -&gt; \"OverlapTaskDistribution\":\n        return cls(min_submitted=1)\n\n    @property\n    def min_submitted(self):\n        return self._model.min_submitted\n\n    @min_submitted.setter\n    def min_submitted(self, value: int):\n        self._model.min_submitted = value\n\n    @classmethod\n    def from_model(cls, model: OverlapTaskDistributionModel) -&gt; \"OverlapTaskDistribution\":\n        return cls(min_submitted=model.min_submitted)\n\n    @classmethod\n    def from_dict(cls, dict: Dict[str, Any]) -&gt; \"OverlapTaskDistribution\":\n        return cls.from_model(OverlapTaskDistributionModel.model_validate(dict))\n\n    def to_dict(self):\n        return self._model.model_dump()\n\n    def _api_model(self) -&gt; OverlapTaskDistributionModel:\n        return self._model\n</code></pre>"},{"location":"reference/argilla/settings/vectors/","title":"Vectors","text":"<p>Vector fields in Argilla are used to define the vector form of a record that will be reviewed by a user.</p>"},{"location":"reference/argilla/settings/vectors/#usage-examples","title":"Usage Examples","text":"<p>To define a vector field, instantiate the <code>VectorField</code> class with a name and dimensions, then pass it to the <code>vectors</code> parameter of the <code>Settings</code> class.</p> <pre><code>settings = rg.Settings(\n    fields=[\n        rg.TextField(name=\"text\"),\n    ],\n    vectors=[\n        rg.VectorField(\n            name=\"my_vector\",\n            dimension=768,\n            title=\"Document Embedding\",\n        ),\n    ],\n)\n</code></pre> <p>To add records with vectors, refer to the <code>rg.Vector</code> class documentation.</p>"},{"location":"reference/argilla/settings/vectors/#src.argilla.settings._vector.VectorField","title":"<code>VectorField</code>","text":"<p>               Bases: <code>Resource</code></p> <p>Vector field for use in Argilla <code>Dataset</code> <code>Settings</code></p> Source code in <code>src/argilla/settings/_vector.py</code> <pre><code>class VectorField(Resource):\n    \"\"\"Vector field for use in Argilla `Dataset` `Settings`\"\"\"\n\n    _model: VectorFieldModel\n    _api: VectorsAPI\n    _dataset: Optional[\"Dataset\"]\n\n    def __init__(\n        self,\n        name: str,\n        dimensions: int,\n        title: Optional[str] = None,\n        _client: Optional[\"Argilla\"] = None,\n    ) -&gt; None:\n        \"\"\"Vector field for use in Argilla `Dataset` `Settings`\n\n        Parameters:\n            name (str): The name of the vector field\n            dimensions (int): The number of dimensions in the vector\n            title (Optional[str]): The title of the vector to be shown in the UI.\n        \"\"\"\n        client = _client or Argilla._get_default()\n        super().__init__(api=client.api.vectors, client=client)\n        self._model = VectorFieldModel(name=name, title=title, dimensions=dimensions)\n        self._dataset = None\n\n    @property\n    def name(self) -&gt; str:\n        return self._model.name\n\n    @name.setter\n    def name(self, value: str) -&gt; None:\n        self._model.name = value\n\n    @property\n    def title(self) -&gt; Optional[str]:\n        return self._model.title\n\n    @title.setter\n    def title(self, value: Optional[str]) -&gt; None:\n        self._model.title = value\n\n    @property\n    def dimensions(self) -&gt; int:\n        return self._model.dimensions\n\n    @dimensions.setter\n    def dimensions(self, value: int) -&gt; None:\n        self._model.dimensions = value\n\n    @property\n    def dataset(self) -&gt; \"Dataset\":\n        return self._dataset\n\n    @dataset.setter\n    def dataset(self, value: \"Dataset\") -&gt; None:\n        self._dataset = value\n        self._model.dataset_id = self._dataset.id\n        self._with_client(self._dataset._client)\n\n    def __repr__(self) -&gt; str:\n        return f\"{self.__class__.__name__}(name={self.name}, title={self.title}, dimensions={self.dimensions})\"\n\n    @classmethod\n    def from_model(cls, model: VectorFieldModel) -&gt; \"VectorField\":\n        instance = cls(name=model.name, dimensions=model.dimensions)\n        instance._model = model\n\n        return instance\n\n    @classmethod\n    def from_dict(cls, data: dict) -&gt; \"VectorField\":\n        model = VectorFieldModel(**data)\n        return cls.from_model(model=model)\n\n    def _with_client(self, client: \"Argilla\") -&gt; \"VectorField\":\n        # TODO: Review and simplify. Maybe only one of them is required\n        self._client = client\n        self._api = self._client.api.vectors\n\n        return self\n</code></pre>"},{"location":"reference/argilla/settings/vectors/#src.argilla.settings._vector.VectorField.__init__","title":"<code>__init__(name, dimensions, title=None, _client=None)</code>","text":"<p>Vector field for use in Argilla <code>Dataset</code> <code>Settings</code></p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the vector field</p> required <code>dimensions</code> <code>int</code> <p>The number of dimensions in the vector</p> required <code>title</code> <code>Optional[str]</code> <p>The title of the vector to be shown in the UI.</p> <code>None</code> Source code in <code>src/argilla/settings/_vector.py</code> <pre><code>def __init__(\n    self,\n    name: str,\n    dimensions: int,\n    title: Optional[str] = None,\n    _client: Optional[\"Argilla\"] = None,\n) -&gt; None:\n    \"\"\"Vector field for use in Argilla `Dataset` `Settings`\n\n    Parameters:\n        name (str): The name of the vector field\n        dimensions (int): The number of dimensions in the vector\n        title (Optional[str]): The title of the vector to be shown in the UI.\n    \"\"\"\n    client = _client or Argilla._get_default()\n    super().__init__(api=client.api.vectors, client=client)\n    self._model = VectorFieldModel(name=name, title=title, dimensions=dimensions)\n    self._dataset = None\n</code></pre>"},{"location":"reference/argilla-server/configuration/","title":"Server configuration","text":"<p>This section explains advanced operations and settings for running the Argilla Server and Argilla Python Client.</p> <p>By default, the Argilla Server will look for your Elasticsearch (ES) endpoint at <code>http://localhost:9200</code>. You can customize this by setting the <code>ARGILLA_ELASTICSEARCH</code> environment variable. Have a look at the list of available environment variables to further configure the Argilla server.</p> <p>From the Argilla version <code>1.19.0</code>, you must set up the search engine manually to work with datasets. You should set the environment variable <code>ARGILLA_SEARCH_ENGINE=opensearch</code> or <code>ARGILLA_SEARCH_ENGINE=elasticsearch</code> depending on the backend you're using The default value for this variable is set to <code>elasticsearch</code>. The minimal version for Elasticsearch is <code>8.5.0</code>, and for Opensearch is <code>2.4.0</code>. Please, review your backend and upgrade it if necessary.</p> <p>Warning</p> <p>For vector search in OpenSearch, the filtering applied is using a <code>post_filter</code> step, since there is a bug that makes queries fail using filtering + knn from Argilla. See https://github.com/opensearch-project/k-NN/issues/1286</p> <p>This may result in unexpected results when combining filtering with vector search with this engine.</p>"},{"location":"reference/argilla-server/configuration/#launching","title":"Launching","text":""},{"location":"reference/argilla-server/configuration/#using-a-proxy","title":"Using a proxy","text":"<p>If you run Argilla behind a proxy by adding some extra prefix to expose the service, you should set the <code>ARGILLA_BASE_URL</code> environment variable to properly route requests to the server application.</p> <p>For example, if your proxy exposes Argilla in the URL <code>https://my-proxy/custom-path-for-argilla</code>, you should launch the Argilla server with <code>ARGILLA_BASE_URL=/custom-path-for-argilla</code>.</p> <p>NGINX and Traefik have been tested and are known to work with Argilla:</p> <ul> <li>NGINX example</li> <li>Traefik example</li> </ul>"},{"location":"reference/argilla-server/configuration/#environment-variables","title":"Environment variables","text":"<p>You can set the following environment variables to further configure your server and client.</p>"},{"location":"reference/argilla-server/configuration/#server","title":"Server","text":""},{"location":"reference/argilla-server/configuration/#fastapi","title":"FastAPI","text":"<ul> <li> <p><code>ARGILLA_HOME_PATH</code>: The directory where Argilla will store all the files needed to run. If the path doesn't exist it will be automatically created (Default: <code>~/.argilla</code>).</p> </li> <li> <p><code>ARGILLA_BASE_URL</code>: If you want to launch the Argilla server in a specific base path other than /, you should set up this environment variable. This can be useful when running Argilla behind a proxy that adds a prefix path to route the service (Default: \"/\").</p> </li> <li> <p><code>ARGILLA_CORS_ORIGINS</code>: List of host patterns for CORS origin access.</p> </li> <li> <p><code>ARGILLA_DOCS_ENABLED</code>: If False, disables openapi docs endpoint at /api/docs.</p> </li> <li> <p><code>HF_HUB_DISABLE_TELEMETRY</code>: If True, disables telemetry for usage metrics. Alternatively, you can disable telemetry by setting <code>HF_HUB_OFFLINE=1</code>.</p> </li> </ul>"},{"location":"reference/argilla-server/configuration/#authentication","title":"Authentication","text":"<ul> <li><code>ARGILLA_AUTH_SECRET_KEY</code>: The secret key used to sign the API token data. You can use <code>openssl rand -hex 32</code> to generate a 32 character string to use with this environment variable. By default a random value is generated, so if you are using more than one server worker (or more than one Argilla server) you will need to set the same value for all of them.</li> <li><code>USERNAME</code>: If provided, the owner username (Default: <code>None</code>).</li> <li><code>PASSWORD</code>: If provided, the owner password (Default: <code>None</code>).</li> </ul> <p>If <code>USERNAME</code> and <code>PASSWORD</code> are provided, the owner user will be created with these credentials on the server startup.</p>"},{"location":"reference/argilla-server/configuration/#database","title":"Database","text":"<ul> <li><code>ARGILLA_DATABASE_URL</code>: A URL string that contains the necessary information to connect to a database. Argilla uses SQLite by default, PostgreSQL is also officially supported (Default: <code>sqlite:///$ARGILLA_HOME_PATH/argilla.db?check_same_thread=False</code>).</li> </ul>"},{"location":"reference/argilla-server/configuration/#sqlite","title":"SQLite","text":"<p>The following environment variables are useful only when SQLite is used:</p> <ul> <li><code>ARGILLA_DATABASE_SQLITE_TIMEOUT</code>: How many seconds the connection should wait before raising an <code>OperationalError</code> when a table is locked. If another connection opens a transaction to modify a table, that table will be locked until the transaction is committed. (Defaut: <code>15</code> seconds).</li> </ul>"},{"location":"reference/argilla-server/configuration/#postgresql","title":"PostgreSQL","text":"<p>The following environment variables are useful only when PostgreSQL is used:</p> <ul> <li> <p><code>ARGILLA_DATABASE_POSTGRESQL_POOL_SIZE</code>: The number of connections to keep open inside the database connection pool (Default: <code>15</code>).</p> </li> <li> <p><code>ARGILLA_DATABASE_POSTGRESQL_MAX_OVERFLOW</code>: The number of connections that can be opened above and beyond <code>ARGILLA_DATABASE_POSTGRESQL_POOL_SIZE</code> setting (Default: <code>10</code>).</p> </li> </ul>"},{"location":"reference/argilla-server/configuration/#search-engine","title":"Search engine","text":"<ul> <li> <p><code>ARGILLA_ELASTICSEARCH</code>: URL of the connection endpoint of the Elasticsearch instance (Default: <code>http://localhost:9200</code>).</p> </li> <li> <p><code>ARGILLA_SEARCH_ENGINE</code>: Search engine to use. Valid values are \"elasticsearch\" and \"opensearch\" (Default: \"elasticsearch\").</p> </li> <li> <p><code>ARGILLA_ELASTICSEARCH_SSL_VERIFY</code>: If \"False\", disables SSL certificate verification when connecting to the Elasticsearch backend.</p> </li> <li> <p><code>ARGILLA_ELASTICSEARCH_CA_PATH</code>: Path to CA cert for ES host. For example: <code>/full/path/to/root-ca.pem</code> (Optional)</p> </li> </ul>"},{"location":"reference/argilla-server/configuration/#redis","title":"Redis","text":"<p>Redis is used by Argilla to store information about jobs to be processed on background. The following environment variables are useful to config how Argilla connects to Redis:</p> <ul> <li><code>ARGILLA_REDIS_URL</code>: A URL string that contains the necessary information to connect to a Redis instance (Default: <code>redis://localhost:6379/0</code>).</li> </ul>"},{"location":"reference/argilla-server/configuration/#datasets","title":"Datasets","text":"<ul> <li> <p><code>ARGILLA_LABEL_SELECTION_OPTIONS_MAX_ITEMS</code>: Set the number of maximum items to be allowed by label and multi label questions (Default: <code>500</code>).</p> </li> <li> <p><code>ARGILLA_SPAN_OPTIONS_MAX_ITEMS</code>: Set the number of maximum items to be allowed by span questions (Default: <code>500</code>).</p> </li> </ul>"},{"location":"reference/argilla-server/configuration/#hugging-face","title":"Hugging Face","text":"<ul> <li><code>ARGILLA_SHOW_HUGGINGFACE_SPACE_PERSISTENT_STORAGE_WARNING</code>: When Argilla is running on Hugging Face Spaces you can use this environment variable to disable the warning message showed when persistent storage is disabled for the space (Default: <code>true</code>).</li> </ul>"},{"location":"reference/argilla-server/configuration/#docker-images-only","title":"Docker images only","text":"<ul> <li> <p><code>REINDEX_DATASETS</code>: If <code>true</code> or <code>1</code>, the datasets will be reindexed in the search engine. This is needed when some search configuration changed or data must be refreshed (Default: <code>0</code>).</p> </li> <li> <p><code>USERNAME</code>: If provided, the owner username. This can be combined with HF OAuth to define the argilla server owner (Default: <code>\"\"</code>).</p> </li> <li> <p><code>PASSWORD</code>: If provided, the owner password. If <code>USERNAME</code> and <code>PASSWORD</code> are provided, the owner user will be created with these credentials on the server startup (Default: <code>\"\"</code>).</p> </li> <li> <p><code>WORKSPACE</code>: If provided, the workspace name. If <code>USERNAME</code>, <code>PASSWORD</code> and <code>WORSPACE</code> are provided, a default workspace will be created with this name (Default: <code>\"\"</code>).</p> </li> <li> <p><code>API_KEY</code>: The default user api key to user. If API_KEY is not provided, a new random api key will be generated (Default: <code>\"\"</code>).</p> </li> <li> <p><code>UVICORN_APP</code>: [Advanced] The name of the FastAPI app to run. This is useful when you want to extend the FastAPI app with additional routes or middleware. The default value is <code>argilla_server:app</code>.</p> </li> </ul>"},{"location":"reference/argilla-server/configuration/#rest-api-docs","title":"REST API docs","text":"<p>FastAPI also provides beautiful REST API docs that you can check at http://localhost:6900/api/v1/docs.</p>"},{"location":"reference/argilla-server/telemetry/","title":"Server Telemetry","text":"<p>Argilla uses telemetry to report anonymous usage and error information. As an open-source software, this type of information is important to improve and understand how the product is used. This is done through the Hugging Face Hub library telemetry implementations.</p>"},{"location":"reference/argilla-server/telemetry/#how-to-opt-out","title":"How to opt-out","text":"<p>You can opt out of telemetry reporting using the <code>ENV</code> variable <code>HF_HUB_DISABLE_TELEMETRY</code> before launching the server. Setting this variable to <code>1</code> will completely disable telemetry reporting.</p> <p>If you are a Linux/MacOs user, you should run:</p> <pre><code>export HF_HUB_DISABLE_TELEMETRY=1\n</code></pre> <p>If you are a Windows user, you should run:</p> <pre><code>set HF_HUB_DISABLE_TELEMETRY=1\n</code></pre> <p>To opt in again, you can set the variable to <code>0</code>.</p>"},{"location":"reference/argilla-server/telemetry/#why-reporting-telemetry","title":"Why reporting telemetry","text":"<p>Anonymous telemetry information enables us to continuously improve the product and detect recurring problems to better serve all users. We collect aggregated information about general usage and errors. We do NOT collect any information on users' data records, datasets, or metadata information.</p>"},{"location":"reference/argilla-server/telemetry/#sensitive-data","title":"Sensitive data","text":"<p>We do not collect any piece of information related to the source data you store in Argilla. We don't identify individual users. Your data does not leave your server at any time:</p> <ul> <li>No dataset record is collected.</li> <li>No dataset names or metadata are collected.</li> </ul>"},{"location":"reference/argilla-server/telemetry/#information-reported","title":"Information reported","text":"<p>The following usage and error information is reported:</p> <ul> <li>The code of the raised error</li> <li>The <code>user-agent</code> and <code>accept-language</code> http headers</li> <li>Task name and number of records for bulk operations</li> <li>An anonymous generated user uuid</li> <li>An anonymous generated server uuid</li> <li>The Argilla version running the server</li> <li>The Python version, e.g. <code>3.8.13</code></li> <li>The system/OS name, such as <code>Linux</code>, <code>Darwin</code>, <code>Windows</code></li> <li>The system\u2019s release version, e.g. <code>Darwin Kernel Version 21.5.0: Tue Apr 26 21:08:22 PDT 2022; root:xnu-8020</code></li> <li>The machine type, e.g. <code>AMD64</code></li> <li>The underlying platform spec with as much useful information as possible. (eg. <code>macOS-10.16-x86_64-i386-64bit</code>)</li> <li>The type of deployment: <code>huggingface_space</code> or <code>server</code></li> <li>The dockerized deployment flag: <code>True</code> or <code>False</code></li> </ul> <p>For transparency, you can inspect the source code where this is performed here.</p> <p>If you have any doubts, don't hesitate to join our Discord channel or open a GitHub issue. We'd be very happy to discuss how we can improve this.</p>"},{"location":"tutorials/","title":"Tutorials","text":"<p>These are the tutorials for the Argilla SDK. They provide step-by-step instructions for common tasks.</p> <ul> <li> <p>Text classification</p> <p>Learn about a standard workflow for a text classification task with model fine-tuning.</p> <p> Tutorial</p> </li> <li> <p>Token classification</p> <p>Learn about a standard workflow for a token classification task with model fine-tuning.</p> <p> Tutorial</p> </li> <li> <p>Image classification</p> <p>Learn about a standard workflow for an image classification task with model fine-tuning.</p> <p> Tutorial</p> </li> <li> <p>Image preference</p> <p>Learn about a standard workflow for multi-modal preference datasets like image generation preference.</p> <p> Tutorial</p> </li> </ul>"},{"location":"tutorials/image_classification/","title":"Image classification","text":"<ul> <li>Goal: Show a standard workflow for an image classification task.</li> <li>Dataset: MNIST, a dataset of 28x28 grayscale images that need to be classified as digits.</li> <li>Libraries: datasets, transformers</li> <li>Components: ImageField, LabelQuestion, Suggestion</li> </ul> <p>If you already have deployed Argilla, you can skip this step. Otherwise, you can quickly deploy Argilla following this guide.</p> <p>To complete this tutorial, you need to install the Argilla SDK and a few third-party libraries via <code>pip</code>.</p> <pre><code>!pip install argilla\n</code></pre> <pre><code>!pip install \"transformers[torch]~=4.0\" \"accelerate~=0.34\"\n</code></pre> <p>Let's make the required imports:</p> <pre><code>import base64\nimport io\nimport re\n\nfrom IPython.display import display\nimport numpy as np\nimport torch\nfrom PIL import Image\n\nfrom datasets import load_dataset, Dataset, load_metric\nfrom transformers import (\n    AutoImageProcessor,\n    AutoModelForImageClassification,\n    pipeline,\n    Trainer,\n    TrainingArguments\n)\n\nimport argilla as rg\n</code></pre> <p>You also need to connect to the Argilla server using the <code>api_url</code> and <code>api_key</code>.</p> <pre><code># Replace api_url with your url if using Docker\n# Replace api_key with your API key under \"My Settings\" in the UI\n# Uncomment the last line and set your HF_TOKEN if your space is private\nclient = rg.Argilla(\n    api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n    api_key=\"[your-api-key]\",\n    # headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"}\n)\n</code></pre> <p>Now, we will need to configure the dataset. In the settings, we can specify the guidelines, fields, and questions. If needed, you can also add metadata and vectors. However, for our use case, we just need a field for the <code>image</code> column and a label question for the <code>label</code> column.</p> <p>Note</p> <p>Check this how-to guide to know more about configuring and creating a dataset.</p> <pre><code>labels = [str(x) for x in range(10)]\n\nsettings = rg.Settings(\n    guidelines=\"The goal of this task is to classify a given image of a handwritten digit into one of 10 classes representing integer values from 0 to 9, inclusively.\",\n    fields=[\n        rg.ImageField(\n            name=\"image\",\n            title=\"An image of a handwritten digit.\",\n        ),\n    ],\n    questions=[\n        rg.LabelQuestion(\n            name=\"image_label\",\n            title=\"What digit do you see on the image?\",\n            labels=labels,\n        )\n    ]\n)\n</code></pre> <p>Let's create the dataset with the name and the defined settings:</p> <pre><code>dataset = rg.Dataset(\n    name=\"image_classification_dataset\",\n    settings=settings,\n)\ndataset.create()\n</code></pre> <p>Even if we have created the dataset, it still lacks the information to be annotated (you can check it in the UI). We will use the <code>ylecun/mnist</code> dataset from the Hugging Face Hub. Specifically, we will use <code>100</code> examples. Because we are dealing with a potentially large image dataset, we will set <code>streaming=True</code> to avoid loading the entire dataset into memory and iterate over the data to lazily load it.</p> <p>Tip</p> <p>When working with Hugging Face datasets, you can set <code>Image(decode=False)</code> so that we can get public image URLs, but this depends on the dataset.</p> <pre><code>n_rows = 100\n\nhf_dataset = load_dataset(\"ylecun/mnist\", streaming=True)\ndataset_rows = [row for _,row in zip(range(n_rows), hf_dataset[\"train\"])]\nhf_dataset = Dataset.from_list(dataset_rows)\n\nhf_dataset\n</code></pre> <pre>\n<code>Dataset({\n    features: ['image', 'label'],\n    num_rows: 100\n})</code>\n</pre> <p>Let's have a look at the first image in the dataset.</p> <pre><code>hf_dataset[0]\n</code></pre> <pre>\n<code>{'image': &lt;PIL.PngImagePlugin.PngImageFile image mode=L size=28x28&gt;,\n 'label': 5}</code>\n</pre> <p>We will easily add them to the dataset using <code>log</code>, without needing a mapping since the names already match the Argilla resources. Additionally, since the images are already in PIL format and defined as <code>Image</code> in the Hugging Face dataset\u2019s features, we can log them directly. We will also include an <code>id</code> column in each record, allowing us to easily trace back to the external data source.</p> <pre><code>hf_dataset = hf_dataset.add_column(\"id\", range(len(hf_dataset)))\ndataset.records.log(records=hf_dataset)\n</code></pre> <p>The next step is to add suggestions to the dataset. This will make things easier and faster for the annotation team. Suggestions will appear as preselected options, so annotators will only need to correct them. In our case, we will generate them using a zero-shot CLIP model. However, you can use a framework or technique of your choice.</p> <p>We will start by loading the model using a <code>transformers</code> pipeline.</p> <pre><code>checkpoint = \"openai/clip-vit-large-patch14\"\ndetector = pipeline(model=checkpoint, task=\"zero-shot-image-classification\")\n</code></pre> <p>Now, let's try to make a model prediction and see if it makes sense.</p> <pre><code>predictions = detector(hf_dataset[1][\"image\"], candidate_labels=labels)\npredictions, display(hf_dataset[1][\"image\"])\n</code></pre> <pre>\n<code>([{'score': 0.5236628651618958, 'label': '0'},\n  {'score': 0.11496700346469879, 'label': '7'},\n  {'score': 0.08030630648136139, 'label': '8'},\n  {'score': 0.07141078263521194, 'label': '9'},\n  {'score': 0.05868939310312271, 'label': '6'},\n  {'score': 0.05507850646972656, 'label': '5'},\n  {'score': 0.0341767854988575, 'label': '1'},\n  {'score': 0.027202051132917404, 'label': '4'},\n  {'score': 0.018533246591687202, 'label': '3'},\n  {'score': 0.015973029658198357, 'label': '2'}],\n None)</code>\n</pre> <p>It's time to make the predictions on the dataset! We will set a function that uses the zero-shot model. The model will infer the label based on the image. When working with large datasets, you can create a <code>batch_predict</code> method to speed up the process.</p> <pre><code>def predict(input, labels):\n    prediction = detector(input, candidate_labels=labels)\n    prediction = prediction[0]\n    return {\"image_label\": prediction[\"label\"], \"score\": prediction[\"score\"]}\n</code></pre> <p>To update the records, we will need to retrieve them from the server and update them with the new suggestions. The <code>id</code> will always need to be provided as it is the records' identifier to update a record and avoid creating a new one.</p> <pre><code>data = dataset.records.to_list(flatten=True)\nupdated_data = [\n    {\n        \"id\": sample[\"id\"],\n        **predict(sample[\"image\"], labels),\n    }\n    for sample in data\n]\ndataset.records.log(records=updated_data, mapping={\"score\": \"image_label.suggestion.score\"})\n</code></pre> <p>Voil\u00e0! We have added the suggestions to the dataset, and they will appear in the UI marked with a \u2728. </p> <p>Now, we can start the annotation process. Just open the dataset in the Argilla UI and start annotating the records. If the suggestions are correct, you can just click on <code>Submit</code>. Otherwise, you can select the correct label.</p> <p>Note</p> <p>Check this how-to guide to know more about annotating in the UI.</p> <p>After the annotation, we will have a robust dataset to train the main model. In our case, we will fine-tune using transformers. However, you can select the one that best fits your requirements.</p> <p>So, let's start by retrieving the annotated records and exporting them as a <code>Dataset</code>, so images will be in PIL format.</p> <p>Note</p> <p>Check this how-to guide to know more about filtering and querying in Argilla. Also, you can check the Hugging Face docs on fine-tuning an image classification model.</p> <pre><code>dataset = client.datasets(\"image_classification_dataset\")\n</code></pre> <pre><code>status_filter = rg.Query(filter=rg.Filter((\"response.status\", \"==\", \"submitted\")))\n\nsubmitted = dataset.records(status_filter).to_datasets()\n</code></pre> <p>We now need to ensure our images are forwarded with the correct dimensions. Because the original MNIST dataset is greyscale and the VIT model expects RGB, we need to add a channel dimension to the images. We will do this by stacking the images along the channel axis.</p> <pre><code>def greyscale_to_rgb(img) -&amp;gt; Image:\n    return Image.merge('RGB', (img, img, img))\n\nsubmitted_image_rgb = [\n    {\n        \"id\": sample[\"id\"],\n        \"image\": greyscale_to_rgb(sample[\"image\"]),\n        \"label\": sample[\"image_label.responses\"][0],\n    }\n    for sample in submitted\n]\nsubmitted_image_rgb[0]\n</code></pre> <pre>\n<code>{'id': '0', 'image': &lt;PIL.Image.Image image mode=RGB size=28x28&gt;, 'label': '0'}</code>\n</pre> <p>Next, we will load the <code>ImageProcessor</code> to fine-tune the model. This processor will handle the image resizing and normalization in order to be compatible with the model we intend to use.</p> <pre><code>checkpoint = \"google/vit-base-patch16-224-in21k\"\nprocessor = AutoImageProcessor.from_pretrained(checkpoint)\n\nsubmitted_image_rgb_processed = [\n    {\n        \"pixel_values\": processor(sample[\"image\"], return_tensors='pt')[\"pixel_values\"],\n        \"label\": sample[\"label\"],\n    }\n    for sample in submitted_image_rgb\n]\nsubmitted_image_rgb_processed[0]\n</code></pre> <p>We can now convert the images to a Hugging Face Dataset that is ready for fine-tuning.</p> <pre><code>prepared_ds = Dataset.from_list(submitted_image_rgb_processed)\nprepared_ds = prepared_ds.train_test_split(test_size=0.2)\nprepared_ds\n</code></pre> <pre>\n<code>DatasetDict({\n    train: Dataset({\n        features: ['pixel_values', 'label'],\n        num_rows: 80\n    })\n    test: Dataset({\n        features: ['pixel_values', 'label'],\n        num_rows: 20\n    })\n})</code>\n</pre> <p>We then need to define our data collator, which will ensure the data is unpacked and stacked correctly for the model.</p> <pre><code>def collate_fn(batch):\n    return {\n        'pixel_values': torch.stack([torch.tensor(x['pixel_values'][0]) for x in batch]),\n        'labels': torch.tensor([int(x['label']) for x in batch])\n    }\n</code></pre> <p>Next, we can define our training metrics. We will use the accuracy metric to evaluate the model's performance.</p> <pre><code>metric = load_metric(\"accuracy\", trust_remote_code=True)\ndef compute_metrics(p):\n    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)\n</code></pre> <p>We then load our model and configure the labels that we will use for training.</p> <pre><code>model = AutoModelForImageClassification.from_pretrained(\n    checkpoint,\n    num_labels=len(labels),\n    id2label={int(i): int(c) for i, c in enumerate(labels)},\n    label2id={int(c): int(i) for i, c in enumerate(labels)}\n)\nmodel.config\n</code></pre> <p>Finally, we define the training arguments and start the training process.</p> <pre><code>training_args = TrainingArguments(\n  output_dir=\"./image-classifier\",\n  per_device_train_batch_size=16,\n  eval_strategy=\"steps\",\n  num_train_epochs=1,\n  fp16=False, # True if you have a GPU with mixed precision support\n  save_steps=100,\n  eval_steps=100,\n  logging_steps=10,\n  learning_rate=2e-4,\n  save_total_limit=2,\n  remove_unused_columns=True,\n  push_to_hub=False,\n  load_best_model_at_end=True,\n)\n\ntrainer = Trainer(\n    model=model,\n    args=training_args,\n    data_collator=collate_fn,\n    compute_metrics=compute_metrics,\n    train_dataset=prepared_ds[\"train\"],\n    eval_dataset=prepared_ds[\"test\"],\n    tokenizer=processor,\n)\n\ntrain_results = trainer.train()\ntrainer.save_model()\ntrainer.log_metrics(\"train\", train_results.metrics)\ntrainer.save_metrics(\"train\", train_results.metrics)\ntrainer.save_state()\n</code></pre> <pre>\n<code>{'train_runtime': 12.5374, 'train_samples_per_second': 6.381, 'train_steps_per_second': 0.399, 'train_loss': 2.0533515930175783, 'epoch': 1.0}\n***** train metrics *****\n  epoch                    =        1.0\n  total_flos               =  5774017GF\n  train_loss               =     2.0534\n  train_runtime            = 0:00:12.53\n  train_samples_per_second =      6.381\n  train_steps_per_second   =      0.399\n</code>\n</pre> <p>As the training data was of better quality, we can expect a better model. So we can update the remainder of our original dataset with the new model's suggestions.</p> <pre><code>pipe = pipeline(\"image-classification\", model=model, image_processor=processor)\n\ndef run_inference(batch):\n    predictions = pipe(batch[\"image\"])\n    batch[\"image_label\"] = [prediction[0][\"label\"] for prediction in predictions]\n    batch[\"score\"] = [prediction[0][\"score\"] for prediction in predictions]\n    return batch\n\nhf_dataset = hf_dataset.map(run_inference, batched=True)\n</code></pre> <pre><code>data = dataset.records.to_list(flatten=True)\nupdated_data = [\n    {\n        \"image_label\": str(sample[\"image_label\"]),\n        \"id\": sample[\"id\"],\n        \"score\": sample[\"score\"],\n    }\n    for sample in hf_dataset\n]\ndataset.records.log(records=updated_data, mapping={\"score\": \"image_label.suggestion.score\"})\n</code></pre> <p>In this tutorial, we present an end-to-end example of an image classification task. This serves as the base, but it can be performed iteratively and seamlessly integrated into your workflow to ensure high-quality curation of your data and improved results.</p> <p>We started by configuring the dataset and adding records and suggestions from a zero-shot model. After the annotation process, we trained a new model with the annotated data and updated the remaining records with the new suggestions.</p>"},{"location":"tutorials/image_classification/#image-classification","title":"Image classification","text":""},{"location":"tutorials/image_classification/#getting-started","title":"Getting started","text":""},{"location":"tutorials/image_classification/#deploy-the-argilla-server","title":"Deploy the Argilla server","text":""},{"location":"tutorials/image_classification/#set-up-the-environment","title":"Set up the environment","text":""},{"location":"tutorials/image_classification/#vibe-check-the-dataset","title":"Vibe check the dataset","text":"<p>We will look at the dataset to understand its structure and the kind of data it contains. We do this by using the embedded Hugging Face Dataset Viewer.</p>"},{"location":"tutorials/image_classification/#configure-and-create-the-argilla-dataset","title":"Configure and create the Argilla dataset","text":""},{"location":"tutorials/image_classification/#add-records","title":"Add records","text":""},{"location":"tutorials/image_classification/#add-initial-model-suggestions","title":"Add initial model suggestions","text":""},{"location":"tutorials/image_classification/#evaluate-with-argilla","title":"Evaluate with Argilla","text":""},{"location":"tutorials/image_classification/#train-your-model","title":"Train your model","text":""},{"location":"tutorials/image_classification/#formatting-the-data","title":"Formatting the data","text":""},{"location":"tutorials/image_classification/#the-actual-training","title":"The actual training","text":""},{"location":"tutorials/image_classification/#conclusions","title":"Conclusions","text":""},{"location":"tutorials/image_preference/","title":"Image preference","text":"<ul> <li>Goal: Show a standard workflow for working with complex multi-modal preference datasets, such as for image-generation preference.</li> <li>Dataset: tomg-group-umd/pixelprose, is a comprehensive dataset of over 16M (million) synthetically generated captions, leveraging cutting-edge vision-language models (Gemini 1.0 Pro Vision) for detailed and accurate descriptions.</li> <li>Libraries: datasets, sentence-transformers</li> <li>Components: TextField, ImageField, TextQuestion, LabelQuestion VectorField, FloatMetadataProperty</li> </ul> <p>If you already have deployed Argilla, you can skip this step. Otherwise, you can quickly deploy Argilla following this guide.</p> <p>To complete this tutorial, you need to install the Argilla SDK and a few third-party libraries via <code>pip</code>.</p> <pre><code>!pip install argilla\n</code></pre> <pre><code>!pip install \"sentence-transformers~=3.0\"\n</code></pre> <p>Let's make the required imports:</p> <pre><code>import io\nimport os\nimport time\n\nimport argilla as rg\nimport requests\nfrom PIL import Image\nfrom datasets import load_dataset, Dataset\nfrom sentence_transformers import SentenceTransformer\n</code></pre> <p>You also need to connect to the Argilla server using the <code>api_url</code> and <code>api_key</code>.</p> <pre><code># Replace api_url with your url if using Docker\n# Replace api_key with your API key under \"My Settings\" in the UI\n# Uncomment the last line and set your HF_TOKEN if your space is private\nclient = rg.Argilla(\n    api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n    api_key=\"[your-api-key]\",\n    # headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"}\n)\n</code></pre> <p>Now, we will need to configure the dataset. In the settings, we can specify the guidelines, fields, and questions. We will include a <code>TextField</code>, an <code>ImageField</code> corresponding to the <code>url</code> image column, and two additional <code>ImageField</code> fields representing the images we will generate based on the <code>original_caption</code> column from our dataset. Additionally, we will use a <code>LabelQuestion</code> and an optional <code>TextQuestion</code>, which will be used to collect the user's preference and the reason behind it. We will also be adding a <code>VectorField</code> to store the embeddings for the <code>original_caption</code> so that we can use semantic search and speed up our labeling process. Lastly, we will include two <code>FloatMetadataProperty</code> to store information from the <code>toxicity</code> and the <code>identity_attack</code> columns.</p> <p>Note</p> <p>Check this how-to guide to know more about configuring and creating a dataset.</p> <pre><code>settings = rg.Settings(\n    guidelines=\"The goal is to choose the image that best represents the caption.\",\n    fields=[\n        rg.TextField(\n            name=\"caption\",\n            title=\"An image caption belonging to the original image.\",\n        ),\n        rg.ImageField(\n            name=\"image_original\",\n            title=\"The original image, belonging to the caption.\",\n        ),\n        rg.ImageField(\n            name=\"image_1\",\n            title=\"An image that has been generated based on the caption.\",\n        ),\n        rg.ImageField(\n            name=\"image_2\",\n            title=\"An image that has been generated based on the caption.\",\n        ),\n    ],\n    questions=[\n        rg.LabelQuestion(\n            name=\"preference\",\n            title=\"The chosen preference for the generation.\",\n            labels=[\"image_1\", \"image_2\"],\n        ),\n        rg.TextQuestion(\n            name=\"comment\",\n            title=\"Any additional comments.\",\n            required=False,\n        ),\n    ],\n    metadata=[\n        rg.FloatMetadataProperty(name=\"toxicity\", title=\"Toxicity score\"),\n        rg.FloatMetadataProperty(name=\"identity_attack\", title=\"Identity attack score\"),\n\n    ],\n    vectors=[\n        rg.VectorField(name=\"original_caption_vector\", dimensions=384),\n    ]\n)\n</code></pre> <p>Let's create the dataset with the name and the defined settings:</p> <pre><code>dataset = rg.Dataset(\n    name=\"image_preference_dataset\",\n    settings=settings,\n)\ndataset.create()\n</code></pre> <pre><code>n_rows = 25\n\nhf_dataset = load_dataset(\"tomg-group-umd/pixelprose\", streaming=True)\ndataset_rows = [row for _,row in zip(range(n_rows), hf_dataset[\"train\"])]\nhf_dataset = Dataset.from_list(dataset_rows)\n\nhf_dataset\n</code></pre> <pre>\n<code>Dataset({\n    features: ['uid', 'url', 'key', 'status', 'original_caption', 'vlm_model', 'vlm_caption', 'toxicity', 'severe_toxicity', 'obscene', 'identity_attack', 'insult', 'threat', 'sexual_explicit', 'watermark_class_id', 'watermark_class_score', 'aesthetic_score', 'error_message', 'width', 'height', 'original_width', 'original_height', 'exif', 'sha256', 'image_id', 'author', 'subreddit', 'score'],\n    num_rows: 25\n})</code>\n</pre> <p>Let's have a look at the first entry in the dataset.</p> <pre><code>hf_dataset[0]\n</code></pre> <pre>\n<code>{'uid': '0065a9b1cb4da4696f2cd6640e00304257cafd97c0064d4c61e44760bf0fa31c',\n 'url': 'https://media.gettyimages.com/photos/plate-of-food-from-murray-bros-caddy-shack-at-the-world-golf-hall-of-picture-id916117812?s=612x612',\n 'key': '007740026',\n 'status': 'success',\n 'original_caption': 'A plate of food from Murray Bros Caddy Shack at the World Golf Hall of Fame',\n 'vlm_model': 'gemini-pro-vision',\n 'vlm_caption': ' This image displays: A plate of fried calamari with a lemon wedge and a side of green beans, served in a basket with a pink bowl of marinara sauce. The basket is sitting on a table with a checkered tablecloth. In the background is a glass of water and a plate with a burger and fries. The style of the image is a photograph.',\n 'toxicity': 0.0005555678508244455,\n 'severe_toxicity': 1.7323875454167137e-06,\n 'obscene': 3.8304504414554685e-05,\n 'identity_attack': 0.00010549413127591833,\n 'insult': 0.00014773994917050004,\n 'threat': 2.5982120860135183e-05,\n 'sexual_explicit': 2.0972733182134107e-05,\n 'watermark_class_id': 1.0,\n 'watermark_class_score': 0.733799934387207,\n 'aesthetic_score': 5.390625,\n 'error_message': None,\n 'width': 612,\n 'height': 408,\n 'original_width': 612,\n 'original_height': 408,\n 'exif': '{\"Image ImageDescription\": \"A plate of food from Murray Bros. Caddy Shack at the World Golf Hall of Fame. (Photo by: Jeffrey Greenberg/Universal Images Group via Getty Images)\", \"Image XResolution\": \"300\", \"Image YResolution\": \"300\"}',\n 'sha256': '0065a9b1cb4da4696f2cd6640e00304257cafd97c0064d4c61e44760bf0fa31c',\n 'image_id': 'null',\n 'author': 'null',\n 'subreddit': -1,\n 'score': -1}</code>\n</pre> <p>As we can see, the <code>url</code> column does not contain an image extension, so we will apply some additional filtering to ensure we have only public image URLs.</p> <pre><code>hf_dataset = hf_dataset.filter(\n    lambda x: any([x[\"url\"].endswith(extension) for extension in [\".jpg\", \".png\", \".jpeg\"]]))\n\nhf_dataset\n</code></pre> <pre>\n<code>Dataset({\n    features: ['uid', 'url', 'key', 'status', 'original_caption', 'vlm_model', 'vlm_caption', 'toxicity', 'severe_toxicity', 'obscene', 'identity_attack', 'insult', 'threat', 'sexual_explicit', 'watermark_class_id', 'watermark_class_score', 'aesthetic_score', 'error_message', 'width', 'height', 'original_width', 'original_height', 'exif', 'sha256', 'image_id', 'author', 'subreddit', 'score'],\n    num_rows: 18\n})</code>\n</pre> <pre><code>API_URL = \"https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-schnell\"\nheaders = {\"Authorization\": f\"Bearer {os.getenv('HF_TOKEN')}\"}\n\ndef query(payload):\n    response = requests.post(API_URL, headers=headers, json=payload)\n    if response.status_code == 200:\n        image_bytes = response.content\n        image = Image.open(io.BytesIO(image_bytes))\n    else:\n        print(f\"Request failed with status code {response.status_code}. retrying in 10 seconds.\")\n        time.sleep(10)\n        image = query(payload)\n    return image\n\nquery({\n    \"inputs\": \"Astronaut riding a horse\"\n})\n</code></pre> <p>Cool! Since we've evaluated the generation function, let's generate the PIL images for the dataset.</p> <pre><code>def generate_image(row):\n    caption = row[\"original_caption\"]\n    row[\"image_1\"] = query({\"inputs\": caption})\n    row[\"image_2\"] = query({\"inputs\": caption + \" \"}) # space to avoid caching and getting the same image\n    return row\n\nhf_dataset_with_images = hf_dataset.map(generate_image, batched=False)\n\nhf_dataset_with_images\n</code></pre> <pre>\n<code>Dataset({\n    features: ['uid', 'url', 'key', 'status', 'original_caption', 'vlm_model', 'vlm_caption', 'toxicity', 'severe_toxicity', 'obscene', 'identity_attack', 'insult', 'threat', 'sexual_explicit', 'watermark_class_id', 'watermark_class_score', 'aesthetic_score', 'error_message', 'width', 'height', 'original_width', 'original_height', 'exif', 'sha256', 'image_id', 'author', 'subreddit', 'score', 'image_1', 'image_2'],\n    num_rows: 18\n})</code>\n</pre> <pre><code>model = SentenceTransformer(\"TaylorAI/bge-micro-v2\")\n\ndef encode_questions(batch):\n    vectors_as_numpy = model.encode(batch[\"original_caption\"])\n    batch[\"original_caption_vector\"] = [x.tolist() for x in vectors_as_numpy]\n    return batch\n\nhf_dataset_with_images_vectors = hf_dataset_with_images.map(encode_questions, batched=True)\n</code></pre> <pre><code>dataset.records.log(records=hf_dataset_with_images_vectors, mapping={\n    \"key\": \"id\",\n    \"original_caption\": \"caption\",\n    \"url\": \"image_original\",\n})\n</code></pre> <p>Voil\u00e0! We have our Argilla dataset ready for annotation.</p> <p>Now, we can start the annotation process. Just open the dataset in the Argilla UI and start annotating the records.</p> <p>Note</p> <p>Check this how-to guide to know more about annotating in the UI.</p> <p>In this tutorial, we present an end-to-end example of an image preference task. This serves as the base, but it can be performed iteratively and seamlessly integrated into your workflow to ensure high-quality curation of your data and improved results.</p> <p>We started by configuring the dataset and adding records with the original and generated images. After the annotation process, you can evaluate the results and potentially retrain the model to improve the quality of the generated images.</p>"},{"location":"tutorials/image_preference/#image-preference","title":"Image preference","text":""},{"location":"tutorials/image_preference/#getting-started","title":"Getting started","text":""},{"location":"tutorials/image_preference/#deploy-the-argilla-server","title":"Deploy the Argilla server","text":""},{"location":"tutorials/image_preference/#set-up-the-environment","title":"Set up the environment","text":""},{"location":"tutorials/image_preference/#vibe-check-the-dataset","title":"Vibe check the dataset","text":"<p>We will take a look at the dataset to understand its structure and the types of data it contains. We can do this using the embedded Hugging Face Dataset Viewer.</p>"},{"location":"tutorials/image_preference/#configure-and-create-the-argilla-dataset","title":"Configure and create the Argilla dataset","text":""},{"location":"tutorials/image_preference/#add-records","title":"Add records","text":"<p>Even if we have created the dataset, it still lacks the information to be annotated (you can check it in the UI). We will use the <code>tomg-group-umd/pixelprose</code> dataset from the Hugging Face Hub. Specifically, we will use <code>25</code> examples. Because we are dealing with a potentially large image dataset, we will set <code>streaming=True</code> to avoid loading the entire dataset into memory and iterate over the data to lazily load it.</p> <p>Tip</p> <p>When working with Hugging Face datasets, you can set <code>Image(decode=False)</code> so that we can get public image URLs, but this depends on the dataset.</p>"},{"location":"tutorials/image_preference/#generate-images","title":"Generate images","text":"<p>We'll start by generating images based on the <code>original_caption</code> column using the recently released black-forest-labs/FLUX.1-schnell model. For this, we will use the free but rate-limited Inference API provided by Hugging Face, but you can use any other model from the Hub or method. We will generate 2 images per example. Additionally, we will add a small retry mechanism to handle the rate limit.</p> <p>Let's begin by defining and testing a generation function.</p>"},{"location":"tutorials/image_preference/#add-vectors","title":"Add vectors","text":"<p>We will use the <code>sentence-transformers</code> library to create vectors for the <code>original_caption</code>. We will use the <code>TaylorAI/bge-micro-v2</code> model, which strikes a good balance between speed and performance. Note that we also need to convert the vectors to a <code>list</code> to store them in the Argilla dataset.</p>"},{"location":"tutorials/image_preference/#log-to-argilla","title":"Log to Argilla","text":"<p>We will easily add them to the dataset using <code>log</code> and the mapping, where we indicate which column from our dataset needs to be mapped to which Argilla resource if the names do not correspond. We are also using the <code>key</code> column as <code>id</code> for our record so we can easily backtrack the record to the external data source.</p>"},{"location":"tutorials/image_preference/#evaluate-with-argilla","title":"Evaluate with Argilla","text":""},{"location":"tutorials/image_preference/#conclusions","title":"Conclusions","text":""},{"location":"tutorials/text_classification/","title":"Text classification","text":"<ul> <li>Goal: Show a standard workflow for a text classification task, including zero-shot suggestions and model fine-tuning.</li> <li>Dataset: IMDB, a dataset of movie reviews that need to be classified as positive or negative.</li> <li>Libraries: datasets, transformers, setfit</li> <li>Components: TextField, LabelQuestion, Suggestion, Query, Filter</li> </ul> <p>If you already have deployed Argilla, you can skip this step. Otherwise, you can quickly deploy Argilla following this guide.</p> <p>To complete this tutorial, you need to install the Argilla SDK and a few third-party libraries via <code>pip</code>.</p> <pre><code>!pip install argilla\n</code></pre> <pre><code>!pip install setfit==1.0.3 transformers==4.40.2\n</code></pre> <p>Let's make the required imports:</p> <pre><code>import argilla as rg\n\nfrom datasets import load_dataset, Dataset\nfrom setfit import SetFitModel, Trainer, get_templated_dataset, sample_dataset\n</code></pre> <p>You also need to connect to the Argilla server using the <code>api_url</code> and <code>api_key</code>.</p> <pre><code># Replace api_url with your url if using Docker\n# Replace api_key with your API key under \"My Settings\" in the UI\n# Uncomment the last line and set your HF_TOKEN if your space is private\nclient = rg.Argilla(\n    api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n    api_key=\"[your-api-key]\",\n    # headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"}\n)\n</code></pre> <p>Now, we will need to configure the dataset. In the settings, we can specify the guidelines, fields, and questions. If needed, you can also add metadata and vectors. However, for our use case, we just need a text field and a label question, corresponding to the <code>text</code> and <code>label</code> columns.</p> <p>Note</p> <p>Check this how-to guide to know more about configuring and creating a dataset.</p> <pre><code>labels = [\"positive\", \"negative\"]\n\nsettings = rg.Settings(\n    guidelines=\"Classify the reviews as positive or negative.\",\n    fields=[\n        rg.TextField(\n            name=\"review\",\n            title=\"Text from the review\",\n            use_markdown=False,\n        ),\n    ],\n    questions=[\n        rg.LabelQuestion(\n            name=\"sentiment_label\",\n            title=\"In which category does this article fit?\",\n            labels=labels,\n        )\n    ],\n)\n</code></pre> <p>Let's create the dataset with the name and the defined settings:</p> <pre><code>dataset = rg.Dataset(\n    name=\"text_classification_dataset\",\n    settings=settings,\n)\ndataset.create()\n</code></pre> <p>Even if we have created the dataset, it still lacks the information to be annotated (you can check it in the UI). We will use the <code>imdb</code> dataset from the Hugging Face Hub. Specifically, we will use 100 samples from the <code>train</code> split.</p> <pre><code>hf_dataset = load_dataset(\"imdb\", split=\"train[:100]\")\n</code></pre> <p>We will easily add them to the dataset using <code>log</code> and the mapping, where we indicate that the column <code>text</code> is the data that should be added to the field <code>review</code>.</p> <pre><code>dataset.records.log(records=hf_dataset, mapping={\"text\": \"review\"})\n</code></pre> <p>The next step is to add suggestions to the dataset. This will make things easier and faster for the annotation team. Suggestions will appear as preselected options, so annotators will only need to correct them. In our case, we will generate them using a zero-shot SetFit model. However, you can use a framework or technique of your choice.</p> <p>We will start by defining an example training set with the required labels: <code>positive</code> and <code>negative</code>. Using <code>get_templated_dataset</code> will create sentences from the default template: \"This sentence is {label}.\"</p> <pre><code>zero_ds = get_templated_dataset(\n    candidate_labels=labels,\n    sample_size=8,\n)\n</code></pre> <p>Now, we will prepare a function to train the SetFit model.</p> <p>Note</p> <p>For further customization, you can check the SetFit documentation.</p> <pre><code>def train_model(model_name, dataset):\n    model = SetFitModel.from_pretrained(model_name)\n\n    trainer = Trainer(\n        model=model,\n        train_dataset=dataset,\n    )\n\n    trainer.train()\n\n    return model\n</code></pre> <p>Let's train the model. We will use <code>TaylorAI/bge-micro-v2</code>, available in the Hugging Face Hub.</p> <pre><code>model = train_model(model_name=\"TaylorAI/bge-micro-v2\", dataset=zero_ds)\n</code></pre> <p>You can save it locally or push it to the Hub. And then, load it from there.</p> <pre><code># Save and load locally\n# model.save_pretrained(\"text_classification_model\")\n# model = SetFitModel.from_pretrained(\"text_classification_model\")\n\n# Push and load in HF\n# model.push_to_hub(\"[username]/text_classification_model\")\n# model = SetFitModel.from_pretrained(\"[username]/text_classification_model\")\n</code></pre> <p>It's time to make the predictions! We will set a function that uses the <code>predict</code> method to get the suggested label. The model will infer the label based on the text.</p> <pre><code>def predict(model, input, labels):\n    model.labels = labels\n\n    prediction = model.predict([input])\n\n    return prediction[0]\n</code></pre> <p>To update the records, we will need to retrieve them from the server and update them with the new suggestions. The <code>id</code> will always need to be provided as it is the records' identifier to update a record and avoid creating a new one.</p> <pre><code>data = dataset.records.to_list(flatten=True)\nupdated_data = [\n    {\n        \"sentiment_label\": predict(model, sample[\"review\"], labels),\n        \"id\": sample[\"id\"],\n    }\n    for sample in data\n]\ndataset.records.log(records=updated_data)\n</code></pre> <p>Voil\u00e0! We have added the suggestions to the dataset, and they will appear in the UI marked with a \u2728. </p> <p>Now, we can start the annotation process. Just open the dataset in the Argilla UI and start annotating the records. If the suggestions are correct, you can just click on <code>Submit</code>. Otherwise, you can select the correct label.</p> <p>Note</p> <p>Check this how-to guide to know more about annotating in the UI.</p> <p>After the annotation, we will have a robust dataset to train the main model. In our case, we will fine-tune using SetFit. However, you can select the one that best fits your requirements. So, let's start by retrieving the annotated records.</p> <p>Note</p> <p>Check this how-to guide to know more about filtering and querying in Argilla. Also, you can check the Hugging Face docs on fine-tuning an text classification model.</p> <pre><code>dataset = client.datasets(\"text_classification_dataset\")\n</code></pre> <pre><code>status_filter = rg.Query(filter=rg.Filter((\"response.status\", \"==\", \"submitted\")))\n\nsubmitted = dataset.records(status_filter).to_list(flatten=True)\n</code></pre> <p>As we have a single response per record, we can retrieve the selected label straightforwardly and create the training set with 8 samples per label. We selected 8 samples per label to have a balanced dataset for few-shot learning.</p> <pre><code>train_records = [\n    {\n        \"text\": r[\"review\"],\n        \"label\": r[\"sentiment_label.responses\"][0],\n    }\n    for r in submitted\n]\ntrain_dataset = Dataset.from_list(train_records)\ntrain_dataset = sample_dataset(train_dataset, label_column=\"label\", num_samples=8)\n</code></pre> <p>We can train the model using our previous function, but this time with a high-quality human-annotated training set.</p> <pre><code>model = train_model(model_name=\"TaylorAI/bge-micro-v2\", dataset=train_dataset)\n</code></pre> <p>As the training data was of better quality, we can expect a better model. So we can update the remaining non-annotated records with the new model's suggestions.</p> <pre><code>data = dataset.records.to_list(flatten=True)\nupdated_data = [\n    {\n        \"sentiment_label\": predict(model, sample[\"review\"], labels),\n        \"id\": sample[\"id\"],\n    }\n    for sample in data\n]\ndataset.records.log(records=updated_data)\n</code></pre> <p>In this tutorial, we present an end-to-end example of a text classification task. This serves as the base, but it can be performed iteratively and seamlessly integrated into your workflow to ensure high-quality curation of your data and improved results.</p> <p>We started by configuring the dataset, adding records, and training a zero-shot SetFit model, as an example, to add suggestions. After the annotation process, we trained a new model with the annotated data and updated the remaining records with the new suggestions.</p>"},{"location":"tutorials/text_classification/#text-classification","title":"Text classification","text":""},{"location":"tutorials/text_classification/#getting-started","title":"Getting started","text":""},{"location":"tutorials/text_classification/#deploy-the-argilla-server","title":"Deploy the Argilla server","text":""},{"location":"tutorials/text_classification/#set-up-the-environment","title":"Set up the environment","text":""},{"location":"tutorials/text_classification/#vibe-check-the-dataset","title":"Vibe check the dataset","text":"<p>We will have a look at the dataset to understand its structure and the kind of data it contains. We do this by using the embedded Hugging Face Dataset Viewer.</p>"},{"location":"tutorials/text_classification/#configure-and-create-the-argilla-dataset","title":"Configure and create the Argilla dataset","text":""},{"location":"tutorials/text_classification/#add-records","title":"Add records","text":""},{"location":"tutorials/text_classification/#add-initial-model-suggestions","title":"Add initial model suggestions","text":""},{"location":"tutorials/text_classification/#evaluate-with-argilla","title":"Evaluate with Argilla","text":""},{"location":"tutorials/text_classification/#train-your-model","title":"Train your model","text":""},{"location":"tutorials/text_classification/#conclusions","title":"Conclusions","text":""},{"location":"tutorials/token_classification/","title":"Token classification","text":"<ul> <li>Goal: Show a standard workflow for a token classification task, including zero-shot suggestions and model fine-tuning.</li> <li>Dataset: ontonotes5, a large corpus comprising various genres of text that need to be classified for Named Entity Recognition.</li> <li>Libraries: datasets, gliner, transformers, spanmarker</li> <li>Components: TextField, SpanQuestion, Suggestion, Query, Filter</li> </ul> <p>If you already have deployed Argilla, you can skip this step. Otherwise, you can quickly deploy Argilla following this guide.</p> <p>To complete this tutorial, you need to install the Argilla SDK and a few third-party libraries via <code>pip</code>.</p> <pre><code>!pip install argilla\n</code></pre> <pre><code>!pip install gliner==0.2.6 transformers==4.40.2 span_marker==1.5.0\n</code></pre> <p>Let's make the needed imports:</p> <pre><code>import re\n\nimport argilla as rg\n\nimport torch\nfrom datasets import load_dataset, Dataset, DatasetDict\nfrom gliner import GLiNER\nfrom span_marker import SpanMarkerModel, Trainer\nfrom transformers import TrainingArguments\n</code></pre> <p>You also need to connect to the Argilla server with the <code>api_url</code> and <code>api_key</code>.</p> <pre><code># Replace api_url with your url if using Docker\n# Replace api_key with your API key under \"My Settings\" in the UI\n# Uncomment the last line and set your HF_TOKEN if your space is private\nclient = rg.Argilla(\n    api_url=\"https://[your-owner-name]-[your_space_name].hf.space\",\n    api_key=\"[your-api-key]\",\n    # headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"}\n)\n</code></pre> <p>Now, we will need to configure the dataset. In the settings, we can specify the guidelines, fields, and questions. If needed, you can also add metadata and vectors. However, for our use case, we just need a text field and a span question, corresponding to the <code>token</code> and <code>tags</code> columns. We will focus on Name Entity Recognition, but this workflow can also be applied to Span Classification, which differs in that the spans are less clearly defined and often overlap.</p> <pre><code>labels = [\n    \"CARDINAL\",\n    \"DATE\",\n    \"PERSON\",\n    \"NORP\",\n    \"GPE\",\n    \"LAW\",\n    \"PERCENT\",\n    \"ORDINAL\",\n    \"MONEY\",\n    \"WORK_OF_ART\",\n    \"FAC\",\n    \"TIME\",\n    \"QUANTITY\",\n    \"PRODUCT\",\n    \"LANGUAGE\",\n    \"ORG\",\n    \"LOC\",\n    \"EVENT\",\n]\n\nsettings = rg.Settings(\n    guidelines=\"Classify individual tokens according to the specified categories, ensuring that any overlapping or nested entities are accurately captured.\",\n    fields=[\n        rg.TextField(\n            name=\"text\",\n            title=\"Text\",\n            use_markdown=False,\n        ),\n    ],\n    questions=[\n        rg.SpanQuestion(\n            name=\"span_label\",\n            field=\"text\",\n            labels=labels,\n            title=\"Classify the tokens according to the specified categories.\",\n            allow_overlapping=False,\n        )\n    ],\n)\n</code></pre> <p>Let's create the dataset with the name and the defined settings:</p> <pre><code>dataset = rg.Dataset(\n    name=\"token_classification_dataset\",\n    settings=settings,\n)\ndataset.create()\n</code></pre> <p>We have created the dataset (you can check it in the UI), but we still need to add the data for annotation. In this case, we will use the <code>ontonote5</code> dataset from the Hugging Face Hub. Specifically, we will use 2100 samples from the <code>test</code> split.</p> <pre><code>hf_dataset = load_dataset(\"tner/ontonotes5\", split=\"test[:2100]\")\n</code></pre> <p>We will iterate over the Hugging Face dataset, adding data to the corresponding field in the <code>Record</code> object for the Argilla dataset. Then, we will easily add them to the dataset using <code>log</code>.</p> <pre><code>records = [rg.Record(fields={\"text\": \" \".join(row[\"tokens\"])}) for row in hf_dataset]\n\ndataset.records.log(records)\n</code></pre> <p>The next step is to add suggestions to the dataset. This will make things easier and faster for the annotation team. Suggestions will appear as preselected options, so annotators will only need to correct them. In our case, we will generate them using a GLiNER model. However, you can use a framework or technique of your choice.</p> <p>Note</p> <p>For further information, you can check the GLiNER repository and the original paper.</p> <p>We will start by loading the pre-trained GLiNER model. Specifically, we will use <code>gliner_mediumv2</code>, available in Hugging Face Hub.</p> <pre><code>gliner_model = GLiNER.from_pretrained(\"urchade/gliner_mediumv2.1\")\n</code></pre> <p>Next, we will create a function to generate predictions using this general model, which can identify the specified labels without being pre-trained on them. The function will return a dictionary formatted with the necessary schema to add entities to our Argilla dataset. This schema includes the keys 'start\u2019 and \u2018end\u2019 to indicate the indices where the span begins and ends, as well as \u2018label\u2019 for the entity label.</p> <pre><code>def predict_gliner(model, text, labels, threshold):\n    entities = model.predict_entities(text, labels, threshold)\n    return [\n        {k: v for k, v in ent.items() if k not in {\"score\", \"text\"}} for ent in entities\n    ]\n</code></pre> <p>To update the records, we will need to retrieve them from the server and update them with the new suggestions. The <code>id</code> will always need to be provided as it is the records' identifier to update a record and avoid creating a new one.</p> <pre><code>data = dataset.records.to_list(flatten=True)\nupdated_data = [\n    {\n        \"span_label\": predict_gliner(\n            model=gliner_model, text=sample[\"text\"], labels=labels, threshold=0.70\n        ),\n        \"id\": sample[\"id\"],\n    }\n    for sample in data\n]\ndataset.records.log(records=updated_data)\n</code></pre> <p>Voil\u00e0! We have added the suggestions to the dataset and they will appear in the UI marked with \u2728.</p> <p>Now, we can start the annotation process. Just open the dataset in the Argilla UI and start annotating the records. If the suggestions are correct, you can just click on <code>Submit</code>. Otherwise, you can select the correct label.</p> <p>Note</p> <p>Check this how-to guide to know more about annotating in the UI.</p> <p>After the annotation, we will have a robust dataset to train our model for entity recognition. For our case, we will train a SpanMarker model, but you can select any model of your choice. So, let's start by retrieving the annotated records.</p> <p>Note</p> <p>Check this how-to guide to learn more about filtering and querying in Argilla. Also, you can check the Hugging Face docs on fine-tuning an token classification model.</p> <pre><code>dataset = client.datasets(\"token_classification_dataset\")\n</code></pre> <p>In our case, we submitted 2000 annotations using the bulk view.</p> <pre><code>status_filter = rg.Query(filter=rg.Filter((\"response.status\", \"==\", \"submitted\")))\n\nsubmitted = dataset.records(status_filter).to_list(flatten=True)\n</code></pre> <p>SpanMarker accepts any dataset as long as it has the <code>tokens</code> and <code>ner_tags</code> columns. The <code>ner_tags</code> can be annotated using the IOB, IOB2, BIOES or BILOU labeling scheme, as well as regular unschemed labels. In our case, we have chosen to use the IOB format. Thus, we will define a function to extract the annotated NER tags according to this schema.</p> <p>Note</p> <p>For further information, you can check the SpanMarker documentation.</p> <pre><code>def get_iob_tag_for_token(token_start, token_end, ner_spans):\n    for span in ner_spans:\n        if token_start &amp;gt;= span[\"start\"] and token_end &amp;lt;= span[\"end\"]:\n            if token_start == span[\"start\"]:\n                return f\"B-{span['label']}\"\n            else:\n                return f\"I-{span['label']}\"\n    return \"O\"\n\n\ndef extract_ner_tags(text, responses):\n    tokens = re.split(r\"(\\s+)\", text)\n    ner_tags = []\n\n    current_position = 0\n    for token in tokens:\n        if token.strip():\n            token_start = current_position\n            token_end = current_position + len(token)\n            tag = get_iob_tag_for_token(token_start, token_end, responses)\n            ner_tags.append(tag)\n        current_position += len(token)\n\n    return ner_tags\n</code></pre> <p>Let's now extract them and save two lists with the tokens and NER tags, which will help us build our dataset to train the SpanMarker model.</p> <pre><code>tokens = []\nner_tags = []\nfor r in submitted:\n    tags = extract_ner_tags(r[\"text\"], r[\"span_label.responses\"][0])\n    tks = r[\"text\"].split()\n    tokens.append(tks)\n    ner_tags.append(tags)\n</code></pre> <p>In addition, we will have to indicate the labels and they should be formatted as integers. So, we will retrieve them and map them.</p> <pre><code>labels = list(set([item for sublist in ner_tags for item in sublist]))\n\nid2label = {i: label for i, label in enumerate(labels)}\nlabel2id = {label: id_ for id_, label in id2label.items()}\n\nmapped_ner_tags = [[label2id[label] for label in ner_tag] for ner_tag in ner_tags]\n</code></pre> <p>Finally, we will create a dataset with the train and validation sets.</p> <pre><code>records = [\n    {\n        \"tokens\": token,\n        \"ner_tags\": ner_tag,\n    }\n    for token, ner_tag in zip(tokens, mapped_ner_tags)\n]\nspan_dataset = DatasetDict(\n    {\n        \"train\": Dataset.from_list(records[:1500]),\n        \"validation\": Dataset.from_list(records[1501:2000]),\n    }\n)\n</code></pre> <p>Now, let's prepare to train our model. For this, it is recommended to use GPU. You can check if it is available as shown below.</p> <pre><code>if torch.cuda.is_available():\n    device = torch.device(\"cuda\")\n    print(f\"Using {torch.cuda.get_device_name(0)}\")\nelif torch.backends.mps.is_available():\n    device = torch.device(\"mps\")\n    print(\"Using MPS device\")\nelse:\n    device = torch.device(\"cpu\")\n    print(\"No GPU available, using CPU instead.\")\n</code></pre> <p>We will define our model and arguments. In this case, we will use the <code>bert-base-cased</code>, available in the Hugging Face Hub, but others can be applied.</p> <p>Note</p> <p>The training arguments are inherited from the Transformers library. You can check more information here.</p> <pre><code>encoder_id = \"bert-base-cased\"\nmodel = SpanMarkerModel.from_pretrained(\n    encoder_id,\n    labels=labels,\n    model_max_length=256,\n    entity_max_length=8,\n)\n\nargs = TrainingArguments(\n    output_dir=\"models/span-marker\",\n    learning_rate=5e-5,\n    per_device_train_batch_size=8,\n    per_device_eval_batch_size=8,\n    num_train_epochs=1,\n    weight_decay=0.01,\n    warmup_ratio=0.1,\n    fp16=False,  # Set to True if available\n    logging_first_step=True,\n    logging_steps=50,\n    evaluation_strategy=\"steps\",\n    save_strategy=\"steps\",\n    eval_steps=500,\n    save_total_limit=2,\n    dataloader_num_workers=2,\n)\n\ntrainer = Trainer(\n    model=model,\n    args=args,\n    train_dataset=span_dataset[\"train\"],\n    eval_dataset=span_dataset[\"validation\"],\n)\n</code></pre> <p>Let's train it! This time, we use a high-quality human-annotated training set, so the results are expected to have improved.</p> <pre><code>trainer.train()\n</code></pre> <pre><code>trainer.evaluate()\n</code></pre> <p>You can save it locally or push it to the Hub. And then load it from there.</p> <pre><code># Save and load locally\n# model.save_pretrained(\"token_classification_model\")\n# model = SpanMarkerModel.from_pretrained(\"token_classification_model\")\n\n# Push and load in HF\n# model.push_to_hub(\"[username]/token_classification_model\")\n# model = SpanMarkerModel.from_pretrained(\"[username]/token_classification_model\")\n</code></pre> <p>It's time to make the predictions! We will set a function that uses the <code>predict</code> method to get the suggested label. The model will infer the label based on the text. The function will return the spans in the corresponding structure for the Argilla dataset.</p> <pre><code>def predict_spanmarker(model, text):\n    entities = model.predict(text)\n    return [\n        {\n            \"start\": ent[\"char_start_index\"],\n            \"end\": ent[\"char_end_index\"],\n            \"label\": ent[\"label\"],\n        }\n        for ent in entities\n    ]\n</code></pre> <p>As the training data was of better quality, we can expect a better model. So we can update the remaining non-annotated records with the new model's suggestions.</p> <pre><code>data = dataset.records.to_list(flatten=True)\nupdated_data = [\n    {\n        \"span_label\": predict_spanmarker(model=model, text=sample[\"text\"]),\n        \"id\": sample[\"id\"],\n    }\n    for sample in data\n]\ndataset.records.log(records=updated_data)\n</code></pre> <p>In this tutorial, we present an end-to-end example of a token classification task. This serves as the base, but it can be performed iteratively and seamlessly integrated into your workflow to ensure high-quality curation of your data and improved results.</p> <p>We started by configuring the dataset, adding records, and adding suggestions based on the GLiNer predictions. After the annotation process, we trained a SpanMarker model with the annotated data and updated the remaining records with the new suggestions.</p>"},{"location":"tutorials/token_classification/#token-classification","title":"Token classification","text":""},{"location":"tutorials/token_classification/#getting-started","title":"Getting started","text":""},{"location":"tutorials/token_classification/#deploy-the-argilla-server","title":"Deploy the Argilla server","text":""},{"location":"tutorials/token_classification/#set-up-the-environment","title":"Set up the environment","text":""},{"location":"tutorials/token_classification/#vibe-check-the-dataset","title":"Vibe check the dataset","text":"<p>We will have a look at the dataset to understand its structure and the kind of data it contains. We do this by using the embedded Hugging Face Dataset Viewer.</p>"},{"location":"tutorials/token_classification/#configure-and-create-the-argilla-dataset","title":"Configure and create the Argilla dataset","text":""},{"location":"tutorials/token_classification/#add-records","title":"Add records","text":""},{"location":"tutorials/token_classification/#add-initial-model-suggestions","title":"Add initial model suggestions","text":""},{"location":"tutorials/token_classification/#evaluate-with-argilla","title":"Evaluate with Argilla","text":""},{"location":"tutorials/token_classification/#train-your-model","title":"Train your model","text":""},{"location":"tutorials/token_classification/#conclusions","title":"Conclusions","text":""}]}
\ No newline at end of file