From aca281dabaf7a31892623d4e096d97d8c0cabc42 Mon Sep 17 00:00:00 2001
From:  <>
Date: Wed, 28 Feb 2024 13:03:20 +0000
Subject: [PATCH] Deployed a6187b8 with MkDocs version: 1.5.3

---
 reference/cli/common_arguments/index.html     | 170 +++----
 reference/cli/config/index.html               | 438 ++++++++--------
 reference/cli/model_arguments/index.html      | 334 ++++++------
 reference/cli/module_arguments/index.html     | 476 +++++++++---------
 reference/cli/module_setup/index.html         | 228 ++++-----
 reference/common/common/index.html            |  26 +-
 reference/common/dicts/index.html             | 160 +++---
 reference/common/io/index.html                | 218 ++++----
 reference/common/strings/index.html           |  90 ++--
 .../modules/dataloader/metadata/index.html    | 218 ++++----
 search/search_index.json                      |   2 +-
 sitemap.xml.gz                                | Bin 127 -> 127 bytes
 12 files changed, 1178 insertions(+), 1182 deletions(-)
diff --git a/reference/cli/common_arguments/index.html b/reference/cli/common_arguments/index.html
index 0fc5aef9..b4265abf 100644
--- a/reference/cli/common_arguments/index.html
+++ b/reference/cli/common_arguments/index.html
@@ -2561,8 +2561,7 @@ <h2 id="nhssynth.cli.common_arguments.get_core_parser" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/cli/common_arguments.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-12">12</a></span>
-<span class="normal"><a href="#__codelineno-0-13">13</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-13">13</a></span>
 <span class="normal"><a href="#__codelineno-0-14">14</a></span>
 <span class="normal"><a href="#__codelineno-0-15">15</a></span>
 <span class="normal"><a href="#__codelineno-0-16">16</a></span>
@@ -2594,40 +2593,41 @@ <h2 id="nhssynth.cli.common_arguments.get_core_parser" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-42">42</a></span>
 <span class="normal"><a href="#__codelineno-0-43">43</a></span>
 <span class="normal"><a href="#__codelineno-0-44">44</a></span>
-<span class="normal"><a href="#__codelineno-0-45">45</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12"></a><span class="k">def</span> <span class="nf">get_core_parser</span><span class="p">(</span><span class="n">overrides</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">:</span>
-</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14"></a><span class="sd">    Create the core common parser group applied to all modules (and the `pipeline` and `config` options).</span>
-</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15"></a><span class="sd">    Note that we leverage common titling of the argument group to ensure arguments appear together even if declared separately.</span>
-</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16"></a>
-</span><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18"></a><span class="sd">        overrides: whether the arguments declared within are required or not.</span>
-</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19"></a>
-</span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20"></a><span class="sd">    Returns:</span>
-</span><span id="__span-0-21"><a id="__codelineno-0-21" name="__codelineno-0-21"></a><span class="sd">        The parser with the group containing the core arguments attached.</span>
-</span><span id="__span-0-22"><a id="__codelineno-0-22" name="__codelineno-0-22"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-23"><a id="__codelineno-0-23" name="__codelineno-0-23"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;&quot;&quot;&quot;</span>
-</span><span id="__span-0-24"><a id="__codelineno-0-24" name="__codelineno-0-24"></a>    <span class="n">core</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">(</span><span class="n">add_help</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
-</span><span id="__span-0-25"><a id="__codelineno-0-25" name="__codelineno-0-25"></a>    <span class="n">core_grp</span> <span class="o">=</span> <span class="n">core</span><span class="o">.</span><span class="n">add_argument_group</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="s2">&quot;options&quot;</span><span class="p">)</span>
-</span><span id="__span-0-26"><a id="__codelineno-0-26" name="__codelineno-0-26"></a>    <span class="n">core_grp</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-27"><a id="__codelineno-0-27" name="__codelineno-0-27"></a>        <span class="s2">&quot;-d&quot;</span><span class="p">,</span>
-</span><span id="__span-0-28"><a id="__codelineno-0-28" name="__codelineno-0-28"></a>        <span class="s2">&quot;--dataset&quot;</span><span class="p">,</span>
-</span><span id="__span-0-29"><a id="__codelineno-0-29" name="__codelineno-0-29"></a>        <span class="n">required</span><span class="o">=</span><span class="p">(</span><span class="ow">not</span> <span class="n">overrides</span><span class="p">),</span>
-</span><span id="__span-0-30"><a id="__codelineno-0-30" name="__codelineno-0-30"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
-</span><span id="__span-0-31"><a id="__codelineno-0-31" name="__codelineno-0-31"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the name of the dataset to experiment with, should be present in `&lt;DATA_DIR&gt;`&quot;</span><span class="p">,</span>
-</span><span id="__span-0-32"><a id="__codelineno-0-32" name="__codelineno-0-32"></a>    <span class="p">)</span>
-</span><span id="__span-0-33"><a id="__codelineno-0-33" name="__codelineno-0-33"></a>    <span class="n">core_grp</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-34"><a id="__codelineno-0-34" name="__codelineno-0-34"></a>        <span class="s2">&quot;-e&quot;</span><span class="p">,</span>
-</span><span id="__span-0-35"><a id="__codelineno-0-35" name="__codelineno-0-35"></a>        <span class="s2">&quot;--experiment-name&quot;</span><span class="p">,</span>
-</span><span id="__span-0-36"><a id="__codelineno-0-36" name="__codelineno-0-36"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
-</span><span id="__span-0-37"><a id="__codelineno-0-37" name="__codelineno-0-37"></a>        <span class="n">default</span><span class="o">=</span><span class="n">TIME</span><span class="p">,</span>
-</span><span id="__span-0-38"><a id="__codelineno-0-38" name="__codelineno-0-38"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;name the experiment run to affect logging, config, and default-behaviour i/o&quot;</span><span class="p">,</span>
-</span><span id="__span-0-39"><a id="__codelineno-0-39" name="__codelineno-0-39"></a>    <span class="p">)</span>
-</span><span id="__span-0-40"><a id="__codelineno-0-40" name="__codelineno-0-40"></a>    <span class="n">core_grp</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-41"><a id="__codelineno-0-41" name="__codelineno-0-41"></a>        <span class="s2">&quot;--save-config&quot;</span><span class="p">,</span>
-</span><span id="__span-0-42"><a id="__codelineno-0-42" name="__codelineno-0-42"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
-</span><span id="__span-0-43"><a id="__codelineno-0-43" name="__codelineno-0-43"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;save the config provided via the cli, this is a recommended option for reproducibility&quot;</span><span class="p">,</span>
-</span><span id="__span-0-44"><a id="__codelineno-0-44" name="__codelineno-0-44"></a>    <span class="p">)</span>
-</span><span id="__span-0-45"><a id="__codelineno-0-45" name="__codelineno-0-45"></a>    <span class="k">return</span> <span class="n">core</span>
+<span class="normal"><a href="#__codelineno-0-45">45</a></span>
+<span class="normal"><a href="#__codelineno-0-46">46</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13"></a><span class="k">def</span> <span class="nf">get_core_parser</span><span class="p">(</span><span class="n">overrides</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">:</span>
+</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15"></a><span class="sd">    Create the core common parser group applied to all modules (and the `pipeline` and `config` options).</span>
+</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16"></a><span class="sd">    Note that we leverage common titling of the argument group to ensure arguments appear together even if declared separately.</span>
+</span><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17"></a>
+</span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19"></a><span class="sd">        overrides: whether the arguments declared within are required or not.</span>
+</span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20"></a>
+</span><span id="__span-0-21"><a id="__codelineno-0-21" name="__codelineno-0-21"></a><span class="sd">    Returns:</span>
+</span><span id="__span-0-22"><a id="__codelineno-0-22" name="__codelineno-0-22"></a><span class="sd">        The parser with the group containing the core arguments attached.</span>
+</span><span id="__span-0-23"><a id="__codelineno-0-23" name="__codelineno-0-23"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-24"><a id="__codelineno-0-24" name="__codelineno-0-24"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;&quot;&quot;&quot;</span>
+</span><span id="__span-0-25"><a id="__codelineno-0-25" name="__codelineno-0-25"></a>    <span class="n">core</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">(</span><span class="n">add_help</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
+</span><span id="__span-0-26"><a id="__codelineno-0-26" name="__codelineno-0-26"></a>    <span class="n">core_grp</span> <span class="o">=</span> <span class="n">core</span><span class="o">.</span><span class="n">add_argument_group</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="s2">&quot;options&quot;</span><span class="p">)</span>
+</span><span id="__span-0-27"><a id="__codelineno-0-27" name="__codelineno-0-27"></a>    <span class="n">core_grp</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-28"><a id="__codelineno-0-28" name="__codelineno-0-28"></a>        <span class="s2">&quot;-d&quot;</span><span class="p">,</span>
+</span><span id="__span-0-29"><a id="__codelineno-0-29" name="__codelineno-0-29"></a>        <span class="s2">&quot;--dataset&quot;</span><span class="p">,</span>
+</span><span id="__span-0-30"><a id="__codelineno-0-30" name="__codelineno-0-30"></a>        <span class="n">required</span><span class="o">=</span><span class="p">(</span><span class="ow">not</span> <span class="n">overrides</span><span class="p">),</span>
+</span><span id="__span-0-31"><a id="__codelineno-0-31" name="__codelineno-0-31"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
+</span><span id="__span-0-32"><a id="__codelineno-0-32" name="__codelineno-0-32"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the name of the dataset to experiment with, should be present in `&lt;DATA_DIR&gt;`&quot;</span><span class="p">,</span>
+</span><span id="__span-0-33"><a id="__codelineno-0-33" name="__codelineno-0-33"></a>    <span class="p">)</span>
+</span><span id="__span-0-34"><a id="__codelineno-0-34" name="__codelineno-0-34"></a>    <span class="n">core_grp</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-35"><a id="__codelineno-0-35" name="__codelineno-0-35"></a>        <span class="s2">&quot;-e&quot;</span><span class="p">,</span>
+</span><span id="__span-0-36"><a id="__codelineno-0-36" name="__codelineno-0-36"></a>        <span class="s2">&quot;--experiment-name&quot;</span><span class="p">,</span>
+</span><span id="__span-0-37"><a id="__codelineno-0-37" name="__codelineno-0-37"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
+</span><span id="__span-0-38"><a id="__codelineno-0-38" name="__codelineno-0-38"></a>        <span class="n">default</span><span class="o">=</span><span class="n">TIME</span><span class="p">,</span>
+</span><span id="__span-0-39"><a id="__codelineno-0-39" name="__codelineno-0-39"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;name the experiment run to affect logging, config, and default-behaviour i/o&quot;</span><span class="p">,</span>
+</span><span id="__span-0-40"><a id="__codelineno-0-40" name="__codelineno-0-40"></a>    <span class="p">)</span>
+</span><span id="__span-0-41"><a id="__codelineno-0-41" name="__codelineno-0-41"></a>    <span class="n">core_grp</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-42"><a id="__codelineno-0-42" name="__codelineno-0-42"></a>        <span class="s2">&quot;--save-config&quot;</span><span class="p">,</span>
+</span><span id="__span-0-43"><a id="__codelineno-0-43" name="__codelineno-0-43"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
+</span><span id="__span-0-44"><a id="__codelineno-0-44" name="__codelineno-0-44"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;save the config provided via the cli, this is a recommended option for reproducibility&quot;</span><span class="p">,</span>
+</span><span id="__span-0-45"><a id="__codelineno-0-45" name="__codelineno-0-45"></a>    <span class="p">)</span>
+</span><span id="__span-0-46"><a id="__codelineno-0-46" name="__codelineno-0-46"></a>    <span class="k">return</span> <span class="n">core</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -2705,8 +2705,7 @@ <h2 id="nhssynth.cli.common_arguments.get_seed_parser" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/cli/common_arguments.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-48">48</a></span>
-<span class="normal"><a href="#__codelineno-0-49">49</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-49">49</a></span>
 <span class="normal"><a href="#__codelineno-0-50">50</a></span>
 <span class="normal"><a href="#__codelineno-0-51">51</a></span>
 <span class="normal"><a href="#__codelineno-0-52">52</a></span>
@@ -2724,26 +2723,27 @@ <h2 id="nhssynth.cli.common_arguments.get_seed_parser" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-64">64</a></span>
 <span class="normal"><a href="#__codelineno-0-65">65</a></span>
 <span class="normal"><a href="#__codelineno-0-66">66</a></span>
-<span class="normal"><a href="#__codelineno-0-67">67</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-48"><a id="__codelineno-0-48" name="__codelineno-0-48"></a><span class="k">def</span> <span class="nf">get_seed_parser</span><span class="p">(</span><span class="n">overrides</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">:</span>
-</span><span id="__span-0-49"><a id="__codelineno-0-49" name="__codelineno-0-49"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-50"><a id="__codelineno-0-50" name="__codelineno-0-50"></a><span class="sd">    Create the common parser group for the seed.</span>
-</span><span id="__span-0-51"><a id="__codelineno-0-51" name="__codelineno-0-51"></a><span class="sd">    NB This is separate to the rest of the core arguments as it does not apply to the dashboard module.</span>
-</span><span id="__span-0-52"><a id="__codelineno-0-52" name="__codelineno-0-52"></a>
-</span><span id="__span-0-53"><a id="__codelineno-0-53" name="__codelineno-0-53"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-54"><a id="__codelineno-0-54" name="__codelineno-0-54"></a><span class="sd">        overrides: whether the arguments declared within are required or not.</span>
-</span><span id="__span-0-55"><a id="__codelineno-0-55" name="__codelineno-0-55"></a>
-</span><span id="__span-0-56"><a id="__codelineno-0-56" name="__codelineno-0-56"></a><span class="sd">    Returns:</span>
-</span><span id="__span-0-57"><a id="__codelineno-0-57" name="__codelineno-0-57"></a><span class="sd">        The parser with the group containing the seed argument attached.</span>
-</span><span id="__span-0-58"><a id="__codelineno-0-58" name="__codelineno-0-58"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-59"><a id="__codelineno-0-59" name="__codelineno-0-59"></a>    <span class="n">parser</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">(</span><span class="n">add_help</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
-</span><span id="__span-0-60"><a id="__codelineno-0-60" name="__codelineno-0-60"></a>    <span class="n">parser_grp</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">add_argument_group</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="s2">&quot;options&quot;</span><span class="p">)</span>
-</span><span id="__span-0-61"><a id="__codelineno-0-61" name="__codelineno-0-61"></a>    <span class="n">parser_grp</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-62"><a id="__codelineno-0-62" name="__codelineno-0-62"></a>        <span class="s2">&quot;-s&quot;</span><span class="p">,</span>
-</span><span id="__span-0-63"><a id="__codelineno-0-63" name="__codelineno-0-63"></a>        <span class="s2">&quot;--seed&quot;</span><span class="p">,</span>
-</span><span id="__span-0-64"><a id="__codelineno-0-64" name="__codelineno-0-64"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
-</span><span id="__span-0-65"><a id="__codelineno-0-65" name="__codelineno-0-65"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;specify a seed for reproducibility, this is a recommended option for reproducibility&quot;</span><span class="p">,</span>
-</span><span id="__span-0-66"><a id="__codelineno-0-66" name="__codelineno-0-66"></a>    <span class="p">)</span>
-</span><span id="__span-0-67"><a id="__codelineno-0-67" name="__codelineno-0-67"></a>    <span class="k">return</span> <span class="n">parser</span>
+<span class="normal"><a href="#__codelineno-0-67">67</a></span>
+<span class="normal"><a href="#__codelineno-0-68">68</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-49"><a id="__codelineno-0-49" name="__codelineno-0-49"></a><span class="k">def</span> <span class="nf">get_seed_parser</span><span class="p">(</span><span class="n">overrides</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">:</span>
+</span><span id="__span-0-50"><a id="__codelineno-0-50" name="__codelineno-0-50"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-51"><a id="__codelineno-0-51" name="__codelineno-0-51"></a><span class="sd">    Create the common parser group for the seed.</span>
+</span><span id="__span-0-52"><a id="__codelineno-0-52" name="__codelineno-0-52"></a><span class="sd">    NB This is separate to the rest of the core arguments as it does not apply to the dashboard module.</span>
+</span><span id="__span-0-53"><a id="__codelineno-0-53" name="__codelineno-0-53"></a>
+</span><span id="__span-0-54"><a id="__codelineno-0-54" name="__codelineno-0-54"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-55"><a id="__codelineno-0-55" name="__codelineno-0-55"></a><span class="sd">        overrides: whether the arguments declared within are required or not.</span>
+</span><span id="__span-0-56"><a id="__codelineno-0-56" name="__codelineno-0-56"></a>
+</span><span id="__span-0-57"><a id="__codelineno-0-57" name="__codelineno-0-57"></a><span class="sd">    Returns:</span>
+</span><span id="__span-0-58"><a id="__codelineno-0-58" name="__codelineno-0-58"></a><span class="sd">        The parser with the group containing the seed argument attached.</span>
+</span><span id="__span-0-59"><a id="__codelineno-0-59" name="__codelineno-0-59"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-60"><a id="__codelineno-0-60" name="__codelineno-0-60"></a>    <span class="n">parser</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">(</span><span class="n">add_help</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
+</span><span id="__span-0-61"><a id="__codelineno-0-61" name="__codelineno-0-61"></a>    <span class="n">parser_grp</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">add_argument_group</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="s2">&quot;options&quot;</span><span class="p">)</span>
+</span><span id="__span-0-62"><a id="__codelineno-0-62" name="__codelineno-0-62"></a>    <span class="n">parser_grp</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-63"><a id="__codelineno-0-63" name="__codelineno-0-63"></a>        <span class="s2">&quot;-s&quot;</span><span class="p">,</span>
+</span><span id="__span-0-64"><a id="__codelineno-0-64" name="__codelineno-0-64"></a>        <span class="s2">&quot;--seed&quot;</span><span class="p">,</span>
+</span><span id="__span-0-65"><a id="__codelineno-0-65" name="__codelineno-0-65"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
+</span><span id="__span-0-66"><a id="__codelineno-0-66" name="__codelineno-0-66"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;specify a seed for reproducibility, this is a recommended option for reproducibility&quot;</span><span class="p">,</span>
+</span><span id="__span-0-67"><a id="__codelineno-0-67" name="__codelineno-0-67"></a>    <span class="p">)</span>
+</span><span id="__span-0-68"><a id="__codelineno-0-68" name="__codelineno-0-68"></a>    <span class="k">return</span> <span class="n">parser</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -2826,10 +2826,7 @@ <h2 id="nhssynth.cli.common_arguments.suffix_parser_generator" class="doc doc-he
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/cli/common_arguments.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-73">73</a></span>
-<span class="normal"><a href="#__codelineno-0-74">74</a></span>
-<span class="normal"><a href="#__codelineno-0-75">75</a></span>
-<span class="normal"><a href="#__codelineno-0-76">76</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-76">76</a></span>
 <span class="normal"><a href="#__codelineno-0-77">77</a></span>
 <span class="normal"><a href="#__codelineno-0-78">78</a></span>
 <span class="normal"><a href="#__codelineno-0-79">79</a></span>
@@ -2848,29 +2845,32 @@ <h2 id="nhssynth.cli.common_arguments.suffix_parser_generator" class="doc doc-he
 <span class="normal"><a href="#__codelineno-0-92">92</a></span>
 <span class="normal"><a href="#__codelineno-0-93">93</a></span>
 <span class="normal"><a href="#__codelineno-0-94">94</a></span>
-<span class="normal"><a href="#__codelineno-0-95">95</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-73"><a id="__codelineno-0-73" name="__codelineno-0-73"></a><span class="k">def</span> <span class="nf">suffix_parser_generator</span><span class="p">(</span><span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">help</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">required</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">:</span>
-</span><span id="__span-0-74"><a id="__codelineno-0-74" name="__codelineno-0-74"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Generator function for creating parsers following a common template.</span>
-</span><span id="__span-0-75"><a id="__codelineno-0-75" name="__codelineno-0-75"></a><span class="sd">    These parsers are all suffixes to the --dataset / -d / DATASET argument, see `COMMON_TITLE`.</span>
-</span><span id="__span-0-76"><a id="__codelineno-0-76" name="__codelineno-0-76"></a>
-</span><span id="__span-0-77"><a id="__codelineno-0-77" name="__codelineno-0-77"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-78"><a id="__codelineno-0-78" name="__codelineno-0-78"></a><span class="sd">        name: the name / label of the argument to add to the CLI options.</span>
-</span><span id="__span-0-79"><a id="__codelineno-0-79" name="__codelineno-0-79"></a><span class="sd">        help: the help message when the CLI is run with --help / -h.</span>
-</span><span id="__span-0-80"><a id="__codelineno-0-80" name="__codelineno-0-80"></a><span class="sd">        required: whether the argument must be provided or not.</span>
-</span><span id="__span-0-81"><a id="__codelineno-0-81" name="__codelineno-0-81"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-82"><a id="__codelineno-0-82" name="__codelineno-0-82"></a>
-</span><span id="__span-0-83"><a id="__codelineno-0-83" name="__codelineno-0-83"></a>    <span class="k">def</span> <span class="nf">get_parser</span><span class="p">(</span><span class="n">overrides</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">:</span>
-</span><span id="__span-0-84"><a id="__codelineno-0-84" name="__codelineno-0-84"></a>        <span class="n">parser</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">(</span><span class="n">add_help</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
-</span><span id="__span-0-85"><a id="__codelineno-0-85" name="__codelineno-0-85"></a>        <span class="n">parser_grp</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">add_argument_group</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="n">COMMON_TITLE</span><span class="p">)</span>
-</span><span id="__span-0-86"><a id="__codelineno-0-86" name="__codelineno-0-86"></a>        <span class="n">parser_grp</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-87"><a id="__codelineno-0-87" name="__codelineno-0-87"></a>            <span class="sa">f</span><span class="s2">&quot;--</span><span class="si">{</span><span class="n">name</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;_&#39;</span><span class="p">,</span><span class="w"> </span><span class="s1">&#39;-&#39;</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
-</span><span id="__span-0-88"><a id="__codelineno-0-88" name="__codelineno-0-88"></a>            <span class="n">required</span><span class="o">=</span><span class="n">required</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">overrides</span><span class="p">,</span>
-</span><span id="__span-0-89"><a id="__codelineno-0-89" name="__codelineno-0-89"></a>            <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
-</span><span id="__span-0-90"><a id="__codelineno-0-90" name="__codelineno-0-90"></a>            <span class="n">default</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;_</span><span class="si">{</span><span class="n">name</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
-</span><span id="__span-0-91"><a id="__codelineno-0-91" name="__codelineno-0-91"></a>            <span class="n">help</span><span class="o">=</span><span class="n">help</span><span class="p">,</span>
-</span><span id="__span-0-92"><a id="__codelineno-0-92" name="__codelineno-0-92"></a>        <span class="p">)</span>
-</span><span id="__span-0-93"><a id="__codelineno-0-93" name="__codelineno-0-93"></a>        <span class="k">return</span> <span class="n">parser</span>
-</span><span id="__span-0-94"><a id="__codelineno-0-94" name="__codelineno-0-94"></a>
-</span><span id="__span-0-95"><a id="__codelineno-0-95" name="__codelineno-0-95"></a>    <span class="k">return</span> <span class="n">get_parser</span>
+<span class="normal"><a href="#__codelineno-0-95">95</a></span>
+<span class="normal"><a href="#__codelineno-0-96">96</a></span>
+<span class="normal"><a href="#__codelineno-0-97">97</a></span>
+<span class="normal"><a href="#__codelineno-0-98">98</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-76"><a id="__codelineno-0-76" name="__codelineno-0-76"></a><span class="k">def</span> <span class="nf">suffix_parser_generator</span><span class="p">(</span><span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">help</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">required</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">:</span>
+</span><span id="__span-0-77"><a id="__codelineno-0-77" name="__codelineno-0-77"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Generator function for creating parsers following a common template.</span>
+</span><span id="__span-0-78"><a id="__codelineno-0-78" name="__codelineno-0-78"></a><span class="sd">    These parsers are all suffixes to the --dataset / -d / DATASET argument, see `COMMON_TITLE`.</span>
+</span><span id="__span-0-79"><a id="__codelineno-0-79" name="__codelineno-0-79"></a>
+</span><span id="__span-0-80"><a id="__codelineno-0-80" name="__codelineno-0-80"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-81"><a id="__codelineno-0-81" name="__codelineno-0-81"></a><span class="sd">        name: the name / label of the argument to add to the CLI options.</span>
+</span><span id="__span-0-82"><a id="__codelineno-0-82" name="__codelineno-0-82"></a><span class="sd">        help: the help message when the CLI is run with --help / -h.</span>
+</span><span id="__span-0-83"><a id="__codelineno-0-83" name="__codelineno-0-83"></a><span class="sd">        required: whether the argument must be provided or not.</span>
+</span><span id="__span-0-84"><a id="__codelineno-0-84" name="__codelineno-0-84"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-85"><a id="__codelineno-0-85" name="__codelineno-0-85"></a>
+</span><span id="__span-0-86"><a id="__codelineno-0-86" name="__codelineno-0-86"></a>    <span class="k">def</span> <span class="nf">get_parser</span><span class="p">(</span><span class="n">overrides</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">:</span>
+</span><span id="__span-0-87"><a id="__codelineno-0-87" name="__codelineno-0-87"></a>        <span class="n">parser</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">(</span><span class="n">add_help</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
+</span><span id="__span-0-88"><a id="__codelineno-0-88" name="__codelineno-0-88"></a>        <span class="n">parser_grp</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">add_argument_group</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="n">COMMON_TITLE</span><span class="p">)</span>
+</span><span id="__span-0-89"><a id="__codelineno-0-89" name="__codelineno-0-89"></a>        <span class="n">parser_grp</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-90"><a id="__codelineno-0-90" name="__codelineno-0-90"></a>            <span class="sa">f</span><span class="s2">&quot;--</span><span class="si">{</span><span class="n">name</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;_&#39;</span><span class="p">,</span><span class="w"> </span><span class="s1">&#39;-&#39;</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
+</span><span id="__span-0-91"><a id="__codelineno-0-91" name="__codelineno-0-91"></a>            <span class="n">required</span><span class="o">=</span><span class="n">required</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">overrides</span><span class="p">,</span>
+</span><span id="__span-0-92"><a id="__codelineno-0-92" name="__codelineno-0-92"></a>            <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
+</span><span id="__span-0-93"><a id="__codelineno-0-93" name="__codelineno-0-93"></a>            <span class="n">default</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;_</span><span class="si">{</span><span class="n">name</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
+</span><span id="__span-0-94"><a id="__codelineno-0-94" name="__codelineno-0-94"></a>            <span class="n">help</span><span class="o">=</span><span class="n">help</span><span class="p">,</span>
+</span><span id="__span-0-95"><a id="__codelineno-0-95" name="__codelineno-0-95"></a>        <span class="p">)</span>
+</span><span id="__span-0-96"><a id="__codelineno-0-96" name="__codelineno-0-96"></a>        <span class="k">return</span> <span class="n">parser</span>
+</span><span id="__span-0-97"><a id="__codelineno-0-97" name="__codelineno-0-97"></a>
+</span><span id="__span-0-98"><a id="__codelineno-0-98" name="__codelineno-0-98"></a>    <span class="k">return</span> <span class="n">get_parser</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
diff --git a/reference/cli/config/index.html b/reference/cli/config/index.html
index 7866705c..cf8a993c 100644
--- a/reference/cli/config/index.html
+++ b/reference/cli/config/index.html
@@ -2615,8 +2615,7 @@ <h2 id="nhssynth.cli.config.assemble_config" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/cli/config.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-151">151</a></span>
-<span class="normal"><a href="#__codelineno-0-152">152</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-152">152</a></span>
 <span class="normal"><a href="#__codelineno-0-153">153</a></span>
 <span class="normal"><a href="#__codelineno-0-154">154</a></span>
 <span class="normal"><a href="#__codelineno-0-155">155</a></span>
@@ -2676,68 +2675,69 @@ <h2 id="nhssynth.cli.config.assemble_config" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-209">209</a></span>
 <span class="normal"><a href="#__codelineno-0-210">210</a></span>
 <span class="normal"><a href="#__codelineno-0-211">211</a></span>
-<span class="normal"><a href="#__codelineno-0-212">212</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-151"><a id="__codelineno-0-151" name="__codelineno-0-151"></a><span class="k">def</span> <span class="nf">assemble_config</span><span class="p">(</span>
-</span><span id="__span-0-152"><a id="__codelineno-0-152" name="__codelineno-0-152"></a>    <span class="n">args</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">Namespace</span><span class="p">,</span>
-</span><span id="__span-0-153"><a id="__codelineno-0-153" name="__codelineno-0-153"></a>    <span class="n">all_subparsers</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">],</span>
-</span><span id="__span-0-154"><a id="__codelineno-0-154" name="__codelineno-0-154"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
-</span><span id="__span-0-155"><a id="__codelineno-0-155" name="__codelineno-0-155"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-156"><a id="__codelineno-0-156" name="__codelineno-0-156"></a><span class="sd">    Assemble and arrange a nested-via-module configuration dictionary from parsed command-line arguments to be output as a YAML record.</span>
-</span><span id="__span-0-157"><a id="__codelineno-0-157" name="__codelineno-0-157"></a>
-</span><span id="__span-0-158"><a id="__codelineno-0-158" name="__codelineno-0-158"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-159"><a id="__codelineno-0-159" name="__codelineno-0-159"></a><span class="sd">        args: A namespace object containing all parsed command-line arguments.</span>
-</span><span id="__span-0-160"><a id="__codelineno-0-160" name="__codelineno-0-160"></a><span class="sd">        all_subparsers: A dictionary mapping module names to subparser objects.</span>
-</span><span id="__span-0-161"><a id="__codelineno-0-161" name="__codelineno-0-161"></a>
-</span><span id="__span-0-162"><a id="__codelineno-0-162" name="__codelineno-0-162"></a><span class="sd">    Returns:</span>
-</span><span id="__span-0-163"><a id="__codelineno-0-163" name="__codelineno-0-163"></a><span class="sd">        A dictionary containing configuration information extracted from `args` in a module-wise nested format that is YAML-friendly.</span>
-</span><span id="__span-0-164"><a id="__codelineno-0-164" name="__codelineno-0-164"></a>
-</span><span id="__span-0-165"><a id="__codelineno-0-165" name="__codelineno-0-165"></a><span class="sd">    Raises:</span>
-</span><span id="__span-0-166"><a id="__codelineno-0-166" name="__codelineno-0-166"></a><span class="sd">        ValueError: If a module specified in `args.modules_to_run` is not in `all_subparsers`.</span>
-</span><span id="__span-0-167"><a id="__codelineno-0-167" name="__codelineno-0-167"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-168"><a id="__codelineno-0-168" name="__codelineno-0-168"></a>    <span class="n">args_dict</span> <span class="o">=</span> <span class="nb">vars</span><span class="p">(</span><span class="n">args</span><span class="p">)</span>
-</span><span id="__span-0-169"><a id="__codelineno-0-169" name="__codelineno-0-169"></a>
-</span><span id="__span-0-170"><a id="__codelineno-0-170" name="__codelineno-0-170"></a>    <span class="c1"># Filter out the keys that are not relevant to the config file</span>
-</span><span id="__span-0-171"><a id="__codelineno-0-171" name="__codelineno-0-171"></a>    <span class="n">args_dict</span> <span class="o">=</span> <span class="n">filter_dict</span><span class="p">(</span>
-</span><span id="__span-0-172"><a id="__codelineno-0-172" name="__codelineno-0-172"></a>        <span class="n">args_dict</span><span class="p">,</span> <span class="p">{</span><span class="s2">&quot;func&quot;</span><span class="p">,</span> <span class="s2">&quot;experiment_name&quot;</span><span class="p">,</span> <span class="s2">&quot;save_config&quot;</span><span class="p">,</span> <span class="s2">&quot;save_config_path&quot;</span><span class="p">,</span> <span class="s2">&quot;module_handover&quot;</span><span class="p">}</span>
-</span><span id="__span-0-173"><a id="__codelineno-0-173" name="__codelineno-0-173"></a>    <span class="p">)</span>
-</span><span id="__span-0-174"><a id="__codelineno-0-174" name="__codelineno-0-174"></a>    <span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">args_dict</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
-</span><span id="__span-0-175"><a id="__codelineno-0-175" name="__codelineno-0-175"></a>        <span class="c1"># Remove empty metric lists from the config</span>
-</span><span id="__span-0-176"><a id="__codelineno-0-176" name="__codelineno-0-176"></a>        <span class="k">if</span> <span class="s2">&quot;_metrics&quot;</span> <span class="ow">in</span> <span class="n">k</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">args_dict</span><span class="p">[</span><span class="n">k</span><span class="p">]:</span>
-</span><span id="__span-0-177"><a id="__codelineno-0-177" name="__codelineno-0-177"></a>            <span class="n">args_dict</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="n">k</span><span class="p">)</span>
-</span><span id="__span-0-178"><a id="__codelineno-0-178" name="__codelineno-0-178"></a>
-</span><span id="__span-0-179"><a id="__codelineno-0-179" name="__codelineno-0-179"></a>    <span class="n">modules_to_run</span> <span class="o">=</span> <span class="n">args_dict</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s2">&quot;modules_to_run&quot;</span><span class="p">)</span>
-</span><span id="__span-0-180"><a id="__codelineno-0-180" name="__codelineno-0-180"></a>    <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">modules_to_run</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
-</span><span id="__span-0-181"><a id="__codelineno-0-181" name="__codelineno-0-181"></a>        <span class="n">run_type</span> <span class="o">=</span> <span class="n">modules_to_run</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
-</span><span id="__span-0-182"><a id="__codelineno-0-182" name="__codelineno-0-182"></a>    <span class="k">elif</span> <span class="n">modules_to_run</span> <span class="o">==</span> <span class="n">PIPELINE</span><span class="p">:</span>
-</span><span id="__span-0-183"><a id="__codelineno-0-183" name="__codelineno-0-183"></a>        <span class="n">run_type</span> <span class="o">=</span> <span class="s2">&quot;pipeline&quot;</span>
-</span><span id="__span-0-184"><a id="__codelineno-0-184" name="__codelineno-0-184"></a>    <span class="k">else</span><span class="p">:</span>
-</span><span id="__span-0-185"><a id="__codelineno-0-185" name="__codelineno-0-185"></a>        <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Invalid value for `modules_to_run`: </span><span class="si">{</span><span class="n">modules_to_run</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
-</span><span id="__span-0-186"><a id="__codelineno-0-186" name="__codelineno-0-186"></a>
-</span><span id="__span-0-187"><a id="__codelineno-0-187" name="__codelineno-0-187"></a>    <span class="c1"># Generate a dictionary containing each module&#39;s name from the run, with all of its possible corresponding config args</span>
-</span><span id="__span-0-188"><a id="__codelineno-0-188" name="__codelineno-0-188"></a>    <span class="n">module_args</span> <span class="o">=</span> <span class="p">{</span>
-</span><span id="__span-0-189"><a id="__codelineno-0-189" name="__codelineno-0-189"></a>        <span class="n">module_name</span><span class="p">:</span> <span class="p">[</span><span class="n">action</span><span class="o">.</span><span class="n">dest</span> <span class="k">for</span> <span class="n">action</span> <span class="ow">in</span> <span class="n">all_subparsers</span><span class="p">[</span><span class="n">module_name</span><span class="p">]</span><span class="o">.</span><span class="n">_actions</span> <span class="k">if</span> <span class="n">action</span><span class="o">.</span><span class="n">dest</span> <span class="o">!=</span> <span class="s2">&quot;help&quot;</span><span class="p">]</span>
-</span><span id="__span-0-190"><a id="__codelineno-0-190" name="__codelineno-0-190"></a>        <span class="k">for</span> <span class="n">module_name</span> <span class="ow">in</span> <span class="n">modules_to_run</span>
-</span><span id="__span-0-191"><a id="__codelineno-0-191" name="__codelineno-0-191"></a>    <span class="p">}</span>
-</span><span id="__span-0-192"><a id="__codelineno-0-192" name="__codelineno-0-192"></a>
-</span><span id="__span-0-193"><a id="__codelineno-0-193" name="__codelineno-0-193"></a>    <span class="c1"># Use the flat namespace to populate a nested (by module) dictionary of config args and values</span>
-</span><span id="__span-0-194"><a id="__codelineno-0-194" name="__codelineno-0-194"></a>    <span class="n">out_dict</span> <span class="o">=</span> <span class="p">{}</span>
-</span><span id="__span-0-195"><a id="__codelineno-0-195" name="__codelineno-0-195"></a>    <span class="k">for</span> <span class="n">module_name</span> <span class="ow">in</span> <span class="n">modules_to_run</span><span class="p">:</span>
-</span><span id="__span-0-196"><a id="__codelineno-0-196" name="__codelineno-0-196"></a>        <span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">args_dict</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
-</span><span id="__span-0-197"><a id="__codelineno-0-197" name="__codelineno-0-197"></a>            <span class="c1"># We want to keep dataset, experiment_name, seed and save_config at the top-level as they are core args</span>
-</span><span id="__span-0-198"><a id="__codelineno-0-198" name="__codelineno-0-198"></a>            <span class="k">if</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">module_args</span><span class="p">[</span><span class="n">module_name</span><span class="p">]</span> <span class="ow">and</span> <span class="n">k</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">{</span>
-</span><span id="__span-0-199"><a id="__codelineno-0-199" name="__codelineno-0-199"></a>                <span class="s2">&quot;version&quot;</span><span class="p">,</span>
-</span><span id="__span-0-200"><a id="__codelineno-0-200" name="__codelineno-0-200"></a>                <span class="s2">&quot;dataset&quot;</span><span class="p">,</span>
-</span><span id="__span-0-201"><a id="__codelineno-0-201" name="__codelineno-0-201"></a>                <span class="s2">&quot;experiment_name&quot;</span><span class="p">,</span>
-</span><span id="__span-0-202"><a id="__codelineno-0-202" name="__codelineno-0-202"></a>                <span class="s2">&quot;seed&quot;</span><span class="p">,</span>
-</span><span id="__span-0-203"><a id="__codelineno-0-203" name="__codelineno-0-203"></a>                <span class="s2">&quot;save_config&quot;</span><span class="p">,</span>
-</span><span id="__span-0-204"><a id="__codelineno-0-204" name="__codelineno-0-204"></a>            <span class="p">}:</span>
-</span><span id="__span-0-205"><a id="__codelineno-0-205" name="__codelineno-0-205"></a>                <span class="k">if</span> <span class="n">module_name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">out_dict</span><span class="p">:</span>
-</span><span id="__span-0-206"><a id="__codelineno-0-206" name="__codelineno-0-206"></a>                    <span class="n">out_dict</span><span class="p">[</span><span class="n">module_name</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
-</span><span id="__span-0-207"><a id="__codelineno-0-207" name="__codelineno-0-207"></a>                <span class="n">v</span> <span class="o">=</span> <span class="n">args_dict</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="n">k</span><span class="p">)</span>
-</span><span id="__span-0-208"><a id="__codelineno-0-208" name="__codelineno-0-208"></a>                <span class="k">if</span> <span class="n">v</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="__span-0-209"><a id="__codelineno-0-209" name="__codelineno-0-209"></a>                    <span class="n">out_dict</span><span class="p">[</span><span class="n">module_name</span><span class="p">][</span><span class="n">k</span><span class="p">]</span> <span class="o">=</span> <span class="n">v</span>
-</span><span id="__span-0-210"><a id="__codelineno-0-210" name="__codelineno-0-210"></a>
-</span><span id="__span-0-211"><a id="__codelineno-0-211" name="__codelineno-0-211"></a>    <span class="c1"># Assemble the final dictionary in YAML-compliant form</span>
-</span><span id="__span-0-212"><a id="__codelineno-0-212" name="__codelineno-0-212"></a>    <span class="k">return</span> <span class="p">{</span><span class="o">**</span><span class="p">({</span><span class="s2">&quot;run_type&quot;</span><span class="p">:</span> <span class="n">run_type</span><span class="p">}</span> <span class="k">if</span> <span class="n">run_type</span> <span class="k">else</span> <span class="p">{}),</span> <span class="o">**</span><span class="n">args_dict</span><span class="p">,</span> <span class="o">**</span><span class="n">out_dict</span><span class="p">}</span>
+<span class="normal"><a href="#__codelineno-0-212">212</a></span>
+<span class="normal"><a href="#__codelineno-0-213">213</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-152"><a id="__codelineno-0-152" name="__codelineno-0-152"></a><span class="k">def</span> <span class="nf">assemble_config</span><span class="p">(</span>
+</span><span id="__span-0-153"><a id="__codelineno-0-153" name="__codelineno-0-153"></a>    <span class="n">args</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">Namespace</span><span class="p">,</span>
+</span><span id="__span-0-154"><a id="__codelineno-0-154" name="__codelineno-0-154"></a>    <span class="n">all_subparsers</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">],</span>
+</span><span id="__span-0-155"><a id="__codelineno-0-155" name="__codelineno-0-155"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
+</span><span id="__span-0-156"><a id="__codelineno-0-156" name="__codelineno-0-156"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-157"><a id="__codelineno-0-157" name="__codelineno-0-157"></a><span class="sd">    Assemble and arrange a nested-via-module configuration dictionary from parsed command-line arguments to be output as a YAML record.</span>
+</span><span id="__span-0-158"><a id="__codelineno-0-158" name="__codelineno-0-158"></a>
+</span><span id="__span-0-159"><a id="__codelineno-0-159" name="__codelineno-0-159"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-160"><a id="__codelineno-0-160" name="__codelineno-0-160"></a><span class="sd">        args: A namespace object containing all parsed command-line arguments.</span>
+</span><span id="__span-0-161"><a id="__codelineno-0-161" name="__codelineno-0-161"></a><span class="sd">        all_subparsers: A dictionary mapping module names to subparser objects.</span>
+</span><span id="__span-0-162"><a id="__codelineno-0-162" name="__codelineno-0-162"></a>
+</span><span id="__span-0-163"><a id="__codelineno-0-163" name="__codelineno-0-163"></a><span class="sd">    Returns:</span>
+</span><span id="__span-0-164"><a id="__codelineno-0-164" name="__codelineno-0-164"></a><span class="sd">        A dictionary containing configuration information extracted from `args` in a module-wise nested format that is YAML-friendly.</span>
+</span><span id="__span-0-165"><a id="__codelineno-0-165" name="__codelineno-0-165"></a>
+</span><span id="__span-0-166"><a id="__codelineno-0-166" name="__codelineno-0-166"></a><span class="sd">    Raises:</span>
+</span><span id="__span-0-167"><a id="__codelineno-0-167" name="__codelineno-0-167"></a><span class="sd">        ValueError: If a module specified in `args.modules_to_run` is not in `all_subparsers`.</span>
+</span><span id="__span-0-168"><a id="__codelineno-0-168" name="__codelineno-0-168"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-169"><a id="__codelineno-0-169" name="__codelineno-0-169"></a>    <span class="n">args_dict</span> <span class="o">=</span> <span class="nb">vars</span><span class="p">(</span><span class="n">args</span><span class="p">)</span>
+</span><span id="__span-0-170"><a id="__codelineno-0-170" name="__codelineno-0-170"></a>
+</span><span id="__span-0-171"><a id="__codelineno-0-171" name="__codelineno-0-171"></a>    <span class="c1"># Filter out the keys that are not relevant to the config file</span>
+</span><span id="__span-0-172"><a id="__codelineno-0-172" name="__codelineno-0-172"></a>    <span class="n">args_dict</span> <span class="o">=</span> <span class="n">filter_dict</span><span class="p">(</span>
+</span><span id="__span-0-173"><a id="__codelineno-0-173" name="__codelineno-0-173"></a>        <span class="n">args_dict</span><span class="p">,</span> <span class="p">{</span><span class="s2">&quot;func&quot;</span><span class="p">,</span> <span class="s2">&quot;experiment_name&quot;</span><span class="p">,</span> <span class="s2">&quot;save_config&quot;</span><span class="p">,</span> <span class="s2">&quot;save_config_path&quot;</span><span class="p">,</span> <span class="s2">&quot;module_handover&quot;</span><span class="p">}</span>
+</span><span id="__span-0-174"><a id="__codelineno-0-174" name="__codelineno-0-174"></a>    <span class="p">)</span>
+</span><span id="__span-0-175"><a id="__codelineno-0-175" name="__codelineno-0-175"></a>    <span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">args_dict</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
+</span><span id="__span-0-176"><a id="__codelineno-0-176" name="__codelineno-0-176"></a>        <span class="c1"># Remove empty metric lists from the config</span>
+</span><span id="__span-0-177"><a id="__codelineno-0-177" name="__codelineno-0-177"></a>        <span class="k">if</span> <span class="s2">&quot;_metrics&quot;</span> <span class="ow">in</span> <span class="n">k</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">args_dict</span><span class="p">[</span><span class="n">k</span><span class="p">]:</span>
+</span><span id="__span-0-178"><a id="__codelineno-0-178" name="__codelineno-0-178"></a>            <span class="n">args_dict</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="n">k</span><span class="p">)</span>
+</span><span id="__span-0-179"><a id="__codelineno-0-179" name="__codelineno-0-179"></a>
+</span><span id="__span-0-180"><a id="__codelineno-0-180" name="__codelineno-0-180"></a>    <span class="n">modules_to_run</span> <span class="o">=</span> <span class="n">args_dict</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s2">&quot;modules_to_run&quot;</span><span class="p">)</span>
+</span><span id="__span-0-181"><a id="__codelineno-0-181" name="__codelineno-0-181"></a>    <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">modules_to_run</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
+</span><span id="__span-0-182"><a id="__codelineno-0-182" name="__codelineno-0-182"></a>        <span class="n">run_type</span> <span class="o">=</span> <span class="n">modules_to_run</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+</span><span id="__span-0-183"><a id="__codelineno-0-183" name="__codelineno-0-183"></a>    <span class="k">elif</span> <span class="n">modules_to_run</span> <span class="o">==</span> <span class="n">PIPELINE</span><span class="p">:</span>
+</span><span id="__span-0-184"><a id="__codelineno-0-184" name="__codelineno-0-184"></a>        <span class="n">run_type</span> <span class="o">=</span> <span class="s2">&quot;pipeline&quot;</span>
+</span><span id="__span-0-185"><a id="__codelineno-0-185" name="__codelineno-0-185"></a>    <span class="k">else</span><span class="p">:</span>
+</span><span id="__span-0-186"><a id="__codelineno-0-186" name="__codelineno-0-186"></a>        <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Invalid value for `modules_to_run`: </span><span class="si">{</span><span class="n">modules_to_run</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
+</span><span id="__span-0-187"><a id="__codelineno-0-187" name="__codelineno-0-187"></a>
+</span><span id="__span-0-188"><a id="__codelineno-0-188" name="__codelineno-0-188"></a>    <span class="c1"># Generate a dictionary containing each module&#39;s name from the run, with all of its possible corresponding config args</span>
+</span><span id="__span-0-189"><a id="__codelineno-0-189" name="__codelineno-0-189"></a>    <span class="n">module_args</span> <span class="o">=</span> <span class="p">{</span>
+</span><span id="__span-0-190"><a id="__codelineno-0-190" name="__codelineno-0-190"></a>        <span class="n">module_name</span><span class="p">:</span> <span class="p">[</span><span class="n">action</span><span class="o">.</span><span class="n">dest</span> <span class="k">for</span> <span class="n">action</span> <span class="ow">in</span> <span class="n">all_subparsers</span><span class="p">[</span><span class="n">module_name</span><span class="p">]</span><span class="o">.</span><span class="n">_actions</span> <span class="k">if</span> <span class="n">action</span><span class="o">.</span><span class="n">dest</span> <span class="o">!=</span> <span class="s2">&quot;help&quot;</span><span class="p">]</span>
+</span><span id="__span-0-191"><a id="__codelineno-0-191" name="__codelineno-0-191"></a>        <span class="k">for</span> <span class="n">module_name</span> <span class="ow">in</span> <span class="n">modules_to_run</span>
+</span><span id="__span-0-192"><a id="__codelineno-0-192" name="__codelineno-0-192"></a>    <span class="p">}</span>
+</span><span id="__span-0-193"><a id="__codelineno-0-193" name="__codelineno-0-193"></a>
+</span><span id="__span-0-194"><a id="__codelineno-0-194" name="__codelineno-0-194"></a>    <span class="c1"># Use the flat namespace to populate a nested (by module) dictionary of config args and values</span>
+</span><span id="__span-0-195"><a id="__codelineno-0-195" name="__codelineno-0-195"></a>    <span class="n">out_dict</span> <span class="o">=</span> <span class="p">{}</span>
+</span><span id="__span-0-196"><a id="__codelineno-0-196" name="__codelineno-0-196"></a>    <span class="k">for</span> <span class="n">module_name</span> <span class="ow">in</span> <span class="n">modules_to_run</span><span class="p">:</span>
+</span><span id="__span-0-197"><a id="__codelineno-0-197" name="__codelineno-0-197"></a>        <span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">args_dict</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
+</span><span id="__span-0-198"><a id="__codelineno-0-198" name="__codelineno-0-198"></a>            <span class="c1"># We want to keep dataset, experiment_name, seed and save_config at the top-level as they are core args</span>
+</span><span id="__span-0-199"><a id="__codelineno-0-199" name="__codelineno-0-199"></a>            <span class="k">if</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">module_args</span><span class="p">[</span><span class="n">module_name</span><span class="p">]</span> <span class="ow">and</span> <span class="n">k</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">{</span>
+</span><span id="__span-0-200"><a id="__codelineno-0-200" name="__codelineno-0-200"></a>                <span class="s2">&quot;version&quot;</span><span class="p">,</span>
+</span><span id="__span-0-201"><a id="__codelineno-0-201" name="__codelineno-0-201"></a>                <span class="s2">&quot;dataset&quot;</span><span class="p">,</span>
+</span><span id="__span-0-202"><a id="__codelineno-0-202" name="__codelineno-0-202"></a>                <span class="s2">&quot;experiment_name&quot;</span><span class="p">,</span>
+</span><span id="__span-0-203"><a id="__codelineno-0-203" name="__codelineno-0-203"></a>                <span class="s2">&quot;seed&quot;</span><span class="p">,</span>
+</span><span id="__span-0-204"><a id="__codelineno-0-204" name="__codelineno-0-204"></a>                <span class="s2">&quot;save_config&quot;</span><span class="p">,</span>
+</span><span id="__span-0-205"><a id="__codelineno-0-205" name="__codelineno-0-205"></a>            <span class="p">}:</span>
+</span><span id="__span-0-206"><a id="__codelineno-0-206" name="__codelineno-0-206"></a>                <span class="k">if</span> <span class="n">module_name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">out_dict</span><span class="p">:</span>
+</span><span id="__span-0-207"><a id="__codelineno-0-207" name="__codelineno-0-207"></a>                    <span class="n">out_dict</span><span class="p">[</span><span class="n">module_name</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
+</span><span id="__span-0-208"><a id="__codelineno-0-208" name="__codelineno-0-208"></a>                <span class="n">v</span> <span class="o">=</span> <span class="n">args_dict</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="n">k</span><span class="p">)</span>
+</span><span id="__span-0-209"><a id="__codelineno-0-209" name="__codelineno-0-209"></a>                <span class="k">if</span> <span class="n">v</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="__span-0-210"><a id="__codelineno-0-210" name="__codelineno-0-210"></a>                    <span class="n">out_dict</span><span class="p">[</span><span class="n">module_name</span><span class="p">][</span><span class="n">k</span><span class="p">]</span> <span class="o">=</span> <span class="n">v</span>
+</span><span id="__span-0-211"><a id="__codelineno-0-211" name="__codelineno-0-211"></a>
+</span><span id="__span-0-212"><a id="__codelineno-0-212" name="__codelineno-0-212"></a>    <span class="c1"># Assemble the final dictionary in YAML-compliant form</span>
+</span><span id="__span-0-213"><a id="__codelineno-0-213" name="__codelineno-0-213"></a>    <span class="k">return</span> <span class="p">{</span><span class="o">**</span><span class="p">({</span><span class="s2">&quot;run_type&quot;</span><span class="p">:</span> <span class="n">run_type</span><span class="p">}</span> <span class="k">if</span> <span class="n">run_type</span> <span class="k">else</span> <span class="p">{}),</span> <span class="o">**</span><span class="n">args_dict</span><span class="p">,</span> <span class="o">**</span><span class="n">out_dict</span><span class="p">}</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -2831,8 +2831,7 @@ <h2 id="nhssynth.cli.config.get_default_and_required_args" class="doc doc-headin
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/cli/config.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-13">13</a></span>
-<span class="normal"><a href="#__codelineno-0-14">14</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-14">14</a></span>
 <span class="normal"><a href="#__codelineno-0-15">15</a></span>
 <span class="normal"><a href="#__codelineno-0-16">16</a></span>
 <span class="normal"><a href="#__codelineno-0-17">17</a></span>
@@ -2856,32 +2855,33 @@ <h2 id="nhssynth.cli.config.get_default_and_required_args" class="doc doc-headin
 <span class="normal"><a href="#__codelineno-0-35">35</a></span>
 <span class="normal"><a href="#__codelineno-0-36">36</a></span>
 <span class="normal"><a href="#__codelineno-0-37">37</a></span>
-<span class="normal"><a href="#__codelineno-0-38">38</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13"></a><span class="k">def</span> <span class="nf">get_default_and_required_args</span><span class="p">(</span>
-</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14"></a>    <span class="n">top_parser</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">,</span>
-</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15"></a>    <span class="n">module_parsers</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">],</span>
-</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">],</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]]:</span>
-</span><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18"></a><span class="sd">    Get the default and required arguments for the top-level parser and the current run&#39;s corresponding list of module parsers.</span>
-</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19"></a>
-</span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-21"><a id="__codelineno-0-21" name="__codelineno-0-21"></a><span class="sd">        top_parser: The top-level parser (contains common arguments).</span>
-</span><span id="__span-0-22"><a id="__codelineno-0-22" name="__codelineno-0-22"></a><span class="sd">        module_parsers: The dict of module-level parsers mapped to their names.</span>
-</span><span id="__span-0-23"><a id="__codelineno-0-23" name="__codelineno-0-23"></a>
-</span><span id="__span-0-24"><a id="__codelineno-0-24" name="__codelineno-0-24"></a><span class="sd">    Returns:</span>
-</span><span id="__span-0-25"><a id="__codelineno-0-25" name="__codelineno-0-25"></a><span class="sd">        A tuple containing two elements:</span>
-</span><span id="__span-0-26"><a id="__codelineno-0-26" name="__codelineno-0-26"></a><span class="sd">            - A dictionary containing all arguments and their default values.</span>
-</span><span id="__span-0-27"><a id="__codelineno-0-27" name="__codelineno-0-27"></a><span class="sd">            - A list of key-value-pairs of the required arguments and their associated module.</span>
-</span><span id="__span-0-28"><a id="__codelineno-0-28" name="__codelineno-0-28"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-29"><a id="__codelineno-0-29" name="__codelineno-0-29"></a>    <span class="n">all_actions</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;top-level&quot;</span><span class="p">:</span> <span class="n">top_parser</span><span class="o">.</span><span class="n">_actions</span><span class="p">}</span> <span class="o">|</span> <span class="p">{</span><span class="n">m</span><span class="p">:</span> <span class="n">p</span><span class="o">.</span><span class="n">_actions</span> <span class="k">for</span> <span class="n">m</span><span class="p">,</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">module_parsers</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
-</span><span id="__span-0-30"><a id="__codelineno-0-30" name="__codelineno-0-30"></a>    <span class="n">defaults</span> <span class="o">=</span> <span class="p">{}</span>
-</span><span id="__span-0-31"><a id="__codelineno-0-31" name="__codelineno-0-31"></a>    <span class="n">required_args</span> <span class="o">=</span> <span class="p">[]</span>
-</span><span id="__span-0-32"><a id="__codelineno-0-32" name="__codelineno-0-32"></a>    <span class="k">for</span> <span class="n">module</span><span class="p">,</span> <span class="n">actions</span> <span class="ow">in</span> <span class="n">all_actions</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
-</span><span id="__span-0-33"><a id="__codelineno-0-33" name="__codelineno-0-33"></a>        <span class="k">for</span> <span class="n">action</span> <span class="ow">in</span> <span class="n">actions</span><span class="p">:</span>
-</span><span id="__span-0-34"><a id="__codelineno-0-34" name="__codelineno-0-34"></a>            <span class="k">if</span> <span class="n">action</span><span class="o">.</span><span class="n">dest</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;help&quot;</span><span class="p">,</span> <span class="s2">&quot;==SUPPRESS==&quot;</span><span class="p">]:</span>
-</span><span id="__span-0-35"><a id="__codelineno-0-35" name="__codelineno-0-35"></a>                <span class="n">defaults</span><span class="p">[</span><span class="n">action</span><span class="o">.</span><span class="n">dest</span><span class="p">]</span> <span class="o">=</span> <span class="n">action</span><span class="o">.</span><span class="n">default</span>
-</span><span id="__span-0-36"><a id="__codelineno-0-36" name="__codelineno-0-36"></a>                <span class="k">if</span> <span class="n">action</span><span class="o">.</span><span class="n">required</span><span class="p">:</span>
-</span><span id="__span-0-37"><a id="__codelineno-0-37" name="__codelineno-0-37"></a>                    <span class="n">required_args</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s2">&quot;arg&quot;</span><span class="p">:</span> <span class="n">action</span><span class="o">.</span><span class="n">dest</span><span class="p">,</span> <span class="s2">&quot;module&quot;</span><span class="p">:</span> <span class="n">module</span><span class="p">})</span>
-</span><span id="__span-0-38"><a id="__codelineno-0-38" name="__codelineno-0-38"></a>    <span class="k">return</span> <span class="n">defaults</span><span class="p">,</span> <span class="n">required_args</span>
+<span class="normal"><a href="#__codelineno-0-38">38</a></span>
+<span class="normal"><a href="#__codelineno-0-39">39</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14"></a><span class="k">def</span> <span class="nf">get_default_and_required_args</span><span class="p">(</span>
+</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15"></a>    <span class="n">top_parser</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">,</span>
+</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16"></a>    <span class="n">module_parsers</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">],</span>
+</span><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">],</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]]:</span>
+</span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19"></a><span class="sd">    Get the default and required arguments for the top-level parser and the current run&#39;s corresponding list of module parsers.</span>
+</span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20"></a>
+</span><span id="__span-0-21"><a id="__codelineno-0-21" name="__codelineno-0-21"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-22"><a id="__codelineno-0-22" name="__codelineno-0-22"></a><span class="sd">        top_parser: The top-level parser (contains common arguments).</span>
+</span><span id="__span-0-23"><a id="__codelineno-0-23" name="__codelineno-0-23"></a><span class="sd">        module_parsers: The dict of module-level parsers mapped to their names.</span>
+</span><span id="__span-0-24"><a id="__codelineno-0-24" name="__codelineno-0-24"></a>
+</span><span id="__span-0-25"><a id="__codelineno-0-25" name="__codelineno-0-25"></a><span class="sd">    Returns:</span>
+</span><span id="__span-0-26"><a id="__codelineno-0-26" name="__codelineno-0-26"></a><span class="sd">        A tuple containing two elements:</span>
+</span><span id="__span-0-27"><a id="__codelineno-0-27" name="__codelineno-0-27"></a><span class="sd">            - A dictionary containing all arguments and their default values.</span>
+</span><span id="__span-0-28"><a id="__codelineno-0-28" name="__codelineno-0-28"></a><span class="sd">            - A list of key-value-pairs of the required arguments and their associated module.</span>
+</span><span id="__span-0-29"><a id="__codelineno-0-29" name="__codelineno-0-29"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-30"><a id="__codelineno-0-30" name="__codelineno-0-30"></a>    <span class="n">all_actions</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;top-level&quot;</span><span class="p">:</span> <span class="n">top_parser</span><span class="o">.</span><span class="n">_actions</span><span class="p">}</span> <span class="o">|</span> <span class="p">{</span><span class="n">m</span><span class="p">:</span> <span class="n">p</span><span class="o">.</span><span class="n">_actions</span> <span class="k">for</span> <span class="n">m</span><span class="p">,</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">module_parsers</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
+</span><span id="__span-0-31"><a id="__codelineno-0-31" name="__codelineno-0-31"></a>    <span class="n">defaults</span> <span class="o">=</span> <span class="p">{}</span>
+</span><span id="__span-0-32"><a id="__codelineno-0-32" name="__codelineno-0-32"></a>    <span class="n">required_args</span> <span class="o">=</span> <span class="p">[]</span>
+</span><span id="__span-0-33"><a id="__codelineno-0-33" name="__codelineno-0-33"></a>    <span class="k">for</span> <span class="n">module</span><span class="p">,</span> <span class="n">actions</span> <span class="ow">in</span> <span class="n">all_actions</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
+</span><span id="__span-0-34"><a id="__codelineno-0-34" name="__codelineno-0-34"></a>        <span class="k">for</span> <span class="n">action</span> <span class="ow">in</span> <span class="n">actions</span><span class="p">:</span>
+</span><span id="__span-0-35"><a id="__codelineno-0-35" name="__codelineno-0-35"></a>            <span class="k">if</span> <span class="n">action</span><span class="o">.</span><span class="n">dest</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;help&quot;</span><span class="p">,</span> <span class="s2">&quot;==SUPPRESS==&quot;</span><span class="p">]:</span>
+</span><span id="__span-0-36"><a id="__codelineno-0-36" name="__codelineno-0-36"></a>                <span class="n">defaults</span><span class="p">[</span><span class="n">action</span><span class="o">.</span><span class="n">dest</span><span class="p">]</span> <span class="o">=</span> <span class="n">action</span><span class="o">.</span><span class="n">default</span>
+</span><span id="__span-0-37"><a id="__codelineno-0-37" name="__codelineno-0-37"></a>                <span class="k">if</span> <span class="n">action</span><span class="o">.</span><span class="n">required</span><span class="p">:</span>
+</span><span id="__span-0-38"><a id="__codelineno-0-38" name="__codelineno-0-38"></a>                    <span class="n">required_args</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s2">&quot;arg&quot;</span><span class="p">:</span> <span class="n">action</span><span class="o">.</span><span class="n">dest</span><span class="p">,</span> <span class="s2">&quot;module&quot;</span><span class="p">:</span> <span class="n">module</span><span class="p">})</span>
+</span><span id="__span-0-39"><a id="__codelineno-0-39" name="__codelineno-0-39"></a>    <span class="k">return</span> <span class="n">defaults</span><span class="p">,</span> <span class="n">required_args</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -2959,8 +2959,7 @@ <h2 id="nhssynth.cli.config.get_modules_to_run" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/cli/config.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-135">135</a></span>
-<span class="normal"><a href="#__codelineno-0-136">136</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-136">136</a></span>
 <span class="normal"><a href="#__codelineno-0-137">137</a></span>
 <span class="normal"><a href="#__codelineno-0-138">138</a></span>
 <span class="normal"><a href="#__codelineno-0-139">139</a></span>
@@ -2972,20 +2971,21 @@ <h2 id="nhssynth.cli.config.get_modules_to_run" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-145">145</a></span>
 <span class="normal"><a href="#__codelineno-0-146">146</a></span>
 <span class="normal"><a href="#__codelineno-0-147">147</a></span>
-<span class="normal"><a href="#__codelineno-0-148">148</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-135"><a id="__codelineno-0-135" name="__codelineno-0-135"></a><span class="k">def</span> <span class="nf">get_modules_to_run</span><span class="p">(</span><span class="n">executor</span><span class="p">:</span> <span class="n">Callable</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
-</span><span id="__span-0-136"><a id="__codelineno-0-136" name="__codelineno-0-136"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-137"><a id="__codelineno-0-137" name="__codelineno-0-137"></a><span class="sd">    Get the list of modules to run from the passed executor function.</span>
-</span><span id="__span-0-138"><a id="__codelineno-0-138" name="__codelineno-0-138"></a>
-</span><span id="__span-0-139"><a id="__codelineno-0-139" name="__codelineno-0-139"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-140"><a id="__codelineno-0-140" name="__codelineno-0-140"></a><span class="sd">        executor: The executor function to run.</span>
-</span><span id="__span-0-141"><a id="__codelineno-0-141" name="__codelineno-0-141"></a>
-</span><span id="__span-0-142"><a id="__codelineno-0-142" name="__codelineno-0-142"></a><span class="sd">    Returns:</span>
-</span><span id="__span-0-143"><a id="__codelineno-0-143" name="__codelineno-0-143"></a><span class="sd">        A list of module names to run.</span>
-</span><span id="__span-0-144"><a id="__codelineno-0-144" name="__codelineno-0-144"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-145"><a id="__codelineno-0-145" name="__codelineno-0-145"></a>    <span class="k">if</span> <span class="n">executor</span> <span class="o">==</span> <span class="n">run_pipeline</span><span class="p">:</span>
-</span><span id="__span-0-146"><a id="__codelineno-0-146" name="__codelineno-0-146"></a>        <span class="k">return</span> <span class="n">PIPELINE</span>
-</span><span id="__span-0-147"><a id="__codelineno-0-147" name="__codelineno-0-147"></a>    <span class="k">else</span><span class="p">:</span>
-</span><span id="__span-0-148"><a id="__codelineno-0-148" name="__codelineno-0-148"></a>        <span class="k">return</span> <span class="p">[</span><span class="n">get_key_by_value</span><span class="p">({</span><span class="n">mn</span><span class="p">:</span> <span class="n">mc</span><span class="o">.</span><span class="n">func</span> <span class="k">for</span> <span class="n">mn</span><span class="p">,</span> <span class="n">mc</span> <span class="ow">in</span> <span class="n">MODULE_MAP</span><span class="o">.</span><span class="n">items</span><span class="p">()},</span> <span class="n">executor</span><span class="p">)]</span>
+<span class="normal"><a href="#__codelineno-0-148">148</a></span>
+<span class="normal"><a href="#__codelineno-0-149">149</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-136"><a id="__codelineno-0-136" name="__codelineno-0-136"></a><span class="k">def</span> <span class="nf">get_modules_to_run</span><span class="p">(</span><span class="n">executor</span><span class="p">:</span> <span class="n">Callable</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
+</span><span id="__span-0-137"><a id="__codelineno-0-137" name="__codelineno-0-137"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-138"><a id="__codelineno-0-138" name="__codelineno-0-138"></a><span class="sd">    Get the list of modules to run from the passed executor function.</span>
+</span><span id="__span-0-139"><a id="__codelineno-0-139" name="__codelineno-0-139"></a>
+</span><span id="__span-0-140"><a id="__codelineno-0-140" name="__codelineno-0-140"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-141"><a id="__codelineno-0-141" name="__codelineno-0-141"></a><span class="sd">        executor: The executor function to run.</span>
+</span><span id="__span-0-142"><a id="__codelineno-0-142" name="__codelineno-0-142"></a>
+</span><span id="__span-0-143"><a id="__codelineno-0-143" name="__codelineno-0-143"></a><span class="sd">    Returns:</span>
+</span><span id="__span-0-144"><a id="__codelineno-0-144" name="__codelineno-0-144"></a><span class="sd">        A list of module names to run.</span>
+</span><span id="__span-0-145"><a id="__codelineno-0-145" name="__codelineno-0-145"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-146"><a id="__codelineno-0-146" name="__codelineno-0-146"></a>    <span class="k">if</span> <span class="n">executor</span> <span class="o">==</span> <span class="n">run_pipeline</span><span class="p">:</span>
+</span><span id="__span-0-147"><a id="__codelineno-0-147" name="__codelineno-0-147"></a>        <span class="k">return</span> <span class="n">PIPELINE</span>
+</span><span id="__span-0-148"><a id="__codelineno-0-148" name="__codelineno-0-148"></a>    <span class="k">else</span><span class="p">:</span>
+</span><span id="__span-0-149"><a id="__codelineno-0-149" name="__codelineno-0-149"></a>        <span class="k">return</span> <span class="p">[</span><span class="n">get_key_by_value</span><span class="p">({</span><span class="n">mn</span><span class="p">:</span> <span class="n">mc</span><span class="o">.</span><span class="n">func</span> <span class="k">for</span> <span class="n">mn</span><span class="p">,</span> <span class="n">mc</span> <span class="ow">in</span> <span class="n">MODULE_MAP</span><span class="o">.</span><span class="n">items</span><span class="p">()},</span> <span class="n">executor</span><span class="p">)]</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -3123,8 +3123,7 @@ <h2 id="nhssynth.cli.config.read_config" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/cli/config.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-41"> 41</a></span>
-<span class="normal"><a href="#__codelineno-0-42"> 42</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-42"> 42</a></span>
 <span class="normal"><a href="#__codelineno-0-43"> 43</a></span>
 <span class="normal"><a href="#__codelineno-0-44"> 44</a></span>
 <span class="normal"><a href="#__codelineno-0-45"> 45</a></span>
@@ -3214,98 +3213,99 @@ <h2 id="nhssynth.cli.config.read_config" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-129">129</a></span>
 <span class="normal"><a href="#__codelineno-0-130">130</a></span>
 <span class="normal"><a href="#__codelineno-0-131">131</a></span>
-<span class="normal"><a href="#__codelineno-0-132">132</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-41"><a id="__codelineno-0-41" name="__codelineno-0-41"></a><span class="k">def</span> <span class="nf">read_config</span><span class="p">(</span>
-</span><span id="__span-0-42"><a id="__codelineno-0-42" name="__codelineno-0-42"></a>    <span class="n">args</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">Namespace</span><span class="p">,</span>
-</span><span id="__span-0-43"><a id="__codelineno-0-43" name="__codelineno-0-43"></a>    <span class="n">parser</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">,</span>
-</span><span id="__span-0-44"><a id="__codelineno-0-44" name="__codelineno-0-44"></a>    <span class="n">all_subparsers</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">],</span>
-</span><span id="__span-0-45"><a id="__codelineno-0-45" name="__codelineno-0-45"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">argparse</span><span class="o">.</span><span class="n">Namespace</span><span class="p">:</span>
-</span><span id="__span-0-46"><a id="__codelineno-0-46" name="__codelineno-0-46"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-47"><a id="__codelineno-0-47" name="__codelineno-0-47"></a><span class="sd">    Hierarchically assembles a config `argparse.Namespace` object for the inferred modules to run and execute, given a file.</span>
-</span><span id="__span-0-48"><a id="__codelineno-0-48" name="__codelineno-0-48"></a>
-</span><span id="__span-0-49"><a id="__codelineno-0-49" name="__codelineno-0-49"></a><span class="sd">    1. Load the YAML file containing the config to read from</span>
-</span><span id="__span-0-50"><a id="__codelineno-0-50" name="__codelineno-0-50"></a><span class="sd">    2. Check a valid `run_type` is specified or infer it and determine the list of `modules_to_run`</span>
-</span><span id="__span-0-51"><a id="__codelineno-0-51" name="__codelineno-0-51"></a><span class="sd">    3. Establish the appropriate default configuration set of arguments from the `parser` and `all_subparsers` for the determined `modules_to_run`</span>
-</span><span id="__span-0-52"><a id="__codelineno-0-52" name="__codelineno-0-52"></a><span class="sd">    4. Overwrite these with the specified (sub)set of config in the YAML file</span>
-</span><span id="__span-0-53"><a id="__codelineno-0-53" name="__codelineno-0-53"></a><span class="sd">    5. Overwrite again with passed command-line `args` (these are considered &#39;overrides&#39;)</span>
-</span><span id="__span-0-54"><a id="__codelineno-0-54" name="__codelineno-0-54"></a><span class="sd">    6. Run the appropriate module(s) or pipeline with the resulting configuration `Namespace` object</span>
-</span><span id="__span-0-55"><a id="__codelineno-0-55" name="__codelineno-0-55"></a>
-</span><span id="__span-0-56"><a id="__codelineno-0-56" name="__codelineno-0-56"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-57"><a id="__codelineno-0-57" name="__codelineno-0-57"></a><span class="sd">        args: Namespace object containing arguments from the command line</span>
-</span><span id="__span-0-58"><a id="__codelineno-0-58" name="__codelineno-0-58"></a><span class="sd">        parser: top-level `ArgumentParser` object containing common arguments</span>
-</span><span id="__span-0-59"><a id="__codelineno-0-59" name="__codelineno-0-59"></a><span class="sd">        all_subparsers: dictionary of `ArgumentParser` objects, one for each module</span>
-</span><span id="__span-0-60"><a id="__codelineno-0-60" name="__codelineno-0-60"></a>
-</span><span id="__span-0-61"><a id="__codelineno-0-61" name="__codelineno-0-61"></a><span class="sd">    Returns:</span>
-</span><span id="__span-0-62"><a id="__codelineno-0-62" name="__codelineno-0-62"></a><span class="sd">        A Namespace object containing the assembled configuration settings</span>
-</span><span id="__span-0-63"><a id="__codelineno-0-63" name="__codelineno-0-63"></a>
-</span><span id="__span-0-64"><a id="__codelineno-0-64" name="__codelineno-0-64"></a><span class="sd">    Raises:</span>
-</span><span id="__span-0-65"><a id="__codelineno-0-65" name="__codelineno-0-65"></a><span class="sd">        AssertionError: if any required arguments are missing from the configuration file / overrides</span>
-</span><span id="__span-0-66"><a id="__codelineno-0-66" name="__codelineno-0-66"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-67"><a id="__codelineno-0-67" name="__codelineno-0-67"></a>    <span class="c1"># Open the passed yaml file and load into a dictionary</span>
-</span><span id="__span-0-68"><a id="__codelineno-0-68" name="__codelineno-0-68"></a>    <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;config/</span><span class="si">{</span><span class="n">args</span><span class="o">.</span><span class="n">input_config</span><span class="si">}</span><span class="s2">.yaml&quot;</span><span class="p">)</span> <span class="k">as</span> <span class="n">stream</span><span class="p">:</span>
-</span><span id="__span-0-69"><a id="__codelineno-0-69" name="__codelineno-0-69"></a>        <span class="n">config_dict</span> <span class="o">=</span> <span class="n">yaml</span><span class="o">.</span><span class="n">safe_load</span><span class="p">(</span><span class="n">stream</span><span class="p">)</span>
-</span><span id="__span-0-70"><a id="__codelineno-0-70" name="__codelineno-0-70"></a>
-</span><span id="__span-0-71"><a id="__codelineno-0-71" name="__codelineno-0-71"></a>    <span class="n">valid_run_types</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">all_subparsers</span><span class="o">.</span><span class="n">keys</span><span class="p">()</span> <span class="k">if</span> <span class="n">x</span> <span class="o">!=</span> <span class="s2">&quot;config&quot;</span><span class="p">]</span>
-</span><span id="__span-0-72"><a id="__codelineno-0-72" name="__codelineno-0-72"></a>
-</span><span id="__span-0-73"><a id="__codelineno-0-73" name="__codelineno-0-73"></a>    <span class="n">version</span> <span class="o">=</span> <span class="n">config_dict</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s2">&quot;version&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
-</span><span id="__span-0-74"><a id="__codelineno-0-74" name="__codelineno-0-74"></a>    <span class="k">if</span> <span class="n">version</span> <span class="ow">and</span> <span class="n">version</span> <span class="o">!=</span> <span class="n">version</span><span class="p">(</span><span class="s2">&quot;nhssynth&quot;</span><span class="p">):</span>
-</span><span id="__span-0-75"><a id="__codelineno-0-75" name="__codelineno-0-75"></a>        <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
-</span><span id="__span-0-76"><a id="__codelineno-0-76" name="__codelineno-0-76"></a>            <span class="sa">f</span><span class="s2">&quot;This config file&#39;s specified version (</span><span class="si">{</span><span class="n">version</span><span class="si">}</span><span class="s2">) does not match the currently installed version of nhssynth (</span><span class="si">{</span><span class="n">version</span><span class="p">(</span><span class="s1">&#39;nhssynth&#39;</span><span class="p">)</span><span class="si">}</span><span class="s2">), results may differ.&quot;</span>
-</span><span id="__span-0-77"><a id="__codelineno-0-77" name="__codelineno-0-77"></a>        <span class="p">)</span>
-</span><span id="__span-0-78"><a id="__codelineno-0-78" name="__codelineno-0-78"></a>    <span class="k">elif</span> <span class="ow">not</span> <span class="n">version</span><span class="p">:</span>
-</span><span id="__span-0-79"><a id="__codelineno-0-79" name="__codelineno-0-79"></a>        <span class="n">version</span> <span class="o">=</span> <span class="n">ver</span><span class="p">(</span><span class="s2">&quot;nhssynth&quot;</span><span class="p">)</span>
-</span><span id="__span-0-80"><a id="__codelineno-0-80" name="__codelineno-0-80"></a>
-</span><span id="__span-0-81"><a id="__codelineno-0-81" name="__codelineno-0-81"></a>    <span class="n">run_type</span> <span class="o">=</span> <span class="n">config_dict</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s2">&quot;run_type&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
-</span><span id="__span-0-82"><a id="__codelineno-0-82" name="__codelineno-0-82"></a>
-</span><span id="__span-0-83"><a id="__codelineno-0-83" name="__codelineno-0-83"></a>    <span class="k">if</span> <span class="n">run_type</span> <span class="o">==</span> <span class="s2">&quot;pipeline&quot;</span><span class="p">:</span>
-</span><span id="__span-0-84"><a id="__codelineno-0-84" name="__codelineno-0-84"></a>        <span class="n">modules_to_run</span> <span class="o">=</span> <span class="n">PIPELINE</span>
-</span><span id="__span-0-85"><a id="__codelineno-0-85" name="__codelineno-0-85"></a>    <span class="k">else</span><span class="p">:</span>
-</span><span id="__span-0-86"><a id="__codelineno-0-86" name="__codelineno-0-86"></a>        <span class="n">modules_to_run</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">config_dict</span><span class="o">.</span><span class="n">keys</span><span class="p">()</span> <span class="o">|</span> <span class="p">{</span><span class="n">run_type</span><span class="p">}</span> <span class="k">if</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">valid_run_types</span><span class="p">]</span>
-</span><span id="__span-0-87"><a id="__codelineno-0-87" name="__codelineno-0-87"></a>        <span class="k">if</span> <span class="ow">not</span> <span class="n">args</span><span class="o">.</span><span class="n">custom_pipeline</span><span class="p">:</span>
-</span><span id="__span-0-88"><a id="__codelineno-0-88" name="__codelineno-0-88"></a>            <span class="n">modules_to_run</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">modules_to_run</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">PIPELINE</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
-</span><span id="__span-0-89"><a id="__codelineno-0-89" name="__codelineno-0-89"></a>
-</span><span id="__span-0-90"><a id="__codelineno-0-90" name="__codelineno-0-90"></a>    <span class="k">if</span> <span class="ow">not</span> <span class="n">modules_to_run</span><span class="p">:</span>
-</span><span id="__span-0-91"><a id="__codelineno-0-91" name="__codelineno-0-91"></a>        <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
-</span><span id="__span-0-92"><a id="__codelineno-0-92" name="__codelineno-0-92"></a>            <span class="s2">&quot;Missing or invalid `run_type` and / or module specification hierarchy in `config/</span><span class="si">{args.input_config}</span><span class="s2">.yaml`, defaulting to a full run of the pipeline&quot;</span>
-</span><span id="__span-0-93"><a id="__codelineno-0-93" name="__codelineno-0-93"></a>        <span class="p">)</span>
-</span><span id="__span-0-94"><a id="__codelineno-0-94" name="__codelineno-0-94"></a>        <span class="n">modules_to_run</span> <span class="o">=</span> <span class="n">PIPELINE</span>
-</span><span id="__span-0-95"><a id="__codelineno-0-95" name="__codelineno-0-95"></a>
-</span><span id="__span-0-96"><a id="__codelineno-0-96" name="__codelineno-0-96"></a>    <span class="c1"># Get all possible default arguments by scraping the top level `parser` and the appropriate sub-parser for the `run_type`</span>
-</span><span id="__span-0-97"><a id="__codelineno-0-97" name="__codelineno-0-97"></a>    <span class="n">args_dict</span><span class="p">,</span> <span class="n">required_args</span> <span class="o">=</span> <span class="n">get_default_and_required_args</span><span class="p">(</span>
-</span><span id="__span-0-98"><a id="__codelineno-0-98" name="__codelineno-0-98"></a>        <span class="n">parser</span><span class="p">,</span> <span class="n">filter_dict</span><span class="p">(</span><span class="n">all_subparsers</span><span class="p">,</span> <span class="n">modules_to_run</span><span class="p">,</span> <span class="n">include</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-</span><span id="__span-0-99"><a id="__codelineno-0-99" name="__codelineno-0-99"></a>    <span class="p">)</span>
-</span><span id="__span-0-100"><a id="__codelineno-0-100" name="__codelineno-0-100"></a>
-</span><span id="__span-0-101"><a id="__codelineno-0-101" name="__codelineno-0-101"></a>    <span class="c1"># Find the non-default arguments amongst passed `args` by seeing which of them are different to the entries of `args_dict`</span>
-</span><span id="__span-0-102"><a id="__codelineno-0-102" name="__codelineno-0-102"></a>    <span class="n">non_default_passed_args_dict</span> <span class="o">=</span> <span class="p">{</span>
-</span><span id="__span-0-103"><a id="__codelineno-0-103" name="__codelineno-0-103"></a>        <span class="n">k</span><span class="p">:</span> <span class="n">v</span>
-</span><span id="__span-0-104"><a id="__codelineno-0-104" name="__codelineno-0-104"></a>        <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="nb">vars</span><span class="p">(</span><span class="n">args</span><span class="p">)</span><span class="o">.</span><span class="n">items</span><span class="p">()</span>
-</span><span id="__span-0-105"><a id="__codelineno-0-105" name="__codelineno-0-105"></a>        <span class="k">if</span> <span class="n">k</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;input_config&quot;</span><span class="p">,</span> <span class="s2">&quot;custom_pipeline&quot;</span><span class="p">]</span> <span class="ow">or</span> <span class="p">(</span><span class="n">k</span> <span class="ow">in</span> <span class="n">args_dict</span> <span class="ow">and</span> <span class="n">k</span> <span class="o">!=</span> <span class="s2">&quot;func&quot;</span> <span class="ow">and</span> <span class="n">v</span> <span class="o">!=</span> <span class="n">args_dict</span><span class="p">[</span><span class="n">k</span><span class="p">])</span>
-</span><span id="__span-0-106"><a id="__codelineno-0-106" name="__codelineno-0-106"></a>    <span class="p">}</span>
-</span><span id="__span-0-107"><a id="__codelineno-0-107" name="__codelineno-0-107"></a>
-</span><span id="__span-0-108"><a id="__codelineno-0-108" name="__codelineno-0-108"></a>    <span class="c1"># Overwrite the default arguments with the ones from the yaml file</span>
-</span><span id="__span-0-109"><a id="__codelineno-0-109" name="__codelineno-0-109"></a>    <span class="n">args_dict</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">flatten_dict</span><span class="p">(</span><span class="n">config_dict</span><span class="p">))</span>
-</span><span id="__span-0-110"><a id="__codelineno-0-110" name="__codelineno-0-110"></a>
-</span><span id="__span-0-111"><a id="__codelineno-0-111" name="__codelineno-0-111"></a>    <span class="c1"># Overwrite the result of the above with any non-default CLI args</span>
-</span><span id="__span-0-112"><a id="__codelineno-0-112" name="__codelineno-0-112"></a>    <span class="n">args_dict</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">non_default_passed_args_dict</span><span class="p">)</span>
-</span><span id="__span-0-113"><a id="__codelineno-0-113" name="__codelineno-0-113"></a>
-</span><span id="__span-0-114"><a id="__codelineno-0-114" name="__codelineno-0-114"></a>    <span class="c1"># Create a new Namespace using the assembled dictionary</span>
-</span><span id="__span-0-115"><a id="__codelineno-0-115" name="__codelineno-0-115"></a>    <span class="n">new_args</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">Namespace</span><span class="p">(</span><span class="o">**</span><span class="n">args_dict</span><span class="p">)</span>
-</span><span id="__span-0-116"><a id="__codelineno-0-116" name="__codelineno-0-116"></a>    <span class="k">assert</span> <span class="nb">getattr</span><span class="p">(</span>
-</span><span id="__span-0-117"><a id="__codelineno-0-117" name="__codelineno-0-117"></a>        <span class="n">new_args</span><span class="p">,</span> <span class="s2">&quot;dataset&quot;</span>
-</span><span id="__span-0-118"><a id="__codelineno-0-118" name="__codelineno-0-118"></a>    <span class="p">),</span> <span class="s2">&quot;No dataset specified in the passed config file, provide one with the `--dataset` argument or add it to the config file&quot;</span>
-</span><span id="__span-0-119"><a id="__codelineno-0-119" name="__codelineno-0-119"></a>    <span class="k">assert</span> <span class="nb">all</span><span class="p">(</span>
-</span><span id="__span-0-120"><a id="__codelineno-0-120" name="__codelineno-0-120"></a>        <span class="nb">getattr</span><span class="p">(</span><span class="n">new_args</span><span class="p">,</span> <span class="n">req_arg</span><span class="p">[</span><span class="s2">&quot;arg&quot;</span><span class="p">])</span> <span class="k">for</span> <span class="n">req_arg</span> <span class="ow">in</span> <span class="n">required_args</span>
-</span><span id="__span-0-121"><a id="__codelineno-0-121" name="__codelineno-0-121"></a>    <span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Required arguments are missing from the passed config file: </span><span class="si">{</span><span class="p">[</span><span class="n">ra</span><span class="p">[</span><span class="s1">&#39;module&#39;</span><span class="p">]</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="s1">&#39;:&#39;</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">ra</span><span class="p">[</span><span class="s1">&#39;arg&#39;</span><span class="p">]</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="n">ra</span><span class="w"> </span><span class="ow">in</span><span class="w"> </span><span class="n">required_args</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="ow">not</span><span class="w"> </span><span class="nb">getattr</span><span class="p">(</span><span class="n">new_args</span><span class="p">,</span><span class="w"> </span><span class="n">ra</span><span class="p">[</span><span class="s1">&#39;arg&#39;</span><span class="p">])]</span><span class="si">}</span><span class="s2">&quot;</span>
-</span><span id="__span-0-122"><a id="__codelineno-0-122" name="__codelineno-0-122"></a>
-</span><span id="__span-0-123"><a id="__codelineno-0-123" name="__codelineno-0-123"></a>    <span class="c1"># Run the appropriate execution function(s)</span>
-</span><span id="__span-0-124"><a id="__codelineno-0-124" name="__codelineno-0-124"></a>    <span class="k">if</span> <span class="ow">not</span> <span class="n">new_args</span><span class="o">.</span><span class="n">seed</span><span class="p">:</span>
-</span><span id="__span-0-125"><a id="__codelineno-0-125" name="__codelineno-0-125"></a>        <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span><span class="s2">&quot;No seed has been specified, meaning the results of this run may not be reproducible.&quot;</span><span class="p">)</span>
-</span><span id="__span-0-126"><a id="__codelineno-0-126" name="__codelineno-0-126"></a>    <span class="n">new_args</span><span class="o">.</span><span class="n">version</span> <span class="o">=</span> <span class="n">version</span>
-</span><span id="__span-0-127"><a id="__codelineno-0-127" name="__codelineno-0-127"></a>    <span class="n">new_args</span><span class="o">.</span><span class="n">modules_to_run</span> <span class="o">=</span> <span class="n">modules_to_run</span>
-</span><span id="__span-0-128"><a id="__codelineno-0-128" name="__codelineno-0-128"></a>    <span class="n">new_args</span><span class="o">.</span><span class="n">module_handover</span> <span class="o">=</span> <span class="p">{}</span>
-</span><span id="__span-0-129"><a id="__codelineno-0-129" name="__codelineno-0-129"></a>    <span class="k">for</span> <span class="n">module</span> <span class="ow">in</span> <span class="n">new_args</span><span class="o">.</span><span class="n">modules_to_run</span><span class="p">:</span>
-</span><span id="__span-0-130"><a id="__codelineno-0-130" name="__codelineno-0-130"></a>        <span class="n">MODULE_MAP</span><span class="p">[</span><span class="n">module</span><span class="p">](</span><span class="n">new_args</span><span class="p">)</span>
-</span><span id="__span-0-131"><a id="__codelineno-0-131" name="__codelineno-0-131"></a>
-</span><span id="__span-0-132"><a id="__codelineno-0-132" name="__codelineno-0-132"></a>    <span class="k">return</span> <span class="n">new_args</span>
+<span class="normal"><a href="#__codelineno-0-132">132</a></span>
+<span class="normal"><a href="#__codelineno-0-133">133</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-42"><a id="__codelineno-0-42" name="__codelineno-0-42"></a><span class="k">def</span> <span class="nf">read_config</span><span class="p">(</span>
+</span><span id="__span-0-43"><a id="__codelineno-0-43" name="__codelineno-0-43"></a>    <span class="n">args</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">Namespace</span><span class="p">,</span>
+</span><span id="__span-0-44"><a id="__codelineno-0-44" name="__codelineno-0-44"></a>    <span class="n">parser</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">,</span>
+</span><span id="__span-0-45"><a id="__codelineno-0-45" name="__codelineno-0-45"></a>    <span class="n">all_subparsers</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">],</span>
+</span><span id="__span-0-46"><a id="__codelineno-0-46" name="__codelineno-0-46"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">argparse</span><span class="o">.</span><span class="n">Namespace</span><span class="p">:</span>
+</span><span id="__span-0-47"><a id="__codelineno-0-47" name="__codelineno-0-47"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-48"><a id="__codelineno-0-48" name="__codelineno-0-48"></a><span class="sd">    Hierarchically assembles a config `argparse.Namespace` object for the inferred modules to run and execute, given a file.</span>
+</span><span id="__span-0-49"><a id="__codelineno-0-49" name="__codelineno-0-49"></a>
+</span><span id="__span-0-50"><a id="__codelineno-0-50" name="__codelineno-0-50"></a><span class="sd">    1. Load the YAML file containing the config to read from</span>
+</span><span id="__span-0-51"><a id="__codelineno-0-51" name="__codelineno-0-51"></a><span class="sd">    2. Check a valid `run_type` is specified or infer it and determine the list of `modules_to_run`</span>
+</span><span id="__span-0-52"><a id="__codelineno-0-52" name="__codelineno-0-52"></a><span class="sd">    3. Establish the appropriate default configuration set of arguments from the `parser` and `all_subparsers` for the determined `modules_to_run`</span>
+</span><span id="__span-0-53"><a id="__codelineno-0-53" name="__codelineno-0-53"></a><span class="sd">    4. Overwrite these with the specified (sub)set of config in the YAML file</span>
+</span><span id="__span-0-54"><a id="__codelineno-0-54" name="__codelineno-0-54"></a><span class="sd">    5. Overwrite again with passed command-line `args` (these are considered &#39;overrides&#39;)</span>
+</span><span id="__span-0-55"><a id="__codelineno-0-55" name="__codelineno-0-55"></a><span class="sd">    6. Run the appropriate module(s) or pipeline with the resulting configuration `Namespace` object</span>
+</span><span id="__span-0-56"><a id="__codelineno-0-56" name="__codelineno-0-56"></a>
+</span><span id="__span-0-57"><a id="__codelineno-0-57" name="__codelineno-0-57"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-58"><a id="__codelineno-0-58" name="__codelineno-0-58"></a><span class="sd">        args: Namespace object containing arguments from the command line</span>
+</span><span id="__span-0-59"><a id="__codelineno-0-59" name="__codelineno-0-59"></a><span class="sd">        parser: top-level `ArgumentParser` object containing common arguments</span>
+</span><span id="__span-0-60"><a id="__codelineno-0-60" name="__codelineno-0-60"></a><span class="sd">        all_subparsers: dictionary of `ArgumentParser` objects, one for each module</span>
+</span><span id="__span-0-61"><a id="__codelineno-0-61" name="__codelineno-0-61"></a>
+</span><span id="__span-0-62"><a id="__codelineno-0-62" name="__codelineno-0-62"></a><span class="sd">    Returns:</span>
+</span><span id="__span-0-63"><a id="__codelineno-0-63" name="__codelineno-0-63"></a><span class="sd">        A Namespace object containing the assembled configuration settings</span>
+</span><span id="__span-0-64"><a id="__codelineno-0-64" name="__codelineno-0-64"></a>
+</span><span id="__span-0-65"><a id="__codelineno-0-65" name="__codelineno-0-65"></a><span class="sd">    Raises:</span>
+</span><span id="__span-0-66"><a id="__codelineno-0-66" name="__codelineno-0-66"></a><span class="sd">        AssertionError: if any required arguments are missing from the configuration file / overrides</span>
+</span><span id="__span-0-67"><a id="__codelineno-0-67" name="__codelineno-0-67"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-68"><a id="__codelineno-0-68" name="__codelineno-0-68"></a>    <span class="c1"># Open the passed yaml file and load into a dictionary</span>
+</span><span id="__span-0-69"><a id="__codelineno-0-69" name="__codelineno-0-69"></a>    <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;config/</span><span class="si">{</span><span class="n">args</span><span class="o">.</span><span class="n">input_config</span><span class="si">}</span><span class="s2">.yaml&quot;</span><span class="p">)</span> <span class="k">as</span> <span class="n">stream</span><span class="p">:</span>
+</span><span id="__span-0-70"><a id="__codelineno-0-70" name="__codelineno-0-70"></a>        <span class="n">config_dict</span> <span class="o">=</span> <span class="n">yaml</span><span class="o">.</span><span class="n">safe_load</span><span class="p">(</span><span class="n">stream</span><span class="p">)</span>
+</span><span id="__span-0-71"><a id="__codelineno-0-71" name="__codelineno-0-71"></a>
+</span><span id="__span-0-72"><a id="__codelineno-0-72" name="__codelineno-0-72"></a>    <span class="n">valid_run_types</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">all_subparsers</span><span class="o">.</span><span class="n">keys</span><span class="p">()</span> <span class="k">if</span> <span class="n">x</span> <span class="o">!=</span> <span class="s2">&quot;config&quot;</span><span class="p">]</span>
+</span><span id="__span-0-73"><a id="__codelineno-0-73" name="__codelineno-0-73"></a>
+</span><span id="__span-0-74"><a id="__codelineno-0-74" name="__codelineno-0-74"></a>    <span class="n">version</span> <span class="o">=</span> <span class="n">config_dict</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s2">&quot;version&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
+</span><span id="__span-0-75"><a id="__codelineno-0-75" name="__codelineno-0-75"></a>    <span class="k">if</span> <span class="n">version</span> <span class="ow">and</span> <span class="n">version</span> <span class="o">!=</span> <span class="n">version</span><span class="p">(</span><span class="s2">&quot;nhssynth&quot;</span><span class="p">):</span>
+</span><span id="__span-0-76"><a id="__codelineno-0-76" name="__codelineno-0-76"></a>        <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
+</span><span id="__span-0-77"><a id="__codelineno-0-77" name="__codelineno-0-77"></a>            <span class="sa">f</span><span class="s2">&quot;This config file&#39;s specified version (</span><span class="si">{</span><span class="n">version</span><span class="si">}</span><span class="s2">) does not match the currently installed version of nhssynth (</span><span class="si">{</span><span class="n">version</span><span class="p">(</span><span class="s1">&#39;nhssynth&#39;</span><span class="p">)</span><span class="si">}</span><span class="s2">), results may differ.&quot;</span>
+</span><span id="__span-0-78"><a id="__codelineno-0-78" name="__codelineno-0-78"></a>        <span class="p">)</span>
+</span><span id="__span-0-79"><a id="__codelineno-0-79" name="__codelineno-0-79"></a>    <span class="k">elif</span> <span class="ow">not</span> <span class="n">version</span><span class="p">:</span>
+</span><span id="__span-0-80"><a id="__codelineno-0-80" name="__codelineno-0-80"></a>        <span class="n">version</span> <span class="o">=</span> <span class="n">ver</span><span class="p">(</span><span class="s2">&quot;nhssynth&quot;</span><span class="p">)</span>
+</span><span id="__span-0-81"><a id="__codelineno-0-81" name="__codelineno-0-81"></a>
+</span><span id="__span-0-82"><a id="__codelineno-0-82" name="__codelineno-0-82"></a>    <span class="n">run_type</span> <span class="o">=</span> <span class="n">config_dict</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s2">&quot;run_type&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
+</span><span id="__span-0-83"><a id="__codelineno-0-83" name="__codelineno-0-83"></a>
+</span><span id="__span-0-84"><a id="__codelineno-0-84" name="__codelineno-0-84"></a>    <span class="k">if</span> <span class="n">run_type</span> <span class="o">==</span> <span class="s2">&quot;pipeline&quot;</span><span class="p">:</span>
+</span><span id="__span-0-85"><a id="__codelineno-0-85" name="__codelineno-0-85"></a>        <span class="n">modules_to_run</span> <span class="o">=</span> <span class="n">PIPELINE</span>
+</span><span id="__span-0-86"><a id="__codelineno-0-86" name="__codelineno-0-86"></a>    <span class="k">else</span><span class="p">:</span>
+</span><span id="__span-0-87"><a id="__codelineno-0-87" name="__codelineno-0-87"></a>        <span class="n">modules_to_run</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">config_dict</span><span class="o">.</span><span class="n">keys</span><span class="p">()</span> <span class="o">|</span> <span class="p">{</span><span class="n">run_type</span><span class="p">}</span> <span class="k">if</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">valid_run_types</span><span class="p">]</span>
+</span><span id="__span-0-88"><a id="__codelineno-0-88" name="__codelineno-0-88"></a>        <span class="k">if</span> <span class="ow">not</span> <span class="n">args</span><span class="o">.</span><span class="n">custom_pipeline</span><span class="p">:</span>
+</span><span id="__span-0-89"><a id="__codelineno-0-89" name="__codelineno-0-89"></a>            <span class="n">modules_to_run</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">modules_to_run</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">PIPELINE</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
+</span><span id="__span-0-90"><a id="__codelineno-0-90" name="__codelineno-0-90"></a>
+</span><span id="__span-0-91"><a id="__codelineno-0-91" name="__codelineno-0-91"></a>    <span class="k">if</span> <span class="ow">not</span> <span class="n">modules_to_run</span><span class="p">:</span>
+</span><span id="__span-0-92"><a id="__codelineno-0-92" name="__codelineno-0-92"></a>        <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
+</span><span id="__span-0-93"><a id="__codelineno-0-93" name="__codelineno-0-93"></a>            <span class="s2">&quot;Missing or invalid `run_type` and / or module specification hierarchy in `config/</span><span class="si">{args.input_config}</span><span class="s2">.yaml`, defaulting to a full run of the pipeline&quot;</span>
+</span><span id="__span-0-94"><a id="__codelineno-0-94" name="__codelineno-0-94"></a>        <span class="p">)</span>
+</span><span id="__span-0-95"><a id="__codelineno-0-95" name="__codelineno-0-95"></a>        <span class="n">modules_to_run</span> <span class="o">=</span> <span class="n">PIPELINE</span>
+</span><span id="__span-0-96"><a id="__codelineno-0-96" name="__codelineno-0-96"></a>
+</span><span id="__span-0-97"><a id="__codelineno-0-97" name="__codelineno-0-97"></a>    <span class="c1"># Get all possible default arguments by scraping the top level `parser` and the appropriate sub-parser for the `run_type`</span>
+</span><span id="__span-0-98"><a id="__codelineno-0-98" name="__codelineno-0-98"></a>    <span class="n">args_dict</span><span class="p">,</span> <span class="n">required_args</span> <span class="o">=</span> <span class="n">get_default_and_required_args</span><span class="p">(</span>
+</span><span id="__span-0-99"><a id="__codelineno-0-99" name="__codelineno-0-99"></a>        <span class="n">parser</span><span class="p">,</span> <span class="n">filter_dict</span><span class="p">(</span><span class="n">all_subparsers</span><span class="p">,</span> <span class="n">modules_to_run</span><span class="p">,</span> <span class="n">include</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</span><span id="__span-0-100"><a id="__codelineno-0-100" name="__codelineno-0-100"></a>    <span class="p">)</span>
+</span><span id="__span-0-101"><a id="__codelineno-0-101" name="__codelineno-0-101"></a>
+</span><span id="__span-0-102"><a id="__codelineno-0-102" name="__codelineno-0-102"></a>    <span class="c1"># Find the non-default arguments amongst passed `args` by seeing which of them are different to the entries of `args_dict`</span>
+</span><span id="__span-0-103"><a id="__codelineno-0-103" name="__codelineno-0-103"></a>    <span class="n">non_default_passed_args_dict</span> <span class="o">=</span> <span class="p">{</span>
+</span><span id="__span-0-104"><a id="__codelineno-0-104" name="__codelineno-0-104"></a>        <span class="n">k</span><span class="p">:</span> <span class="n">v</span>
+</span><span id="__span-0-105"><a id="__codelineno-0-105" name="__codelineno-0-105"></a>        <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="nb">vars</span><span class="p">(</span><span class="n">args</span><span class="p">)</span><span class="o">.</span><span class="n">items</span><span class="p">()</span>
+</span><span id="__span-0-106"><a id="__codelineno-0-106" name="__codelineno-0-106"></a>        <span class="k">if</span> <span class="n">k</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;input_config&quot;</span><span class="p">,</span> <span class="s2">&quot;custom_pipeline&quot;</span><span class="p">]</span> <span class="ow">or</span> <span class="p">(</span><span class="n">k</span> <span class="ow">in</span> <span class="n">args_dict</span> <span class="ow">and</span> <span class="n">k</span> <span class="o">!=</span> <span class="s2">&quot;func&quot;</span> <span class="ow">and</span> <span class="n">v</span> <span class="o">!=</span> <span class="n">args_dict</span><span class="p">[</span><span class="n">k</span><span class="p">])</span>
+</span><span id="__span-0-107"><a id="__codelineno-0-107" name="__codelineno-0-107"></a>    <span class="p">}</span>
+</span><span id="__span-0-108"><a id="__codelineno-0-108" name="__codelineno-0-108"></a>
+</span><span id="__span-0-109"><a id="__codelineno-0-109" name="__codelineno-0-109"></a>    <span class="c1"># Overwrite the default arguments with the ones from the yaml file</span>
+</span><span id="__span-0-110"><a id="__codelineno-0-110" name="__codelineno-0-110"></a>    <span class="n">args_dict</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">flatten_dict</span><span class="p">(</span><span class="n">config_dict</span><span class="p">))</span>
+</span><span id="__span-0-111"><a id="__codelineno-0-111" name="__codelineno-0-111"></a>
+</span><span id="__span-0-112"><a id="__codelineno-0-112" name="__codelineno-0-112"></a>    <span class="c1"># Overwrite the result of the above with any non-default CLI args</span>
+</span><span id="__span-0-113"><a id="__codelineno-0-113" name="__codelineno-0-113"></a>    <span class="n">args_dict</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">non_default_passed_args_dict</span><span class="p">)</span>
+</span><span id="__span-0-114"><a id="__codelineno-0-114" name="__codelineno-0-114"></a>
+</span><span id="__span-0-115"><a id="__codelineno-0-115" name="__codelineno-0-115"></a>    <span class="c1"># Create a new Namespace using the assembled dictionary</span>
+</span><span id="__span-0-116"><a id="__codelineno-0-116" name="__codelineno-0-116"></a>    <span class="n">new_args</span> <span class="o">=</span> <span class="n">argparse</span><span class="o">.</span><span class="n">Namespace</span><span class="p">(</span><span class="o">**</span><span class="n">args_dict</span><span class="p">)</span>
+</span><span id="__span-0-117"><a id="__codelineno-0-117" name="__codelineno-0-117"></a>    <span class="k">assert</span> <span class="nb">getattr</span><span class="p">(</span>
+</span><span id="__span-0-118"><a id="__codelineno-0-118" name="__codelineno-0-118"></a>        <span class="n">new_args</span><span class="p">,</span> <span class="s2">&quot;dataset&quot;</span>
+</span><span id="__span-0-119"><a id="__codelineno-0-119" name="__codelineno-0-119"></a>    <span class="p">),</span> <span class="s2">&quot;No dataset specified in the passed config file, provide one with the `--dataset` argument or add it to the config file&quot;</span>
+</span><span id="__span-0-120"><a id="__codelineno-0-120" name="__codelineno-0-120"></a>    <span class="k">assert</span> <span class="nb">all</span><span class="p">(</span>
+</span><span id="__span-0-121"><a id="__codelineno-0-121" name="__codelineno-0-121"></a>        <span class="nb">getattr</span><span class="p">(</span><span class="n">new_args</span><span class="p">,</span> <span class="n">req_arg</span><span class="p">[</span><span class="s2">&quot;arg&quot;</span><span class="p">])</span> <span class="k">for</span> <span class="n">req_arg</span> <span class="ow">in</span> <span class="n">required_args</span>
+</span><span id="__span-0-122"><a id="__codelineno-0-122" name="__codelineno-0-122"></a>    <span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Required arguments are missing from the passed config file: </span><span class="si">{</span><span class="p">[</span><span class="n">ra</span><span class="p">[</span><span class="s1">&#39;module&#39;</span><span class="p">]</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="s1">&#39;:&#39;</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">ra</span><span class="p">[</span><span class="s1">&#39;arg&#39;</span><span class="p">]</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="n">ra</span><span class="w"> </span><span class="ow">in</span><span class="w"> </span><span class="n">required_args</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="ow">not</span><span class="w"> </span><span class="nb">getattr</span><span class="p">(</span><span class="n">new_args</span><span class="p">,</span><span class="w"> </span><span class="n">ra</span><span class="p">[</span><span class="s1">&#39;arg&#39;</span><span class="p">])]</span><span class="si">}</span><span class="s2">&quot;</span>
+</span><span id="__span-0-123"><a id="__codelineno-0-123" name="__codelineno-0-123"></a>
+</span><span id="__span-0-124"><a id="__codelineno-0-124" name="__codelineno-0-124"></a>    <span class="c1"># Run the appropriate execution function(s)</span>
+</span><span id="__span-0-125"><a id="__codelineno-0-125" name="__codelineno-0-125"></a>    <span class="k">if</span> <span class="ow">not</span> <span class="n">new_args</span><span class="o">.</span><span class="n">seed</span><span class="p">:</span>
+</span><span id="__span-0-126"><a id="__codelineno-0-126" name="__codelineno-0-126"></a>        <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span><span class="s2">&quot;No seed has been specified, meaning the results of this run may not be reproducible.&quot;</span><span class="p">)</span>
+</span><span id="__span-0-127"><a id="__codelineno-0-127" name="__codelineno-0-127"></a>    <span class="n">new_args</span><span class="o">.</span><span class="n">version</span> <span class="o">=</span> <span class="n">version</span>
+</span><span id="__span-0-128"><a id="__codelineno-0-128" name="__codelineno-0-128"></a>    <span class="n">new_args</span><span class="o">.</span><span class="n">modules_to_run</span> <span class="o">=</span> <span class="n">modules_to_run</span>
+</span><span id="__span-0-129"><a id="__codelineno-0-129" name="__codelineno-0-129"></a>    <span class="n">new_args</span><span class="o">.</span><span class="n">module_handover</span> <span class="o">=</span> <span class="p">{}</span>
+</span><span id="__span-0-130"><a id="__codelineno-0-130" name="__codelineno-0-130"></a>    <span class="k">for</span> <span class="n">module</span> <span class="ow">in</span> <span class="n">new_args</span><span class="o">.</span><span class="n">modules_to_run</span><span class="p">:</span>
+</span><span id="__span-0-131"><a id="__codelineno-0-131" name="__codelineno-0-131"></a>        <span class="n">MODULE_MAP</span><span class="p">[</span><span class="n">module</span><span class="p">](</span><span class="n">new_args</span><span class="p">)</span>
+</span><span id="__span-0-132"><a id="__codelineno-0-132" name="__codelineno-0-132"></a>
+</span><span id="__span-0-133"><a id="__codelineno-0-133" name="__codelineno-0-133"></a>    <span class="k">return</span> <span class="n">new_args</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -3373,8 +3373,7 @@ <h2 id="nhssynth.cli.config.write_config" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/cli/config.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-215">215</a></span>
-<span class="normal"><a href="#__codelineno-0-216">216</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-216">216</a></span>
 <span class="normal"><a href="#__codelineno-0-217">217</a></span>
 <span class="normal"><a href="#__codelineno-0-218">218</a></span>
 <span class="normal"><a href="#__codelineno-0-219">219</a></span>
@@ -3387,21 +3386,22 @@ <h2 id="nhssynth.cli.config.write_config" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-226">226</a></span>
 <span class="normal"><a href="#__codelineno-0-227">227</a></span>
 <span class="normal"><a href="#__codelineno-0-228">228</a></span>
-<span class="normal"><a href="#__codelineno-0-229">229</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-215"><a id="__codelineno-0-215" name="__codelineno-0-215"></a><span class="k">def</span> <span class="nf">write_config</span><span class="p">(</span>
-</span><span id="__span-0-216"><a id="__codelineno-0-216" name="__codelineno-0-216"></a>    <span class="n">args</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">Namespace</span><span class="p">,</span>
-</span><span id="__span-0-217"><a id="__codelineno-0-217" name="__codelineno-0-217"></a>    <span class="n">all_subparsers</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">],</span>
-</span><span id="__span-0-218"><a id="__codelineno-0-218" name="__codelineno-0-218"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="__span-0-219"><a id="__codelineno-0-219" name="__codelineno-0-219"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-220"><a id="__codelineno-0-220" name="__codelineno-0-220"></a><span class="sd">    Assembles a configuration dictionary from the run config and writes it to a YAML file at the location specified by `args.save_config_path`.</span>
-</span><span id="__span-0-221"><a id="__codelineno-0-221" name="__codelineno-0-221"></a>
-</span><span id="__span-0-222"><a id="__codelineno-0-222" name="__codelineno-0-222"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-223"><a id="__codelineno-0-223" name="__codelineno-0-223"></a><span class="sd">        args: A namespace containing the run&#39;s configuration.</span>
-</span><span id="__span-0-224"><a id="__codelineno-0-224" name="__codelineno-0-224"></a><span class="sd">        all_subparsers: A dictionary containing all subparsers for the config args.</span>
-</span><span id="__span-0-225"><a id="__codelineno-0-225" name="__codelineno-0-225"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-226"><a id="__codelineno-0-226" name="__codelineno-0-226"></a>    <span class="n">experiment_name</span> <span class="o">=</span> <span class="n">args</span><span class="o">.</span><span class="n">experiment_name</span>
-</span><span id="__span-0-227"><a id="__codelineno-0-227" name="__codelineno-0-227"></a>    <span class="n">args_dict</span> <span class="o">=</span> <span class="n">assemble_config</span><span class="p">(</span><span class="n">args</span><span class="p">,</span> <span class="n">all_subparsers</span><span class="p">)</span>
-</span><span id="__span-0-228"><a id="__codelineno-0-228" name="__codelineno-0-228"></a>    <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;experiments/</span><span class="si">{</span><span class="n">experiment_name</span><span class="si">}</span><span class="s2">/config_</span><span class="si">{</span><span class="n">experiment_name</span><span class="si">}</span><span class="s2">.yaml&quot;</span><span class="p">,</span> <span class="s2">&quot;w&quot;</span><span class="p">)</span> <span class="k">as</span> <span class="n">yaml_file</span><span class="p">:</span>
-</span><span id="__span-0-229"><a id="__codelineno-0-229" name="__codelineno-0-229"></a>        <span class="n">yaml</span><span class="o">.</span><span class="n">dump</span><span class="p">(</span><span class="n">args_dict</span><span class="p">,</span> <span class="n">yaml_file</span><span class="p">,</span> <span class="n">default_flow_style</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">sort_keys</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
+<span class="normal"><a href="#__codelineno-0-229">229</a></span>
+<span class="normal"><a href="#__codelineno-0-230">230</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-216"><a id="__codelineno-0-216" name="__codelineno-0-216"></a><span class="k">def</span> <span class="nf">write_config</span><span class="p">(</span>
+</span><span id="__span-0-217"><a id="__codelineno-0-217" name="__codelineno-0-217"></a>    <span class="n">args</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">Namespace</span><span class="p">,</span>
+</span><span id="__span-0-218"><a id="__codelineno-0-218" name="__codelineno-0-218"></a>    <span class="n">all_subparsers</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">],</span>
+</span><span id="__span-0-219"><a id="__codelineno-0-219" name="__codelineno-0-219"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="__span-0-220"><a id="__codelineno-0-220" name="__codelineno-0-220"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-221"><a id="__codelineno-0-221" name="__codelineno-0-221"></a><span class="sd">    Assembles a configuration dictionary from the run config and writes it to a YAML file at the location specified by `args.save_config_path`.</span>
+</span><span id="__span-0-222"><a id="__codelineno-0-222" name="__codelineno-0-222"></a>
+</span><span id="__span-0-223"><a id="__codelineno-0-223" name="__codelineno-0-223"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-224"><a id="__codelineno-0-224" name="__codelineno-0-224"></a><span class="sd">        args: A namespace containing the run&#39;s configuration.</span>
+</span><span id="__span-0-225"><a id="__codelineno-0-225" name="__codelineno-0-225"></a><span class="sd">        all_subparsers: A dictionary containing all subparsers for the config args.</span>
+</span><span id="__span-0-226"><a id="__codelineno-0-226" name="__codelineno-0-226"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-227"><a id="__codelineno-0-227" name="__codelineno-0-227"></a>    <span class="n">experiment_name</span> <span class="o">=</span> <span class="n">args</span><span class="o">.</span><span class="n">experiment_name</span>
+</span><span id="__span-0-228"><a id="__codelineno-0-228" name="__codelineno-0-228"></a>    <span class="n">args_dict</span> <span class="o">=</span> <span class="n">assemble_config</span><span class="p">(</span><span class="n">args</span><span class="p">,</span> <span class="n">all_subparsers</span><span class="p">)</span>
+</span><span id="__span-0-229"><a id="__codelineno-0-229" name="__codelineno-0-229"></a>    <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;experiments/</span><span class="si">{</span><span class="n">experiment_name</span><span class="si">}</span><span class="s2">/config_</span><span class="si">{</span><span class="n">experiment_name</span><span class="si">}</span><span class="s2">.yaml&quot;</span><span class="p">,</span> <span class="s2">&quot;w&quot;</span><span class="p">)</span> <span class="k">as</span> <span class="n">yaml_file</span><span class="p">:</span>
+</span><span id="__span-0-230"><a id="__codelineno-0-230" name="__codelineno-0-230"></a>        <span class="n">yaml</span><span class="o">.</span><span class="n">dump</span><span class="p">(</span><span class="n">args_dict</span><span class="p">,</span> <span class="n">yaml_file</span><span class="p">,</span> <span class="n">default_flow_style</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">sort_keys</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
diff --git a/reference/cli/model_arguments/index.html b/reference/cli/model_arguments/index.html
index 353b9c85..5a36a58e 100644
--- a/reference/cli/model_arguments/index.html
+++ b/reference/cli/model_arguments/index.html
@@ -2505,8 +2505,7 @@ <h2 id="nhssynth.cli.model_arguments.add_gan_args" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/cli/model_arguments.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-76"> 76</a></span>
-<span class="normal"><a href="#__codelineno-0-77"> 77</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-77"> 77</a></span>
 <span class="normal"><a href="#__codelineno-0-78"> 78</a></span>
 <span class="normal"><a href="#__codelineno-0-79"> 79</a></span>
 <span class="normal"><a href="#__codelineno-0-80"> 80</a></span>
@@ -2600,102 +2599,103 @@ <h2 id="nhssynth.cli.model_arguments.add_gan_args" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-168">168</a></span>
 <span class="normal"><a href="#__codelineno-0-169">169</a></span>
 <span class="normal"><a href="#__codelineno-0-170">170</a></span>
-<span class="normal"><a href="#__codelineno-0-171">171</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-76"><a id="__codelineno-0-76" name="__codelineno-0-76"></a><span class="k">def</span> <span class="nf">add_gan_args</span><span class="p">(</span><span class="n">group</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">_ArgumentGroup</span><span class="p">,</span> <span class="n">overrides</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="__span-0-77"><a id="__codelineno-0-77" name="__codelineno-0-77"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Adds arguments to an existing group for the GAN model.&quot;&quot;&quot;</span>
-</span><span id="__span-0-78"><a id="__codelineno-0-78" name="__codelineno-0-78"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-79"><a id="__codelineno-0-79" name="__codelineno-0-79"></a>        <span class="s2">&quot;--n-units-conditional&quot;</span><span class="p">,</span>
-</span><span id="__span-0-80"><a id="__codelineno-0-80" name="__codelineno-0-80"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
-</span><span id="__span-0-81"><a id="__codelineno-0-81" name="__codelineno-0-81"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the number of units in the conditional layer&quot;</span><span class="p">,</span>
-</span><span id="__span-0-82"><a id="__codelineno-0-82" name="__codelineno-0-82"></a>    <span class="p">)</span>
-</span><span id="__span-0-83"><a id="__codelineno-0-83" name="__codelineno-0-83"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-84"><a id="__codelineno-0-84" name="__codelineno-0-84"></a>        <span class="s2">&quot;--generator-n-layers-hidden&quot;</span><span class="p">,</span>
-</span><span id="__span-0-85"><a id="__codelineno-0-85" name="__codelineno-0-85"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
-</span><span id="__span-0-86"><a id="__codelineno-0-86" name="__codelineno-0-86"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the number of hidden layers in the generator&quot;</span><span class="p">,</span>
-</span><span id="__span-0-87"><a id="__codelineno-0-87" name="__codelineno-0-87"></a>    <span class="p">)</span>
-</span><span id="__span-0-88"><a id="__codelineno-0-88" name="__codelineno-0-88"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-89"><a id="__codelineno-0-89" name="__codelineno-0-89"></a>        <span class="s2">&quot;--generator-n-units-hidden&quot;</span><span class="p">,</span>
-</span><span id="__span-0-90"><a id="__codelineno-0-90" name="__codelineno-0-90"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
-</span><span id="__span-0-91"><a id="__codelineno-0-91" name="__codelineno-0-91"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the number of units in each hidden layer of the generator&quot;</span><span class="p">,</span>
-</span><span id="__span-0-92"><a id="__codelineno-0-92" name="__codelineno-0-92"></a>    <span class="p">)</span>
-</span><span id="__span-0-93"><a id="__codelineno-0-93" name="__codelineno-0-93"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-94"><a id="__codelineno-0-94" name="__codelineno-0-94"></a>        <span class="s2">&quot;--generator-activation&quot;</span><span class="p">,</span>
-</span><span id="__span-0-95"><a id="__codelineno-0-95" name="__codelineno-0-95"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
-</span><span id="__span-0-96"><a id="__codelineno-0-96" name="__codelineno-0-96"></a>        <span class="n">choices</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="n">ACTIVATION_FUNCTIONS</span><span class="o">.</span><span class="n">keys</span><span class="p">()),</span>
-</span><span id="__span-0-97"><a id="__codelineno-0-97" name="__codelineno-0-97"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the activation function of the generator&quot;</span><span class="p">,</span>
-</span><span id="__span-0-98"><a id="__codelineno-0-98" name="__codelineno-0-98"></a>    <span class="p">)</span>
-</span><span id="__span-0-99"><a id="__codelineno-0-99" name="__codelineno-0-99"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-100"><a id="__codelineno-0-100" name="__codelineno-0-100"></a>        <span class="s2">&quot;--generator-batch-norm&quot;</span><span class="p">,</span>
-</span><span id="__span-0-101"><a id="__codelineno-0-101" name="__codelineno-0-101"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
-</span><span id="__span-0-102"><a id="__codelineno-0-102" name="__codelineno-0-102"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;whether to use batch norm in the generator&quot;</span><span class="p">,</span>
-</span><span id="__span-0-103"><a id="__codelineno-0-103" name="__codelineno-0-103"></a>    <span class="p">)</span>
-</span><span id="__span-0-104"><a id="__codelineno-0-104" name="__codelineno-0-104"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-105"><a id="__codelineno-0-105" name="__codelineno-0-105"></a>        <span class="s2">&quot;--generator-dropout&quot;</span><span class="p">,</span>
-</span><span id="__span-0-106"><a id="__codelineno-0-106" name="__codelineno-0-106"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
-</span><span id="__span-0-107"><a id="__codelineno-0-107" name="__codelineno-0-107"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the dropout rate in the generator&quot;</span><span class="p">,</span>
-</span><span id="__span-0-108"><a id="__codelineno-0-108" name="__codelineno-0-108"></a>    <span class="p">)</span>
-</span><span id="__span-0-109"><a id="__codelineno-0-109" name="__codelineno-0-109"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-110"><a id="__codelineno-0-110" name="__codelineno-0-110"></a>        <span class="s2">&quot;--generator-lr&quot;</span><span class="p">,</span>
-</span><span id="__span-0-111"><a id="__codelineno-0-111" name="__codelineno-0-111"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
-</span><span id="__span-0-112"><a id="__codelineno-0-112" name="__codelineno-0-112"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the learning rate for the generator&quot;</span><span class="p">,</span>
-</span><span id="__span-0-113"><a id="__codelineno-0-113" name="__codelineno-0-113"></a>    <span class="p">)</span>
-</span><span id="__span-0-114"><a id="__codelineno-0-114" name="__codelineno-0-114"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-115"><a id="__codelineno-0-115" name="__codelineno-0-115"></a>        <span class="s2">&quot;--generator-residual&quot;</span><span class="p">,</span>
-</span><span id="__span-0-116"><a id="__codelineno-0-116" name="__codelineno-0-116"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
-</span><span id="__span-0-117"><a id="__codelineno-0-117" name="__codelineno-0-117"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;whether to use residual connections in the generator&quot;</span><span class="p">,</span>
-</span><span id="__span-0-118"><a id="__codelineno-0-118" name="__codelineno-0-118"></a>    <span class="p">)</span>
-</span><span id="__span-0-119"><a id="__codelineno-0-119" name="__codelineno-0-119"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-120"><a id="__codelineno-0-120" name="__codelineno-0-120"></a>        <span class="s2">&quot;--generator-opt-betas&quot;</span><span class="p">,</span>
-</span><span id="__span-0-121"><a id="__codelineno-0-121" name="__codelineno-0-121"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
-</span><span id="__span-0-122"><a id="__codelineno-0-122" name="__codelineno-0-122"></a>        <span class="n">nargs</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span>
-</span><span id="__span-0-123"><a id="__codelineno-0-123" name="__codelineno-0-123"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the beta values for the generator optimizer&quot;</span><span class="p">,</span>
-</span><span id="__span-0-124"><a id="__codelineno-0-124" name="__codelineno-0-124"></a>    <span class="p">)</span>
-</span><span id="__span-0-125"><a id="__codelineno-0-125" name="__codelineno-0-125"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-126"><a id="__codelineno-0-126" name="__codelineno-0-126"></a>        <span class="s2">&quot;--discriminator-n-layers-hidden&quot;</span><span class="p">,</span>
-</span><span id="__span-0-127"><a id="__codelineno-0-127" name="__codelineno-0-127"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
-</span><span id="__span-0-128"><a id="__codelineno-0-128" name="__codelineno-0-128"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the number of hidden layers in the discriminator&quot;</span><span class="p">,</span>
-</span><span id="__span-0-129"><a id="__codelineno-0-129" name="__codelineno-0-129"></a>    <span class="p">)</span>
-</span><span id="__span-0-130"><a id="__codelineno-0-130" name="__codelineno-0-130"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-131"><a id="__codelineno-0-131" name="__codelineno-0-131"></a>        <span class="s2">&quot;--discriminator-n-units-hidden&quot;</span><span class="p">,</span>
-</span><span id="__span-0-132"><a id="__codelineno-0-132" name="__codelineno-0-132"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
-</span><span id="__span-0-133"><a id="__codelineno-0-133" name="__codelineno-0-133"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the number of units in each hidden layer of the discriminator&quot;</span><span class="p">,</span>
-</span><span id="__span-0-134"><a id="__codelineno-0-134" name="__codelineno-0-134"></a>    <span class="p">)</span>
-</span><span id="__span-0-135"><a id="__codelineno-0-135" name="__codelineno-0-135"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-136"><a id="__codelineno-0-136" name="__codelineno-0-136"></a>        <span class="s2">&quot;--discriminator-activation&quot;</span><span class="p">,</span>
-</span><span id="__span-0-137"><a id="__codelineno-0-137" name="__codelineno-0-137"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
-</span><span id="__span-0-138"><a id="__codelineno-0-138" name="__codelineno-0-138"></a>        <span class="n">choices</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="n">ACTIVATION_FUNCTIONS</span><span class="o">.</span><span class="n">keys</span><span class="p">()),</span>
-</span><span id="__span-0-139"><a id="__codelineno-0-139" name="__codelineno-0-139"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the activation function of the discriminator&quot;</span><span class="p">,</span>
-</span><span id="__span-0-140"><a id="__codelineno-0-140" name="__codelineno-0-140"></a>    <span class="p">)</span>
-</span><span id="__span-0-141"><a id="__codelineno-0-141" name="__codelineno-0-141"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-142"><a id="__codelineno-0-142" name="__codelineno-0-142"></a>        <span class="s2">&quot;--discriminator-batch-norm&quot;</span><span class="p">,</span>
-</span><span id="__span-0-143"><a id="__codelineno-0-143" name="__codelineno-0-143"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
-</span><span id="__span-0-144"><a id="__codelineno-0-144" name="__codelineno-0-144"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;whether to use batch norm in the discriminator&quot;</span><span class="p">,</span>
-</span><span id="__span-0-145"><a id="__codelineno-0-145" name="__codelineno-0-145"></a>    <span class="p">)</span>
-</span><span id="__span-0-146"><a id="__codelineno-0-146" name="__codelineno-0-146"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-147"><a id="__codelineno-0-147" name="__codelineno-0-147"></a>        <span class="s2">&quot;--discriminator-dropout&quot;</span><span class="p">,</span>
-</span><span id="__span-0-148"><a id="__codelineno-0-148" name="__codelineno-0-148"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
-</span><span id="__span-0-149"><a id="__codelineno-0-149" name="__codelineno-0-149"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the dropout rate in the discriminator&quot;</span><span class="p">,</span>
-</span><span id="__span-0-150"><a id="__codelineno-0-150" name="__codelineno-0-150"></a>    <span class="p">)</span>
-</span><span id="__span-0-151"><a id="__codelineno-0-151" name="__codelineno-0-151"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-152"><a id="__codelineno-0-152" name="__codelineno-0-152"></a>        <span class="s2">&quot;--discriminator-lr&quot;</span><span class="p">,</span>
-</span><span id="__span-0-153"><a id="__codelineno-0-153" name="__codelineno-0-153"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
-</span><span id="__span-0-154"><a id="__codelineno-0-154" name="__codelineno-0-154"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the learning rate for the discriminator&quot;</span><span class="p">,</span>
-</span><span id="__span-0-155"><a id="__codelineno-0-155" name="__codelineno-0-155"></a>    <span class="p">)</span>
-</span><span id="__span-0-156"><a id="__codelineno-0-156" name="__codelineno-0-156"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-157"><a id="__codelineno-0-157" name="__codelineno-0-157"></a>        <span class="s2">&quot;--discriminator-opt-betas&quot;</span><span class="p">,</span>
-</span><span id="__span-0-158"><a id="__codelineno-0-158" name="__codelineno-0-158"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
-</span><span id="__span-0-159"><a id="__codelineno-0-159" name="__codelineno-0-159"></a>        <span class="n">nargs</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span>
-</span><span id="__span-0-160"><a id="__codelineno-0-160" name="__codelineno-0-160"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the beta values for the discriminator optimizer&quot;</span><span class="p">,</span>
-</span><span id="__span-0-161"><a id="__codelineno-0-161" name="__codelineno-0-161"></a>    <span class="p">)</span>
-</span><span id="__span-0-162"><a id="__codelineno-0-162" name="__codelineno-0-162"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-163"><a id="__codelineno-0-163" name="__codelineno-0-163"></a>        <span class="s2">&quot;--clipping-value&quot;</span><span class="p">,</span>
-</span><span id="__span-0-164"><a id="__codelineno-0-164" name="__codelineno-0-164"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
-</span><span id="__span-0-165"><a id="__codelineno-0-165" name="__codelineno-0-165"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the clipping value for the discriminator&quot;</span><span class="p">,</span>
-</span><span id="__span-0-166"><a id="__codelineno-0-166" name="__codelineno-0-166"></a>    <span class="p">)</span>
-</span><span id="__span-0-167"><a id="__codelineno-0-167" name="__codelineno-0-167"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-168"><a id="__codelineno-0-168" name="__codelineno-0-168"></a>        <span class="s2">&quot;--lambda-gradient-penalty&quot;</span><span class="p">,</span>
-</span><span id="__span-0-169"><a id="__codelineno-0-169" name="__codelineno-0-169"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
-</span><span id="__span-0-170"><a id="__codelineno-0-170" name="__codelineno-0-170"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the gradient penalty coefficient&quot;</span><span class="p">,</span>
-</span><span id="__span-0-171"><a id="__codelineno-0-171" name="__codelineno-0-171"></a>    <span class="p">)</span>
+<span class="normal"><a href="#__codelineno-0-171">171</a></span>
+<span class="normal"><a href="#__codelineno-0-172">172</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-77"><a id="__codelineno-0-77" name="__codelineno-0-77"></a><span class="k">def</span> <span class="nf">add_gan_args</span><span class="p">(</span><span class="n">group</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">_ArgumentGroup</span><span class="p">,</span> <span class="n">overrides</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="__span-0-78"><a id="__codelineno-0-78" name="__codelineno-0-78"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Adds arguments to an existing group for the GAN model.&quot;&quot;&quot;</span>
+</span><span id="__span-0-79"><a id="__codelineno-0-79" name="__codelineno-0-79"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-80"><a id="__codelineno-0-80" name="__codelineno-0-80"></a>        <span class="s2">&quot;--n-units-conditional&quot;</span><span class="p">,</span>
+</span><span id="__span-0-81"><a id="__codelineno-0-81" name="__codelineno-0-81"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
+</span><span id="__span-0-82"><a id="__codelineno-0-82" name="__codelineno-0-82"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the number of units in the conditional layer&quot;</span><span class="p">,</span>
+</span><span id="__span-0-83"><a id="__codelineno-0-83" name="__codelineno-0-83"></a>    <span class="p">)</span>
+</span><span id="__span-0-84"><a id="__codelineno-0-84" name="__codelineno-0-84"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-85"><a id="__codelineno-0-85" name="__codelineno-0-85"></a>        <span class="s2">&quot;--generator-n-layers-hidden&quot;</span><span class="p">,</span>
+</span><span id="__span-0-86"><a id="__codelineno-0-86" name="__codelineno-0-86"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
+</span><span id="__span-0-87"><a id="__codelineno-0-87" name="__codelineno-0-87"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the number of hidden layers in the generator&quot;</span><span class="p">,</span>
+</span><span id="__span-0-88"><a id="__codelineno-0-88" name="__codelineno-0-88"></a>    <span class="p">)</span>
+</span><span id="__span-0-89"><a id="__codelineno-0-89" name="__codelineno-0-89"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-90"><a id="__codelineno-0-90" name="__codelineno-0-90"></a>        <span class="s2">&quot;--generator-n-units-hidden&quot;</span><span class="p">,</span>
+</span><span id="__span-0-91"><a id="__codelineno-0-91" name="__codelineno-0-91"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
+</span><span id="__span-0-92"><a id="__codelineno-0-92" name="__codelineno-0-92"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the number of units in each hidden layer of the generator&quot;</span><span class="p">,</span>
+</span><span id="__span-0-93"><a id="__codelineno-0-93" name="__codelineno-0-93"></a>    <span class="p">)</span>
+</span><span id="__span-0-94"><a id="__codelineno-0-94" name="__codelineno-0-94"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-95"><a id="__codelineno-0-95" name="__codelineno-0-95"></a>        <span class="s2">&quot;--generator-activation&quot;</span><span class="p">,</span>
+</span><span id="__span-0-96"><a id="__codelineno-0-96" name="__codelineno-0-96"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
+</span><span id="__span-0-97"><a id="__codelineno-0-97" name="__codelineno-0-97"></a>        <span class="n">choices</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="n">ACTIVATION_FUNCTIONS</span><span class="o">.</span><span class="n">keys</span><span class="p">()),</span>
+</span><span id="__span-0-98"><a id="__codelineno-0-98" name="__codelineno-0-98"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the activation function of the generator&quot;</span><span class="p">,</span>
+</span><span id="__span-0-99"><a id="__codelineno-0-99" name="__codelineno-0-99"></a>    <span class="p">)</span>
+</span><span id="__span-0-100"><a id="__codelineno-0-100" name="__codelineno-0-100"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-101"><a id="__codelineno-0-101" name="__codelineno-0-101"></a>        <span class="s2">&quot;--generator-batch-norm&quot;</span><span class="p">,</span>
+</span><span id="__span-0-102"><a id="__codelineno-0-102" name="__codelineno-0-102"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
+</span><span id="__span-0-103"><a id="__codelineno-0-103" name="__codelineno-0-103"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;whether to use batch norm in the generator&quot;</span><span class="p">,</span>
+</span><span id="__span-0-104"><a id="__codelineno-0-104" name="__codelineno-0-104"></a>    <span class="p">)</span>
+</span><span id="__span-0-105"><a id="__codelineno-0-105" name="__codelineno-0-105"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-106"><a id="__codelineno-0-106" name="__codelineno-0-106"></a>        <span class="s2">&quot;--generator-dropout&quot;</span><span class="p">,</span>
+</span><span id="__span-0-107"><a id="__codelineno-0-107" name="__codelineno-0-107"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
+</span><span id="__span-0-108"><a id="__codelineno-0-108" name="__codelineno-0-108"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the dropout rate in the generator&quot;</span><span class="p">,</span>
+</span><span id="__span-0-109"><a id="__codelineno-0-109" name="__codelineno-0-109"></a>    <span class="p">)</span>
+</span><span id="__span-0-110"><a id="__codelineno-0-110" name="__codelineno-0-110"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-111"><a id="__codelineno-0-111" name="__codelineno-0-111"></a>        <span class="s2">&quot;--generator-lr&quot;</span><span class="p">,</span>
+</span><span id="__span-0-112"><a id="__codelineno-0-112" name="__codelineno-0-112"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
+</span><span id="__span-0-113"><a id="__codelineno-0-113" name="__codelineno-0-113"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the learning rate for the generator&quot;</span><span class="p">,</span>
+</span><span id="__span-0-114"><a id="__codelineno-0-114" name="__codelineno-0-114"></a>    <span class="p">)</span>
+</span><span id="__span-0-115"><a id="__codelineno-0-115" name="__codelineno-0-115"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-116"><a id="__codelineno-0-116" name="__codelineno-0-116"></a>        <span class="s2">&quot;--generator-residual&quot;</span><span class="p">,</span>
+</span><span id="__span-0-117"><a id="__codelineno-0-117" name="__codelineno-0-117"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
+</span><span id="__span-0-118"><a id="__codelineno-0-118" name="__codelineno-0-118"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;whether to use residual connections in the generator&quot;</span><span class="p">,</span>
+</span><span id="__span-0-119"><a id="__codelineno-0-119" name="__codelineno-0-119"></a>    <span class="p">)</span>
+</span><span id="__span-0-120"><a id="__codelineno-0-120" name="__codelineno-0-120"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-121"><a id="__codelineno-0-121" name="__codelineno-0-121"></a>        <span class="s2">&quot;--generator-opt-betas&quot;</span><span class="p">,</span>
+</span><span id="__span-0-122"><a id="__codelineno-0-122" name="__codelineno-0-122"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
+</span><span id="__span-0-123"><a id="__codelineno-0-123" name="__codelineno-0-123"></a>        <span class="n">nargs</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span>
+</span><span id="__span-0-124"><a id="__codelineno-0-124" name="__codelineno-0-124"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the beta values for the generator optimizer&quot;</span><span class="p">,</span>
+</span><span id="__span-0-125"><a id="__codelineno-0-125" name="__codelineno-0-125"></a>    <span class="p">)</span>
+</span><span id="__span-0-126"><a id="__codelineno-0-126" name="__codelineno-0-126"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-127"><a id="__codelineno-0-127" name="__codelineno-0-127"></a>        <span class="s2">&quot;--discriminator-n-layers-hidden&quot;</span><span class="p">,</span>
+</span><span id="__span-0-128"><a id="__codelineno-0-128" name="__codelineno-0-128"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
+</span><span id="__span-0-129"><a id="__codelineno-0-129" name="__codelineno-0-129"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the number of hidden layers in the discriminator&quot;</span><span class="p">,</span>
+</span><span id="__span-0-130"><a id="__codelineno-0-130" name="__codelineno-0-130"></a>    <span class="p">)</span>
+</span><span id="__span-0-131"><a id="__codelineno-0-131" name="__codelineno-0-131"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-132"><a id="__codelineno-0-132" name="__codelineno-0-132"></a>        <span class="s2">&quot;--discriminator-n-units-hidden&quot;</span><span class="p">,</span>
+</span><span id="__span-0-133"><a id="__codelineno-0-133" name="__codelineno-0-133"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
+</span><span id="__span-0-134"><a id="__codelineno-0-134" name="__codelineno-0-134"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the number of units in each hidden layer of the discriminator&quot;</span><span class="p">,</span>
+</span><span id="__span-0-135"><a id="__codelineno-0-135" name="__codelineno-0-135"></a>    <span class="p">)</span>
+</span><span id="__span-0-136"><a id="__codelineno-0-136" name="__codelineno-0-136"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-137"><a id="__codelineno-0-137" name="__codelineno-0-137"></a>        <span class="s2">&quot;--discriminator-activation&quot;</span><span class="p">,</span>
+</span><span id="__span-0-138"><a id="__codelineno-0-138" name="__codelineno-0-138"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
+</span><span id="__span-0-139"><a id="__codelineno-0-139" name="__codelineno-0-139"></a>        <span class="n">choices</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="n">ACTIVATION_FUNCTIONS</span><span class="o">.</span><span class="n">keys</span><span class="p">()),</span>
+</span><span id="__span-0-140"><a id="__codelineno-0-140" name="__codelineno-0-140"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the activation function of the discriminator&quot;</span><span class="p">,</span>
+</span><span id="__span-0-141"><a id="__codelineno-0-141" name="__codelineno-0-141"></a>    <span class="p">)</span>
+</span><span id="__span-0-142"><a id="__codelineno-0-142" name="__codelineno-0-142"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-143"><a id="__codelineno-0-143" name="__codelineno-0-143"></a>        <span class="s2">&quot;--discriminator-batch-norm&quot;</span><span class="p">,</span>
+</span><span id="__span-0-144"><a id="__codelineno-0-144" name="__codelineno-0-144"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
+</span><span id="__span-0-145"><a id="__codelineno-0-145" name="__codelineno-0-145"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;whether to use batch norm in the discriminator&quot;</span><span class="p">,</span>
+</span><span id="__span-0-146"><a id="__codelineno-0-146" name="__codelineno-0-146"></a>    <span class="p">)</span>
+</span><span id="__span-0-147"><a id="__codelineno-0-147" name="__codelineno-0-147"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-148"><a id="__codelineno-0-148" name="__codelineno-0-148"></a>        <span class="s2">&quot;--discriminator-dropout&quot;</span><span class="p">,</span>
+</span><span id="__span-0-149"><a id="__codelineno-0-149" name="__codelineno-0-149"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
+</span><span id="__span-0-150"><a id="__codelineno-0-150" name="__codelineno-0-150"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the dropout rate in the discriminator&quot;</span><span class="p">,</span>
+</span><span id="__span-0-151"><a id="__codelineno-0-151" name="__codelineno-0-151"></a>    <span class="p">)</span>
+</span><span id="__span-0-152"><a id="__codelineno-0-152" name="__codelineno-0-152"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-153"><a id="__codelineno-0-153" name="__codelineno-0-153"></a>        <span class="s2">&quot;--discriminator-lr&quot;</span><span class="p">,</span>
+</span><span id="__span-0-154"><a id="__codelineno-0-154" name="__codelineno-0-154"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
+</span><span id="__span-0-155"><a id="__codelineno-0-155" name="__codelineno-0-155"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the learning rate for the discriminator&quot;</span><span class="p">,</span>
+</span><span id="__span-0-156"><a id="__codelineno-0-156" name="__codelineno-0-156"></a>    <span class="p">)</span>
+</span><span id="__span-0-157"><a id="__codelineno-0-157" name="__codelineno-0-157"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-158"><a id="__codelineno-0-158" name="__codelineno-0-158"></a>        <span class="s2">&quot;--discriminator-opt-betas&quot;</span><span class="p">,</span>
+</span><span id="__span-0-159"><a id="__codelineno-0-159" name="__codelineno-0-159"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
+</span><span id="__span-0-160"><a id="__codelineno-0-160" name="__codelineno-0-160"></a>        <span class="n">nargs</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span>
+</span><span id="__span-0-161"><a id="__codelineno-0-161" name="__codelineno-0-161"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the beta values for the discriminator optimizer&quot;</span><span class="p">,</span>
+</span><span id="__span-0-162"><a id="__codelineno-0-162" name="__codelineno-0-162"></a>    <span class="p">)</span>
+</span><span id="__span-0-163"><a id="__codelineno-0-163" name="__codelineno-0-163"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-164"><a id="__codelineno-0-164" name="__codelineno-0-164"></a>        <span class="s2">&quot;--clipping-value&quot;</span><span class="p">,</span>
+</span><span id="__span-0-165"><a id="__codelineno-0-165" name="__codelineno-0-165"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
+</span><span id="__span-0-166"><a id="__codelineno-0-166" name="__codelineno-0-166"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the clipping value for the discriminator&quot;</span><span class="p">,</span>
+</span><span id="__span-0-167"><a id="__codelineno-0-167" name="__codelineno-0-167"></a>    <span class="p">)</span>
+</span><span id="__span-0-168"><a id="__codelineno-0-168" name="__codelineno-0-168"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-169"><a id="__codelineno-0-169" name="__codelineno-0-169"></a>        <span class="s2">&quot;--lambda-gradient-penalty&quot;</span><span class="p">,</span>
+</span><span id="__span-0-170"><a id="__codelineno-0-170" name="__codelineno-0-170"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
+</span><span id="__span-0-171"><a id="__codelineno-0-171" name="__codelineno-0-171"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the gradient penalty coefficient&quot;</span><span class="p">,</span>
+</span><span id="__span-0-172"><a id="__codelineno-0-172" name="__codelineno-0-172"></a>    <span class="p">)</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -2719,21 +2719,21 @@ <h2 id="nhssynth.cli.model_arguments.add_model_specific_args" class="doc doc-hea
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/cli/model_arguments.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-7"> 7</a></span>
-<span class="normal"><a href="#__codelineno-0-8"> 8</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-8"> 8</a></span>
 <span class="normal"><a href="#__codelineno-0-9"> 9</a></span>
 <span class="normal"><a href="#__codelineno-0-10">10</a></span>
 <span class="normal"><a href="#__codelineno-0-11">11</a></span>
 <span class="normal"><a href="#__codelineno-0-12">12</a></span>
 <span class="normal"><a href="#__codelineno-0-13">13</a></span>
-<span class="normal"><a href="#__codelineno-0-14">14</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-7"><a id="__codelineno-0-7" name="__codelineno-0-7"></a><span class="k">def</span> <span class="nf">add_model_specific_args</span><span class="p">(</span><span class="n">group</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">_ArgumentGroup</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">overrides</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Adds arguments to an existing group according to `name`.&quot;&quot;&quot;</span>
-</span><span id="__span-0-9"><a id="__codelineno-0-9" name="__codelineno-0-9"></a>    <span class="k">if</span> <span class="n">name</span> <span class="o">==</span> <span class="s2">&quot;VAE&quot;</span><span class="p">:</span>
-</span><span id="__span-0-10"><a id="__codelineno-0-10" name="__codelineno-0-10"></a>        <span class="n">add_vae_args</span><span class="p">(</span><span class="n">group</span><span class="p">,</span> <span class="n">overrides</span><span class="p">)</span>
-</span><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11"></a>    <span class="k">elif</span> <span class="n">name</span> <span class="o">==</span> <span class="s2">&quot;GAN&quot;</span><span class="p">:</span>
-</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12"></a>        <span class="n">add_gan_args</span><span class="p">(</span><span class="n">group</span><span class="p">,</span> <span class="n">overrides</span><span class="p">)</span>
-</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13"></a>    <span class="k">elif</span> <span class="n">name</span> <span class="o">==</span> <span class="s2">&quot;TabularGAN&quot;</span><span class="p">:</span>
-</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14"></a>        <span class="n">add_tabular_gan_args</span><span class="p">(</span><span class="n">group</span><span class="p">,</span> <span class="n">overrides</span><span class="p">)</span>
+<span class="normal"><a href="#__codelineno-0-14">14</a></span>
+<span class="normal"><a href="#__codelineno-0-15">15</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8"></a><span class="k">def</span> <span class="nf">add_model_specific_args</span><span class="p">(</span><span class="n">group</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">_ArgumentGroup</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">overrides</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="__span-0-9"><a id="__codelineno-0-9" name="__codelineno-0-9"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Adds arguments to an existing group according to `name`.&quot;&quot;&quot;</span>
+</span><span id="__span-0-10"><a id="__codelineno-0-10" name="__codelineno-0-10"></a>    <span class="k">if</span> <span class="n">name</span> <span class="o">==</span> <span class="s2">&quot;VAE&quot;</span><span class="p">:</span>
+</span><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11"></a>        <span class="n">add_vae_args</span><span class="p">(</span><span class="n">group</span><span class="p">,</span> <span class="n">overrides</span><span class="p">)</span>
+</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12"></a>    <span class="k">elif</span> <span class="n">name</span> <span class="o">==</span> <span class="s2">&quot;GAN&quot;</span><span class="p">:</span>
+</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13"></a>        <span class="n">add_gan_args</span><span class="p">(</span><span class="n">group</span><span class="p">,</span> <span class="n">overrides</span><span class="p">)</span>
+</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14"></a>    <span class="k">elif</span> <span class="n">name</span> <span class="o">==</span> <span class="s2">&quot;TabularGAN&quot;</span><span class="p">:</span>
+</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15"></a>        <span class="n">add_tabular_gan_args</span><span class="p">(</span><span class="n">group</span><span class="p">,</span> <span class="n">overrides</span><span class="p">)</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -2757,8 +2757,7 @@ <h2 id="nhssynth.cli.model_arguments.add_vae_args" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/cli/model_arguments.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-17">17</a></span>
-<span class="normal"><a href="#__codelineno-0-18">18</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-18">18</a></span>
 <span class="normal"><a href="#__codelineno-0-19">19</a></span>
 <span class="normal"><a href="#__codelineno-0-20">20</a></span>
 <span class="normal"><a href="#__codelineno-0-21">21</a></span>
@@ -2813,63 +2812,64 @@ <h2 id="nhssynth.cli.model_arguments.add_vae_args" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-70">70</a></span>
 <span class="normal"><a href="#__codelineno-0-71">71</a></span>
 <span class="normal"><a href="#__codelineno-0-72">72</a></span>
-<span class="normal"><a href="#__codelineno-0-73">73</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17"></a><span class="k">def</span> <span class="nf">add_vae_args</span><span class="p">(</span><span class="n">group</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">_ArgumentGroup</span><span class="p">,</span> <span class="n">overrides</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Adds arguments to an existing group for the VAE model.&quot;&quot;&quot;</span>
-</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20"></a>        <span class="s2">&quot;--encoder-latent-dim&quot;</span><span class="p">,</span>
-</span><span id="__span-0-21"><a id="__codelineno-0-21" name="__codelineno-0-21"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
-</span><span id="__span-0-22"><a id="__codelineno-0-22" name="__codelineno-0-22"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
-</span><span id="__span-0-23"><a id="__codelineno-0-23" name="__codelineno-0-23"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the latent dimension of the encoder&quot;</span><span class="p">,</span>
-</span><span id="__span-0-24"><a id="__codelineno-0-24" name="__codelineno-0-24"></a>    <span class="p">)</span>
-</span><span id="__span-0-25"><a id="__codelineno-0-25" name="__codelineno-0-25"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-26"><a id="__codelineno-0-26" name="__codelineno-0-26"></a>        <span class="s2">&quot;--encoder-hidden-dim&quot;</span><span class="p">,</span>
-</span><span id="__span-0-27"><a id="__codelineno-0-27" name="__codelineno-0-27"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
-</span><span id="__span-0-28"><a id="__codelineno-0-28" name="__codelineno-0-28"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
-</span><span id="__span-0-29"><a id="__codelineno-0-29" name="__codelineno-0-29"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the hidden dimension of the encoder&quot;</span><span class="p">,</span>
-</span><span id="__span-0-30"><a id="__codelineno-0-30" name="__codelineno-0-30"></a>    <span class="p">)</span>
-</span><span id="__span-0-31"><a id="__codelineno-0-31" name="__codelineno-0-31"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-32"><a id="__codelineno-0-32" name="__codelineno-0-32"></a>        <span class="s2">&quot;--encoder-activation&quot;</span><span class="p">,</span>
-</span><span id="__span-0-33"><a id="__codelineno-0-33" name="__codelineno-0-33"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
-</span><span id="__span-0-34"><a id="__codelineno-0-34" name="__codelineno-0-34"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
-</span><span id="__span-0-35"><a id="__codelineno-0-35" name="__codelineno-0-35"></a>        <span class="n">choices</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="n">ACTIVATION_FUNCTIONS</span><span class="o">.</span><span class="n">keys</span><span class="p">()),</span>
-</span><span id="__span-0-36"><a id="__codelineno-0-36" name="__codelineno-0-36"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the activation function of the encoder&quot;</span><span class="p">,</span>
-</span><span id="__span-0-37"><a id="__codelineno-0-37" name="__codelineno-0-37"></a>    <span class="p">)</span>
-</span><span id="__span-0-38"><a id="__codelineno-0-38" name="__codelineno-0-38"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-39"><a id="__codelineno-0-39" name="__codelineno-0-39"></a>        <span class="s2">&quot;--encoder-learning-rate&quot;</span><span class="p">,</span>
-</span><span id="__span-0-40"><a id="__codelineno-0-40" name="__codelineno-0-40"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
-</span><span id="__span-0-41"><a id="__codelineno-0-41" name="__codelineno-0-41"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
-</span><span id="__span-0-42"><a id="__codelineno-0-42" name="__codelineno-0-42"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the learning rate for the encoder&quot;</span><span class="p">,</span>
-</span><span id="__span-0-43"><a id="__codelineno-0-43" name="__codelineno-0-43"></a>    <span class="p">)</span>
-</span><span id="__span-0-44"><a id="__codelineno-0-44" name="__codelineno-0-44"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-45"><a id="__codelineno-0-45" name="__codelineno-0-45"></a>        <span class="s2">&quot;--decoder-latent-dim&quot;</span><span class="p">,</span>
-</span><span id="__span-0-46"><a id="__codelineno-0-46" name="__codelineno-0-46"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
-</span><span id="__span-0-47"><a id="__codelineno-0-47" name="__codelineno-0-47"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
-</span><span id="__span-0-48"><a id="__codelineno-0-48" name="__codelineno-0-48"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the latent dimension of the decoder&quot;</span><span class="p">,</span>
-</span><span id="__span-0-49"><a id="__codelineno-0-49" name="__codelineno-0-49"></a>    <span class="p">)</span>
-</span><span id="__span-0-50"><a id="__codelineno-0-50" name="__codelineno-0-50"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-51"><a id="__codelineno-0-51" name="__codelineno-0-51"></a>        <span class="s2">&quot;--decoder-hidden-dim&quot;</span><span class="p">,</span>
-</span><span id="__span-0-52"><a id="__codelineno-0-52" name="__codelineno-0-52"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
-</span><span id="__span-0-53"><a id="__codelineno-0-53" name="__codelineno-0-53"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
-</span><span id="__span-0-54"><a id="__codelineno-0-54" name="__codelineno-0-54"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the hidden dimension of the decoder&quot;</span><span class="p">,</span>
-</span><span id="__span-0-55"><a id="__codelineno-0-55" name="__codelineno-0-55"></a>    <span class="p">)</span>
-</span><span id="__span-0-56"><a id="__codelineno-0-56" name="__codelineno-0-56"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-57"><a id="__codelineno-0-57" name="__codelineno-0-57"></a>        <span class="s2">&quot;--decoder-activation&quot;</span><span class="p">,</span>
-</span><span id="__span-0-58"><a id="__codelineno-0-58" name="__codelineno-0-58"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
-</span><span id="__span-0-59"><a id="__codelineno-0-59" name="__codelineno-0-59"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
-</span><span id="__span-0-60"><a id="__codelineno-0-60" name="__codelineno-0-60"></a>        <span class="n">choices</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="n">ACTIVATION_FUNCTIONS</span><span class="o">.</span><span class="n">keys</span><span class="p">()),</span>
-</span><span id="__span-0-61"><a id="__codelineno-0-61" name="__codelineno-0-61"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the activation function of the decoder&quot;</span><span class="p">,</span>
-</span><span id="__span-0-62"><a id="__codelineno-0-62" name="__codelineno-0-62"></a>    <span class="p">)</span>
-</span><span id="__span-0-63"><a id="__codelineno-0-63" name="__codelineno-0-63"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-64"><a id="__codelineno-0-64" name="__codelineno-0-64"></a>        <span class="s2">&quot;--decoder-learning-rate&quot;</span><span class="p">,</span>
-</span><span id="__span-0-65"><a id="__codelineno-0-65" name="__codelineno-0-65"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
-</span><span id="__span-0-66"><a id="__codelineno-0-66" name="__codelineno-0-66"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
-</span><span id="__span-0-67"><a id="__codelineno-0-67" name="__codelineno-0-67"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the learning rate for the decoder&quot;</span><span class="p">,</span>
-</span><span id="__span-0-68"><a id="__codelineno-0-68" name="__codelineno-0-68"></a>    <span class="p">)</span>
-</span><span id="__span-0-69"><a id="__codelineno-0-69" name="__codelineno-0-69"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-70"><a id="__codelineno-0-70" name="__codelineno-0-70"></a>        <span class="s2">&quot;--shared-optimizer&quot;</span><span class="p">,</span>
-</span><span id="__span-0-71"><a id="__codelineno-0-71" name="__codelineno-0-71"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
-</span><span id="__span-0-72"><a id="__codelineno-0-72" name="__codelineno-0-72"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;whether to use a shared optimizer for the encoder and decoder&quot;</span><span class="p">,</span>
-</span><span id="__span-0-73"><a id="__codelineno-0-73" name="__codelineno-0-73"></a>    <span class="p">)</span>
+<span class="normal"><a href="#__codelineno-0-73">73</a></span>
+<span class="normal"><a href="#__codelineno-0-74">74</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18"></a><span class="k">def</span> <span class="nf">add_vae_args</span><span class="p">(</span><span class="n">group</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">_ArgumentGroup</span><span class="p">,</span> <span class="n">overrides</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Adds arguments to an existing group for the VAE model.&quot;&quot;&quot;</span>
+</span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-21"><a id="__codelineno-0-21" name="__codelineno-0-21"></a>        <span class="s2">&quot;--encoder-latent-dim&quot;</span><span class="p">,</span>
+</span><span id="__span-0-22"><a id="__codelineno-0-22" name="__codelineno-0-22"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
+</span><span id="__span-0-23"><a id="__codelineno-0-23" name="__codelineno-0-23"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
+</span><span id="__span-0-24"><a id="__codelineno-0-24" name="__codelineno-0-24"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the latent dimension of the encoder&quot;</span><span class="p">,</span>
+</span><span id="__span-0-25"><a id="__codelineno-0-25" name="__codelineno-0-25"></a>    <span class="p">)</span>
+</span><span id="__span-0-26"><a id="__codelineno-0-26" name="__codelineno-0-26"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-27"><a id="__codelineno-0-27" name="__codelineno-0-27"></a>        <span class="s2">&quot;--encoder-hidden-dim&quot;</span><span class="p">,</span>
+</span><span id="__span-0-28"><a id="__codelineno-0-28" name="__codelineno-0-28"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
+</span><span id="__span-0-29"><a id="__codelineno-0-29" name="__codelineno-0-29"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
+</span><span id="__span-0-30"><a id="__codelineno-0-30" name="__codelineno-0-30"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the hidden dimension of the encoder&quot;</span><span class="p">,</span>
+</span><span id="__span-0-31"><a id="__codelineno-0-31" name="__codelineno-0-31"></a>    <span class="p">)</span>
+</span><span id="__span-0-32"><a id="__codelineno-0-32" name="__codelineno-0-32"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-33"><a id="__codelineno-0-33" name="__codelineno-0-33"></a>        <span class="s2">&quot;--encoder-activation&quot;</span><span class="p">,</span>
+</span><span id="__span-0-34"><a id="__codelineno-0-34" name="__codelineno-0-34"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
+</span><span id="__span-0-35"><a id="__codelineno-0-35" name="__codelineno-0-35"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
+</span><span id="__span-0-36"><a id="__codelineno-0-36" name="__codelineno-0-36"></a>        <span class="n">choices</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="n">ACTIVATION_FUNCTIONS</span><span class="o">.</span><span class="n">keys</span><span class="p">()),</span>
+</span><span id="__span-0-37"><a id="__codelineno-0-37" name="__codelineno-0-37"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the activation function of the encoder&quot;</span><span class="p">,</span>
+</span><span id="__span-0-38"><a id="__codelineno-0-38" name="__codelineno-0-38"></a>    <span class="p">)</span>
+</span><span id="__span-0-39"><a id="__codelineno-0-39" name="__codelineno-0-39"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-40"><a id="__codelineno-0-40" name="__codelineno-0-40"></a>        <span class="s2">&quot;--encoder-learning-rate&quot;</span><span class="p">,</span>
+</span><span id="__span-0-41"><a id="__codelineno-0-41" name="__codelineno-0-41"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
+</span><span id="__span-0-42"><a id="__codelineno-0-42" name="__codelineno-0-42"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
+</span><span id="__span-0-43"><a id="__codelineno-0-43" name="__codelineno-0-43"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the learning rate for the encoder&quot;</span><span class="p">,</span>
+</span><span id="__span-0-44"><a id="__codelineno-0-44" name="__codelineno-0-44"></a>    <span class="p">)</span>
+</span><span id="__span-0-45"><a id="__codelineno-0-45" name="__codelineno-0-45"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-46"><a id="__codelineno-0-46" name="__codelineno-0-46"></a>        <span class="s2">&quot;--decoder-latent-dim&quot;</span><span class="p">,</span>
+</span><span id="__span-0-47"><a id="__codelineno-0-47" name="__codelineno-0-47"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
+</span><span id="__span-0-48"><a id="__codelineno-0-48" name="__codelineno-0-48"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
+</span><span id="__span-0-49"><a id="__codelineno-0-49" name="__codelineno-0-49"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the latent dimension of the decoder&quot;</span><span class="p">,</span>
+</span><span id="__span-0-50"><a id="__codelineno-0-50" name="__codelineno-0-50"></a>    <span class="p">)</span>
+</span><span id="__span-0-51"><a id="__codelineno-0-51" name="__codelineno-0-51"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-52"><a id="__codelineno-0-52" name="__codelineno-0-52"></a>        <span class="s2">&quot;--decoder-hidden-dim&quot;</span><span class="p">,</span>
+</span><span id="__span-0-53"><a id="__codelineno-0-53" name="__codelineno-0-53"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
+</span><span id="__span-0-54"><a id="__codelineno-0-54" name="__codelineno-0-54"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
+</span><span id="__span-0-55"><a id="__codelineno-0-55" name="__codelineno-0-55"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the hidden dimension of the decoder&quot;</span><span class="p">,</span>
+</span><span id="__span-0-56"><a id="__codelineno-0-56" name="__codelineno-0-56"></a>    <span class="p">)</span>
+</span><span id="__span-0-57"><a id="__codelineno-0-57" name="__codelineno-0-57"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-58"><a id="__codelineno-0-58" name="__codelineno-0-58"></a>        <span class="s2">&quot;--decoder-activation&quot;</span><span class="p">,</span>
+</span><span id="__span-0-59"><a id="__codelineno-0-59" name="__codelineno-0-59"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
+</span><span id="__span-0-60"><a id="__codelineno-0-60" name="__codelineno-0-60"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
+</span><span id="__span-0-61"><a id="__codelineno-0-61" name="__codelineno-0-61"></a>        <span class="n">choices</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="n">ACTIVATION_FUNCTIONS</span><span class="o">.</span><span class="n">keys</span><span class="p">()),</span>
+</span><span id="__span-0-62"><a id="__codelineno-0-62" name="__codelineno-0-62"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the activation function of the decoder&quot;</span><span class="p">,</span>
+</span><span id="__span-0-63"><a id="__codelineno-0-63" name="__codelineno-0-63"></a>    <span class="p">)</span>
+</span><span id="__span-0-64"><a id="__codelineno-0-64" name="__codelineno-0-64"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-65"><a id="__codelineno-0-65" name="__codelineno-0-65"></a>        <span class="s2">&quot;--decoder-learning-rate&quot;</span><span class="p">,</span>
+</span><span id="__span-0-66"><a id="__codelineno-0-66" name="__codelineno-0-66"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
+</span><span id="__span-0-67"><a id="__codelineno-0-67" name="__codelineno-0-67"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
+</span><span id="__span-0-68"><a id="__codelineno-0-68" name="__codelineno-0-68"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the learning rate for the decoder&quot;</span><span class="p">,</span>
+</span><span id="__span-0-69"><a id="__codelineno-0-69" name="__codelineno-0-69"></a>    <span class="p">)</span>
+</span><span id="__span-0-70"><a id="__codelineno-0-70" name="__codelineno-0-70"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-71"><a id="__codelineno-0-71" name="__codelineno-0-71"></a>        <span class="s2">&quot;--shared-optimizer&quot;</span><span class="p">,</span>
+</span><span id="__span-0-72"><a id="__codelineno-0-72" name="__codelineno-0-72"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
+</span><span id="__span-0-73"><a id="__codelineno-0-73" name="__codelineno-0-73"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;whether to use a shared optimizer for the encoder and decoder&quot;</span><span class="p">,</span>
+</span><span id="__span-0-74"><a id="__codelineno-0-74" name="__codelineno-0-74"></a>    <span class="p">)</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
diff --git a/reference/cli/module_arguments/index.html b/reference/cli/module_arguments/index.html
index 43779f0b..b5814a89 100644
--- a/reference/cli/module_arguments/index.html
+++ b/reference/cli/module_arguments/index.html
@@ -2533,8 +2533,7 @@ <h2 id="nhssynth.cli.module_arguments.AllChoicesDefault" class="doc doc-heading"
 </details>
             <details class="quote">
               <summary>Source code in <code>src/nhssynth/cli/module_arguments.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-10">10</a></span>
-<span class="normal"><a href="#__codelineno-0-11">11</a></span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-11">11</a></span>
 <span class="normal"><a href="#__codelineno-0-12">12</a></span>
 <span class="normal"><a href="#__codelineno-0-13">13</a></span>
 <span class="normal"><a href="#__codelineno-0-14">14</a></span>
@@ -2551,25 +2550,26 @@ <h2 id="nhssynth.cli.module_arguments.AllChoicesDefault" class="doc doc-heading"
 <span class="normal"><a href="#__codelineno-0-25">25</a></span>
 <span class="normal"><a href="#__codelineno-0-26">26</a></span>
 <span class="normal"><a href="#__codelineno-0-27">27</a></span>
-<span class="normal"><a href="#__codelineno-0-28">28</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-10"><a id="__codelineno-0-10" name="__codelineno-0-10"></a><span class="k">class</span> <span class="nc">AllChoicesDefault</span><span class="p">(</span><span class="n">argparse</span><span class="o">.</span><span class="n">Action</span><span class="p">):</span>
-</span><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12"></a><span class="sd">    Customised argparse action for defaulting to the full list of choices if only the argument&#39;s flag is supplied:</span>
-</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13"></a><span class="sd">    (i.e. user passes `--metrics` with no follow up list of metric groups =&gt; all metric groups will be executed).</span>
-</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14"></a>
-</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15"></a><span class="sd">    Notes:</span>
-</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16"></a><span class="sd">        1) If no `option_string` is supplied: set to default value (`self.default`)</span>
-</span><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17"></a><span class="sd">        2) If `option_string` is supplied:</span>
-</span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18"></a><span class="sd">            a) If `values` are supplied, set to list of values</span>
-</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19"></a><span class="sd">            b) If no `values` are supplied, set to `self.const`, if `self.const` is not set, set to `self.default`</span>
-</span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-21"><a id="__codelineno-0-21" name="__codelineno-0-21"></a>
-</span><span id="__span-0-22"><a id="__codelineno-0-22" name="__codelineno-0-22"></a>    <span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">parser</span><span class="p">,</span> <span class="n">namespace</span><span class="p">,</span> <span class="n">values</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">option_string</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
-</span><span id="__span-0-23"><a id="__codelineno-0-23" name="__codelineno-0-23"></a>        <span class="k">if</span> <span class="n">values</span><span class="p">:</span>
-</span><span id="__span-0-24"><a id="__codelineno-0-24" name="__codelineno-0-24"></a>            <span class="nb">setattr</span><span class="p">(</span><span class="n">namespace</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dest</span><span class="p">,</span> <span class="n">values</span><span class="p">)</span>
-</span><span id="__span-0-25"><a id="__codelineno-0-25" name="__codelineno-0-25"></a>        <span class="k">elif</span> <span class="n">option_string</span><span class="p">:</span>
-</span><span id="__span-0-26"><a id="__codelineno-0-26" name="__codelineno-0-26"></a>            <span class="nb">setattr</span><span class="p">(</span><span class="n">namespace</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dest</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">const</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">const</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">default</span><span class="p">)</span>
-</span><span id="__span-0-27"><a id="__codelineno-0-27" name="__codelineno-0-27"></a>        <span class="k">else</span><span class="p">:</span>
-</span><span id="__span-0-28"><a id="__codelineno-0-28" name="__codelineno-0-28"></a>            <span class="nb">setattr</span><span class="p">(</span><span class="n">namespace</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dest</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">default</span><span class="p">)</span>
+<span class="normal"><a href="#__codelineno-0-28">28</a></span>
+<span class="normal"><a href="#__codelineno-0-29">29</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11"></a><span class="k">class</span> <span class="nc">AllChoicesDefault</span><span class="p">(</span><span class="n">argparse</span><span class="o">.</span><span class="n">Action</span><span class="p">):</span>
+</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13"></a><span class="sd">    Customised argparse action for defaulting to the full list of choices if only the argument&#39;s flag is supplied:</span>
+</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14"></a><span class="sd">    (i.e. user passes `--metrics` with no follow up list of metric groups =&gt; all metric groups will be executed).</span>
+</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15"></a>
+</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16"></a><span class="sd">    Notes:</span>
+</span><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17"></a><span class="sd">        1) If no `option_string` is supplied: set to default value (`self.default`)</span>
+</span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18"></a><span class="sd">        2) If `option_string` is supplied:</span>
+</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19"></a><span class="sd">            a) If `values` are supplied, set to list of values</span>
+</span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20"></a><span class="sd">            b) If no `values` are supplied, set to `self.const`, if `self.const` is not set, set to `self.default`</span>
+</span><span id="__span-0-21"><a id="__codelineno-0-21" name="__codelineno-0-21"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-22"><a id="__codelineno-0-22" name="__codelineno-0-22"></a>
+</span><span id="__span-0-23"><a id="__codelineno-0-23" name="__codelineno-0-23"></a>    <span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">parser</span><span class="p">,</span> <span class="n">namespace</span><span class="p">,</span> <span class="n">values</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">option_string</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+</span><span id="__span-0-24"><a id="__codelineno-0-24" name="__codelineno-0-24"></a>        <span class="k">if</span> <span class="n">values</span><span class="p">:</span>
+</span><span id="__span-0-25"><a id="__codelineno-0-25" name="__codelineno-0-25"></a>            <span class="nb">setattr</span><span class="p">(</span><span class="n">namespace</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dest</span><span class="p">,</span> <span class="n">values</span><span class="p">)</span>
+</span><span id="__span-0-26"><a id="__codelineno-0-26" name="__codelineno-0-26"></a>        <span class="k">elif</span> <span class="n">option_string</span><span class="p">:</span>
+</span><span id="__span-0-27"><a id="__codelineno-0-27" name="__codelineno-0-27"></a>            <span class="nb">setattr</span><span class="p">(</span><span class="n">namespace</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dest</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">const</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">const</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">default</span><span class="p">)</span>
+</span><span id="__span-0-28"><a id="__codelineno-0-28" name="__codelineno-0-28"></a>        <span class="k">else</span><span class="p">:</span>
+</span><span id="__span-0-29"><a id="__codelineno-0-29" name="__codelineno-0-29"></a>            <span class="nb">setattr</span><span class="p">(</span><span class="n">namespace</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dest</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">default</span><span class="p">)</span>
 </span></code></pre></div></td></tr></table></div>
             </details>
 
@@ -2612,8 +2612,7 @@ <h2 id="nhssynth.cli.module_arguments.add_dataloader_args" class="doc doc-headin
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/cli/module_arguments.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-31">31</a></span>
-<span class="normal"><a href="#__codelineno-0-32">32</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-32">32</a></span>
 <span class="normal"><a href="#__codelineno-0-33">33</a></span>
 <span class="normal"><a href="#__codelineno-0-34">34</a></span>
 <span class="normal"><a href="#__codelineno-0-35">35</a></span>
@@ -2655,50 +2654,51 @@ <h2 id="nhssynth.cli.module_arguments.add_dataloader_args" class="doc doc-headin
 <span class="normal"><a href="#__codelineno-0-71">71</a></span>
 <span class="normal"><a href="#__codelineno-0-72">72</a></span>
 <span class="normal"><a href="#__codelineno-0-73">73</a></span>
-<span class="normal"><a href="#__codelineno-0-74">74</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-31"><a id="__codelineno-0-31" name="__codelineno-0-31"></a><span class="k">def</span> <span class="nf">add_dataloader_args</span><span class="p">(</span><span class="n">parser</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">,</span> <span class="n">group_title</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">overrides</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="__span-0-32"><a id="__codelineno-0-32" name="__codelineno-0-32"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Adds arguments to an existing dataloader module sub-parser instance.&quot;&quot;&quot;</span>
-</span><span id="__span-0-33"><a id="__codelineno-0-33" name="__codelineno-0-33"></a>    <span class="n">group</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">add_argument_group</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="n">group_title</span><span class="p">)</span>
-</span><span id="__span-0-34"><a id="__codelineno-0-34" name="__codelineno-0-34"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-35"><a id="__codelineno-0-35" name="__codelineno-0-35"></a>        <span class="s2">&quot;--data-dir&quot;</span><span class="p">,</span>
-</span><span id="__span-0-36"><a id="__codelineno-0-36" name="__codelineno-0-36"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
-</span><span id="__span-0-37"><a id="__codelineno-0-37" name="__codelineno-0-37"></a>        <span class="n">default</span><span class="o">=</span><span class="s2">&quot;./data&quot;</span><span class="p">,</span>
-</span><span id="__span-0-38"><a id="__codelineno-0-38" name="__codelineno-0-38"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the directory containing the chosen dataset&quot;</span><span class="p">,</span>
-</span><span id="__span-0-39"><a id="__codelineno-0-39" name="__codelineno-0-39"></a>    <span class="p">)</span>
-</span><span id="__span-0-40"><a id="__codelineno-0-40" name="__codelineno-0-40"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-41"><a id="__codelineno-0-41" name="__codelineno-0-41"></a>        <span class="s2">&quot;--index-col&quot;</span><span class="p">,</span>
-</span><span id="__span-0-42"><a id="__codelineno-0-42" name="__codelineno-0-42"></a>        <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
-</span><span id="__span-0-43"><a id="__codelineno-0-43" name="__codelineno-0-43"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;*&quot;</span><span class="p">,</span>
-</span><span id="__span-0-44"><a id="__codelineno-0-44" name="__codelineno-0-44"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;indicate the name of the index column(s) in the csv file, such that pandas can index by it&quot;</span><span class="p">,</span>
-</span><span id="__span-0-45"><a id="__codelineno-0-45" name="__codelineno-0-45"></a>    <span class="p">)</span>
-</span><span id="__span-0-46"><a id="__codelineno-0-46" name="__codelineno-0-46"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-47"><a id="__codelineno-0-47" name="__codelineno-0-47"></a>        <span class="s2">&quot;--constraint-graph&quot;</span><span class="p">,</span>
-</span><span id="__span-0-48"><a id="__codelineno-0-48" name="__codelineno-0-48"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
-</span><span id="__span-0-49"><a id="__codelineno-0-49" name="__codelineno-0-49"></a>        <span class="n">default</span><span class="o">=</span><span class="s2">&quot;_constraint_graph&quot;</span><span class="p">,</span>
-</span><span id="__span-0-50"><a id="__codelineno-0-50" name="__codelineno-0-50"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the name of the html file to write the constraint graph to, defaults to `&lt;DATASET&gt;_constraint_graph`&quot;</span><span class="p">,</span>
-</span><span id="__span-0-51"><a id="__codelineno-0-51" name="__codelineno-0-51"></a>    <span class="p">)</span>
-</span><span id="__span-0-52"><a id="__codelineno-0-52" name="__codelineno-0-52"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-53"><a id="__codelineno-0-53" name="__codelineno-0-53"></a>        <span class="s2">&quot;--collapse-yaml&quot;</span><span class="p">,</span>
-</span><span id="__span-0-54"><a id="__codelineno-0-54" name="__codelineno-0-54"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
-</span><span id="__span-0-55"><a id="__codelineno-0-55" name="__codelineno-0-55"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;use aliases and anchors in the output metadata yaml, this will make it much more compact&quot;</span><span class="p">,</span>
-</span><span id="__span-0-56"><a id="__codelineno-0-56" name="__codelineno-0-56"></a>    <span class="p">)</span>
-</span><span id="__span-0-57"><a id="__codelineno-0-57" name="__codelineno-0-57"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-58"><a id="__codelineno-0-58" name="__codelineno-0-58"></a>        <span class="s2">&quot;--missingness&quot;</span><span class="p">,</span>
-</span><span id="__span-0-59"><a id="__codelineno-0-59" name="__codelineno-0-59"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
-</span><span id="__span-0-60"><a id="__codelineno-0-60" name="__codelineno-0-60"></a>        <span class="n">default</span><span class="o">=</span><span class="s2">&quot;augment&quot;</span><span class="p">,</span>
-</span><span id="__span-0-61"><a id="__codelineno-0-61" name="__codelineno-0-61"></a>        <span class="n">choices</span><span class="o">=</span><span class="n">MISSINGNESS_STRATEGIES</span><span class="p">,</span>
-</span><span id="__span-0-62"><a id="__codelineno-0-62" name="__codelineno-0-62"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;how to handle missing values in the dataset&quot;</span><span class="p">,</span>
-</span><span id="__span-0-63"><a id="__codelineno-0-63" name="__codelineno-0-63"></a>    <span class="p">)</span>
-</span><span id="__span-0-64"><a id="__codelineno-0-64" name="__codelineno-0-64"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-65"><a id="__codelineno-0-65" name="__codelineno-0-65"></a>        <span class="s2">&quot;--impute&quot;</span><span class="p">,</span>
-</span><span id="__span-0-66"><a id="__codelineno-0-66" name="__codelineno-0-66"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
-</span><span id="__span-0-67"><a id="__codelineno-0-67" name="__codelineno-0-67"></a>        <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
-</span><span id="__span-0-68"><a id="__codelineno-0-68" name="__codelineno-0-68"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the imputation strategy to use, ONLY USED if &lt;MISSINGNESS&gt; is set to &#39;impute&#39;, choose from: &#39;mean&#39;, &#39;median&#39;, &#39;mode&#39;, or any specific value (e.g. &#39;0&#39;)&quot;</span><span class="p">,</span>
-</span><span id="__span-0-69"><a id="__codelineno-0-69" name="__codelineno-0-69"></a>    <span class="p">)</span>
-</span><span id="__span-0-70"><a id="__codelineno-0-70" name="__codelineno-0-70"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-71"><a id="__codelineno-0-71" name="__codelineno-0-71"></a>        <span class="s2">&quot;--write-csv&quot;</span><span class="p">,</span>
-</span><span id="__span-0-72"><a id="__codelineno-0-72" name="__codelineno-0-72"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
-</span><span id="__span-0-73"><a id="__codelineno-0-73" name="__codelineno-0-73"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;write the transformed real data to a csv file&quot;</span><span class="p">,</span>
-</span><span id="__span-0-74"><a id="__codelineno-0-74" name="__codelineno-0-74"></a>    <span class="p">)</span>
+<span class="normal"><a href="#__codelineno-0-74">74</a></span>
+<span class="normal"><a href="#__codelineno-0-75">75</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-32"><a id="__codelineno-0-32" name="__codelineno-0-32"></a><span class="k">def</span> <span class="nf">add_dataloader_args</span><span class="p">(</span><span class="n">parser</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">,</span> <span class="n">group_title</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">overrides</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="__span-0-33"><a id="__codelineno-0-33" name="__codelineno-0-33"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Adds arguments to an existing dataloader module sub-parser instance.&quot;&quot;&quot;</span>
+</span><span id="__span-0-34"><a id="__codelineno-0-34" name="__codelineno-0-34"></a>    <span class="n">group</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">add_argument_group</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="n">group_title</span><span class="p">)</span>
+</span><span id="__span-0-35"><a id="__codelineno-0-35" name="__codelineno-0-35"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-36"><a id="__codelineno-0-36" name="__codelineno-0-36"></a>        <span class="s2">&quot;--data-dir&quot;</span><span class="p">,</span>
+</span><span id="__span-0-37"><a id="__codelineno-0-37" name="__codelineno-0-37"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
+</span><span id="__span-0-38"><a id="__codelineno-0-38" name="__codelineno-0-38"></a>        <span class="n">default</span><span class="o">=</span><span class="s2">&quot;./data&quot;</span><span class="p">,</span>
+</span><span id="__span-0-39"><a id="__codelineno-0-39" name="__codelineno-0-39"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the directory containing the chosen dataset&quot;</span><span class="p">,</span>
+</span><span id="__span-0-40"><a id="__codelineno-0-40" name="__codelineno-0-40"></a>    <span class="p">)</span>
+</span><span id="__span-0-41"><a id="__codelineno-0-41" name="__codelineno-0-41"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-42"><a id="__codelineno-0-42" name="__codelineno-0-42"></a>        <span class="s2">&quot;--index-col&quot;</span><span class="p">,</span>
+</span><span id="__span-0-43"><a id="__codelineno-0-43" name="__codelineno-0-43"></a>        <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
+</span><span id="__span-0-44"><a id="__codelineno-0-44" name="__codelineno-0-44"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;*&quot;</span><span class="p">,</span>
+</span><span id="__span-0-45"><a id="__codelineno-0-45" name="__codelineno-0-45"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;indicate the name of the index column(s) in the csv file, such that pandas can index by it&quot;</span><span class="p">,</span>
+</span><span id="__span-0-46"><a id="__codelineno-0-46" name="__codelineno-0-46"></a>    <span class="p">)</span>
+</span><span id="__span-0-47"><a id="__codelineno-0-47" name="__codelineno-0-47"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-48"><a id="__codelineno-0-48" name="__codelineno-0-48"></a>        <span class="s2">&quot;--constraint-graph&quot;</span><span class="p">,</span>
+</span><span id="__span-0-49"><a id="__codelineno-0-49" name="__codelineno-0-49"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
+</span><span id="__span-0-50"><a id="__codelineno-0-50" name="__codelineno-0-50"></a>        <span class="n">default</span><span class="o">=</span><span class="s2">&quot;_constraint_graph&quot;</span><span class="p">,</span>
+</span><span id="__span-0-51"><a id="__codelineno-0-51" name="__codelineno-0-51"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the name of the html file to write the constraint graph to, defaults to `&lt;DATASET&gt;_constraint_graph`&quot;</span><span class="p">,</span>
+</span><span id="__span-0-52"><a id="__codelineno-0-52" name="__codelineno-0-52"></a>    <span class="p">)</span>
+</span><span id="__span-0-53"><a id="__codelineno-0-53" name="__codelineno-0-53"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-54"><a id="__codelineno-0-54" name="__codelineno-0-54"></a>        <span class="s2">&quot;--collapse-yaml&quot;</span><span class="p">,</span>
+</span><span id="__span-0-55"><a id="__codelineno-0-55" name="__codelineno-0-55"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
+</span><span id="__span-0-56"><a id="__codelineno-0-56" name="__codelineno-0-56"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;use aliases and anchors in the output metadata yaml, this will make it much more compact&quot;</span><span class="p">,</span>
+</span><span id="__span-0-57"><a id="__codelineno-0-57" name="__codelineno-0-57"></a>    <span class="p">)</span>
+</span><span id="__span-0-58"><a id="__codelineno-0-58" name="__codelineno-0-58"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-59"><a id="__codelineno-0-59" name="__codelineno-0-59"></a>        <span class="s2">&quot;--missingness&quot;</span><span class="p">,</span>
+</span><span id="__span-0-60"><a id="__codelineno-0-60" name="__codelineno-0-60"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
+</span><span id="__span-0-61"><a id="__codelineno-0-61" name="__codelineno-0-61"></a>        <span class="n">default</span><span class="o">=</span><span class="s2">&quot;augment&quot;</span><span class="p">,</span>
+</span><span id="__span-0-62"><a id="__codelineno-0-62" name="__codelineno-0-62"></a>        <span class="n">choices</span><span class="o">=</span><span class="n">MISSINGNESS_STRATEGIES</span><span class="p">,</span>
+</span><span id="__span-0-63"><a id="__codelineno-0-63" name="__codelineno-0-63"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;how to handle missing values in the dataset&quot;</span><span class="p">,</span>
+</span><span id="__span-0-64"><a id="__codelineno-0-64" name="__codelineno-0-64"></a>    <span class="p">)</span>
+</span><span id="__span-0-65"><a id="__codelineno-0-65" name="__codelineno-0-65"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-66"><a id="__codelineno-0-66" name="__codelineno-0-66"></a>        <span class="s2">&quot;--impute&quot;</span><span class="p">,</span>
+</span><span id="__span-0-67"><a id="__codelineno-0-67" name="__codelineno-0-67"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
+</span><span id="__span-0-68"><a id="__codelineno-0-68" name="__codelineno-0-68"></a>        <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
+</span><span id="__span-0-69"><a id="__codelineno-0-69" name="__codelineno-0-69"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the imputation strategy to use, ONLY USED if &lt;MISSINGNESS&gt; is set to &#39;impute&#39;, choose from: &#39;mean&#39;, &#39;median&#39;, &#39;mode&#39;, or any specific value (e.g. &#39;0&#39;)&quot;</span><span class="p">,</span>
+</span><span id="__span-0-70"><a id="__codelineno-0-70" name="__codelineno-0-70"></a>    <span class="p">)</span>
+</span><span id="__span-0-71"><a id="__codelineno-0-71" name="__codelineno-0-71"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-72"><a id="__codelineno-0-72" name="__codelineno-0-72"></a>        <span class="s2">&quot;--write-csv&quot;</span><span class="p">,</span>
+</span><span id="__span-0-73"><a id="__codelineno-0-73" name="__codelineno-0-73"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
+</span><span id="__span-0-74"><a id="__codelineno-0-74" name="__codelineno-0-74"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;write the transformed real data to a csv file&quot;</span><span class="p">,</span>
+</span><span id="__span-0-75"><a id="__codelineno-0-75" name="__codelineno-0-75"></a>    <span class="p">)</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -2722,8 +2722,7 @@ <h2 id="nhssynth.cli.module_arguments.add_evaluation_args" class="doc doc-headin
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/cli/module_arguments.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-181">181</a></span>
-<span class="normal"><a href="#__codelineno-0-182">182</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-182">182</a></span>
 <span class="normal"><a href="#__codelineno-0-183">183</a></span>
 <span class="normal"><a href="#__codelineno-0-184">184</a></span>
 <span class="normal"><a href="#__codelineno-0-185">185</a></span>
@@ -2778,63 +2777,64 @@ <h2 id="nhssynth.cli.module_arguments.add_evaluation_args" class="doc doc-headin
 <span class="normal"><a href="#__codelineno-0-234">234</a></span>
 <span class="normal"><a href="#__codelineno-0-235">235</a></span>
 <span class="normal"><a href="#__codelineno-0-236">236</a></span>
-<span class="normal"><a href="#__codelineno-0-237">237</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-181"><a id="__codelineno-0-181" name="__codelineno-0-181"></a><span class="k">def</span> <span class="nf">add_evaluation_args</span><span class="p">(</span><span class="n">parser</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">,</span> <span class="n">group_title</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">overrides</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="__span-0-182"><a id="__codelineno-0-182" name="__codelineno-0-182"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Adds arguments to an existing evaluation module sub-parser instance.&quot;&quot;&quot;</span>
-</span><span id="__span-0-183"><a id="__codelineno-0-183" name="__codelineno-0-183"></a>    <span class="n">group</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">add_argument_group</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="n">group_title</span><span class="p">)</span>
-</span><span id="__span-0-184"><a id="__codelineno-0-184" name="__codelineno-0-184"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-185"><a id="__codelineno-0-185" name="__codelineno-0-185"></a>        <span class="s2">&quot;--downstream-tasks&quot;</span><span class="p">,</span>
-</span><span id="__span-0-186"><a id="__codelineno-0-186" name="__codelineno-0-186"></a>        <span class="s2">&quot;--tasks&quot;</span><span class="p">,</span>
-</span><span id="__span-0-187"><a id="__codelineno-0-187" name="__codelineno-0-187"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
-</span><span id="__span-0-188"><a id="__codelineno-0-188" name="__codelineno-0-188"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;run the downstream tasks evaluation&quot;</span><span class="p">,</span>
-</span><span id="__span-0-189"><a id="__codelineno-0-189" name="__codelineno-0-189"></a>    <span class="p">)</span>
-</span><span id="__span-0-190"><a id="__codelineno-0-190" name="__codelineno-0-190"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-191"><a id="__codelineno-0-191" name="__codelineno-0-191"></a>        <span class="s2">&quot;--tasks-dir&quot;</span><span class="p">,</span>
-</span><span id="__span-0-192"><a id="__codelineno-0-192" name="__codelineno-0-192"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
-</span><span id="__span-0-193"><a id="__codelineno-0-193" name="__codelineno-0-193"></a>        <span class="n">default</span><span class="o">=</span><span class="s2">&quot;./tasks&quot;</span><span class="p">,</span>
-</span><span id="__span-0-194"><a id="__codelineno-0-194" name="__codelineno-0-194"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the directory containing the downstream tasks to run, this directory must contain a folder called &lt;DATASET&gt; containing the tasks to run&quot;</span><span class="p">,</span>
-</span><span id="__span-0-195"><a id="__codelineno-0-195" name="__codelineno-0-195"></a>    <span class="p">)</span>
-</span><span id="__span-0-196"><a id="__codelineno-0-196" name="__codelineno-0-196"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-197"><a id="__codelineno-0-197" name="__codelineno-0-197"></a>        <span class="s2">&quot;--aequitas&quot;</span><span class="p">,</span>
-</span><span id="__span-0-198"><a id="__codelineno-0-198" name="__codelineno-0-198"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
-</span><span id="__span-0-199"><a id="__codelineno-0-199" name="__codelineno-0-199"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;run the aequitas fairness evaluation (note this runs for each of the downstream tasks)&quot;</span><span class="p">,</span>
-</span><span id="__span-0-200"><a id="__codelineno-0-200" name="__codelineno-0-200"></a>    <span class="p">)</span>
-</span><span id="__span-0-201"><a id="__codelineno-0-201" name="__codelineno-0-201"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-202"><a id="__codelineno-0-202" name="__codelineno-0-202"></a>        <span class="s2">&quot;--aequitas-attributes&quot;</span><span class="p">,</span>
-</span><span id="__span-0-203"><a id="__codelineno-0-203" name="__codelineno-0-203"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
-</span><span id="__span-0-204"><a id="__codelineno-0-204" name="__codelineno-0-204"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
-</span><span id="__span-0-205"><a id="__codelineno-0-205" name="__codelineno-0-205"></a>        <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
-</span><span id="__span-0-206"><a id="__codelineno-0-206" name="__codelineno-0-206"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the attributes to use for the aequitas fairness evaluation, defaults to all attributes&quot;</span><span class="p">,</span>
-</span><span id="__span-0-207"><a id="__codelineno-0-207" name="__codelineno-0-207"></a>    <span class="p">)</span>
-</span><span id="__span-0-208"><a id="__codelineno-0-208" name="__codelineno-0-208"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-209"><a id="__codelineno-0-209" name="__codelineno-0-209"></a>        <span class="s2">&quot;--key-numerical-fields&quot;</span><span class="p">,</span>
-</span><span id="__span-0-210"><a id="__codelineno-0-210" name="__codelineno-0-210"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
-</span><span id="__span-0-211"><a id="__codelineno-0-211" name="__codelineno-0-211"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
-</span><span id="__span-0-212"><a id="__codelineno-0-212" name="__codelineno-0-212"></a>        <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
-</span><span id="__span-0-213"><a id="__codelineno-0-213" name="__codelineno-0-213"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the numerical key field attributes to use for SDV privacy evaluations&quot;</span><span class="p">,</span>
-</span><span id="__span-0-214"><a id="__codelineno-0-214" name="__codelineno-0-214"></a>    <span class="p">)</span>
-</span><span id="__span-0-215"><a id="__codelineno-0-215" name="__codelineno-0-215"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-216"><a id="__codelineno-0-216" name="__codelineno-0-216"></a>        <span class="s2">&quot;--sensitive-numerical-fields&quot;</span><span class="p">,</span>
-</span><span id="__span-0-217"><a id="__codelineno-0-217" name="__codelineno-0-217"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
-</span><span id="__span-0-218"><a id="__codelineno-0-218" name="__codelineno-0-218"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
-</span><span id="__span-0-219"><a id="__codelineno-0-219" name="__codelineno-0-219"></a>        <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
-</span><span id="__span-0-220"><a id="__codelineno-0-220" name="__codelineno-0-220"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the numerical sensitive field attributes to use for SDV privacy evaluations&quot;</span><span class="p">,</span>
-</span><span id="__span-0-221"><a id="__codelineno-0-221" name="__codelineno-0-221"></a>    <span class="p">)</span>
-</span><span id="__span-0-222"><a id="__codelineno-0-222" name="__codelineno-0-222"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-223"><a id="__codelineno-0-223" name="__codelineno-0-223"></a>        <span class="s2">&quot;--key-categorical-fields&quot;</span><span class="p">,</span>
-</span><span id="__span-0-224"><a id="__codelineno-0-224" name="__codelineno-0-224"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
-</span><span id="__span-0-225"><a id="__codelineno-0-225" name="__codelineno-0-225"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
-</span><span id="__span-0-226"><a id="__codelineno-0-226" name="__codelineno-0-226"></a>        <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
-</span><span id="__span-0-227"><a id="__codelineno-0-227" name="__codelineno-0-227"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the categorical key field attributes to use for SDV privacy evaluations&quot;</span><span class="p">,</span>
-</span><span id="__span-0-228"><a id="__codelineno-0-228" name="__codelineno-0-228"></a>    <span class="p">)</span>
-</span><span id="__span-0-229"><a id="__codelineno-0-229" name="__codelineno-0-229"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-230"><a id="__codelineno-0-230" name="__codelineno-0-230"></a>        <span class="s2">&quot;--sensitive-categorical-fields&quot;</span><span class="p">,</span>
-</span><span id="__span-0-231"><a id="__codelineno-0-231" name="__codelineno-0-231"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
-</span><span id="__span-0-232"><a id="__codelineno-0-232" name="__codelineno-0-232"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
-</span><span id="__span-0-233"><a id="__codelineno-0-233" name="__codelineno-0-233"></a>        <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
-</span><span id="__span-0-234"><a id="__codelineno-0-234" name="__codelineno-0-234"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the categorical sensitive field attributes to use for SDV privacy evaluations&quot;</span><span class="p">,</span>
-</span><span id="__span-0-235"><a id="__codelineno-0-235" name="__codelineno-0-235"></a>    <span class="p">)</span>
-</span><span id="__span-0-236"><a id="__codelineno-0-236" name="__codelineno-0-236"></a>    <span class="k">for</span> <span class="n">name</span> <span class="ow">in</span> <span class="n">METRIC_CHOICES</span><span class="p">:</span>
-</span><span id="__span-0-237"><a id="__codelineno-0-237" name="__codelineno-0-237"></a>        <span class="n">generate_evaluation_arg</span><span class="p">(</span><span class="n">group</span><span class="p">,</span> <span class="n">name</span><span class="p">)</span>
+<span class="normal"><a href="#__codelineno-0-237">237</a></span>
+<span class="normal"><a href="#__codelineno-0-238">238</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-182"><a id="__codelineno-0-182" name="__codelineno-0-182"></a><span class="k">def</span> <span class="nf">add_evaluation_args</span><span class="p">(</span><span class="n">parser</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">,</span> <span class="n">group_title</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">overrides</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="__span-0-183"><a id="__codelineno-0-183" name="__codelineno-0-183"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Adds arguments to an existing evaluation module sub-parser instance.&quot;&quot;&quot;</span>
+</span><span id="__span-0-184"><a id="__codelineno-0-184" name="__codelineno-0-184"></a>    <span class="n">group</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">add_argument_group</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="n">group_title</span><span class="p">)</span>
+</span><span id="__span-0-185"><a id="__codelineno-0-185" name="__codelineno-0-185"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-186"><a id="__codelineno-0-186" name="__codelineno-0-186"></a>        <span class="s2">&quot;--downstream-tasks&quot;</span><span class="p">,</span>
+</span><span id="__span-0-187"><a id="__codelineno-0-187" name="__codelineno-0-187"></a>        <span class="s2">&quot;--tasks&quot;</span><span class="p">,</span>
+</span><span id="__span-0-188"><a id="__codelineno-0-188" name="__codelineno-0-188"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
+</span><span id="__span-0-189"><a id="__codelineno-0-189" name="__codelineno-0-189"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;run the downstream tasks evaluation&quot;</span><span class="p">,</span>
+</span><span id="__span-0-190"><a id="__codelineno-0-190" name="__codelineno-0-190"></a>    <span class="p">)</span>
+</span><span id="__span-0-191"><a id="__codelineno-0-191" name="__codelineno-0-191"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-192"><a id="__codelineno-0-192" name="__codelineno-0-192"></a>        <span class="s2">&quot;--tasks-dir&quot;</span><span class="p">,</span>
+</span><span id="__span-0-193"><a id="__codelineno-0-193" name="__codelineno-0-193"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
+</span><span id="__span-0-194"><a id="__codelineno-0-194" name="__codelineno-0-194"></a>        <span class="n">default</span><span class="o">=</span><span class="s2">&quot;./tasks&quot;</span><span class="p">,</span>
+</span><span id="__span-0-195"><a id="__codelineno-0-195" name="__codelineno-0-195"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the directory containing the downstream tasks to run, this directory must contain a folder called &lt;DATASET&gt; containing the tasks to run&quot;</span><span class="p">,</span>
+</span><span id="__span-0-196"><a id="__codelineno-0-196" name="__codelineno-0-196"></a>    <span class="p">)</span>
+</span><span id="__span-0-197"><a id="__codelineno-0-197" name="__codelineno-0-197"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-198"><a id="__codelineno-0-198" name="__codelineno-0-198"></a>        <span class="s2">&quot;--aequitas&quot;</span><span class="p">,</span>
+</span><span id="__span-0-199"><a id="__codelineno-0-199" name="__codelineno-0-199"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
+</span><span id="__span-0-200"><a id="__codelineno-0-200" name="__codelineno-0-200"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;run the aequitas fairness evaluation (note this runs for each of the downstream tasks)&quot;</span><span class="p">,</span>
+</span><span id="__span-0-201"><a id="__codelineno-0-201" name="__codelineno-0-201"></a>    <span class="p">)</span>
+</span><span id="__span-0-202"><a id="__codelineno-0-202" name="__codelineno-0-202"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-203"><a id="__codelineno-0-203" name="__codelineno-0-203"></a>        <span class="s2">&quot;--aequitas-attributes&quot;</span><span class="p">,</span>
+</span><span id="__span-0-204"><a id="__codelineno-0-204" name="__codelineno-0-204"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
+</span><span id="__span-0-205"><a id="__codelineno-0-205" name="__codelineno-0-205"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
+</span><span id="__span-0-206"><a id="__codelineno-0-206" name="__codelineno-0-206"></a>        <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
+</span><span id="__span-0-207"><a id="__codelineno-0-207" name="__codelineno-0-207"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the attributes to use for the aequitas fairness evaluation, defaults to all attributes&quot;</span><span class="p">,</span>
+</span><span id="__span-0-208"><a id="__codelineno-0-208" name="__codelineno-0-208"></a>    <span class="p">)</span>
+</span><span id="__span-0-209"><a id="__codelineno-0-209" name="__codelineno-0-209"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-210"><a id="__codelineno-0-210" name="__codelineno-0-210"></a>        <span class="s2">&quot;--key-numerical-fields&quot;</span><span class="p">,</span>
+</span><span id="__span-0-211"><a id="__codelineno-0-211" name="__codelineno-0-211"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
+</span><span id="__span-0-212"><a id="__codelineno-0-212" name="__codelineno-0-212"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
+</span><span id="__span-0-213"><a id="__codelineno-0-213" name="__codelineno-0-213"></a>        <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
+</span><span id="__span-0-214"><a id="__codelineno-0-214" name="__codelineno-0-214"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the numerical key field attributes to use for SDV privacy evaluations&quot;</span><span class="p">,</span>
+</span><span id="__span-0-215"><a id="__codelineno-0-215" name="__codelineno-0-215"></a>    <span class="p">)</span>
+</span><span id="__span-0-216"><a id="__codelineno-0-216" name="__codelineno-0-216"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-217"><a id="__codelineno-0-217" name="__codelineno-0-217"></a>        <span class="s2">&quot;--sensitive-numerical-fields&quot;</span><span class="p">,</span>
+</span><span id="__span-0-218"><a id="__codelineno-0-218" name="__codelineno-0-218"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
+</span><span id="__span-0-219"><a id="__codelineno-0-219" name="__codelineno-0-219"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
+</span><span id="__span-0-220"><a id="__codelineno-0-220" name="__codelineno-0-220"></a>        <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
+</span><span id="__span-0-221"><a id="__codelineno-0-221" name="__codelineno-0-221"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the numerical sensitive field attributes to use for SDV privacy evaluations&quot;</span><span class="p">,</span>
+</span><span id="__span-0-222"><a id="__codelineno-0-222" name="__codelineno-0-222"></a>    <span class="p">)</span>
+</span><span id="__span-0-223"><a id="__codelineno-0-223" name="__codelineno-0-223"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-224"><a id="__codelineno-0-224" name="__codelineno-0-224"></a>        <span class="s2">&quot;--key-categorical-fields&quot;</span><span class="p">,</span>
+</span><span id="__span-0-225"><a id="__codelineno-0-225" name="__codelineno-0-225"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
+</span><span id="__span-0-226"><a id="__codelineno-0-226" name="__codelineno-0-226"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
+</span><span id="__span-0-227"><a id="__codelineno-0-227" name="__codelineno-0-227"></a>        <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
+</span><span id="__span-0-228"><a id="__codelineno-0-228" name="__codelineno-0-228"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the categorical key field attributes to use for SDV privacy evaluations&quot;</span><span class="p">,</span>
+</span><span id="__span-0-229"><a id="__codelineno-0-229" name="__codelineno-0-229"></a>    <span class="p">)</span>
+</span><span id="__span-0-230"><a id="__codelineno-0-230" name="__codelineno-0-230"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-231"><a id="__codelineno-0-231" name="__codelineno-0-231"></a>        <span class="s2">&quot;--sensitive-categorical-fields&quot;</span><span class="p">,</span>
+</span><span id="__span-0-232"><a id="__codelineno-0-232" name="__codelineno-0-232"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
+</span><span id="__span-0-233"><a id="__codelineno-0-233" name="__codelineno-0-233"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
+</span><span id="__span-0-234"><a id="__codelineno-0-234" name="__codelineno-0-234"></a>        <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
+</span><span id="__span-0-235"><a id="__codelineno-0-235" name="__codelineno-0-235"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the categorical sensitive field attributes to use for SDV privacy evaluations&quot;</span><span class="p">,</span>
+</span><span id="__span-0-236"><a id="__codelineno-0-236" name="__codelineno-0-236"></a>    <span class="p">)</span>
+</span><span id="__span-0-237"><a id="__codelineno-0-237" name="__codelineno-0-237"></a>    <span class="k">for</span> <span class="n">name</span> <span class="ow">in</span> <span class="n">METRIC_CHOICES</span><span class="p">:</span>
+</span><span id="__span-0-238"><a id="__codelineno-0-238" name="__codelineno-0-238"></a>        <span class="n">generate_evaluation_arg</span><span class="p">(</span><span class="n">group</span><span class="p">,</span> <span class="n">name</span><span class="p">)</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -2858,8 +2858,7 @@ <h2 id="nhssynth.cli.module_arguments.add_model_args" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/cli/module_arguments.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-81"> 81</a></span>
-<span class="normal"><a href="#__codelineno-0-82"> 82</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-82"> 82</a></span>
 <span class="normal"><a href="#__codelineno-0-83"> 83</a></span>
 <span class="normal"><a href="#__codelineno-0-84"> 84</a></span>
 <span class="normal"><a href="#__codelineno-0-85"> 85</a></span>
@@ -2942,91 +2941,92 @@ <h2 id="nhssynth.cli.module_arguments.add_model_args" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-162">162</a></span>
 <span class="normal"><a href="#__codelineno-0-163">163</a></span>
 <span class="normal"><a href="#__codelineno-0-164">164</a></span>
-<span class="normal"><a href="#__codelineno-0-165">165</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-81"><a id="__codelineno-0-81" name="__codelineno-0-81"></a><span class="k">def</span> <span class="nf">add_model_args</span><span class="p">(</span><span class="n">parser</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">,</span> <span class="n">group_title</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">overrides</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="__span-0-82"><a id="__codelineno-0-82" name="__codelineno-0-82"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Adds arguments to an existing model module sub-parser instance.&quot;&quot;&quot;</span>
-</span><span id="__span-0-83"><a id="__codelineno-0-83" name="__codelineno-0-83"></a>    <span class="n">group</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">add_argument_group</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="n">group_title</span><span class="p">)</span>
-</span><span id="__span-0-84"><a id="__codelineno-0-84" name="__codelineno-0-84"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-85"><a id="__codelineno-0-85" name="__codelineno-0-85"></a>        <span class="s2">&quot;--architecture&quot;</span><span class="p">,</span>
-</span><span id="__span-0-86"><a id="__codelineno-0-86" name="__codelineno-0-86"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
-</span><span id="__span-0-87"><a id="__codelineno-0-87" name="__codelineno-0-87"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
-</span><span id="__span-0-88"><a id="__codelineno-0-88" name="__codelineno-0-88"></a>        <span class="n">default</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;VAE&quot;</span><span class="p">],</span>
-</span><span id="__span-0-89"><a id="__codelineno-0-89" name="__codelineno-0-89"></a>        <span class="n">choices</span><span class="o">=</span><span class="n">MODELS</span><span class="p">,</span>
-</span><span id="__span-0-90"><a id="__codelineno-0-90" name="__codelineno-0-90"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the model architecture(s) to train&quot;</span><span class="p">,</span>
-</span><span id="__span-0-91"><a id="__codelineno-0-91" name="__codelineno-0-91"></a>    <span class="p">)</span>
-</span><span id="__span-0-92"><a id="__codelineno-0-92" name="__codelineno-0-92"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-93"><a id="__codelineno-0-93" name="__codelineno-0-93"></a>        <span class="s2">&quot;--repeats&quot;</span><span class="p">,</span>
-</span><span id="__span-0-94"><a id="__codelineno-0-94" name="__codelineno-0-94"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
-</span><span id="__span-0-95"><a id="__codelineno-0-95" name="__codelineno-0-95"></a>        <span class="n">default</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
-</span><span id="__span-0-96"><a id="__codelineno-0-96" name="__codelineno-0-96"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;how many times to repeat the training process per model architecture (&lt;SEED&gt; is incremented each time)&quot;</span><span class="p">,</span>
-</span><span id="__span-0-97"><a id="__codelineno-0-97" name="__codelineno-0-97"></a>    <span class="p">)</span>
-</span><span id="__span-0-98"><a id="__codelineno-0-98" name="__codelineno-0-98"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-99"><a id="__codelineno-0-99" name="__codelineno-0-99"></a>        <span class="s2">&quot;--batch-size&quot;</span><span class="p">,</span>
-</span><span id="__span-0-100"><a id="__codelineno-0-100" name="__codelineno-0-100"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
-</span><span id="__span-0-101"><a id="__codelineno-0-101" name="__codelineno-0-101"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
-</span><span id="__span-0-102"><a id="__codelineno-0-102" name="__codelineno-0-102"></a>        <span class="n">default</span><span class="o">=</span><span class="mi">32</span><span class="p">,</span>
-</span><span id="__span-0-103"><a id="__codelineno-0-103" name="__codelineno-0-103"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the batch size for the model&quot;</span><span class="p">,</span>
-</span><span id="__span-0-104"><a id="__codelineno-0-104" name="__codelineno-0-104"></a>    <span class="p">)</span>
-</span><span id="__span-0-105"><a id="__codelineno-0-105" name="__codelineno-0-105"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-106"><a id="__codelineno-0-106" name="__codelineno-0-106"></a>        <span class="s2">&quot;--num-epochs&quot;</span><span class="p">,</span>
-</span><span id="__span-0-107"><a id="__codelineno-0-107" name="__codelineno-0-107"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
-</span><span id="__span-0-108"><a id="__codelineno-0-108" name="__codelineno-0-108"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
-</span><span id="__span-0-109"><a id="__codelineno-0-109" name="__codelineno-0-109"></a>        <span class="n">default</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
-</span><span id="__span-0-110"><a id="__codelineno-0-110" name="__codelineno-0-110"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;number of epochs to train for&quot;</span><span class="p">,</span>
-</span><span id="__span-0-111"><a id="__codelineno-0-111" name="__codelineno-0-111"></a>    <span class="p">)</span>
-</span><span id="__span-0-112"><a id="__codelineno-0-112" name="__codelineno-0-112"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-113"><a id="__codelineno-0-113" name="__codelineno-0-113"></a>        <span class="s2">&quot;--patience&quot;</span><span class="p">,</span>
-</span><span id="__span-0-114"><a id="__codelineno-0-114" name="__codelineno-0-114"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
-</span><span id="__span-0-115"><a id="__codelineno-0-115" name="__codelineno-0-115"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
-</span><span id="__span-0-116"><a id="__codelineno-0-116" name="__codelineno-0-116"></a>        <span class="n">default</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span>
-</span><span id="__span-0-117"><a id="__codelineno-0-117" name="__codelineno-0-117"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;how many epochs the model is allowed to train for without improvement&quot;</span><span class="p">,</span>
-</span><span id="__span-0-118"><a id="__codelineno-0-118" name="__codelineno-0-118"></a>    <span class="p">)</span>
-</span><span id="__span-0-119"><a id="__codelineno-0-119" name="__codelineno-0-119"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-120"><a id="__codelineno-0-120" name="__codelineno-0-120"></a>        <span class="s2">&quot;--displayed-metrics&quot;</span><span class="p">,</span>
-</span><span id="__span-0-121"><a id="__codelineno-0-121" name="__codelineno-0-121"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
-</span><span id="__span-0-122"><a id="__codelineno-0-122" name="__codelineno-0-122"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
-</span><span id="__span-0-123"><a id="__codelineno-0-123" name="__codelineno-0-123"></a>        <span class="n">default</span><span class="o">=</span><span class="p">[],</span>
-</span><span id="__span-0-124"><a id="__codelineno-0-124" name="__codelineno-0-124"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;metrics to display during training of the model, when set to `None`, all metrics are displayed&quot;</span><span class="p">,</span>
-</span><span id="__span-0-125"><a id="__codelineno-0-125" name="__codelineno-0-125"></a>    <span class="p">)</span>
-</span><span id="__span-0-126"><a id="__codelineno-0-126" name="__codelineno-0-126"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-127"><a id="__codelineno-0-127" name="__codelineno-0-127"></a>        <span class="s2">&quot;--use-gpu&quot;</span><span class="p">,</span>
-</span><span id="__span-0-128"><a id="__codelineno-0-128" name="__codelineno-0-128"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
-</span><span id="__span-0-129"><a id="__codelineno-0-129" name="__codelineno-0-129"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;use the GPU for training&quot;</span><span class="p">,</span>
-</span><span id="__span-0-130"><a id="__codelineno-0-130" name="__codelineno-0-130"></a>    <span class="p">)</span>
-</span><span id="__span-0-131"><a id="__codelineno-0-131" name="__codelineno-0-131"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-132"><a id="__codelineno-0-132" name="__codelineno-0-132"></a>        <span class="s2">&quot;--num-samples&quot;</span><span class="p">,</span>
-</span><span id="__span-0-133"><a id="__codelineno-0-133" name="__codelineno-0-133"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
-</span><span id="__span-0-134"><a id="__codelineno-0-134" name="__codelineno-0-134"></a>        <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
-</span><span id="__span-0-135"><a id="__codelineno-0-135" name="__codelineno-0-135"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the number of samples to generate from the model, defaults to the size of the original dataset&quot;</span><span class="p">,</span>
-</span><span id="__span-0-136"><a id="__codelineno-0-136" name="__codelineno-0-136"></a>    <span class="p">)</span>
-</span><span id="__span-0-137"><a id="__codelineno-0-137" name="__codelineno-0-137"></a>    <span class="n">privacy_group</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">add_argument_group</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="s2">&quot;model privacy options&quot;</span><span class="p">)</span>
-</span><span id="__span-0-138"><a id="__codelineno-0-138" name="__codelineno-0-138"></a>    <span class="n">privacy_group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-139"><a id="__codelineno-0-139" name="__codelineno-0-139"></a>        <span class="s2">&quot;--target-epsilon&quot;</span><span class="p">,</span>
-</span><span id="__span-0-140"><a id="__codelineno-0-140" name="__codelineno-0-140"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
-</span><span id="__span-0-141"><a id="__codelineno-0-141" name="__codelineno-0-141"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
-</span><span id="__span-0-142"><a id="__codelineno-0-142" name="__codelineno-0-142"></a>        <span class="n">default</span><span class="o">=</span><span class="mf">1.0</span><span class="p">,</span>
-</span><span id="__span-0-143"><a id="__codelineno-0-143" name="__codelineno-0-143"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the target epsilon for differential privacy&quot;</span><span class="p">,</span>
-</span><span id="__span-0-144"><a id="__codelineno-0-144" name="__codelineno-0-144"></a>    <span class="p">)</span>
-</span><span id="__span-0-145"><a id="__codelineno-0-145" name="__codelineno-0-145"></a>    <span class="n">privacy_group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-146"><a id="__codelineno-0-146" name="__codelineno-0-146"></a>        <span class="s2">&quot;--target-delta&quot;</span><span class="p">,</span>
-</span><span id="__span-0-147"><a id="__codelineno-0-147" name="__codelineno-0-147"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
-</span><span id="__span-0-148"><a id="__codelineno-0-148" name="__codelineno-0-148"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
-</span><span id="__span-0-149"><a id="__codelineno-0-149" name="__codelineno-0-149"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the target delta for differential privacy, defaults to `1 / len(dataset)` if not specified&quot;</span><span class="p">,</span>
-</span><span id="__span-0-150"><a id="__codelineno-0-150" name="__codelineno-0-150"></a>    <span class="p">)</span>
-</span><span id="__span-0-151"><a id="__codelineno-0-151" name="__codelineno-0-151"></a>    <span class="n">privacy_group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-152"><a id="__codelineno-0-152" name="__codelineno-0-152"></a>        <span class="s2">&quot;--max-grad-norm&quot;</span><span class="p">,</span>
-</span><span id="__span-0-153"><a id="__codelineno-0-153" name="__codelineno-0-153"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
-</span><span id="__span-0-154"><a id="__codelineno-0-154" name="__codelineno-0-154"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
-</span><span id="__span-0-155"><a id="__codelineno-0-155" name="__codelineno-0-155"></a>        <span class="n">default</span><span class="o">=</span><span class="mf">5.0</span><span class="p">,</span>
-</span><span id="__span-0-156"><a id="__codelineno-0-156" name="__codelineno-0-156"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the clipping threshold for gradients (only relevant under differential privacy)&quot;</span><span class="p">,</span>
-</span><span id="__span-0-157"><a id="__codelineno-0-157" name="__codelineno-0-157"></a>    <span class="p">)</span>
-</span><span id="__span-0-158"><a id="__codelineno-0-158" name="__codelineno-0-158"></a>    <span class="n">privacy_group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-159"><a id="__codelineno-0-159" name="__codelineno-0-159"></a>        <span class="s2">&quot;--secure-mode&quot;</span><span class="p">,</span>
-</span><span id="__span-0-160"><a id="__codelineno-0-160" name="__codelineno-0-160"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
-</span><span id="__span-0-161"><a id="__codelineno-0-161" name="__codelineno-0-161"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;Enable secure RNG via the `csprng` package to make privacy guarantees more robust, comes at a cost of performance and reproducibility&quot;</span><span class="p">,</span>
-</span><span id="__span-0-162"><a id="__codelineno-0-162" name="__codelineno-0-162"></a>    <span class="p">)</span>
-</span><span id="__span-0-163"><a id="__codelineno-0-163" name="__codelineno-0-163"></a>    <span class="k">for</span> <span class="n">model_name</span> <span class="ow">in</span> <span class="n">MODELS</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
-</span><span id="__span-0-164"><a id="__codelineno-0-164" name="__codelineno-0-164"></a>        <span class="n">model_group</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">add_argument_group</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">model_name</span><span class="si">}</span><span class="s2">-specific options&quot;</span><span class="p">)</span>
-</span><span id="__span-0-165"><a id="__codelineno-0-165" name="__codelineno-0-165"></a>        <span class="n">add_model_specific_args</span><span class="p">(</span><span class="n">model_group</span><span class="p">,</span> <span class="n">model_name</span><span class="p">,</span> <span class="n">overrides</span><span class="o">=</span><span class="n">overrides</span><span class="p">)</span>
+<span class="normal"><a href="#__codelineno-0-165">165</a></span>
+<span class="normal"><a href="#__codelineno-0-166">166</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-82"><a id="__codelineno-0-82" name="__codelineno-0-82"></a><span class="k">def</span> <span class="nf">add_model_args</span><span class="p">(</span><span class="n">parser</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">,</span> <span class="n">group_title</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">overrides</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="__span-0-83"><a id="__codelineno-0-83" name="__codelineno-0-83"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Adds arguments to an existing model module sub-parser instance.&quot;&quot;&quot;</span>
+</span><span id="__span-0-84"><a id="__codelineno-0-84" name="__codelineno-0-84"></a>    <span class="n">group</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">add_argument_group</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="n">group_title</span><span class="p">)</span>
+</span><span id="__span-0-85"><a id="__codelineno-0-85" name="__codelineno-0-85"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-86"><a id="__codelineno-0-86" name="__codelineno-0-86"></a>        <span class="s2">&quot;--architecture&quot;</span><span class="p">,</span>
+</span><span id="__span-0-87"><a id="__codelineno-0-87" name="__codelineno-0-87"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
+</span><span id="__span-0-88"><a id="__codelineno-0-88" name="__codelineno-0-88"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
+</span><span id="__span-0-89"><a id="__codelineno-0-89" name="__codelineno-0-89"></a>        <span class="n">default</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;VAE&quot;</span><span class="p">],</span>
+</span><span id="__span-0-90"><a id="__codelineno-0-90" name="__codelineno-0-90"></a>        <span class="n">choices</span><span class="o">=</span><span class="n">MODELS</span><span class="p">,</span>
+</span><span id="__span-0-91"><a id="__codelineno-0-91" name="__codelineno-0-91"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the model architecture(s) to train&quot;</span><span class="p">,</span>
+</span><span id="__span-0-92"><a id="__codelineno-0-92" name="__codelineno-0-92"></a>    <span class="p">)</span>
+</span><span id="__span-0-93"><a id="__codelineno-0-93" name="__codelineno-0-93"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-94"><a id="__codelineno-0-94" name="__codelineno-0-94"></a>        <span class="s2">&quot;--repeats&quot;</span><span class="p">,</span>
+</span><span id="__span-0-95"><a id="__codelineno-0-95" name="__codelineno-0-95"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
+</span><span id="__span-0-96"><a id="__codelineno-0-96" name="__codelineno-0-96"></a>        <span class="n">default</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
+</span><span id="__span-0-97"><a id="__codelineno-0-97" name="__codelineno-0-97"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;how many times to repeat the training process per model architecture (&lt;SEED&gt; is incremented each time)&quot;</span><span class="p">,</span>
+</span><span id="__span-0-98"><a id="__codelineno-0-98" name="__codelineno-0-98"></a>    <span class="p">)</span>
+</span><span id="__span-0-99"><a id="__codelineno-0-99" name="__codelineno-0-99"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-100"><a id="__codelineno-0-100" name="__codelineno-0-100"></a>        <span class="s2">&quot;--batch-size&quot;</span><span class="p">,</span>
+</span><span id="__span-0-101"><a id="__codelineno-0-101" name="__codelineno-0-101"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
+</span><span id="__span-0-102"><a id="__codelineno-0-102" name="__codelineno-0-102"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
+</span><span id="__span-0-103"><a id="__codelineno-0-103" name="__codelineno-0-103"></a>        <span class="n">default</span><span class="o">=</span><span class="mi">32</span><span class="p">,</span>
+</span><span id="__span-0-104"><a id="__codelineno-0-104" name="__codelineno-0-104"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the batch size for the model&quot;</span><span class="p">,</span>
+</span><span id="__span-0-105"><a id="__codelineno-0-105" name="__codelineno-0-105"></a>    <span class="p">)</span>
+</span><span id="__span-0-106"><a id="__codelineno-0-106" name="__codelineno-0-106"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-107"><a id="__codelineno-0-107" name="__codelineno-0-107"></a>        <span class="s2">&quot;--num-epochs&quot;</span><span class="p">,</span>
+</span><span id="__span-0-108"><a id="__codelineno-0-108" name="__codelineno-0-108"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
+</span><span id="__span-0-109"><a id="__codelineno-0-109" name="__codelineno-0-109"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
+</span><span id="__span-0-110"><a id="__codelineno-0-110" name="__codelineno-0-110"></a>        <span class="n">default</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
+</span><span id="__span-0-111"><a id="__codelineno-0-111" name="__codelineno-0-111"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;number of epochs to train for&quot;</span><span class="p">,</span>
+</span><span id="__span-0-112"><a id="__codelineno-0-112" name="__codelineno-0-112"></a>    <span class="p">)</span>
+</span><span id="__span-0-113"><a id="__codelineno-0-113" name="__codelineno-0-113"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-114"><a id="__codelineno-0-114" name="__codelineno-0-114"></a>        <span class="s2">&quot;--patience&quot;</span><span class="p">,</span>
+</span><span id="__span-0-115"><a id="__codelineno-0-115" name="__codelineno-0-115"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
+</span><span id="__span-0-116"><a id="__codelineno-0-116" name="__codelineno-0-116"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
+</span><span id="__span-0-117"><a id="__codelineno-0-117" name="__codelineno-0-117"></a>        <span class="n">default</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span>
+</span><span id="__span-0-118"><a id="__codelineno-0-118" name="__codelineno-0-118"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;how many epochs the model is allowed to train for without improvement&quot;</span><span class="p">,</span>
+</span><span id="__span-0-119"><a id="__codelineno-0-119" name="__codelineno-0-119"></a>    <span class="p">)</span>
+</span><span id="__span-0-120"><a id="__codelineno-0-120" name="__codelineno-0-120"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-121"><a id="__codelineno-0-121" name="__codelineno-0-121"></a>        <span class="s2">&quot;--displayed-metrics&quot;</span><span class="p">,</span>
+</span><span id="__span-0-122"><a id="__codelineno-0-122" name="__codelineno-0-122"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
+</span><span id="__span-0-123"><a id="__codelineno-0-123" name="__codelineno-0-123"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
+</span><span id="__span-0-124"><a id="__codelineno-0-124" name="__codelineno-0-124"></a>        <span class="n">default</span><span class="o">=</span><span class="p">[],</span>
+</span><span id="__span-0-125"><a id="__codelineno-0-125" name="__codelineno-0-125"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;metrics to display during training of the model, when set to `None`, all metrics are displayed&quot;</span><span class="p">,</span>
+</span><span id="__span-0-126"><a id="__codelineno-0-126" name="__codelineno-0-126"></a>    <span class="p">)</span>
+</span><span id="__span-0-127"><a id="__codelineno-0-127" name="__codelineno-0-127"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-128"><a id="__codelineno-0-128" name="__codelineno-0-128"></a>        <span class="s2">&quot;--use-gpu&quot;</span><span class="p">,</span>
+</span><span id="__span-0-129"><a id="__codelineno-0-129" name="__codelineno-0-129"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
+</span><span id="__span-0-130"><a id="__codelineno-0-130" name="__codelineno-0-130"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;use the GPU for training&quot;</span><span class="p">,</span>
+</span><span id="__span-0-131"><a id="__codelineno-0-131" name="__codelineno-0-131"></a>    <span class="p">)</span>
+</span><span id="__span-0-132"><a id="__codelineno-0-132" name="__codelineno-0-132"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-133"><a id="__codelineno-0-133" name="__codelineno-0-133"></a>        <span class="s2">&quot;--num-samples&quot;</span><span class="p">,</span>
+</span><span id="__span-0-134"><a id="__codelineno-0-134" name="__codelineno-0-134"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span>
+</span><span id="__span-0-135"><a id="__codelineno-0-135" name="__codelineno-0-135"></a>        <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
+</span><span id="__span-0-136"><a id="__codelineno-0-136" name="__codelineno-0-136"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the number of samples to generate from the model, defaults to the size of the original dataset&quot;</span><span class="p">,</span>
+</span><span id="__span-0-137"><a id="__codelineno-0-137" name="__codelineno-0-137"></a>    <span class="p">)</span>
+</span><span id="__span-0-138"><a id="__codelineno-0-138" name="__codelineno-0-138"></a>    <span class="n">privacy_group</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">add_argument_group</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="s2">&quot;model privacy options&quot;</span><span class="p">)</span>
+</span><span id="__span-0-139"><a id="__codelineno-0-139" name="__codelineno-0-139"></a>    <span class="n">privacy_group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-140"><a id="__codelineno-0-140" name="__codelineno-0-140"></a>        <span class="s2">&quot;--target-epsilon&quot;</span><span class="p">,</span>
+</span><span id="__span-0-141"><a id="__codelineno-0-141" name="__codelineno-0-141"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
+</span><span id="__span-0-142"><a id="__codelineno-0-142" name="__codelineno-0-142"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
+</span><span id="__span-0-143"><a id="__codelineno-0-143" name="__codelineno-0-143"></a>        <span class="n">default</span><span class="o">=</span><span class="mf">1.0</span><span class="p">,</span>
+</span><span id="__span-0-144"><a id="__codelineno-0-144" name="__codelineno-0-144"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the target epsilon for differential privacy&quot;</span><span class="p">,</span>
+</span><span id="__span-0-145"><a id="__codelineno-0-145" name="__codelineno-0-145"></a>    <span class="p">)</span>
+</span><span id="__span-0-146"><a id="__codelineno-0-146" name="__codelineno-0-146"></a>    <span class="n">privacy_group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-147"><a id="__codelineno-0-147" name="__codelineno-0-147"></a>        <span class="s2">&quot;--target-delta&quot;</span><span class="p">,</span>
+</span><span id="__span-0-148"><a id="__codelineno-0-148" name="__codelineno-0-148"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
+</span><span id="__span-0-149"><a id="__codelineno-0-149" name="__codelineno-0-149"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
+</span><span id="__span-0-150"><a id="__codelineno-0-150" name="__codelineno-0-150"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the target delta for differential privacy, defaults to `1 / len(dataset)` if not specified&quot;</span><span class="p">,</span>
+</span><span id="__span-0-151"><a id="__codelineno-0-151" name="__codelineno-0-151"></a>    <span class="p">)</span>
+</span><span id="__span-0-152"><a id="__codelineno-0-152" name="__codelineno-0-152"></a>    <span class="n">privacy_group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-153"><a id="__codelineno-0-153" name="__codelineno-0-153"></a>        <span class="s2">&quot;--max-grad-norm&quot;</span><span class="p">,</span>
+</span><span id="__span-0-154"><a id="__codelineno-0-154" name="__codelineno-0-154"></a>        <span class="nb">type</span><span class="o">=</span><span class="nb">float</span><span class="p">,</span>
+</span><span id="__span-0-155"><a id="__codelineno-0-155" name="__codelineno-0-155"></a>        <span class="n">nargs</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
+</span><span id="__span-0-156"><a id="__codelineno-0-156" name="__codelineno-0-156"></a>        <span class="n">default</span><span class="o">=</span><span class="mf">5.0</span><span class="p">,</span>
+</span><span id="__span-0-157"><a id="__codelineno-0-157" name="__codelineno-0-157"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;the clipping threshold for gradients (only relevant under differential privacy)&quot;</span><span class="p">,</span>
+</span><span id="__span-0-158"><a id="__codelineno-0-158" name="__codelineno-0-158"></a>    <span class="p">)</span>
+</span><span id="__span-0-159"><a id="__codelineno-0-159" name="__codelineno-0-159"></a>    <span class="n">privacy_group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-160"><a id="__codelineno-0-160" name="__codelineno-0-160"></a>        <span class="s2">&quot;--secure-mode&quot;</span><span class="p">,</span>
+</span><span id="__span-0-161"><a id="__codelineno-0-161" name="__codelineno-0-161"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
+</span><span id="__span-0-162"><a id="__codelineno-0-162" name="__codelineno-0-162"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;Enable secure RNG via the `csprng` package to make privacy guarantees more robust, comes at a cost of performance and reproducibility&quot;</span><span class="p">,</span>
+</span><span id="__span-0-163"><a id="__codelineno-0-163" name="__codelineno-0-163"></a>    <span class="p">)</span>
+</span><span id="__span-0-164"><a id="__codelineno-0-164" name="__codelineno-0-164"></a>    <span class="k">for</span> <span class="n">model_name</span> <span class="ow">in</span> <span class="n">MODELS</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
+</span><span id="__span-0-165"><a id="__codelineno-0-165" name="__codelineno-0-165"></a>        <span class="n">model_group</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">add_argument_group</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">model_name</span><span class="si">}</span><span class="s2">-specific options&quot;</span><span class="p">)</span>
+</span><span id="__span-0-166"><a id="__codelineno-0-166" name="__codelineno-0-166"></a>        <span class="n">add_model_specific_args</span><span class="p">(</span><span class="n">model_group</span><span class="p">,</span> <span class="n">model_name</span><span class="p">,</span> <span class="n">overrides</span><span class="o">=</span><span class="n">overrides</span><span class="p">)</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -3050,8 +3050,7 @@ <h2 id="nhssynth.cli.module_arguments.add_plotting_args" class="doc doc-heading"
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/cli/module_arguments.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-240">240</a></span>
-<span class="normal"><a href="#__codelineno-0-241">241</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-241">241</a></span>
 <span class="normal"><a href="#__codelineno-0-242">242</a></span>
 <span class="normal"><a href="#__codelineno-0-243">243</a></span>
 <span class="normal"><a href="#__codelineno-0-244">244</a></span>
@@ -3072,29 +3071,30 @@ <h2 id="nhssynth.cli.module_arguments.add_plotting_args" class="doc doc-heading"
 <span class="normal"><a href="#__codelineno-0-259">259</a></span>
 <span class="normal"><a href="#__codelineno-0-260">260</a></span>
 <span class="normal"><a href="#__codelineno-0-261">261</a></span>
-<span class="normal"><a href="#__codelineno-0-262">262</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-240"><a id="__codelineno-0-240" name="__codelineno-0-240"></a><span class="k">def</span> <span class="nf">add_plotting_args</span><span class="p">(</span><span class="n">parser</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">,</span> <span class="n">group_title</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">overrides</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="__span-0-241"><a id="__codelineno-0-241" name="__codelineno-0-241"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Adds arguments to an existing plotting module sub-parser instance.&quot;&quot;&quot;</span>
-</span><span id="__span-0-242"><a id="__codelineno-0-242" name="__codelineno-0-242"></a>    <span class="n">group</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">add_argument_group</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="n">group_title</span><span class="p">)</span>
-</span><span id="__span-0-243"><a id="__codelineno-0-243" name="__codelineno-0-243"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-244"><a id="__codelineno-0-244" name="__codelineno-0-244"></a>        <span class="s2">&quot;--plot-quality&quot;</span><span class="p">,</span>
-</span><span id="__span-0-245"><a id="__codelineno-0-245" name="__codelineno-0-245"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
-</span><span id="__span-0-246"><a id="__codelineno-0-246" name="__codelineno-0-246"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;plot the SDV quality report&quot;</span><span class="p">,</span>
-</span><span id="__span-0-247"><a id="__codelineno-0-247" name="__codelineno-0-247"></a>    <span class="p">)</span>
-</span><span id="__span-0-248"><a id="__codelineno-0-248" name="__codelineno-0-248"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-249"><a id="__codelineno-0-249" name="__codelineno-0-249"></a>        <span class="s2">&quot;--plot-diagnostic&quot;</span><span class="p">,</span>
-</span><span id="__span-0-250"><a id="__codelineno-0-250" name="__codelineno-0-250"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
-</span><span id="__span-0-251"><a id="__codelineno-0-251" name="__codelineno-0-251"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;plot the SDV diagnostic report&quot;</span><span class="p">,</span>
-</span><span id="__span-0-252"><a id="__codelineno-0-252" name="__codelineno-0-252"></a>    <span class="p">)</span>
-</span><span id="__span-0-253"><a id="__codelineno-0-253" name="__codelineno-0-253"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-254"><a id="__codelineno-0-254" name="__codelineno-0-254"></a>        <span class="s2">&quot;--plot-sdv-report&quot;</span><span class="p">,</span>
-</span><span id="__span-0-255"><a id="__codelineno-0-255" name="__codelineno-0-255"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
-</span><span id="__span-0-256"><a id="__codelineno-0-256" name="__codelineno-0-256"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;plot the SDV report&quot;</span><span class="p">,</span>
-</span><span id="__span-0-257"><a id="__codelineno-0-257" name="__codelineno-0-257"></a>    <span class="p">)</span>
-</span><span id="__span-0-258"><a id="__codelineno-0-258" name="__codelineno-0-258"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-259"><a id="__codelineno-0-259" name="__codelineno-0-259"></a>        <span class="s2">&quot;--plot-tsne&quot;</span><span class="p">,</span>
-</span><span id="__span-0-260"><a id="__codelineno-0-260" name="__codelineno-0-260"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
-</span><span id="__span-0-261"><a id="__codelineno-0-261" name="__codelineno-0-261"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;plot the t-SNE embeddings of the real and synthetic data&quot;</span><span class="p">,</span>
-</span><span id="__span-0-262"><a id="__codelineno-0-262" name="__codelineno-0-262"></a>    <span class="p">)</span>
+<span class="normal"><a href="#__codelineno-0-262">262</a></span>
+<span class="normal"><a href="#__codelineno-0-263">263</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-241"><a id="__codelineno-0-241" name="__codelineno-0-241"></a><span class="k">def</span> <span class="nf">add_plotting_args</span><span class="p">(</span><span class="n">parser</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">,</span> <span class="n">group_title</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">overrides</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="__span-0-242"><a id="__codelineno-0-242" name="__codelineno-0-242"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Adds arguments to an existing plotting module sub-parser instance.&quot;&quot;&quot;</span>
+</span><span id="__span-0-243"><a id="__codelineno-0-243" name="__codelineno-0-243"></a>    <span class="n">group</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">add_argument_group</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="n">group_title</span><span class="p">)</span>
+</span><span id="__span-0-244"><a id="__codelineno-0-244" name="__codelineno-0-244"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-245"><a id="__codelineno-0-245" name="__codelineno-0-245"></a>        <span class="s2">&quot;--plot-quality&quot;</span><span class="p">,</span>
+</span><span id="__span-0-246"><a id="__codelineno-0-246" name="__codelineno-0-246"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
+</span><span id="__span-0-247"><a id="__codelineno-0-247" name="__codelineno-0-247"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;plot the SDV quality report&quot;</span><span class="p">,</span>
+</span><span id="__span-0-248"><a id="__codelineno-0-248" name="__codelineno-0-248"></a>    <span class="p">)</span>
+</span><span id="__span-0-249"><a id="__codelineno-0-249" name="__codelineno-0-249"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-250"><a id="__codelineno-0-250" name="__codelineno-0-250"></a>        <span class="s2">&quot;--plot-diagnostic&quot;</span><span class="p">,</span>
+</span><span id="__span-0-251"><a id="__codelineno-0-251" name="__codelineno-0-251"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
+</span><span id="__span-0-252"><a id="__codelineno-0-252" name="__codelineno-0-252"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;plot the SDV diagnostic report&quot;</span><span class="p">,</span>
+</span><span id="__span-0-253"><a id="__codelineno-0-253" name="__codelineno-0-253"></a>    <span class="p">)</span>
+</span><span id="__span-0-254"><a id="__codelineno-0-254" name="__codelineno-0-254"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-255"><a id="__codelineno-0-255" name="__codelineno-0-255"></a>        <span class="s2">&quot;--plot-sdv-report&quot;</span><span class="p">,</span>
+</span><span id="__span-0-256"><a id="__codelineno-0-256" name="__codelineno-0-256"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
+</span><span id="__span-0-257"><a id="__codelineno-0-257" name="__codelineno-0-257"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;plot the SDV report&quot;</span><span class="p">,</span>
+</span><span id="__span-0-258"><a id="__codelineno-0-258" name="__codelineno-0-258"></a>    <span class="p">)</span>
+</span><span id="__span-0-259"><a id="__codelineno-0-259" name="__codelineno-0-259"></a>    <span class="n">group</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-260"><a id="__codelineno-0-260" name="__codelineno-0-260"></a>        <span class="s2">&quot;--plot-tsne&quot;</span><span class="p">,</span>
+</span><span id="__span-0-261"><a id="__codelineno-0-261" name="__codelineno-0-261"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
+</span><span id="__span-0-262"><a id="__codelineno-0-262" name="__codelineno-0-262"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;plot the t-SNE embeddings of the real and synthetic data&quot;</span><span class="p">,</span>
+</span><span id="__span-0-263"><a id="__codelineno-0-263" name="__codelineno-0-263"></a>    <span class="p">)</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
diff --git a/reference/cli/module_setup/index.html b/reference/cli/module_setup/index.html
index d1efd388..94e8e197 100644
--- a/reference/cli/module_setup/index.html
+++ b/reference/cli/module_setup/index.html
@@ -2597,8 +2597,7 @@ <h2 id="nhssynth.cli.module_setup.ModuleConfig" class="doc doc-heading">
 
             <details class="quote">
               <summary>Source code in <code>src/nhssynth/cli/module_setup.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-10">10</a></span>
-<span class="normal"><a href="#__codelineno-0-11">11</a></span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-11">11</a></span>
 <span class="normal"><a href="#__codelineno-0-12">12</a></span>
 <span class="normal"><a href="#__codelineno-0-13">13</a></span>
 <span class="normal"><a href="#__codelineno-0-14">14</a></span>
@@ -2632,42 +2631,43 @@ <h2 id="nhssynth.cli.module_setup.ModuleConfig" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-42">42</a></span>
 <span class="normal"><a href="#__codelineno-0-43">43</a></span>
 <span class="normal"><a href="#__codelineno-0-44">44</a></span>
-<span class="normal"><a href="#__codelineno-0-45">45</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-10"><a id="__codelineno-0-10" name="__codelineno-0-10"></a><span class="k">class</span> <span class="nc">ModuleConfig</span><span class="p">:</span>
-</span><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12"></a><span class="sd">    Represents a module&#39;s configuration, containing the following attributes:</span>
-</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13"></a>
-</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14"></a><span class="sd">    Attributes:</span>
-</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15"></a><span class="sd">        func: A callable that executes the module&#39;s functionality.</span>
-</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16"></a><span class="sd">        add_args: A callable that populates the module&#39;s sub-parser arguments.</span>
-</span><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17"></a><span class="sd">        description: A description of the module&#39;s functionality.</span>
-</span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18"></a><span class="sd">        help: A help message for the module&#39;s command-line interface.</span>
-</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19"></a><span class="sd">        common_parsers: A list of common parsers to add to the module&#39;s sub-parser, appending the &#39;dataset&#39; and &#39;core&#39; parsers to those passed.</span>
-</span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-21"><a id="__codelineno-0-21" name="__codelineno-0-21"></a>
-</span><span id="__span-0-22"><a id="__codelineno-0-22" name="__codelineno-0-22"></a>    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
-</span><span id="__span-0-23"><a id="__codelineno-0-23" name="__codelineno-0-23"></a>        <span class="bp">self</span><span class="p">,</span>
-</span><span id="__span-0-24"><a id="__codelineno-0-24" name="__codelineno-0-24"></a>        <span class="n">func</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="n">argparse</span><span class="o">.</span><span class="n">Namespace</span><span class="p">],</span>
-</span><span id="__span-0-25"><a id="__codelineno-0-25" name="__codelineno-0-25"></a>        <span class="n">add_args</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="kc">None</span><span class="p">],</span>
-</span><span id="__span-0-26"><a id="__codelineno-0-26" name="__codelineno-0-26"></a>        <span class="n">description</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
-</span><span id="__span-0-27"><a id="__codelineno-0-27" name="__codelineno-0-27"></a>        <span class="n">help</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
-</span><span id="__span-0-28"><a id="__codelineno-0-28" name="__codelineno-0-28"></a>        <span class="n">common_parsers</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-</span><span id="__span-0-29"><a id="__codelineno-0-29" name="__codelineno-0-29"></a>        <span class="n">no_seed</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
-</span><span id="__span-0-30"><a id="__codelineno-0-30" name="__codelineno-0-30"></a>    <span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="__span-0-31"><a id="__codelineno-0-31" name="__codelineno-0-31"></a>        <span class="bp">self</span><span class="o">.</span><span class="n">func</span> <span class="o">=</span> <span class="n">func</span>
-</span><span id="__span-0-32"><a id="__codelineno-0-32" name="__codelineno-0-32"></a>        <span class="bp">self</span><span class="o">.</span><span class="n">add_args</span> <span class="o">=</span> <span class="n">add_args</span>
-</span><span id="__span-0-33"><a id="__codelineno-0-33" name="__codelineno-0-33"></a>        <span class="bp">self</span><span class="o">.</span><span class="n">description</span> <span class="o">=</span> <span class="n">description</span>
-</span><span id="__span-0-34"><a id="__codelineno-0-34" name="__codelineno-0-34"></a>        <span class="bp">self</span><span class="o">.</span><span class="n">help</span> <span class="o">=</span> <span class="n">help</span>
-</span><span id="__span-0-35"><a id="__codelineno-0-35" name="__codelineno-0-35"></a>        <span class="bp">self</span><span class="o">.</span><span class="n">common_parsers</span> <span class="o">=</span> <span class="p">[</span><span class="s2">&quot;core&quot;</span><span class="p">,</span> <span class="s2">&quot;seed&quot;</span><span class="p">]</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">no_seed</span> <span class="k">else</span> <span class="p">[</span><span class="s2">&quot;core&quot;</span><span class="p">]</span>
-</span><span id="__span-0-36"><a id="__codelineno-0-36" name="__codelineno-0-36"></a>        <span class="k">if</span> <span class="n">common_parsers</span><span class="p">:</span>
-</span><span id="__span-0-37"><a id="__codelineno-0-37" name="__codelineno-0-37"></a>            <span class="k">assert</span> <span class="nb">set</span><span class="p">(</span><span class="n">common_parsers</span><span class="p">)</span> <span class="o">&lt;=</span> <span class="n">COMMON_PARSERS</span><span class="o">.</span><span class="n">keys</span><span class="p">(),</span> <span class="s2">&quot;Invalid common parser(s) specified.&quot;</span>
-</span><span id="__span-0-38"><a id="__codelineno-0-38" name="__codelineno-0-38"></a>            <span class="c1"># merge the below two assert statements</span>
-</span><span id="__span-0-39"><a id="__codelineno-0-39" name="__codelineno-0-39"></a>            <span class="k">assert</span> <span class="p">(</span>
-</span><span id="__span-0-40"><a id="__codelineno-0-40" name="__codelineno-0-40"></a>                <span class="s2">&quot;core&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">common_parsers</span> <span class="ow">and</span> <span class="s2">&quot;seed&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">common_parsers</span>
-</span><span id="__span-0-41"><a id="__codelineno-0-41" name="__codelineno-0-41"></a>            <span class="p">),</span> <span class="s2">&quot;The &#39;seed&#39; and &#39;core&#39; parser groups are automatically added to all modules, remove the from `ModuleConfig`s.&quot;</span>
-</span><span id="__span-0-42"><a id="__codelineno-0-42" name="__codelineno-0-42"></a>            <span class="bp">self</span><span class="o">.</span><span class="n">common_parsers</span> <span class="o">+=</span> <span class="n">common_parsers</span>
-</span><span id="__span-0-43"><a id="__codelineno-0-43" name="__codelineno-0-43"></a>
-</span><span id="__span-0-44"><a id="__codelineno-0-44" name="__codelineno-0-44"></a>    <span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">Namespace</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">argparse</span><span class="o">.</span><span class="n">Namespace</span><span class="p">:</span>
-</span><span id="__span-0-45"><a id="__codelineno-0-45" name="__codelineno-0-45"></a>        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">func</span><span class="p">(</span><span class="n">args</span><span class="p">)</span>
+<span class="normal"><a href="#__codelineno-0-45">45</a></span>
+<span class="normal"><a href="#__codelineno-0-46">46</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11"></a><span class="k">class</span> <span class="nc">ModuleConfig</span><span class="p">:</span>
+</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13"></a><span class="sd">    Represents a module&#39;s configuration, containing the following attributes:</span>
+</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14"></a>
+</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15"></a><span class="sd">    Attributes:</span>
+</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16"></a><span class="sd">        func: A callable that executes the module&#39;s functionality.</span>
+</span><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17"></a><span class="sd">        add_args: A callable that populates the module&#39;s sub-parser arguments.</span>
+</span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18"></a><span class="sd">        description: A description of the module&#39;s functionality.</span>
+</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19"></a><span class="sd">        help: A help message for the module&#39;s command-line interface.</span>
+</span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20"></a><span class="sd">        common_parsers: A list of common parsers to add to the module&#39;s sub-parser, appending the &#39;dataset&#39; and &#39;core&#39; parsers to those passed.</span>
+</span><span id="__span-0-21"><a id="__codelineno-0-21" name="__codelineno-0-21"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-22"><a id="__codelineno-0-22" name="__codelineno-0-22"></a>
+</span><span id="__span-0-23"><a id="__codelineno-0-23" name="__codelineno-0-23"></a>    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
+</span><span id="__span-0-24"><a id="__codelineno-0-24" name="__codelineno-0-24"></a>        <span class="bp">self</span><span class="p">,</span>
+</span><span id="__span-0-25"><a id="__codelineno-0-25" name="__codelineno-0-25"></a>        <span class="n">func</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="n">argparse</span><span class="o">.</span><span class="n">Namespace</span><span class="p">],</span>
+</span><span id="__span-0-26"><a id="__codelineno-0-26" name="__codelineno-0-26"></a>        <span class="n">add_args</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="kc">None</span><span class="p">],</span>
+</span><span id="__span-0-27"><a id="__codelineno-0-27" name="__codelineno-0-27"></a>        <span class="n">description</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
+</span><span id="__span-0-28"><a id="__codelineno-0-28" name="__codelineno-0-28"></a>        <span class="n">help</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
+</span><span id="__span-0-29"><a id="__codelineno-0-29" name="__codelineno-0-29"></a>        <span class="n">common_parsers</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+</span><span id="__span-0-30"><a id="__codelineno-0-30" name="__codelineno-0-30"></a>        <span class="n">no_seed</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
+</span><span id="__span-0-31"><a id="__codelineno-0-31" name="__codelineno-0-31"></a>    <span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="__span-0-32"><a id="__codelineno-0-32" name="__codelineno-0-32"></a>        <span class="bp">self</span><span class="o">.</span><span class="n">func</span> <span class="o">=</span> <span class="n">func</span>
+</span><span id="__span-0-33"><a id="__codelineno-0-33" name="__codelineno-0-33"></a>        <span class="bp">self</span><span class="o">.</span><span class="n">add_args</span> <span class="o">=</span> <span class="n">add_args</span>
+</span><span id="__span-0-34"><a id="__codelineno-0-34" name="__codelineno-0-34"></a>        <span class="bp">self</span><span class="o">.</span><span class="n">description</span> <span class="o">=</span> <span class="n">description</span>
+</span><span id="__span-0-35"><a id="__codelineno-0-35" name="__codelineno-0-35"></a>        <span class="bp">self</span><span class="o">.</span><span class="n">help</span> <span class="o">=</span> <span class="n">help</span>
+</span><span id="__span-0-36"><a id="__codelineno-0-36" name="__codelineno-0-36"></a>        <span class="bp">self</span><span class="o">.</span><span class="n">common_parsers</span> <span class="o">=</span> <span class="p">[</span><span class="s2">&quot;core&quot;</span><span class="p">,</span> <span class="s2">&quot;seed&quot;</span><span class="p">]</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">no_seed</span> <span class="k">else</span> <span class="p">[</span><span class="s2">&quot;core&quot;</span><span class="p">]</span>
+</span><span id="__span-0-37"><a id="__codelineno-0-37" name="__codelineno-0-37"></a>        <span class="k">if</span> <span class="n">common_parsers</span><span class="p">:</span>
+</span><span id="__span-0-38"><a id="__codelineno-0-38" name="__codelineno-0-38"></a>            <span class="k">assert</span> <span class="nb">set</span><span class="p">(</span><span class="n">common_parsers</span><span class="p">)</span> <span class="o">&lt;=</span> <span class="n">COMMON_PARSERS</span><span class="o">.</span><span class="n">keys</span><span class="p">(),</span> <span class="s2">&quot;Invalid common parser(s) specified.&quot;</span>
+</span><span id="__span-0-39"><a id="__codelineno-0-39" name="__codelineno-0-39"></a>            <span class="c1"># merge the below two assert statements</span>
+</span><span id="__span-0-40"><a id="__codelineno-0-40" name="__codelineno-0-40"></a>            <span class="k">assert</span> <span class="p">(</span>
+</span><span id="__span-0-41"><a id="__codelineno-0-41" name="__codelineno-0-41"></a>                <span class="s2">&quot;core&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">common_parsers</span> <span class="ow">and</span> <span class="s2">&quot;seed&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">common_parsers</span>
+</span><span id="__span-0-42"><a id="__codelineno-0-42" name="__codelineno-0-42"></a>            <span class="p">),</span> <span class="s2">&quot;The &#39;seed&#39; and &#39;core&#39; parser groups are automatically added to all modules, remove the from `ModuleConfig`s.&quot;</span>
+</span><span id="__span-0-43"><a id="__codelineno-0-43" name="__codelineno-0-43"></a>            <span class="bp">self</span><span class="o">.</span><span class="n">common_parsers</span> <span class="o">+=</span> <span class="n">common_parsers</span>
+</span><span id="__span-0-44"><a id="__codelineno-0-44" name="__codelineno-0-44"></a>
+</span><span id="__span-0-45"><a id="__codelineno-0-45" name="__codelineno-0-45"></a>    <span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">Namespace</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">argparse</span><span class="o">.</span><span class="n">Namespace</span><span class="p">:</span>
+</span><span id="__span-0-46"><a id="__codelineno-0-46" name="__codelineno-0-46"></a>        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">func</span><span class="p">(</span><span class="n">args</span><span class="p">)</span>
 </span></code></pre></div></td></tr></table></div>
             </details>
 
@@ -2710,8 +2710,7 @@ <h2 id="nhssynth.cli.module_setup.add_config_args" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/cli/module_setup.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-62">62</a></span>
-<span class="normal"><a href="#__codelineno-0-63">63</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-63">63</a></span>
 <span class="normal"><a href="#__codelineno-0-64">64</a></span>
 <span class="normal"><a href="#__codelineno-0-65">65</a></span>
 <span class="normal"><a href="#__codelineno-0-66">66</a></span>
@@ -2727,24 +2726,25 @@ <h2 id="nhssynth.cli.module_setup.add_config_args" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-76">76</a></span>
 <span class="normal"><a href="#__codelineno-0-77">77</a></span>
 <span class="normal"><a href="#__codelineno-0-78">78</a></span>
-<span class="normal"><a href="#__codelineno-0-79">79</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-62"><a id="__codelineno-0-62" name="__codelineno-0-62"></a><span class="k">def</span> <span class="nf">add_config_args</span><span class="p">(</span><span class="n">parser</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="__span-0-63"><a id="__codelineno-0-63" name="__codelineno-0-63"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Adds arguments to `parser` relating to configuration file handling and module-specific config overrides.&quot;&quot;&quot;</span>
-</span><span id="__span-0-64"><a id="__codelineno-0-64" name="__codelineno-0-64"></a>    <span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-65"><a id="__codelineno-0-65" name="__codelineno-0-65"></a>        <span class="s2">&quot;-c&quot;</span><span class="p">,</span>
-</span><span id="__span-0-66"><a id="__codelineno-0-66" name="__codelineno-0-66"></a>        <span class="s2">&quot;--input-config&quot;</span><span class="p">,</span>
-</span><span id="__span-0-67"><a id="__codelineno-0-67" name="__codelineno-0-67"></a>        <span class="n">required</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
-</span><span id="__span-0-68"><a id="__codelineno-0-68" name="__codelineno-0-68"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;specify the config file name&quot;</span><span class="p">,</span>
-</span><span id="__span-0-69"><a id="__codelineno-0-69" name="__codelineno-0-69"></a>    <span class="p">)</span>
-</span><span id="__span-0-70"><a id="__codelineno-0-70" name="__codelineno-0-70"></a>    <span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
-</span><span id="__span-0-71"><a id="__codelineno-0-71" name="__codelineno-0-71"></a>        <span class="s2">&quot;-cp&quot;</span><span class="p">,</span>
-</span><span id="__span-0-72"><a id="__codelineno-0-72" name="__codelineno-0-72"></a>        <span class="s2">&quot;--custom-pipeline&quot;</span><span class="p">,</span>
-</span><span id="__span-0-73"><a id="__codelineno-0-73" name="__codelineno-0-73"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
-</span><span id="__span-0-74"><a id="__codelineno-0-74" name="__codelineno-0-74"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;infer a custom pipeline running order of modules from the config&quot;</span><span class="p">,</span>
-</span><span id="__span-0-75"><a id="__codelineno-0-75" name="__codelineno-0-75"></a>    <span class="p">)</span>
-</span><span id="__span-0-76"><a id="__codelineno-0-76" name="__codelineno-0-76"></a>    <span class="k">for</span> <span class="n">module_name</span> <span class="ow">in</span> <span class="n">PIPELINE</span><span class="p">:</span>
-</span><span id="__span-0-77"><a id="__codelineno-0-77" name="__codelineno-0-77"></a>        <span class="n">MODULE_MAP</span><span class="p">[</span><span class="n">module_name</span><span class="p">]</span><span class="o">.</span><span class="n">add_args</span><span class="p">(</span><span class="n">parser</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">module_name</span><span class="si">}</span><span class="s2"> option overrides&quot;</span><span class="p">,</span> <span class="n">overrides</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-</span><span id="__span-0-78"><a id="__codelineno-0-78" name="__codelineno-0-78"></a>    <span class="k">for</span> <span class="n">module_name</span> <span class="ow">in</span> <span class="n">VALID_MODULES</span> <span class="o">-</span> <span class="nb">set</span><span class="p">(</span><span class="n">PIPELINE</span><span class="p">):</span>
-</span><span id="__span-0-79"><a id="__codelineno-0-79" name="__codelineno-0-79"></a>        <span class="n">MODULE_MAP</span><span class="p">[</span><span class="n">module_name</span><span class="p">]</span><span class="o">.</span><span class="n">add_args</span><span class="p">(</span><span class="n">parser</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">module_name</span><span class="si">}</span><span class="s2"> options overrides&quot;</span><span class="p">,</span> <span class="n">overrides</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+<span class="normal"><a href="#__codelineno-0-79">79</a></span>
+<span class="normal"><a href="#__codelineno-0-80">80</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-63"><a id="__codelineno-0-63" name="__codelineno-0-63"></a><span class="k">def</span> <span class="nf">add_config_args</span><span class="p">(</span><span class="n">parser</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="__span-0-64"><a id="__codelineno-0-64" name="__codelineno-0-64"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Adds arguments to `parser` relating to configuration file handling and module-specific config overrides.&quot;&quot;&quot;</span>
+</span><span id="__span-0-65"><a id="__codelineno-0-65" name="__codelineno-0-65"></a>    <span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-66"><a id="__codelineno-0-66" name="__codelineno-0-66"></a>        <span class="s2">&quot;-c&quot;</span><span class="p">,</span>
+</span><span id="__span-0-67"><a id="__codelineno-0-67" name="__codelineno-0-67"></a>        <span class="s2">&quot;--input-config&quot;</span><span class="p">,</span>
+</span><span id="__span-0-68"><a id="__codelineno-0-68" name="__codelineno-0-68"></a>        <span class="n">required</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
+</span><span id="__span-0-69"><a id="__codelineno-0-69" name="__codelineno-0-69"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;specify the config file name&quot;</span><span class="p">,</span>
+</span><span id="__span-0-70"><a id="__codelineno-0-70" name="__codelineno-0-70"></a>    <span class="p">)</span>
+</span><span id="__span-0-71"><a id="__codelineno-0-71" name="__codelineno-0-71"></a>    <span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
+</span><span id="__span-0-72"><a id="__codelineno-0-72" name="__codelineno-0-72"></a>        <span class="s2">&quot;-cp&quot;</span><span class="p">,</span>
+</span><span id="__span-0-73"><a id="__codelineno-0-73" name="__codelineno-0-73"></a>        <span class="s2">&quot;--custom-pipeline&quot;</span><span class="p">,</span>
+</span><span id="__span-0-74"><a id="__codelineno-0-74" name="__codelineno-0-74"></a>        <span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
+</span><span id="__span-0-75"><a id="__codelineno-0-75" name="__codelineno-0-75"></a>        <span class="n">help</span><span class="o">=</span><span class="s2">&quot;infer a custom pipeline running order of modules from the config&quot;</span><span class="p">,</span>
+</span><span id="__span-0-76"><a id="__codelineno-0-76" name="__codelineno-0-76"></a>    <span class="p">)</span>
+</span><span id="__span-0-77"><a id="__codelineno-0-77" name="__codelineno-0-77"></a>    <span class="k">for</span> <span class="n">module_name</span> <span class="ow">in</span> <span class="n">PIPELINE</span><span class="p">:</span>
+</span><span id="__span-0-78"><a id="__codelineno-0-78" name="__codelineno-0-78"></a>        <span class="n">MODULE_MAP</span><span class="p">[</span><span class="n">module_name</span><span class="p">]</span><span class="o">.</span><span class="n">add_args</span><span class="p">(</span><span class="n">parser</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">module_name</span><span class="si">}</span><span class="s2"> option overrides&quot;</span><span class="p">,</span> <span class="n">overrides</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</span><span id="__span-0-79"><a id="__codelineno-0-79" name="__codelineno-0-79"></a>    <span class="k">for</span> <span class="n">module_name</span> <span class="ow">in</span> <span class="n">VALID_MODULES</span> <span class="o">-</span> <span class="nb">set</span><span class="p">(</span><span class="n">PIPELINE</span><span class="p">):</span>
+</span><span id="__span-0-80"><a id="__codelineno-0-80" name="__codelineno-0-80"></a>        <span class="n">MODULE_MAP</span><span class="p">[</span><span class="n">module_name</span><span class="p">]</span><span class="o">.</span><span class="n">add_args</span><span class="p">(</span><span class="n">parser</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">module_name</span><span class="si">}</span><span class="s2"> options overrides&quot;</span><span class="p">,</span> <span class="n">overrides</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -2768,13 +2768,13 @@ <h2 id="nhssynth.cli.module_setup.add_pipeline_args" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/cli/module_setup.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-56">56</a></span>
-<span class="normal"><a href="#__codelineno-0-57">57</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-57">57</a></span>
 <span class="normal"><a href="#__codelineno-0-58">58</a></span>
-<span class="normal"><a href="#__codelineno-0-59">59</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-56"><a id="__codelineno-0-56" name="__codelineno-0-56"></a><span class="k">def</span> <span class="nf">add_pipeline_args</span><span class="p">(</span><span class="n">parser</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="__span-0-57"><a id="__codelineno-0-57" name="__codelineno-0-57"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Adds arguments to `parser` for each module in the pipeline.&quot;&quot;&quot;</span>
-</span><span id="__span-0-58"><a id="__codelineno-0-58" name="__codelineno-0-58"></a>    <span class="k">for</span> <span class="n">module_name</span> <span class="ow">in</span> <span class="n">PIPELINE</span><span class="p">:</span>
-</span><span id="__span-0-59"><a id="__codelineno-0-59" name="__codelineno-0-59"></a>        <span class="n">MODULE_MAP</span><span class="p">[</span><span class="n">module_name</span><span class="p">]</span><span class="o">.</span><span class="n">add_args</span><span class="p">(</span><span class="n">parser</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">module_name</span><span class="si">}</span><span class="s2"> options&quot;</span><span class="p">)</span>
+<span class="normal"><a href="#__codelineno-0-59">59</a></span>
+<span class="normal"><a href="#__codelineno-0-60">60</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-57"><a id="__codelineno-0-57" name="__codelineno-0-57"></a><span class="k">def</span> <span class="nf">add_pipeline_args</span><span class="p">(</span><span class="n">parser</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="__span-0-58"><a id="__codelineno-0-58" name="__codelineno-0-58"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Adds arguments to `parser` for each module in the pipeline.&quot;&quot;&quot;</span>
+</span><span id="__span-0-59"><a id="__codelineno-0-59" name="__codelineno-0-59"></a>    <span class="k">for</span> <span class="n">module_name</span> <span class="ow">in</span> <span class="n">PIPELINE</span><span class="p">:</span>
+</span><span id="__span-0-60"><a id="__codelineno-0-60" name="__codelineno-0-60"></a>        <span class="n">MODULE_MAP</span><span class="p">[</span><span class="n">module_name</span><span class="p">]</span><span class="o">.</span><span class="n">add_args</span><span class="p">(</span><span class="n">parser</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">module_name</span><span class="si">}</span><span class="s2"> options&quot;</span><span class="p">)</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -2880,8 +2880,7 @@ <h2 id="nhssynth.cli.module_setup.add_subparser" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/cli/module_setup.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-167">167</a></span>
-<span class="normal"><a href="#__codelineno-0-168">168</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-168">168</a></span>
 <span class="normal"><a href="#__codelineno-0-169">169</a></span>
 <span class="normal"><a href="#__codelineno-0-170">170</a></span>
 <span class="normal"><a href="#__codelineno-0-171">171</a></span>
@@ -2909,36 +2908,37 @@ <h2 id="nhssynth.cli.module_setup.add_subparser" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-193">193</a></span>
 <span class="normal"><a href="#__codelineno-0-194">194</a></span>
 <span class="normal"><a href="#__codelineno-0-195">195</a></span>
-<span class="normal"><a href="#__codelineno-0-196">196</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-167"><a id="__codelineno-0-167" name="__codelineno-0-167"></a><span class="k">def</span> <span class="nf">add_subparser</span><span class="p">(</span>
-</span><span id="__span-0-168"><a id="__codelineno-0-168" name="__codelineno-0-168"></a>    <span class="n">subparsers</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">_SubParsersAction</span><span class="p">,</span>
-</span><span id="__span-0-169"><a id="__codelineno-0-169" name="__codelineno-0-169"></a>    <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
-</span><span id="__span-0-170"><a id="__codelineno-0-170" name="__codelineno-0-170"></a>    <span class="n">module_config</span><span class="p">:</span> <span class="n">ModuleConfig</span><span class="p">,</span>
-</span><span id="__span-0-171"><a id="__codelineno-0-171" name="__codelineno-0-171"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">:</span>
-</span><span id="__span-0-172"><a id="__codelineno-0-172" name="__codelineno-0-172"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-173"><a id="__codelineno-0-173" name="__codelineno-0-173"></a><span class="sd">    Add a subparser to an argparse argument parser.</span>
-</span><span id="__span-0-174"><a id="__codelineno-0-174" name="__codelineno-0-174"></a>
-</span><span id="__span-0-175"><a id="__codelineno-0-175" name="__codelineno-0-175"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-176"><a id="__codelineno-0-176" name="__codelineno-0-176"></a><span class="sd">        subparsers: The subparsers action to which the subparser will be added.</span>
-</span><span id="__span-0-177"><a id="__codelineno-0-177" name="__codelineno-0-177"></a><span class="sd">        name: The name of the subparser.</span>
-</span><span id="__span-0-178"><a id="__codelineno-0-178" name="__codelineno-0-178"></a><span class="sd">        module_config: A [`ModuleConfig`][nhssynth.cli.module_setup.ModuleConfig] object containing information about the subparser, including a function to execute and a function to add arguments.</span>
-</span><span id="__span-0-179"><a id="__codelineno-0-179" name="__codelineno-0-179"></a>
-</span><span id="__span-0-180"><a id="__codelineno-0-180" name="__codelineno-0-180"></a><span class="sd">    Returns:</span>
-</span><span id="__span-0-181"><a id="__codelineno-0-181" name="__codelineno-0-181"></a><span class="sd">        The newly created subparser.</span>
-</span><span id="__span-0-182"><a id="__codelineno-0-182" name="__codelineno-0-182"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-183"><a id="__codelineno-0-183" name="__codelineno-0-183"></a>    <span class="n">parent_parsers</span> <span class="o">=</span> <span class="n">get_parent_parsers</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">module_config</span><span class="o">.</span><span class="n">common_parsers</span><span class="p">)</span>
-</span><span id="__span-0-184"><a id="__codelineno-0-184" name="__codelineno-0-184"></a>    <span class="n">parser</span> <span class="o">=</span> <span class="n">subparsers</span><span class="o">.</span><span class="n">add_parser</span><span class="p">(</span>
-</span><span id="__span-0-185"><a id="__codelineno-0-185" name="__codelineno-0-185"></a>        <span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">,</span>
-</span><span id="__span-0-186"><a id="__codelineno-0-186" name="__codelineno-0-186"></a>        <span class="n">description</span><span class="o">=</span><span class="n">module_config</span><span class="o">.</span><span class="n">description</span><span class="p">,</span>
-</span><span id="__span-0-187"><a id="__codelineno-0-187" name="__codelineno-0-187"></a>        <span class="n">help</span><span class="o">=</span><span class="n">module_config</span><span class="o">.</span><span class="n">help</span><span class="p">,</span>
-</span><span id="__span-0-188"><a id="__codelineno-0-188" name="__codelineno-0-188"></a>        <span class="n">formatter_class</span><span class="o">=</span><span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentDefaultsHelpFormatter</span><span class="p">,</span>
-</span><span id="__span-0-189"><a id="__codelineno-0-189" name="__codelineno-0-189"></a>        <span class="n">parents</span><span class="o">=</span><span class="n">parent_parsers</span><span class="p">,</span>
-</span><span id="__span-0-190"><a id="__codelineno-0-190" name="__codelineno-0-190"></a>    <span class="p">)</span>
-</span><span id="__span-0-191"><a id="__codelineno-0-191" name="__codelineno-0-191"></a>    <span class="k">if</span> <span class="n">name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">&quot;pipeline&quot;</span><span class="p">,</span> <span class="s2">&quot;config&quot;</span><span class="p">}:</span>
-</span><span id="__span-0-192"><a id="__codelineno-0-192" name="__codelineno-0-192"></a>        <span class="n">module_config</span><span class="o">.</span><span class="n">add_args</span><span class="p">(</span><span class="n">parser</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">name</span><span class="si">}</span><span class="s2"> options&quot;</span><span class="p">)</span>
-</span><span id="__span-0-193"><a id="__codelineno-0-193" name="__codelineno-0-193"></a>    <span class="k">else</span><span class="p">:</span>
-</span><span id="__span-0-194"><a id="__codelineno-0-194" name="__codelineno-0-194"></a>        <span class="n">module_config</span><span class="o">.</span><span class="n">add_args</span><span class="p">(</span><span class="n">parser</span><span class="p">)</span>
-</span><span id="__span-0-195"><a id="__codelineno-0-195" name="__codelineno-0-195"></a>    <span class="n">parser</span><span class="o">.</span><span class="n">set_defaults</span><span class="p">(</span><span class="n">func</span><span class="o">=</span><span class="n">module_config</span><span class="o">.</span><span class="n">func</span><span class="p">)</span>
-</span><span id="__span-0-196"><a id="__codelineno-0-196" name="__codelineno-0-196"></a>    <span class="k">return</span> <span class="n">parser</span>
+<span class="normal"><a href="#__codelineno-0-196">196</a></span>
+<span class="normal"><a href="#__codelineno-0-197">197</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-168"><a id="__codelineno-0-168" name="__codelineno-0-168"></a><span class="k">def</span> <span class="nf">add_subparser</span><span class="p">(</span>
+</span><span id="__span-0-169"><a id="__codelineno-0-169" name="__codelineno-0-169"></a>    <span class="n">subparsers</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">_SubParsersAction</span><span class="p">,</span>
+</span><span id="__span-0-170"><a id="__codelineno-0-170" name="__codelineno-0-170"></a>    <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
+</span><span id="__span-0-171"><a id="__codelineno-0-171" name="__codelineno-0-171"></a>    <span class="n">module_config</span><span class="p">:</span> <span class="n">ModuleConfig</span><span class="p">,</span>
+</span><span id="__span-0-172"><a id="__codelineno-0-172" name="__codelineno-0-172"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">:</span>
+</span><span id="__span-0-173"><a id="__codelineno-0-173" name="__codelineno-0-173"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-174"><a id="__codelineno-0-174" name="__codelineno-0-174"></a><span class="sd">    Add a subparser to an argparse argument parser.</span>
+</span><span id="__span-0-175"><a id="__codelineno-0-175" name="__codelineno-0-175"></a>
+</span><span id="__span-0-176"><a id="__codelineno-0-176" name="__codelineno-0-176"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-177"><a id="__codelineno-0-177" name="__codelineno-0-177"></a><span class="sd">        subparsers: The subparsers action to which the subparser will be added.</span>
+</span><span id="__span-0-178"><a id="__codelineno-0-178" name="__codelineno-0-178"></a><span class="sd">        name: The name of the subparser.</span>
+</span><span id="__span-0-179"><a id="__codelineno-0-179" name="__codelineno-0-179"></a><span class="sd">        module_config: A [`ModuleConfig`][nhssynth.cli.module_setup.ModuleConfig] object containing information about the subparser, including a function to execute and a function to add arguments.</span>
+</span><span id="__span-0-180"><a id="__codelineno-0-180" name="__codelineno-0-180"></a>
+</span><span id="__span-0-181"><a id="__codelineno-0-181" name="__codelineno-0-181"></a><span class="sd">    Returns:</span>
+</span><span id="__span-0-182"><a id="__codelineno-0-182" name="__codelineno-0-182"></a><span class="sd">        The newly created subparser.</span>
+</span><span id="__span-0-183"><a id="__codelineno-0-183" name="__codelineno-0-183"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-184"><a id="__codelineno-0-184" name="__codelineno-0-184"></a>    <span class="n">parent_parsers</span> <span class="o">=</span> <span class="n">get_parent_parsers</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">module_config</span><span class="o">.</span><span class="n">common_parsers</span><span class="p">)</span>
+</span><span id="__span-0-185"><a id="__codelineno-0-185" name="__codelineno-0-185"></a>    <span class="n">parser</span> <span class="o">=</span> <span class="n">subparsers</span><span class="o">.</span><span class="n">add_parser</span><span class="p">(</span>
+</span><span id="__span-0-186"><a id="__codelineno-0-186" name="__codelineno-0-186"></a>        <span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">,</span>
+</span><span id="__span-0-187"><a id="__codelineno-0-187" name="__codelineno-0-187"></a>        <span class="n">description</span><span class="o">=</span><span class="n">module_config</span><span class="o">.</span><span class="n">description</span><span class="p">,</span>
+</span><span id="__span-0-188"><a id="__codelineno-0-188" name="__codelineno-0-188"></a>        <span class="n">help</span><span class="o">=</span><span class="n">module_config</span><span class="o">.</span><span class="n">help</span><span class="p">,</span>
+</span><span id="__span-0-189"><a id="__codelineno-0-189" name="__codelineno-0-189"></a>        <span class="n">formatter_class</span><span class="o">=</span><span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentDefaultsHelpFormatter</span><span class="p">,</span>
+</span><span id="__span-0-190"><a id="__codelineno-0-190" name="__codelineno-0-190"></a>        <span class="n">parents</span><span class="o">=</span><span class="n">parent_parsers</span><span class="p">,</span>
+</span><span id="__span-0-191"><a id="__codelineno-0-191" name="__codelineno-0-191"></a>    <span class="p">)</span>
+</span><span id="__span-0-192"><a id="__codelineno-0-192" name="__codelineno-0-192"></a>    <span class="k">if</span> <span class="n">name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">&quot;pipeline&quot;</span><span class="p">,</span> <span class="s2">&quot;config&quot;</span><span class="p">}:</span>
+</span><span id="__span-0-193"><a id="__codelineno-0-193" name="__codelineno-0-193"></a>        <span class="n">module_config</span><span class="o">.</span><span class="n">add_args</span><span class="p">(</span><span class="n">parser</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">name</span><span class="si">}</span><span class="s2"> options&quot;</span><span class="p">)</span>
+</span><span id="__span-0-194"><a id="__codelineno-0-194" name="__codelineno-0-194"></a>    <span class="k">else</span><span class="p">:</span>
+</span><span id="__span-0-195"><a id="__codelineno-0-195" name="__codelineno-0-195"></a>        <span class="n">module_config</span><span class="o">.</span><span class="n">add_args</span><span class="p">(</span><span class="n">parser</span><span class="p">)</span>
+</span><span id="__span-0-196"><a id="__codelineno-0-196" name="__codelineno-0-196"></a>    <span class="n">parser</span><span class="o">.</span><span class="n">set_defaults</span><span class="p">(</span><span class="n">func</span><span class="o">=</span><span class="n">module_config</span><span class="o">.</span><span class="n">func</span><span class="p">)</span>
+</span><span id="__span-0-197"><a id="__codelineno-0-197" name="__codelineno-0-197"></a>    <span class="k">return</span> <span class="n">parser</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -2962,21 +2962,21 @@ <h2 id="nhssynth.cli.module_setup.get_parent_parsers" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/cli/module_setup.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-157">157</a></span>
-<span class="normal"><a href="#__codelineno-0-158">158</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-158">158</a></span>
 <span class="normal"><a href="#__codelineno-0-159">159</a></span>
 <span class="normal"><a href="#__codelineno-0-160">160</a></span>
 <span class="normal"><a href="#__codelineno-0-161">161</a></span>
 <span class="normal"><a href="#__codelineno-0-162">162</a></span>
 <span class="normal"><a href="#__codelineno-0-163">163</a></span>
-<span class="normal"><a href="#__codelineno-0-164">164</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-157"><a id="__codelineno-0-157" name="__codelineno-0-157"></a><span class="k">def</span> <span class="nf">get_parent_parsers</span><span class="p">(</span><span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">module_parsers</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">]:</span>
-</span><span id="__span-0-158"><a id="__codelineno-0-158" name="__codelineno-0-158"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Get a list of parent parsers for a given module, based on the module&#39;s `common_parsers` attribute.&quot;&quot;&quot;</span>
-</span><span id="__span-0-159"><a id="__codelineno-0-159" name="__codelineno-0-159"></a>    <span class="k">if</span> <span class="n">name</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">&quot;pipeline&quot;</span><span class="p">,</span> <span class="s2">&quot;config&quot;</span><span class="p">}:</span>
-</span><span id="__span-0-160"><a id="__codelineno-0-160" name="__codelineno-0-160"></a>        <span class="k">return</span> <span class="p">[</span><span class="n">p</span><span class="p">(</span><span class="n">name</span> <span class="o">==</span> <span class="s2">&quot;config&quot;</span><span class="p">)</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">COMMON_PARSERS</span><span class="o">.</span><span class="n">values</span><span class="p">()]</span>
-</span><span id="__span-0-161"><a id="__codelineno-0-161" name="__codelineno-0-161"></a>    <span class="k">elif</span> <span class="n">name</span> <span class="o">==</span> <span class="s2">&quot;dashboard&quot;</span><span class="p">:</span>
-</span><span id="__span-0-162"><a id="__codelineno-0-162" name="__codelineno-0-162"></a>        <span class="k">return</span> <span class="p">[</span><span class="n">COMMON_PARSERS</span><span class="p">[</span><span class="n">pn</span><span class="p">](</span><span class="kc">True</span><span class="p">)</span> <span class="k">for</span> <span class="n">pn</span> <span class="ow">in</span> <span class="n">module_parsers</span><span class="p">]</span>
-</span><span id="__span-0-163"><a id="__codelineno-0-163" name="__codelineno-0-163"></a>    <span class="k">else</span><span class="p">:</span>
-</span><span id="__span-0-164"><a id="__codelineno-0-164" name="__codelineno-0-164"></a>        <span class="k">return</span> <span class="p">[</span><span class="n">COMMON_PARSERS</span><span class="p">[</span><span class="n">pn</span><span class="p">]()</span> <span class="k">for</span> <span class="n">pn</span> <span class="ow">in</span> <span class="n">module_parsers</span><span class="p">]</span>
+<span class="normal"><a href="#__codelineno-0-164">164</a></span>
+<span class="normal"><a href="#__codelineno-0-165">165</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-158"><a id="__codelineno-0-158" name="__codelineno-0-158"></a><span class="k">def</span> <span class="nf">get_parent_parsers</span><span class="p">(</span><span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">module_parsers</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="n">argparse</span><span class="o">.</span><span class="n">ArgumentParser</span><span class="p">]:</span>
+</span><span id="__span-0-159"><a id="__codelineno-0-159" name="__codelineno-0-159"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Get a list of parent parsers for a given module, based on the module&#39;s `common_parsers` attribute.&quot;&quot;&quot;</span>
+</span><span id="__span-0-160"><a id="__codelineno-0-160" name="__codelineno-0-160"></a>    <span class="k">if</span> <span class="n">name</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">&quot;pipeline&quot;</span><span class="p">,</span> <span class="s2">&quot;config&quot;</span><span class="p">}:</span>
+</span><span id="__span-0-161"><a id="__codelineno-0-161" name="__codelineno-0-161"></a>        <span class="k">return</span> <span class="p">[</span><span class="n">p</span><span class="p">(</span><span class="n">name</span> <span class="o">==</span> <span class="s2">&quot;config&quot;</span><span class="p">)</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">COMMON_PARSERS</span><span class="o">.</span><span class="n">values</span><span class="p">()]</span>
+</span><span id="__span-0-162"><a id="__codelineno-0-162" name="__codelineno-0-162"></a>    <span class="k">elif</span> <span class="n">name</span> <span class="o">==</span> <span class="s2">&quot;dashboard&quot;</span><span class="p">:</span>
+</span><span id="__span-0-163"><a id="__codelineno-0-163" name="__codelineno-0-163"></a>        <span class="k">return</span> <span class="p">[</span><span class="n">COMMON_PARSERS</span><span class="p">[</span><span class="n">pn</span><span class="p">](</span><span class="kc">True</span><span class="p">)</span> <span class="k">for</span> <span class="n">pn</span> <span class="ow">in</span> <span class="n">module_parsers</span><span class="p">]</span>
+</span><span id="__span-0-164"><a id="__codelineno-0-164" name="__codelineno-0-164"></a>    <span class="k">else</span><span class="p">:</span>
+</span><span id="__span-0-165"><a id="__codelineno-0-165" name="__codelineno-0-165"></a>        <span class="k">return</span> <span class="p">[</span><span class="n">COMMON_PARSERS</span><span class="p">[</span><span class="n">pn</span><span class="p">]()</span> <span class="k">for</span> <span class="n">pn</span> <span class="ow">in</span> <span class="n">module_parsers</span><span class="p">]</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -3000,17 +3000,17 @@ <h2 id="nhssynth.cli.module_setup.run_pipeline" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/cli/module_setup.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-48">48</a></span>
-<span class="normal"><a href="#__codelineno-0-49">49</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-49">49</a></span>
 <span class="normal"><a href="#__codelineno-0-50">50</a></span>
 <span class="normal"><a href="#__codelineno-0-51">51</a></span>
 <span class="normal"><a href="#__codelineno-0-52">52</a></span>
-<span class="normal"><a href="#__codelineno-0-53">53</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-48"><a id="__codelineno-0-48" name="__codelineno-0-48"></a><span class="k">def</span> <span class="nf">run_pipeline</span><span class="p">(</span><span class="n">args</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">Namespace</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="__span-0-49"><a id="__codelineno-0-49" name="__codelineno-0-49"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Runs the specified pipeline of modules with the passed configuration `args`.&quot;&quot;&quot;</span>
-</span><span id="__span-0-50"><a id="__codelineno-0-50" name="__codelineno-0-50"></a>    <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Running full pipeline...&quot;</span><span class="p">)</span>
-</span><span id="__span-0-51"><a id="__codelineno-0-51" name="__codelineno-0-51"></a>    <span class="n">args</span><span class="o">.</span><span class="n">modules_to_run</span> <span class="o">=</span> <span class="n">PIPELINE</span>
-</span><span id="__span-0-52"><a id="__codelineno-0-52" name="__codelineno-0-52"></a>    <span class="k">for</span> <span class="n">module_name</span> <span class="ow">in</span> <span class="n">PIPELINE</span><span class="p">:</span>
-</span><span id="__span-0-53"><a id="__codelineno-0-53" name="__codelineno-0-53"></a>        <span class="n">args</span> <span class="o">=</span> <span class="n">MODULE_MAP</span><span class="p">[</span><span class="n">module_name</span><span class="p">](</span><span class="n">args</span><span class="p">)</span>
+<span class="normal"><a href="#__codelineno-0-53">53</a></span>
+<span class="normal"><a href="#__codelineno-0-54">54</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-49"><a id="__codelineno-0-49" name="__codelineno-0-49"></a><span class="k">def</span> <span class="nf">run_pipeline</span><span class="p">(</span><span class="n">args</span><span class="p">:</span> <span class="n">argparse</span><span class="o">.</span><span class="n">Namespace</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="__span-0-50"><a id="__codelineno-0-50" name="__codelineno-0-50"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;Runs the specified pipeline of modules with the passed configuration `args`.&quot;&quot;&quot;</span>
+</span><span id="__span-0-51"><a id="__codelineno-0-51" name="__codelineno-0-51"></a>    <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Running full pipeline...&quot;</span><span class="p">)</span>
+</span><span id="__span-0-52"><a id="__codelineno-0-52" name="__codelineno-0-52"></a>    <span class="n">args</span><span class="o">.</span><span class="n">modules_to_run</span> <span class="o">=</span> <span class="n">PIPELINE</span>
+</span><span id="__span-0-53"><a id="__codelineno-0-53" name="__codelineno-0-53"></a>    <span class="k">for</span> <span class="n">module_name</span> <span class="ow">in</span> <span class="n">PIPELINE</span><span class="p">:</span>
+</span><span id="__span-0-54"><a id="__codelineno-0-54" name="__codelineno-0-54"></a>        <span class="n">args</span> <span class="o">=</span> <span class="n">MODULE_MAP</span><span class="p">[</span><span class="n">module_name</span><span class="p">](</span><span class="n">args</span><span class="p">)</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
diff --git a/reference/common/common/index.html b/reference/common/common/index.html
index e6a95c6c..42968cd6 100644
--- a/reference/common/common/index.html
+++ b/reference/common/common/index.html
@@ -2517,8 +2517,7 @@ <h2 id="nhssynth.common.common.set_seed" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/common/common.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-9"> 9</a></span>
-<span class="normal"><a href="#__codelineno-0-10">10</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-10">10</a></span>
 <span class="normal"><a href="#__codelineno-0-11">11</a></span>
 <span class="normal"><a href="#__codelineno-0-12">12</a></span>
 <span class="normal"><a href="#__codelineno-0-13">13</a></span>
@@ -2527,17 +2526,18 @@ <h2 id="nhssynth.common.common.set_seed" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-16">16</a></span>
 <span class="normal"><a href="#__codelineno-0-17">17</a></span>
 <span class="normal"><a href="#__codelineno-0-18">18</a></span>
-<span class="normal"><a href="#__codelineno-0-19">19</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-9"><a id="__codelineno-0-9" name="__codelineno-0-9"></a><span class="k">def</span> <span class="nf">set_seed</span><span class="p">(</span><span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="__span-0-10"><a id="__codelineno-0-10" name="__codelineno-0-10"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11"></a><span class="sd">    (Potentially) set the seed for numpy, torch and random. If no seed is provided, nothing happens.</span>
-</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12"></a>
-</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14"></a><span class="sd">        seed: The seed to set.</span>
-</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16"></a>    <span class="k">if</span> <span class="n">seed</span><span class="p">:</span>
-</span><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17"></a>        <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span>
-</span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18"></a>        <span class="n">torch</span><span class="o">.</span><span class="n">manual_seed</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span>
-</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19"></a>        <span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span>
+<span class="normal"><a href="#__codelineno-0-19">19</a></span>
+<span class="normal"><a href="#__codelineno-0-20">20</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-10"><a id="__codelineno-0-10" name="__codelineno-0-10"></a><span class="k">def</span> <span class="nf">set_seed</span><span class="p">(</span><span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12"></a><span class="sd">    (Potentially) set the seed for numpy, torch and random. If no seed is provided, nothing happens.</span>
+</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13"></a>
+</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15"></a><span class="sd">        seed: The seed to set.</span>
+</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17"></a>    <span class="k">if</span> <span class="n">seed</span><span class="p">:</span>
+</span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18"></a>        <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span>
+</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19"></a>        <span class="n">torch</span><span class="o">.</span><span class="n">manual_seed</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span>
+</span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20"></a>        <span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
diff --git a/reference/common/dicts/index.html b/reference/common/dicts/index.html
index 5662fc95..2c1e122e 100644
--- a/reference/common/dicts/index.html
+++ b/reference/common/dicts/index.html
@@ -2597,8 +2597,7 @@ <h2 id="nhssynth.common.dicts.filter_dict" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/common/dicts.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-5"> 5</a></span>
-<span class="normal"><a href="#__codelineno-0-6"> 6</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-6"> 6</a></span>
 <span class="normal"><a href="#__codelineno-0-7"> 7</a></span>
 <span class="normal"><a href="#__codelineno-0-8"> 8</a></span>
 <span class="normal"><a href="#__codelineno-0-9"> 9</a></span>
@@ -2620,30 +2619,31 @@ <h2 id="nhssynth.common.dicts.filter_dict" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-25">25</a></span>
 <span class="normal"><a href="#__codelineno-0-26">26</a></span>
 <span class="normal"><a href="#__codelineno-0-27">27</a></span>
-<span class="normal"><a href="#__codelineno-0-28">28</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5"></a><span class="k">def</span> <span class="nf">filter_dict</span><span class="p">(</span><span class="n">d</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span> <span class="n">filter_keys</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">set</span><span class="p">,</span> <span class="nb">list</span><span class="p">],</span> <span class="n">include</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">:</span>
-</span><span id="__span-0-6"><a id="__codelineno-0-6" name="__codelineno-0-6"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-7"><a id="__codelineno-0-7" name="__codelineno-0-7"></a><span class="sd">    Given a dictionary, return a new dictionary either including or excluding keys in a given `filter` set.</span>
-</span><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8"></a>
-</span><span id="__span-0-9"><a id="__codelineno-0-9" name="__codelineno-0-9"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-10"><a id="__codelineno-0-10" name="__codelineno-0-10"></a><span class="sd">        d: A dictionary to filter.</span>
-</span><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11"></a><span class="sd">        filter_keys: A list or set of keys to either include or exclude.</span>
-</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12"></a><span class="sd">        include: Determine whether to return a dictionary including or excluding keys in `filter`.</span>
-</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13"></a>
-</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14"></a><span class="sd">    Returns:</span>
-</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15"></a><span class="sd">        A filtered dictionary.</span>
-</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16"></a>
-</span><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17"></a><span class="sd">    Examples:</span>
-</span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18"></a><span class="sd">        &gt;&gt;&gt; d = {&#39;a&#39;: 1, &#39;b&#39;: 2, &#39;c&#39;: 3}</span>
-</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19"></a><span class="sd">        &gt;&gt;&gt; filter_dict(d, {&#39;a&#39;, &#39;b&#39;})</span>
-</span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20"></a><span class="sd">        {&#39;c&#39;: 3}</span>
-</span><span id="__span-0-21"><a id="__codelineno-0-21" name="__codelineno-0-21"></a><span class="sd">        &gt;&gt;&gt; filter_dict(d, {&#39;a&#39;, &#39;b&#39;}, include=True)</span>
-</span><span id="__span-0-22"><a id="__codelineno-0-22" name="__codelineno-0-22"></a><span class="sd">        {&#39;a&#39;: 1, &#39;b&#39;: 2}</span>
-</span><span id="__span-0-23"><a id="__codelineno-0-23" name="__codelineno-0-23"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-24"><a id="__codelineno-0-24" name="__codelineno-0-24"></a>    <span class="k">if</span> <span class="n">include</span><span class="p">:</span>
-</span><span id="__span-0-25"><a id="__codelineno-0-25" name="__codelineno-0-25"></a>        <span class="n">filtered_keys</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">filter_keys</span><span class="p">)</span> <span class="o">&amp;</span> <span class="nb">set</span><span class="p">(</span><span class="n">d</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
-</span><span id="__span-0-26"><a id="__codelineno-0-26" name="__codelineno-0-26"></a>    <span class="k">else</span><span class="p">:</span>
-</span><span id="__span-0-27"><a id="__codelineno-0-27" name="__codelineno-0-27"></a>        <span class="n">filtered_keys</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">d</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span> <span class="o">-</span> <span class="nb">set</span><span class="p">(</span><span class="n">filter_keys</span><span class="p">)</span>
-</span><span id="__span-0-28"><a id="__codelineno-0-28" name="__codelineno-0-28"></a>    <span class="k">return</span> <span class="p">{</span><span class="n">k</span><span class="p">:</span> <span class="n">v</span> <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">d</span><span class="o">.</span><span class="n">items</span><span class="p">()</span> <span class="k">if</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">filtered_keys</span><span class="p">}</span>
+<span class="normal"><a href="#__codelineno-0-28">28</a></span>
+<span class="normal"><a href="#__codelineno-0-29">29</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-6"><a id="__codelineno-0-6" name="__codelineno-0-6"></a><span class="k">def</span> <span class="nf">filter_dict</span><span class="p">(</span><span class="n">d</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span> <span class="n">filter_keys</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">set</span><span class="p">,</span> <span class="nb">list</span><span class="p">],</span> <span class="n">include</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">:</span>
+</span><span id="__span-0-7"><a id="__codelineno-0-7" name="__codelineno-0-7"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8"></a><span class="sd">    Given a dictionary, return a new dictionary either including or excluding keys in a given `filter` set.</span>
+</span><span id="__span-0-9"><a id="__codelineno-0-9" name="__codelineno-0-9"></a>
+</span><span id="__span-0-10"><a id="__codelineno-0-10" name="__codelineno-0-10"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11"></a><span class="sd">        d: A dictionary to filter.</span>
+</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12"></a><span class="sd">        filter_keys: A list or set of keys to either include or exclude.</span>
+</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13"></a><span class="sd">        include: Determine whether to return a dictionary including or excluding keys in `filter`.</span>
+</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14"></a>
+</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15"></a><span class="sd">    Returns:</span>
+</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16"></a><span class="sd">        A filtered dictionary.</span>
+</span><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17"></a>
+</span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18"></a><span class="sd">    Examples:</span>
+</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19"></a><span class="sd">        &gt;&gt;&gt; d = {&#39;a&#39;: 1, &#39;b&#39;: 2, &#39;c&#39;: 3}</span>
+</span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20"></a><span class="sd">        &gt;&gt;&gt; filter_dict(d, {&#39;a&#39;, &#39;b&#39;})</span>
+</span><span id="__span-0-21"><a id="__codelineno-0-21" name="__codelineno-0-21"></a><span class="sd">        {&#39;c&#39;: 3}</span>
+</span><span id="__span-0-22"><a id="__codelineno-0-22" name="__codelineno-0-22"></a><span class="sd">        &gt;&gt;&gt; filter_dict(d, {&#39;a&#39;, &#39;b&#39;}, include=True)</span>
+</span><span id="__span-0-23"><a id="__codelineno-0-23" name="__codelineno-0-23"></a><span class="sd">        {&#39;a&#39;: 1, &#39;b&#39;: 2}</span>
+</span><span id="__span-0-24"><a id="__codelineno-0-24" name="__codelineno-0-24"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-25"><a id="__codelineno-0-25" name="__codelineno-0-25"></a>    <span class="k">if</span> <span class="n">include</span><span class="p">:</span>
+</span><span id="__span-0-26"><a id="__codelineno-0-26" name="__codelineno-0-26"></a>        <span class="n">filtered_keys</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">filter_keys</span><span class="p">)</span> <span class="o">&amp;</span> <span class="nb">set</span><span class="p">(</span><span class="n">d</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
+</span><span id="__span-0-27"><a id="__codelineno-0-27" name="__codelineno-0-27"></a>    <span class="k">else</span><span class="p">:</span>
+</span><span id="__span-0-28"><a id="__codelineno-0-28" name="__codelineno-0-28"></a>        <span class="n">filtered_keys</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">d</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span> <span class="o">-</span> <span class="nb">set</span><span class="p">(</span><span class="n">filter_keys</span><span class="p">)</span>
+</span><span id="__span-0-29"><a id="__codelineno-0-29" name="__codelineno-0-29"></a>    <span class="k">return</span> <span class="p">{</span><span class="n">k</span><span class="p">:</span> <span class="n">v</span> <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">d</span><span class="o">.</span><span class="n">items</span><span class="p">()</span> <span class="k">if</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">filtered_keys</span><span class="p">}</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -2753,8 +2753,7 @@ <h2 id="nhssynth.common.dicts.flatten_dict" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/common/dicts.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-56">56</a></span>
-<span class="normal"><a href="#__codelineno-0-57">57</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-57">57</a></span>
 <span class="normal"><a href="#__codelineno-0-58">58</a></span>
 <span class="normal"><a href="#__codelineno-0-59">59</a></span>
 <span class="normal"><a href="#__codelineno-0-60">60</a></span>
@@ -2779,33 +2778,34 @@ <h2 id="nhssynth.common.dicts.flatten_dict" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-79">79</a></span>
 <span class="normal"><a href="#__codelineno-0-80">80</a></span>
 <span class="normal"><a href="#__codelineno-0-81">81</a></span>
-<span class="normal"><a href="#__codelineno-0-82">82</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-56"><a id="__codelineno-0-56" name="__codelineno-0-56"></a><span class="k">def</span> <span class="nf">flatten_dict</span><span class="p">(</span><span class="n">d</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
-</span><span id="__span-0-57"><a id="__codelineno-0-57" name="__codelineno-0-57"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-58"><a id="__codelineno-0-58" name="__codelineno-0-58"></a><span class="sd">    Flatten a dictionary by recursively combining nested keys into a single dictionary until no nested keys remain.</span>
-</span><span id="__span-0-59"><a id="__codelineno-0-59" name="__codelineno-0-59"></a>
-</span><span id="__span-0-60"><a id="__codelineno-0-60" name="__codelineno-0-60"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-61"><a id="__codelineno-0-61" name="__codelineno-0-61"></a><span class="sd">        d: A dictionary with potentially nested keys.</span>
-</span><span id="__span-0-62"><a id="__codelineno-0-62" name="__codelineno-0-62"></a>
-</span><span id="__span-0-63"><a id="__codelineno-0-63" name="__codelineno-0-63"></a><span class="sd">    Returns:</span>
-</span><span id="__span-0-64"><a id="__codelineno-0-64" name="__codelineno-0-64"></a><span class="sd">        A flattened dictionary.</span>
-</span><span id="__span-0-65"><a id="__codelineno-0-65" name="__codelineno-0-65"></a>
-</span><span id="__span-0-66"><a id="__codelineno-0-66" name="__codelineno-0-66"></a><span class="sd">    Raises:</span>
-</span><span id="__span-0-67"><a id="__codelineno-0-67" name="__codelineno-0-67"></a><span class="sd">        ValueError: If duplicate keys are found in the flattened dictionary.</span>
-</span><span id="__span-0-68"><a id="__codelineno-0-68" name="__codelineno-0-68"></a>
-</span><span id="__span-0-69"><a id="__codelineno-0-69" name="__codelineno-0-69"></a><span class="sd">    Examples:</span>
-</span><span id="__span-0-70"><a id="__codelineno-0-70" name="__codelineno-0-70"></a><span class="sd">        &gt;&gt;&gt; d = {&#39;a&#39;: 1, &#39;b&#39;: {&#39;c&#39;: 2, &#39;d&#39;: {&#39;e&#39;: 3}}}</span>
-</span><span id="__span-0-71"><a id="__codelineno-0-71" name="__codelineno-0-71"></a><span class="sd">        &gt;&gt;&gt; flatten_dict(d)</span>
-</span><span id="__span-0-72"><a id="__codelineno-0-72" name="__codelineno-0-72"></a><span class="sd">        {&#39;a&#39;: 1, &#39;c&#39;: 2, &#39;e&#39;: 3}</span>
-</span><span id="__span-0-73"><a id="__codelineno-0-73" name="__codelineno-0-73"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-74"><a id="__codelineno-0-74" name="__codelineno-0-74"></a>    <span class="n">items</span> <span class="o">=</span> <span class="p">[]</span>
-</span><span id="__span-0-75"><a id="__codelineno-0-75" name="__codelineno-0-75"></a>    <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">d</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
-</span><span id="__span-0-76"><a id="__codelineno-0-76" name="__codelineno-0-76"></a>        <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">v</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
-</span><span id="__span-0-77"><a id="__codelineno-0-77" name="__codelineno-0-77"></a>            <span class="n">items</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">flatten_dict</span><span class="p">(</span><span class="n">v</span><span class="p">)</span><span class="o">.</span><span class="n">items</span><span class="p">())</span>
-</span><span id="__span-0-78"><a id="__codelineno-0-78" name="__codelineno-0-78"></a>        <span class="k">else</span><span class="p">:</span>
-</span><span id="__span-0-79"><a id="__codelineno-0-79" name="__codelineno-0-79"></a>            <span class="n">items</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">k</span><span class="p">,</span> <span class="n">v</span><span class="p">))</span>
-</span><span id="__span-0-80"><a id="__codelineno-0-80" name="__codelineno-0-80"></a>    <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="nb">set</span><span class="p">([</span><span class="n">p</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">items</span><span class="p">]))</span> <span class="o">!=</span> <span class="nb">len</span><span class="p">(</span><span class="n">items</span><span class="p">):</span>
-</span><span id="__span-0-81"><a id="__codelineno-0-81" name="__codelineno-0-81"></a>        <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Duplicate keys found in flattened dictionary&quot;</span><span class="p">)</span>
-</span><span id="__span-0-82"><a id="__codelineno-0-82" name="__codelineno-0-82"></a>    <span class="k">return</span> <span class="nb">dict</span><span class="p">(</span><span class="n">items</span><span class="p">)</span>
+<span class="normal"><a href="#__codelineno-0-82">82</a></span>
+<span class="normal"><a href="#__codelineno-0-83">83</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-57"><a id="__codelineno-0-57" name="__codelineno-0-57"></a><span class="k">def</span> <span class="nf">flatten_dict</span><span class="p">(</span><span class="n">d</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
+</span><span id="__span-0-58"><a id="__codelineno-0-58" name="__codelineno-0-58"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-59"><a id="__codelineno-0-59" name="__codelineno-0-59"></a><span class="sd">    Flatten a dictionary by recursively combining nested keys into a single dictionary until no nested keys remain.</span>
+</span><span id="__span-0-60"><a id="__codelineno-0-60" name="__codelineno-0-60"></a>
+</span><span id="__span-0-61"><a id="__codelineno-0-61" name="__codelineno-0-61"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-62"><a id="__codelineno-0-62" name="__codelineno-0-62"></a><span class="sd">        d: A dictionary with potentially nested keys.</span>
+</span><span id="__span-0-63"><a id="__codelineno-0-63" name="__codelineno-0-63"></a>
+</span><span id="__span-0-64"><a id="__codelineno-0-64" name="__codelineno-0-64"></a><span class="sd">    Returns:</span>
+</span><span id="__span-0-65"><a id="__codelineno-0-65" name="__codelineno-0-65"></a><span class="sd">        A flattened dictionary.</span>
+</span><span id="__span-0-66"><a id="__codelineno-0-66" name="__codelineno-0-66"></a>
+</span><span id="__span-0-67"><a id="__codelineno-0-67" name="__codelineno-0-67"></a><span class="sd">    Raises:</span>
+</span><span id="__span-0-68"><a id="__codelineno-0-68" name="__codelineno-0-68"></a><span class="sd">        ValueError: If duplicate keys are found in the flattened dictionary.</span>
+</span><span id="__span-0-69"><a id="__codelineno-0-69" name="__codelineno-0-69"></a>
+</span><span id="__span-0-70"><a id="__codelineno-0-70" name="__codelineno-0-70"></a><span class="sd">    Examples:</span>
+</span><span id="__span-0-71"><a id="__codelineno-0-71" name="__codelineno-0-71"></a><span class="sd">        &gt;&gt;&gt; d = {&#39;a&#39;: 1, &#39;b&#39;: {&#39;c&#39;: 2, &#39;d&#39;: {&#39;e&#39;: 3}}}</span>
+</span><span id="__span-0-72"><a id="__codelineno-0-72" name="__codelineno-0-72"></a><span class="sd">        &gt;&gt;&gt; flatten_dict(d)</span>
+</span><span id="__span-0-73"><a id="__codelineno-0-73" name="__codelineno-0-73"></a><span class="sd">        {&#39;a&#39;: 1, &#39;c&#39;: 2, &#39;e&#39;: 3}</span>
+</span><span id="__span-0-74"><a id="__codelineno-0-74" name="__codelineno-0-74"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-75"><a id="__codelineno-0-75" name="__codelineno-0-75"></a>    <span class="n">items</span> <span class="o">=</span> <span class="p">[]</span>
+</span><span id="__span-0-76"><a id="__codelineno-0-76" name="__codelineno-0-76"></a>    <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">d</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
+</span><span id="__span-0-77"><a id="__codelineno-0-77" name="__codelineno-0-77"></a>        <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">v</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
+</span><span id="__span-0-78"><a id="__codelineno-0-78" name="__codelineno-0-78"></a>            <span class="n">items</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">flatten_dict</span><span class="p">(</span><span class="n">v</span><span class="p">)</span><span class="o">.</span><span class="n">items</span><span class="p">())</span>
+</span><span id="__span-0-79"><a id="__codelineno-0-79" name="__codelineno-0-79"></a>        <span class="k">else</span><span class="p">:</span>
+</span><span id="__span-0-80"><a id="__codelineno-0-80" name="__codelineno-0-80"></a>            <span class="n">items</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">k</span><span class="p">,</span> <span class="n">v</span><span class="p">))</span>
+</span><span id="__span-0-81"><a id="__codelineno-0-81" name="__codelineno-0-81"></a>    <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="nb">set</span><span class="p">([</span><span class="n">p</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">items</span><span class="p">]))</span> <span class="o">!=</span> <span class="nb">len</span><span class="p">(</span><span class="n">items</span><span class="p">):</span>
+</span><span id="__span-0-82"><a id="__codelineno-0-82" name="__codelineno-0-82"></a>        <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Duplicate keys found in flattened dictionary&quot;</span><span class="p">)</span>
+</span><span id="__span-0-83"><a id="__codelineno-0-83" name="__codelineno-0-83"></a>    <span class="k">return</span> <span class="nb">dict</span><span class="p">(</span><span class="n">items</span><span class="p">)</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -2907,8 +2907,7 @@ <h2 id="nhssynth.common.dicts.get_key_by_value" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/common/dicts.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-31">31</a></span>
-<span class="normal"><a href="#__codelineno-0-32">32</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-32">32</a></span>
 <span class="normal"><a href="#__codelineno-0-33">33</a></span>
 <span class="normal"><a href="#__codelineno-0-34">34</a></span>
 <span class="normal"><a href="#__codelineno-0-35">35</a></span>
@@ -2929,29 +2928,30 @@ <h2 id="nhssynth.common.dicts.get_key_by_value" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-50">50</a></span>
 <span class="normal"><a href="#__codelineno-0-51">51</a></span>
 <span class="normal"><a href="#__codelineno-0-52">52</a></span>
-<span class="normal"><a href="#__codelineno-0-53">53</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-31"><a id="__codelineno-0-31" name="__codelineno-0-31"></a><span class="k">def</span> <span class="nf">get_key_by_value</span><span class="p">(</span><span class="n">d</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">Any</span><span class="p">,</span> <span class="kc">None</span><span class="p">]:</span>
-</span><span id="__span-0-32"><a id="__codelineno-0-32" name="__codelineno-0-32"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-33"><a id="__codelineno-0-33" name="__codelineno-0-33"></a><span class="sd">    Find the first key in a dictionary with a given value.</span>
-</span><span id="__span-0-34"><a id="__codelineno-0-34" name="__codelineno-0-34"></a>
-</span><span id="__span-0-35"><a id="__codelineno-0-35" name="__codelineno-0-35"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-36"><a id="__codelineno-0-36" name="__codelineno-0-36"></a><span class="sd">        d: A dictionary to search through.</span>
-</span><span id="__span-0-37"><a id="__codelineno-0-37" name="__codelineno-0-37"></a><span class="sd">        value: The value to search for.</span>
-</span><span id="__span-0-38"><a id="__codelineno-0-38" name="__codelineno-0-38"></a>
-</span><span id="__span-0-39"><a id="__codelineno-0-39" name="__codelineno-0-39"></a><span class="sd">    Returns:</span>
-</span><span id="__span-0-40"><a id="__codelineno-0-40" name="__codelineno-0-40"></a><span class="sd">        The first key in `d` with the value `value`, or `None` if no such key exists.</span>
-</span><span id="__span-0-41"><a id="__codelineno-0-41" name="__codelineno-0-41"></a>
-</span><span id="__span-0-42"><a id="__codelineno-0-42" name="__codelineno-0-42"></a><span class="sd">    Examples:</span>
-</span><span id="__span-0-43"><a id="__codelineno-0-43" name="__codelineno-0-43"></a><span class="sd">        &gt;&gt;&gt; d = {&#39;a&#39;: 1, &#39;b&#39;: 2, &#39;c&#39;: 1}</span>
-</span><span id="__span-0-44"><a id="__codelineno-0-44" name="__codelineno-0-44"></a><span class="sd">        &gt;&gt;&gt; get_key_by_value(d, 2)</span>
-</span><span id="__span-0-45"><a id="__codelineno-0-45" name="__codelineno-0-45"></a><span class="sd">        &#39;b&#39;</span>
-</span><span id="__span-0-46"><a id="__codelineno-0-46" name="__codelineno-0-46"></a><span class="sd">        &gt;&gt;&gt; get_key_by_value(d, 3)</span>
-</span><span id="__span-0-47"><a id="__codelineno-0-47" name="__codelineno-0-47"></a><span class="sd">        None</span>
-</span><span id="__span-0-48"><a id="__codelineno-0-48" name="__codelineno-0-48"></a>
-</span><span id="__span-0-49"><a id="__codelineno-0-49" name="__codelineno-0-49"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-50"><a id="__codelineno-0-50" name="__codelineno-0-50"></a>    <span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">val</span> <span class="ow">in</span> <span class="n">d</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
-</span><span id="__span-0-51"><a id="__codelineno-0-51" name="__codelineno-0-51"></a>        <span class="k">if</span> <span class="n">val</span> <span class="o">==</span> <span class="n">value</span><span class="p">:</span>
-</span><span id="__span-0-52"><a id="__codelineno-0-52" name="__codelineno-0-52"></a>            <span class="k">return</span> <span class="n">key</span>
-</span><span id="__span-0-53"><a id="__codelineno-0-53" name="__codelineno-0-53"></a>    <span class="k">return</span> <span class="kc">None</span>
+<span class="normal"><a href="#__codelineno-0-53">53</a></span>
+<span class="normal"><a href="#__codelineno-0-54">54</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-32"><a id="__codelineno-0-32" name="__codelineno-0-32"></a><span class="k">def</span> <span class="nf">get_key_by_value</span><span class="p">(</span><span class="n">d</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">Any</span><span class="p">,</span> <span class="kc">None</span><span class="p">]:</span>
+</span><span id="__span-0-33"><a id="__codelineno-0-33" name="__codelineno-0-33"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-34"><a id="__codelineno-0-34" name="__codelineno-0-34"></a><span class="sd">    Find the first key in a dictionary with a given value.</span>
+</span><span id="__span-0-35"><a id="__codelineno-0-35" name="__codelineno-0-35"></a>
+</span><span id="__span-0-36"><a id="__codelineno-0-36" name="__codelineno-0-36"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-37"><a id="__codelineno-0-37" name="__codelineno-0-37"></a><span class="sd">        d: A dictionary to search through.</span>
+</span><span id="__span-0-38"><a id="__codelineno-0-38" name="__codelineno-0-38"></a><span class="sd">        value: The value to search for.</span>
+</span><span id="__span-0-39"><a id="__codelineno-0-39" name="__codelineno-0-39"></a>
+</span><span id="__span-0-40"><a id="__codelineno-0-40" name="__codelineno-0-40"></a><span class="sd">    Returns:</span>
+</span><span id="__span-0-41"><a id="__codelineno-0-41" name="__codelineno-0-41"></a><span class="sd">        The first key in `d` with the value `value`, or `None` if no such key exists.</span>
+</span><span id="__span-0-42"><a id="__codelineno-0-42" name="__codelineno-0-42"></a>
+</span><span id="__span-0-43"><a id="__codelineno-0-43" name="__codelineno-0-43"></a><span class="sd">    Examples:</span>
+</span><span id="__span-0-44"><a id="__codelineno-0-44" name="__codelineno-0-44"></a><span class="sd">        &gt;&gt;&gt; d = {&#39;a&#39;: 1, &#39;b&#39;: 2, &#39;c&#39;: 1}</span>
+</span><span id="__span-0-45"><a id="__codelineno-0-45" name="__codelineno-0-45"></a><span class="sd">        &gt;&gt;&gt; get_key_by_value(d, 2)</span>
+</span><span id="__span-0-46"><a id="__codelineno-0-46" name="__codelineno-0-46"></a><span class="sd">        &#39;b&#39;</span>
+</span><span id="__span-0-47"><a id="__codelineno-0-47" name="__codelineno-0-47"></a><span class="sd">        &gt;&gt;&gt; get_key_by_value(d, 3)</span>
+</span><span id="__span-0-48"><a id="__codelineno-0-48" name="__codelineno-0-48"></a><span class="sd">        None</span>
+</span><span id="__span-0-49"><a id="__codelineno-0-49" name="__codelineno-0-49"></a>
+</span><span id="__span-0-50"><a id="__codelineno-0-50" name="__codelineno-0-50"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-51"><a id="__codelineno-0-51" name="__codelineno-0-51"></a>    <span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">val</span> <span class="ow">in</span> <span class="n">d</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
+</span><span id="__span-0-52"><a id="__codelineno-0-52" name="__codelineno-0-52"></a>        <span class="k">if</span> <span class="n">val</span> <span class="o">==</span> <span class="n">value</span><span class="p">:</span>
+</span><span id="__span-0-53"><a id="__codelineno-0-53" name="__codelineno-0-53"></a>            <span class="k">return</span> <span class="n">key</span>
+</span><span id="__span-0-54"><a id="__codelineno-0-54" name="__codelineno-0-54"></a>    <span class="k">return</span> <span class="kc">None</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
diff --git a/reference/common/io/index.html b/reference/common/io/index.html
index 859eaef8..a3307984 100644
--- a/reference/common/io/index.html
+++ b/reference/common/io/index.html
@@ -2609,8 +2609,7 @@ <h2 id="nhssynth.common.io.check_exists" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/common/io.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-81">81</a></span>
-<span class="normal"><a href="#__codelineno-0-82">82</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-82">82</a></span>
 <span class="normal"><a href="#__codelineno-0-83">83</a></span>
 <span class="normal"><a href="#__codelineno-0-84">84</a></span>
 <span class="normal"><a href="#__codelineno-0-85">85</a></span>
@@ -2622,20 +2621,21 @@ <h2 id="nhssynth.common.io.check_exists" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-91">91</a></span>
 <span class="normal"><a href="#__codelineno-0-92">92</a></span>
 <span class="normal"><a href="#__codelineno-0-93">93</a></span>
-<span class="normal"><a href="#__codelineno-0-94">94</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-81"><a id="__codelineno-0-81" name="__codelineno-0-81"></a><span class="k">def</span> <span class="nf">check_exists</span><span class="p">(</span><span class="n">fns</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="nb">dir</span><span class="p">:</span> <span class="n">Path</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="__span-0-82"><a id="__codelineno-0-82" name="__codelineno-0-82"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-83"><a id="__codelineno-0-83" name="__codelineno-0-83"></a><span class="sd">    Checks if the files in `fns` exist in `dir`.</span>
-</span><span id="__span-0-84"><a id="__codelineno-0-84" name="__codelineno-0-84"></a>
-</span><span id="__span-0-85"><a id="__codelineno-0-85" name="__codelineno-0-85"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-86"><a id="__codelineno-0-86" name="__codelineno-0-86"></a><span class="sd">        fns: The list of files to check.</span>
-</span><span id="__span-0-87"><a id="__codelineno-0-87" name="__codelineno-0-87"></a><span class="sd">        dir: The directory the files should exist in.</span>
-</span><span id="__span-0-88"><a id="__codelineno-0-88" name="__codelineno-0-88"></a>
-</span><span id="__span-0-89"><a id="__codelineno-0-89" name="__codelineno-0-89"></a><span class="sd">    Raises:</span>
-</span><span id="__span-0-90"><a id="__codelineno-0-90" name="__codelineno-0-90"></a><span class="sd">        FileNotFoundError: If any of the files in `fns` do not exist in `dir`.</span>
-</span><span id="__span-0-91"><a id="__codelineno-0-91" name="__codelineno-0-91"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-92"><a id="__codelineno-0-92" name="__codelineno-0-92"></a>    <span class="k">for</span> <span class="n">fn</span> <span class="ow">in</span> <span class="n">fns</span><span class="p">:</span>
-</span><span id="__span-0-93"><a id="__codelineno-0-93" name="__codelineno-0-93"></a>        <span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="nb">dir</span> <span class="o">/</span> <span class="n">fn</span><span class="p">)</span><span class="o">.</span><span class="n">exists</span><span class="p">():</span>
-</span><span id="__span-0-94"><a id="__codelineno-0-94" name="__codelineno-0-94"></a>            <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;File </span><span class="si">{</span><span class="n">fn</span><span class="si">}</span><span class="s2"> does not exist at </span><span class="si">{</span><span class="nb">dir</span><span class="si">}</span><span class="s2">.&quot;</span><span class="p">)</span>
+<span class="normal"><a href="#__codelineno-0-94">94</a></span>
+<span class="normal"><a href="#__codelineno-0-95">95</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-82"><a id="__codelineno-0-82" name="__codelineno-0-82"></a><span class="k">def</span> <span class="nf">check_exists</span><span class="p">(</span><span class="n">fns</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="nb">dir</span><span class="p">:</span> <span class="n">Path</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="__span-0-83"><a id="__codelineno-0-83" name="__codelineno-0-83"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-84"><a id="__codelineno-0-84" name="__codelineno-0-84"></a><span class="sd">    Checks if the files in `fns` exist in `dir`.</span>
+</span><span id="__span-0-85"><a id="__codelineno-0-85" name="__codelineno-0-85"></a>
+</span><span id="__span-0-86"><a id="__codelineno-0-86" name="__codelineno-0-86"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-87"><a id="__codelineno-0-87" name="__codelineno-0-87"></a><span class="sd">        fns: The list of files to check.</span>
+</span><span id="__span-0-88"><a id="__codelineno-0-88" name="__codelineno-0-88"></a><span class="sd">        dir: The directory the files should exist in.</span>
+</span><span id="__span-0-89"><a id="__codelineno-0-89" name="__codelineno-0-89"></a>
+</span><span id="__span-0-90"><a id="__codelineno-0-90" name="__codelineno-0-90"></a><span class="sd">    Raises:</span>
+</span><span id="__span-0-91"><a id="__codelineno-0-91" name="__codelineno-0-91"></a><span class="sd">        FileNotFoundError: If any of the files in `fns` do not exist in `dir`.</span>
+</span><span id="__span-0-92"><a id="__codelineno-0-92" name="__codelineno-0-92"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-93"><a id="__codelineno-0-93" name="__codelineno-0-93"></a>    <span class="k">for</span> <span class="n">fn</span> <span class="ow">in</span> <span class="n">fns</span><span class="p">:</span>
+</span><span id="__span-0-94"><a id="__codelineno-0-94" name="__codelineno-0-94"></a>        <span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="nb">dir</span> <span class="o">/</span> <span class="n">fn</span><span class="p">)</span><span class="o">.</span><span class="n">exists</span><span class="p">():</span>
+</span><span id="__span-0-95"><a id="__codelineno-0-95" name="__codelineno-0-95"></a>            <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;File </span><span class="si">{</span><span class="n">fn</span><span class="si">}</span><span class="s2"> does not exist at </span><span class="si">{</span><span class="nb">dir</span><span class="si">}</span><span class="s2">.&quot;</span><span class="p">)</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -2741,8 +2741,7 @@ <h2 id="nhssynth.common.io.consistent_ending" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/common/io.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-23">23</a></span>
-<span class="normal"><a href="#__codelineno-0-24">24</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-24">24</a></span>
 <span class="normal"><a href="#__codelineno-0-25">25</a></span>
 <span class="normal"><a href="#__codelineno-0-26">26</a></span>
 <span class="normal"><a href="#__codelineno-0-27">27</a></span>
@@ -2754,20 +2753,21 @@ <h2 id="nhssynth.common.io.consistent_ending" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-33">33</a></span>
 <span class="normal"><a href="#__codelineno-0-34">34</a></span>
 <span class="normal"><a href="#__codelineno-0-35">35</a></span>
-<span class="normal"><a href="#__codelineno-0-36">36</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-23"><a id="__codelineno-0-23" name="__codelineno-0-23"></a><span class="k">def</span> <span class="nf">consistent_ending</span><span class="p">(</span><span class="n">fn</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">ending</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;.pkl&quot;</span><span class="p">,</span> <span class="n">suffix</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
-</span><span id="__span-0-24"><a id="__codelineno-0-24" name="__codelineno-0-24"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-25"><a id="__codelineno-0-25" name="__codelineno-0-25"></a><span class="sd">    Ensures that the filename `fn` ends with `ending`. If not, removes any existing ending and appends `ending`.</span>
-</span><span id="__span-0-26"><a id="__codelineno-0-26" name="__codelineno-0-26"></a>
-</span><span id="__span-0-27"><a id="__codelineno-0-27" name="__codelineno-0-27"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-28"><a id="__codelineno-0-28" name="__codelineno-0-28"></a><span class="sd">        fn: The filename to check.</span>
-</span><span id="__span-0-29"><a id="__codelineno-0-29" name="__codelineno-0-29"></a><span class="sd">        ending: The desired ending to check for. Default is &quot;.pkl&quot;.</span>
-</span><span id="__span-0-30"><a id="__codelineno-0-30" name="__codelineno-0-30"></a><span class="sd">        suffix: A suffix to append to the filename before the ending.</span>
-</span><span id="__span-0-31"><a id="__codelineno-0-31" name="__codelineno-0-31"></a>
-</span><span id="__span-0-32"><a id="__codelineno-0-32" name="__codelineno-0-32"></a><span class="sd">    Returns:</span>
-</span><span id="__span-0-33"><a id="__codelineno-0-33" name="__codelineno-0-33"></a><span class="sd">        The filename with the correct ending and potentially an inserted suffix.</span>
-</span><span id="__span-0-34"><a id="__codelineno-0-34" name="__codelineno-0-34"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-35"><a id="__codelineno-0-35" name="__codelineno-0-35"></a>    <span class="n">path_fn</span> <span class="o">=</span> <span class="n">Path</span><span class="p">(</span><span class="n">fn</span><span class="p">)</span>
-</span><span id="__span-0-36"><a id="__codelineno-0-36" name="__codelineno-0-36"></a>    <span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="n">path_fn</span><span class="o">.</span><span class="n">parent</span> <span class="o">/</span> <span class="n">path_fn</span><span class="o">.</span><span class="n">stem</span><span class="p">)</span> <span class="o">+</span> <span class="p">(</span><span class="s2">&quot;_&quot;</span> <span class="k">if</span> <span class="n">suffix</span> <span class="k">else</span> <span class="s2">&quot;&quot;</span><span class="p">)</span> <span class="o">+</span> <span class="n">suffix</span> <span class="o">+</span> <span class="n">ending</span>
+<span class="normal"><a href="#__codelineno-0-36">36</a></span>
+<span class="normal"><a href="#__codelineno-0-37">37</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-24"><a id="__codelineno-0-24" name="__codelineno-0-24"></a><span class="k">def</span> <span class="nf">consistent_ending</span><span class="p">(</span><span class="n">fn</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">ending</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;.pkl&quot;</span><span class="p">,</span> <span class="n">suffix</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
+</span><span id="__span-0-25"><a id="__codelineno-0-25" name="__codelineno-0-25"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-26"><a id="__codelineno-0-26" name="__codelineno-0-26"></a><span class="sd">    Ensures that the filename `fn` ends with `ending`. If not, removes any existing ending and appends `ending`.</span>
+</span><span id="__span-0-27"><a id="__codelineno-0-27" name="__codelineno-0-27"></a>
+</span><span id="__span-0-28"><a id="__codelineno-0-28" name="__codelineno-0-28"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-29"><a id="__codelineno-0-29" name="__codelineno-0-29"></a><span class="sd">        fn: The filename to check.</span>
+</span><span id="__span-0-30"><a id="__codelineno-0-30" name="__codelineno-0-30"></a><span class="sd">        ending: The desired ending to check for. Default is &quot;.pkl&quot;.</span>
+</span><span id="__span-0-31"><a id="__codelineno-0-31" name="__codelineno-0-31"></a><span class="sd">        suffix: A suffix to append to the filename before the ending.</span>
+</span><span id="__span-0-32"><a id="__codelineno-0-32" name="__codelineno-0-32"></a>
+</span><span id="__span-0-33"><a id="__codelineno-0-33" name="__codelineno-0-33"></a><span class="sd">    Returns:</span>
+</span><span id="__span-0-34"><a id="__codelineno-0-34" name="__codelineno-0-34"></a><span class="sd">        The filename with the correct ending and potentially an inserted suffix.</span>
+</span><span id="__span-0-35"><a id="__codelineno-0-35" name="__codelineno-0-35"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-36"><a id="__codelineno-0-36" name="__codelineno-0-36"></a>    <span class="n">path_fn</span> <span class="o">=</span> <span class="n">Path</span><span class="p">(</span><span class="n">fn</span><span class="p">)</span>
+</span><span id="__span-0-37"><a id="__codelineno-0-37" name="__codelineno-0-37"></a>    <span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="n">path_fn</span><span class="o">.</span><span class="n">parent</span> <span class="o">/</span> <span class="n">path_fn</span><span class="o">.</span><span class="n">stem</span><span class="p">)</span> <span class="o">+</span> <span class="p">(</span><span class="s2">&quot;_&quot;</span> <span class="k">if</span> <span class="n">suffix</span> <span class="k">else</span> <span class="s2">&quot;&quot;</span><span class="p">)</span> <span class="o">+</span> <span class="n">suffix</span> <span class="o">+</span> <span class="n">ending</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -2845,8 +2845,7 @@ <h2 id="nhssynth.common.io.consistent_endings" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/common/io.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-39">39</a></span>
-<span class="normal"><a href="#__codelineno-0-40">40</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-40">40</a></span>
 <span class="normal"><a href="#__codelineno-0-41">41</a></span>
 <span class="normal"><a href="#__codelineno-0-42">42</a></span>
 <span class="normal"><a href="#__codelineno-0-43">43</a></span>
@@ -2855,17 +2854,18 @@ <h2 id="nhssynth.common.io.consistent_endings" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-46">46</a></span>
 <span class="normal"><a href="#__codelineno-0-47">47</a></span>
 <span class="normal"><a href="#__codelineno-0-48">48</a></span>
-<span class="normal"><a href="#__codelineno-0-49">49</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-39"><a id="__codelineno-0-39" name="__codelineno-0-39"></a><span class="k">def</span> <span class="nf">consistent_endings</span><span class="p">(</span><span class="n">args</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">],</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]])</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
-</span><span id="__span-0-40"><a id="__codelineno-0-40" name="__codelineno-0-40"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-41"><a id="__codelineno-0-41" name="__codelineno-0-41"></a><span class="sd">    Wrapper around `consistent_ending` to apply it to a list of filenames.</span>
-</span><span id="__span-0-42"><a id="__codelineno-0-42" name="__codelineno-0-42"></a>
-</span><span id="__span-0-43"><a id="__codelineno-0-43" name="__codelineno-0-43"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-44"><a id="__codelineno-0-44" name="__codelineno-0-44"></a><span class="sd">        args: The list of filenames to check. Can take the form of a single filename, a pair of a filename and an ending, or a triple of a filename, an ending and a suffix.</span>
-</span><span id="__span-0-45"><a id="__codelineno-0-45" name="__codelineno-0-45"></a>
-</span><span id="__span-0-46"><a id="__codelineno-0-46" name="__codelineno-0-46"></a><span class="sd">    Returns:</span>
-</span><span id="__span-0-47"><a id="__codelineno-0-47" name="__codelineno-0-47"></a><span class="sd">        The list of filenames with the correct endings.</span>
-</span><span id="__span-0-48"><a id="__codelineno-0-48" name="__codelineno-0-48"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-49"><a id="__codelineno-0-49" name="__codelineno-0-49"></a>    <span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="n">consistent_ending</span><span class="p">(</span><span class="n">arg</span><span class="p">)</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span> <span class="k">else</span> <span class="n">consistent_ending</span><span class="p">(</span><span class="o">*</span><span class="n">arg</span><span class="p">)</span> <span class="k">for</span> <span class="n">arg</span> <span class="ow">in</span> <span class="n">args</span><span class="p">)</span>
+<span class="normal"><a href="#__codelineno-0-49">49</a></span>
+<span class="normal"><a href="#__codelineno-0-50">50</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-40"><a id="__codelineno-0-40" name="__codelineno-0-40"></a><span class="k">def</span> <span class="nf">consistent_endings</span><span class="p">(</span><span class="n">args</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">],</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]])</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
+</span><span id="__span-0-41"><a id="__codelineno-0-41" name="__codelineno-0-41"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-42"><a id="__codelineno-0-42" name="__codelineno-0-42"></a><span class="sd">    Wrapper around `consistent_ending` to apply it to a list of filenames.</span>
+</span><span id="__span-0-43"><a id="__codelineno-0-43" name="__codelineno-0-43"></a>
+</span><span id="__span-0-44"><a id="__codelineno-0-44" name="__codelineno-0-44"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-45"><a id="__codelineno-0-45" name="__codelineno-0-45"></a><span class="sd">        args: The list of filenames to check. Can take the form of a single filename, a pair of a filename and an ending, or a triple of a filename, an ending and a suffix.</span>
+</span><span id="__span-0-46"><a id="__codelineno-0-46" name="__codelineno-0-46"></a>
+</span><span id="__span-0-47"><a id="__codelineno-0-47" name="__codelineno-0-47"></a><span class="sd">    Returns:</span>
+</span><span id="__span-0-48"><a id="__codelineno-0-48" name="__codelineno-0-48"></a><span class="sd">        The list of filenames with the correct endings.</span>
+</span><span id="__span-0-49"><a id="__codelineno-0-49" name="__codelineno-0-49"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-50"><a id="__codelineno-0-50" name="__codelineno-0-50"></a>    <span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="n">consistent_ending</span><span class="p">(</span><span class="n">arg</span><span class="p">)</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span> <span class="k">else</span> <span class="n">consistent_ending</span><span class="p">(</span><span class="o">*</span><span class="n">arg</span><span class="p">)</span> <span class="k">for</span> <span class="n">arg</span> <span class="ow">in</span> <span class="n">args</span><span class="p">)</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -2957,8 +2957,7 @@ <h2 id="nhssynth.common.io.experiment_io" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/common/io.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-7"> 7</a></span>
-<span class="normal"><a href="#__codelineno-0-8"> 8</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-8"> 8</a></span>
 <span class="normal"><a href="#__codelineno-0-9"> 9</a></span>
 <span class="normal"><a href="#__codelineno-0-10">10</a></span>
 <span class="normal"><a href="#__codelineno-0-11">11</a></span>
@@ -2970,20 +2969,21 @@ <h2 id="nhssynth.common.io.experiment_io" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-17">17</a></span>
 <span class="normal"><a href="#__codelineno-0-18">18</a></span>
 <span class="normal"><a href="#__codelineno-0-19">19</a></span>
-<span class="normal"><a href="#__codelineno-0-20">20</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-7"><a id="__codelineno-0-7" name="__codelineno-0-7"></a><span class="k">def</span> <span class="nf">experiment_io</span><span class="p">(</span><span class="n">experiment_name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">dir_experiments</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;experiments&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
-</span><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-9"><a id="__codelineno-0-9" name="__codelineno-0-9"></a><span class="sd">    Create an experiment&#39;s directory and return the path.</span>
-</span><span id="__span-0-10"><a id="__codelineno-0-10" name="__codelineno-0-10"></a>
-</span><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12"></a><span class="sd">        experiment_name: The name of the experiment.</span>
-</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13"></a><span class="sd">        dir_experiments: The name of the directory containing all experiments.</span>
-</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14"></a>
-</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15"></a><span class="sd">    Returns:</span>
-</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16"></a><span class="sd">        The path to the experiment directory.</span>
-</span><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18"></a>    <span class="n">dir_experiment</span> <span class="o">=</span> <span class="n">Path</span><span class="p">(</span><span class="n">dir_experiments</span><span class="p">)</span> <span class="o">/</span> <span class="n">experiment_name</span>
-</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19"></a>    <span class="n">dir_experiment</span><span class="o">.</span><span class="n">mkdir</span><span class="p">(</span><span class="n">parents</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-</span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20"></a>    <span class="k">return</span> <span class="n">dir_experiment</span>
+<span class="normal"><a href="#__codelineno-0-20">20</a></span>
+<span class="normal"><a href="#__codelineno-0-21">21</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8"></a><span class="k">def</span> <span class="nf">experiment_io</span><span class="p">(</span><span class="n">experiment_name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">dir_experiments</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;experiments&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
+</span><span id="__span-0-9"><a id="__codelineno-0-9" name="__codelineno-0-9"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-10"><a id="__codelineno-0-10" name="__codelineno-0-10"></a><span class="sd">    Create an experiment&#39;s directory and return the path.</span>
+</span><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11"></a>
+</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13"></a><span class="sd">        experiment_name: The name of the experiment.</span>
+</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14"></a><span class="sd">        dir_experiments: The name of the directory containing all experiments.</span>
+</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15"></a>
+</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16"></a><span class="sd">    Returns:</span>
+</span><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17"></a><span class="sd">        The path to the experiment directory.</span>
+</span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19"></a>    <span class="n">dir_experiment</span> <span class="o">=</span> <span class="n">Path</span><span class="p">(</span><span class="n">dir_experiments</span><span class="p">)</span> <span class="o">/</span> <span class="n">experiment_name</span>
+</span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20"></a>    <span class="n">dir_experiment</span><span class="o">.</span><span class="n">mkdir</span><span class="p">(</span><span class="n">parents</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</span><span id="__span-0-21"><a id="__codelineno-0-21" name="__codelineno-0-21"></a>    <span class="k">return</span> <span class="n">dir_experiment</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -3075,8 +3075,7 @@ <h2 id="nhssynth.common.io.potential_suffix" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/common/io.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-52">52</a></span>
-<span class="normal"><a href="#__codelineno-0-53">53</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-53">53</a></span>
 <span class="normal"><a href="#__codelineno-0-54">54</a></span>
 <span class="normal"><a href="#__codelineno-0-55">55</a></span>
 <span class="normal"><a href="#__codelineno-0-56">56</a></span>
@@ -3090,22 +3089,23 @@ <h2 id="nhssynth.common.io.potential_suffix" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-64">64</a></span>
 <span class="normal"><a href="#__codelineno-0-65">65</a></span>
 <span class="normal"><a href="#__codelineno-0-66">66</a></span>
-<span class="normal"><a href="#__codelineno-0-67">67</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-52"><a id="__codelineno-0-52" name="__codelineno-0-52"></a><span class="k">def</span> <span class="nf">potential_suffix</span><span class="p">(</span><span class="n">fn</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">fn_base</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
-</span><span id="__span-0-53"><a id="__codelineno-0-53" name="__codelineno-0-53"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-54"><a id="__codelineno-0-54" name="__codelineno-0-54"></a><span class="sd">    Checks if `fn` is a suffix (starts with an underscore) to append to `fn_base`, or a filename in its own right.</span>
-</span><span id="__span-0-55"><a id="__codelineno-0-55" name="__codelineno-0-55"></a>
-</span><span id="__span-0-56"><a id="__codelineno-0-56" name="__codelineno-0-56"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-57"><a id="__codelineno-0-57" name="__codelineno-0-57"></a><span class="sd">        fn: The filename / potential suffix to append to `fn_base`.</span>
-</span><span id="__span-0-58"><a id="__codelineno-0-58" name="__codelineno-0-58"></a><span class="sd">        fn_base: The name of the file the suffix would attach to.</span>
-</span><span id="__span-0-59"><a id="__codelineno-0-59" name="__codelineno-0-59"></a>
-</span><span id="__span-0-60"><a id="__codelineno-0-60" name="__codelineno-0-60"></a><span class="sd">    Returns:</span>
-</span><span id="__span-0-61"><a id="__codelineno-0-61" name="__codelineno-0-61"></a><span class="sd">        The appropriately processed `fn`</span>
-</span><span id="__span-0-62"><a id="__codelineno-0-62" name="__codelineno-0-62"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-63"><a id="__codelineno-0-63" name="__codelineno-0-63"></a>    <span class="n">fn_base</span> <span class="o">=</span> <span class="n">Path</span><span class="p">(</span><span class="n">fn_base</span><span class="p">)</span><span class="o">.</span><span class="n">stem</span>
-</span><span id="__span-0-64"><a id="__codelineno-0-64" name="__codelineno-0-64"></a>    <span class="k">if</span> <span class="n">fn</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="s2">&quot;_&quot;</span><span class="p">:</span>
-</span><span id="__span-0-65"><a id="__codelineno-0-65" name="__codelineno-0-65"></a>        <span class="k">return</span> <span class="n">fn_base</span> <span class="o">+</span> <span class="n">fn</span>
-</span><span id="__span-0-66"><a id="__codelineno-0-66" name="__codelineno-0-66"></a>    <span class="k">else</span><span class="p">:</span>
-</span><span id="__span-0-67"><a id="__codelineno-0-67" name="__codelineno-0-67"></a>        <span class="k">return</span> <span class="n">fn</span>
+<span class="normal"><a href="#__codelineno-0-67">67</a></span>
+<span class="normal"><a href="#__codelineno-0-68">68</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-53"><a id="__codelineno-0-53" name="__codelineno-0-53"></a><span class="k">def</span> <span class="nf">potential_suffix</span><span class="p">(</span><span class="n">fn</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">fn_base</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
+</span><span id="__span-0-54"><a id="__codelineno-0-54" name="__codelineno-0-54"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-55"><a id="__codelineno-0-55" name="__codelineno-0-55"></a><span class="sd">    Checks if `fn` is a suffix (starts with an underscore) to append to `fn_base`, or a filename in its own right.</span>
+</span><span id="__span-0-56"><a id="__codelineno-0-56" name="__codelineno-0-56"></a>
+</span><span id="__span-0-57"><a id="__codelineno-0-57" name="__codelineno-0-57"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-58"><a id="__codelineno-0-58" name="__codelineno-0-58"></a><span class="sd">        fn: The filename / potential suffix to append to `fn_base`.</span>
+</span><span id="__span-0-59"><a id="__codelineno-0-59" name="__codelineno-0-59"></a><span class="sd">        fn_base: The name of the file the suffix would attach to.</span>
+</span><span id="__span-0-60"><a id="__codelineno-0-60" name="__codelineno-0-60"></a>
+</span><span id="__span-0-61"><a id="__codelineno-0-61" name="__codelineno-0-61"></a><span class="sd">    Returns:</span>
+</span><span id="__span-0-62"><a id="__codelineno-0-62" name="__codelineno-0-62"></a><span class="sd">        The appropriately processed `fn`</span>
+</span><span id="__span-0-63"><a id="__codelineno-0-63" name="__codelineno-0-63"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-64"><a id="__codelineno-0-64" name="__codelineno-0-64"></a>    <span class="n">fn_base</span> <span class="o">=</span> <span class="n">Path</span><span class="p">(</span><span class="n">fn_base</span><span class="p">)</span><span class="o">.</span><span class="n">stem</span>
+</span><span id="__span-0-65"><a id="__codelineno-0-65" name="__codelineno-0-65"></a>    <span class="k">if</span> <span class="n">fn</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="s2">&quot;_&quot;</span><span class="p">:</span>
+</span><span id="__span-0-66"><a id="__codelineno-0-66" name="__codelineno-0-66"></a>        <span class="k">return</span> <span class="n">fn_base</span> <span class="o">+</span> <span class="n">fn</span>
+</span><span id="__span-0-67"><a id="__codelineno-0-67" name="__codelineno-0-67"></a>    <span class="k">else</span><span class="p">:</span>
+</span><span id="__span-0-68"><a id="__codelineno-0-68" name="__codelineno-0-68"></a>        <span class="k">return</span> <span class="n">fn</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -3173,23 +3173,23 @@ <h2 id="nhssynth.common.io.potential_suffixes" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/common/io.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-70">70</a></span>
-<span class="normal"><a href="#__codelineno-0-71">71</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-71">71</a></span>
 <span class="normal"><a href="#__codelineno-0-72">72</a></span>
 <span class="normal"><a href="#__codelineno-0-73">73</a></span>
 <span class="normal"><a href="#__codelineno-0-74">74</a></span>
 <span class="normal"><a href="#__codelineno-0-75">75</a></span>
 <span class="normal"><a href="#__codelineno-0-76">76</a></span>
 <span class="normal"><a href="#__codelineno-0-77">77</a></span>
-<span class="normal"><a href="#__codelineno-0-78">78</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-70"><a id="__codelineno-0-70" name="__codelineno-0-70"></a><span class="k">def</span> <span class="nf">potential_suffixes</span><span class="p">(</span><span class="n">fns</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">fn_base</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
-</span><span id="__span-0-71"><a id="__codelineno-0-71" name="__codelineno-0-71"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-72"><a id="__codelineno-0-72" name="__codelineno-0-72"></a><span class="sd">    Wrapper around `potential_suffix` to apply it to a list of filenames.</span>
-</span><span id="__span-0-73"><a id="__codelineno-0-73" name="__codelineno-0-73"></a>
-</span><span id="__span-0-74"><a id="__codelineno-0-74" name="__codelineno-0-74"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-75"><a id="__codelineno-0-75" name="__codelineno-0-75"></a><span class="sd">        fns: The list of filenames / potential suffixes to append to `fn_base`.</span>
-</span><span id="__span-0-76"><a id="__codelineno-0-76" name="__codelineno-0-76"></a><span class="sd">        fn_base: The name of the file the suffixes would attach to.</span>
-</span><span id="__span-0-77"><a id="__codelineno-0-77" name="__codelineno-0-77"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-78"><a id="__codelineno-0-78" name="__codelineno-0-78"></a>    <span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="n">potential_suffix</span><span class="p">(</span><span class="n">fn</span><span class="p">,</span> <span class="n">fn_base</span><span class="p">)</span> <span class="k">for</span> <span class="n">fn</span> <span class="ow">in</span> <span class="n">fns</span><span class="p">)</span>
+<span class="normal"><a href="#__codelineno-0-78">78</a></span>
+<span class="normal"><a href="#__codelineno-0-79">79</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-71"><a id="__codelineno-0-71" name="__codelineno-0-71"></a><span class="k">def</span> <span class="nf">potential_suffixes</span><span class="p">(</span><span class="n">fns</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">fn_base</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
+</span><span id="__span-0-72"><a id="__codelineno-0-72" name="__codelineno-0-72"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-73"><a id="__codelineno-0-73" name="__codelineno-0-73"></a><span class="sd">    Wrapper around `potential_suffix` to apply it to a list of filenames.</span>
+</span><span id="__span-0-74"><a id="__codelineno-0-74" name="__codelineno-0-74"></a>
+</span><span id="__span-0-75"><a id="__codelineno-0-75" name="__codelineno-0-75"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-76"><a id="__codelineno-0-76" name="__codelineno-0-76"></a><span class="sd">        fns: The list of filenames / potential suffixes to append to `fn_base`.</span>
+</span><span id="__span-0-77"><a id="__codelineno-0-77" name="__codelineno-0-77"></a><span class="sd">        fn_base: The name of the file the suffixes would attach to.</span>
+</span><span id="__span-0-78"><a id="__codelineno-0-78" name="__codelineno-0-78"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-79"><a id="__codelineno-0-79" name="__codelineno-0-79"></a>    <span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="n">potential_suffix</span><span class="p">(</span><span class="n">fn</span><span class="p">,</span> <span class="n">fn_base</span><span class="p">)</span> <span class="k">for</span> <span class="n">fn</span> <span class="ow">in</span> <span class="n">fns</span><span class="p">)</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -3281,8 +3281,7 @@ <h2 id="nhssynth.common.io.warn_if_path_supplied" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/common/io.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-97"> 97</a></span>
-<span class="normal"><a href="#__codelineno-0-98"> 98</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-98"> 98</a></span>
 <span class="normal"><a href="#__codelineno-0-99"> 99</a></span>
 <span class="normal"><a href="#__codelineno-0-100">100</a></span>
 <span class="normal"><a href="#__codelineno-0-101">101</a></span>
@@ -3297,23 +3296,24 @@ <h2 id="nhssynth.common.io.warn_if_path_supplied" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-110">110</a></span>
 <span class="normal"><a href="#__codelineno-0-111">111</a></span>
 <span class="normal"><a href="#__codelineno-0-112">112</a></span>
-<span class="normal"><a href="#__codelineno-0-113">113</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-97"><a id="__codelineno-0-97" name="__codelineno-0-97"></a><span class="k">def</span> <span class="nf">warn_if_path_supplied</span><span class="p">(</span><span class="n">fns</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="nb">dir</span><span class="p">:</span> <span class="n">Path</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="__span-0-98"><a id="__codelineno-0-98" name="__codelineno-0-98"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-99"><a id="__codelineno-0-99" name="__codelineno-0-99"></a><span class="sd">    Warns if the files in `fns` include directory separators.</span>
-</span><span id="__span-0-100"><a id="__codelineno-0-100" name="__codelineno-0-100"></a>
-</span><span id="__span-0-101"><a id="__codelineno-0-101" name="__codelineno-0-101"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-102"><a id="__codelineno-0-102" name="__codelineno-0-102"></a><span class="sd">        fns: The list of files to check.</span>
-</span><span id="__span-0-103"><a id="__codelineno-0-103" name="__codelineno-0-103"></a><span class="sd">        dir: The directory the files should exist in.</span>
-</span><span id="__span-0-104"><a id="__codelineno-0-104" name="__codelineno-0-104"></a>
-</span><span id="__span-0-105"><a id="__codelineno-0-105" name="__codelineno-0-105"></a><span class="sd">    Warnings:</span>
-</span><span id="__span-0-106"><a id="__codelineno-0-106" name="__codelineno-0-106"></a><span class="sd">        UserWarning: when the path to any of the files in `fns` includes directory separators, as this may lead to unintended consequences if the user doesn&#39;t realise default directories are pre-specified.</span>
-</span><span id="__span-0-107"><a id="__codelineno-0-107" name="__codelineno-0-107"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-108"><a id="__codelineno-0-108" name="__codelineno-0-108"></a>    <span class="k">for</span> <span class="n">fn</span> <span class="ow">in</span> <span class="n">fns</span><span class="p">:</span>
-</span><span id="__span-0-109"><a id="__codelineno-0-109" name="__codelineno-0-109"></a>        <span class="k">if</span> <span class="s2">&quot;/&quot;</span> <span class="ow">in</span> <span class="n">fn</span><span class="p">:</span>
-</span><span id="__span-0-110"><a id="__codelineno-0-110" name="__codelineno-0-110"></a>            <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
-</span><span id="__span-0-111"><a id="__codelineno-0-111" name="__codelineno-0-111"></a>                <span class="sa">f</span><span class="s2">&quot;Using the path supplied appended to </span><span class="si">{</span><span class="nb">dir</span><span class="si">}</span><span class="s2">, i.e. attempting to read data from </span><span class="si">{</span><span class="nb">dir</span><span class="w"> </span><span class="o">/</span><span class="w"> </span><span class="n">fn</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
-</span><span id="__span-0-112"><a id="__codelineno-0-112" name="__codelineno-0-112"></a>                <span class="ne">UserWarning</span><span class="p">,</span>
-</span><span id="__span-0-113"><a id="__codelineno-0-113" name="__codelineno-0-113"></a>            <span class="p">)</span>
+<span class="normal"><a href="#__codelineno-0-113">113</a></span>
+<span class="normal"><a href="#__codelineno-0-114">114</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-98"><a id="__codelineno-0-98" name="__codelineno-0-98"></a><span class="k">def</span> <span class="nf">warn_if_path_supplied</span><span class="p">(</span><span class="n">fns</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="nb">dir</span><span class="p">:</span> <span class="n">Path</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="__span-0-99"><a id="__codelineno-0-99" name="__codelineno-0-99"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-100"><a id="__codelineno-0-100" name="__codelineno-0-100"></a><span class="sd">    Warns if the files in `fns` include directory separators.</span>
+</span><span id="__span-0-101"><a id="__codelineno-0-101" name="__codelineno-0-101"></a>
+</span><span id="__span-0-102"><a id="__codelineno-0-102" name="__codelineno-0-102"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-103"><a id="__codelineno-0-103" name="__codelineno-0-103"></a><span class="sd">        fns: The list of files to check.</span>
+</span><span id="__span-0-104"><a id="__codelineno-0-104" name="__codelineno-0-104"></a><span class="sd">        dir: The directory the files should exist in.</span>
+</span><span id="__span-0-105"><a id="__codelineno-0-105" name="__codelineno-0-105"></a>
+</span><span id="__span-0-106"><a id="__codelineno-0-106" name="__codelineno-0-106"></a><span class="sd">    Warnings:</span>
+</span><span id="__span-0-107"><a id="__codelineno-0-107" name="__codelineno-0-107"></a><span class="sd">        UserWarning: when the path to any of the files in `fns` includes directory separators, as this may lead to unintended consequences if the user doesn&#39;t realise default directories are pre-specified.</span>
+</span><span id="__span-0-108"><a id="__codelineno-0-108" name="__codelineno-0-108"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-109"><a id="__codelineno-0-109" name="__codelineno-0-109"></a>    <span class="k">for</span> <span class="n">fn</span> <span class="ow">in</span> <span class="n">fns</span><span class="p">:</span>
+</span><span id="__span-0-110"><a id="__codelineno-0-110" name="__codelineno-0-110"></a>        <span class="k">if</span> <span class="s2">&quot;/&quot;</span> <span class="ow">in</span> <span class="n">fn</span><span class="p">:</span>
+</span><span id="__span-0-111"><a id="__codelineno-0-111" name="__codelineno-0-111"></a>            <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
+</span><span id="__span-0-112"><a id="__codelineno-0-112" name="__codelineno-0-112"></a>                <span class="sa">f</span><span class="s2">&quot;Using the path supplied appended to </span><span class="si">{</span><span class="nb">dir</span><span class="si">}</span><span class="s2">, i.e. attempting to read data from </span><span class="si">{</span><span class="nb">dir</span><span class="w"> </span><span class="o">/</span><span class="w"> </span><span class="n">fn</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
+</span><span id="__span-0-113"><a id="__codelineno-0-113" name="__codelineno-0-113"></a>                <span class="ne">UserWarning</span><span class="p">,</span>
+</span><span id="__span-0-114"><a id="__codelineno-0-114" name="__codelineno-0-114"></a>            <span class="p">)</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
diff --git a/reference/common/strings/index.html b/reference/common/strings/index.html
index 9ccfcfa2..9eb9793d 100644
--- a/reference/common/strings/index.html
+++ b/reference/common/strings/index.html
@@ -2559,8 +2559,7 @@ <h2 id="nhssynth.common.strings.add_spaces_before_caps" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/common/strings.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-6"> 6</a></span>
-<span class="normal"><a href="#__codelineno-0-7"> 7</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-7"> 7</a></span>
 <span class="normal"><a href="#__codelineno-0-8"> 8</a></span>
 <span class="normal"><a href="#__codelineno-0-9"> 9</a></span>
 <span class="normal"><a href="#__codelineno-0-10">10</a></span>
@@ -2575,23 +2574,24 @@ <h2 id="nhssynth.common.strings.add_spaces_before_caps" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-19">19</a></span>
 <span class="normal"><a href="#__codelineno-0-20">20</a></span>
 <span class="normal"><a href="#__codelineno-0-21">21</a></span>
-<span class="normal"><a href="#__codelineno-0-22">22</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-6"><a id="__codelineno-0-6" name="__codelineno-0-6"></a><span class="k">def</span> <span class="nf">add_spaces_before_caps</span><span class="p">(</span><span class="n">string</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
-</span><span id="__span-0-7"><a id="__codelineno-0-7" name="__codelineno-0-7"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8"></a><span class="sd">    Adds spaces before capital letters in a string if there is a lower-case letter following it.</span>
-</span><span id="__span-0-9"><a id="__codelineno-0-9" name="__codelineno-0-9"></a>
-</span><span id="__span-0-10"><a id="__codelineno-0-10" name="__codelineno-0-10"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11"></a><span class="sd">        string: The string to add spaces to.</span>
-</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12"></a>
-</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13"></a><span class="sd">    Returns:</span>
-</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14"></a><span class="sd">        The string with spaces added before capital letters.</span>
-</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15"></a>
-</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16"></a><span class="sd">    Examples:</span>
-</span><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17"></a><span class="sd">        &gt;&gt;&gt; add_spaces_before_caps(&quot;HelloWorld&quot;)</span>
-</span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18"></a><span class="sd">        &#39;Hello World&#39;</span>
-</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19"></a><span class="sd">        &gt;&gt;&gt; add_spaces_before_caps(&quot;HelloWorldAGAIN&quot;)</span>
-</span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20"></a><span class="sd">        &#39;Hello World AGAIN&#39;</span>
-</span><span id="__span-0-21"><a id="__codelineno-0-21" name="__codelineno-0-21"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-22"><a id="__codelineno-0-22" name="__codelineno-0-22"></a>    <span class="k">return</span> <span class="s2">&quot; &quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">re</span><span class="o">.</span><span class="n">findall</span><span class="p">(</span><span class="sa">r</span><span class="s2">&quot;[a-z]?[A-Z][a-z]+|[A-Z]+(?=[A-Z][a-z]|\b)&quot;</span><span class="p">,</span> <span class="n">string</span><span class="p">))</span>
+<span class="normal"><a href="#__codelineno-0-22">22</a></span>
+<span class="normal"><a href="#__codelineno-0-23">23</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-7"><a id="__codelineno-0-7" name="__codelineno-0-7"></a><span class="k">def</span> <span class="nf">add_spaces_before_caps</span><span class="p">(</span><span class="n">string</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
+</span><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-9"><a id="__codelineno-0-9" name="__codelineno-0-9"></a><span class="sd">    Adds spaces before capital letters in a string if there is a lower-case letter following it.</span>
+</span><span id="__span-0-10"><a id="__codelineno-0-10" name="__codelineno-0-10"></a>
+</span><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12"></a><span class="sd">        string: The string to add spaces to.</span>
+</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13"></a>
+</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14"></a><span class="sd">    Returns:</span>
+</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15"></a><span class="sd">        The string with spaces added before capital letters.</span>
+</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16"></a>
+</span><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17"></a><span class="sd">    Examples:</span>
+</span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18"></a><span class="sd">        &gt;&gt;&gt; add_spaces_before_caps(&quot;HelloWorld&quot;)</span>
+</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19"></a><span class="sd">        &#39;Hello World&#39;</span>
+</span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20"></a><span class="sd">        &gt;&gt;&gt; add_spaces_before_caps(&quot;HelloWorldAGAIN&quot;)</span>
+</span><span id="__span-0-21"><a id="__codelineno-0-21" name="__codelineno-0-21"></a><span class="sd">        &#39;Hello World AGAIN&#39;</span>
+</span><span id="__span-0-22"><a id="__codelineno-0-22" name="__codelineno-0-22"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-23"><a id="__codelineno-0-23" name="__codelineno-0-23"></a>    <span class="k">return</span> <span class="s2">&quot; &quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">re</span><span class="o">.</span><span class="n">findall</span><span class="p">(</span><span class="sa">r</span><span class="s2">&quot;[a-z]?[A-Z][a-z]+|[A-Z]+(?=[A-Z][a-z]|\b)&quot;</span><span class="p">,</span> <span class="n">string</span><span class="p">))</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
@@ -2683,8 +2683,7 @@ <h2 id="nhssynth.common.strings.format_timedelta" class="doc doc-heading">
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/common/strings.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-25">25</a></span>
-<span class="normal"><a href="#__codelineno-0-26">26</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-26">26</a></span>
 <span class="normal"><a href="#__codelineno-0-27">27</a></span>
 <span class="normal"><a href="#__codelineno-0-28">28</a></span>
 <span class="normal"><a href="#__codelineno-0-29">29</a></span>
@@ -2706,30 +2705,31 @@ <h2 id="nhssynth.common.strings.format_timedelta" class="doc doc-heading">
 <span class="normal"><a href="#__codelineno-0-45">45</a></span>
 <span class="normal"><a href="#__codelineno-0-46">46</a></span>
 <span class="normal"><a href="#__codelineno-0-47">47</a></span>
-<span class="normal"><a href="#__codelineno-0-48">48</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-25"><a id="__codelineno-0-25" name="__codelineno-0-25"></a><span class="k">def</span> <span class="nf">format_timedelta</span><span class="p">(</span><span class="n">start</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> <span class="n">finish</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
-</span><span id="__span-0-26"><a id="__codelineno-0-26" name="__codelineno-0-26"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-27"><a id="__codelineno-0-27" name="__codelineno-0-27"></a><span class="sd">    Calculate and prettily format the difference between two calls to `time.time()`.</span>
-</span><span id="__span-0-28"><a id="__codelineno-0-28" name="__codelineno-0-28"></a>
-</span><span id="__span-0-29"><a id="__codelineno-0-29" name="__codelineno-0-29"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-30"><a id="__codelineno-0-30" name="__codelineno-0-30"></a><span class="sd">        start: The start time.</span>
-</span><span id="__span-0-31"><a id="__codelineno-0-31" name="__codelineno-0-31"></a><span class="sd">        finish: The finish time.</span>
-</span><span id="__span-0-32"><a id="__codelineno-0-32" name="__codelineno-0-32"></a>
-</span><span id="__span-0-33"><a id="__codelineno-0-33" name="__codelineno-0-33"></a><span class="sd">    Returns:</span>
-</span><span id="__span-0-34"><a id="__codelineno-0-34" name="__codelineno-0-34"></a><span class="sd">        A string containing the time difference in a human-readable format.</span>
-</span><span id="__span-0-35"><a id="__codelineno-0-35" name="__codelineno-0-35"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-36"><a id="__codelineno-0-36" name="__codelineno-0-36"></a>    <span class="n">total</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">timedelta</span><span class="p">(</span><span class="n">seconds</span><span class="o">=</span><span class="n">finish</span> <span class="o">-</span> <span class="n">start</span><span class="p">)</span>
-</span><span id="__span-0-37"><a id="__codelineno-0-37" name="__codelineno-0-37"></a>    <span class="n">hours</span><span class="p">,</span> <span class="n">remainder</span> <span class="o">=</span> <span class="nb">divmod</span><span class="p">(</span><span class="n">total</span><span class="o">.</span><span class="n">seconds</span><span class="p">,</span> <span class="mi">3600</span><span class="p">)</span>
-</span><span id="__span-0-38"><a id="__codelineno-0-38" name="__codelineno-0-38"></a>    <span class="n">minutes</span><span class="p">,</span> <span class="n">seconds</span> <span class="o">=</span> <span class="nb">divmod</span><span class="p">(</span><span class="n">remainder</span><span class="p">,</span> <span class="mi">60</span><span class="p">)</span>
-</span><span id="__span-0-39"><a id="__codelineno-0-39" name="__codelineno-0-39"></a>
-</span><span id="__span-0-40"><a id="__codelineno-0-40" name="__codelineno-0-40"></a>    <span class="k">if</span> <span class="n">total</span><span class="o">.</span><span class="n">days</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
-</span><span id="__span-0-41"><a id="__codelineno-0-41" name="__codelineno-0-41"></a>        <span class="n">delta_str</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">total</span><span class="o">.</span><span class="n">days</span><span class="si">}</span><span class="s2">d </span><span class="si">{</span><span class="n">hours</span><span class="si">}</span><span class="s2">h </span><span class="si">{</span><span class="n">minutes</span><span class="si">}</span><span class="s2">m </span><span class="si">{</span><span class="n">seconds</span><span class="si">}</span><span class="s2">s&quot;</span>
-</span><span id="__span-0-42"><a id="__codelineno-0-42" name="__codelineno-0-42"></a>    <span class="k">elif</span> <span class="n">hours</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
-</span><span id="__span-0-43"><a id="__codelineno-0-43" name="__codelineno-0-43"></a>        <span class="n">delta_str</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">hours</span><span class="si">}</span><span class="s2">h </span><span class="si">{</span><span class="n">minutes</span><span class="si">}</span><span class="s2">m </span><span class="si">{</span><span class="n">seconds</span><span class="si">}</span><span class="s2">s&quot;</span>
-</span><span id="__span-0-44"><a id="__codelineno-0-44" name="__codelineno-0-44"></a>    <span class="k">elif</span> <span class="n">minutes</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
-</span><span id="__span-0-45"><a id="__codelineno-0-45" name="__codelineno-0-45"></a>        <span class="n">delta_str</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">minutes</span><span class="si">}</span><span class="s2">m </span><span class="si">{</span><span class="n">seconds</span><span class="si">}</span><span class="s2">s&quot;</span>
-</span><span id="__span-0-46"><a id="__codelineno-0-46" name="__codelineno-0-46"></a>    <span class="k">else</span><span class="p">:</span>
-</span><span id="__span-0-47"><a id="__codelineno-0-47" name="__codelineno-0-47"></a>        <span class="n">delta_str</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">seconds</span><span class="si">}</span><span class="s2">s&quot;</span>
-</span><span id="__span-0-48"><a id="__codelineno-0-48" name="__codelineno-0-48"></a>    <span class="k">return</span> <span class="n">delta_str</span>
+<span class="normal"><a href="#__codelineno-0-48">48</a></span>
+<span class="normal"><a href="#__codelineno-0-49">49</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-26"><a id="__codelineno-0-26" name="__codelineno-0-26"></a><span class="k">def</span> <span class="nf">format_timedelta</span><span class="p">(</span><span class="n">start</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> <span class="n">finish</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
+</span><span id="__span-0-27"><a id="__codelineno-0-27" name="__codelineno-0-27"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-28"><a id="__codelineno-0-28" name="__codelineno-0-28"></a><span class="sd">    Calculate and prettily format the difference between two calls to `time.time()`.</span>
+</span><span id="__span-0-29"><a id="__codelineno-0-29" name="__codelineno-0-29"></a>
+</span><span id="__span-0-30"><a id="__codelineno-0-30" name="__codelineno-0-30"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-31"><a id="__codelineno-0-31" name="__codelineno-0-31"></a><span class="sd">        start: The start time.</span>
+</span><span id="__span-0-32"><a id="__codelineno-0-32" name="__codelineno-0-32"></a><span class="sd">        finish: The finish time.</span>
+</span><span id="__span-0-33"><a id="__codelineno-0-33" name="__codelineno-0-33"></a>
+</span><span id="__span-0-34"><a id="__codelineno-0-34" name="__codelineno-0-34"></a><span class="sd">    Returns:</span>
+</span><span id="__span-0-35"><a id="__codelineno-0-35" name="__codelineno-0-35"></a><span class="sd">        A string containing the time difference in a human-readable format.</span>
+</span><span id="__span-0-36"><a id="__codelineno-0-36" name="__codelineno-0-36"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-37"><a id="__codelineno-0-37" name="__codelineno-0-37"></a>    <span class="n">total</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">timedelta</span><span class="p">(</span><span class="n">seconds</span><span class="o">=</span><span class="n">finish</span> <span class="o">-</span> <span class="n">start</span><span class="p">)</span>
+</span><span id="__span-0-38"><a id="__codelineno-0-38" name="__codelineno-0-38"></a>    <span class="n">hours</span><span class="p">,</span> <span class="n">remainder</span> <span class="o">=</span> <span class="nb">divmod</span><span class="p">(</span><span class="n">total</span><span class="o">.</span><span class="n">seconds</span><span class="p">,</span> <span class="mi">3600</span><span class="p">)</span>
+</span><span id="__span-0-39"><a id="__codelineno-0-39" name="__codelineno-0-39"></a>    <span class="n">minutes</span><span class="p">,</span> <span class="n">seconds</span> <span class="o">=</span> <span class="nb">divmod</span><span class="p">(</span><span class="n">remainder</span><span class="p">,</span> <span class="mi">60</span><span class="p">)</span>
+</span><span id="__span-0-40"><a id="__codelineno-0-40" name="__codelineno-0-40"></a>
+</span><span id="__span-0-41"><a id="__codelineno-0-41" name="__codelineno-0-41"></a>    <span class="k">if</span> <span class="n">total</span><span class="o">.</span><span class="n">days</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
+</span><span id="__span-0-42"><a id="__codelineno-0-42" name="__codelineno-0-42"></a>        <span class="n">delta_str</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">total</span><span class="o">.</span><span class="n">days</span><span class="si">}</span><span class="s2">d </span><span class="si">{</span><span class="n">hours</span><span class="si">}</span><span class="s2">h </span><span class="si">{</span><span class="n">minutes</span><span class="si">}</span><span class="s2">m </span><span class="si">{</span><span class="n">seconds</span><span class="si">}</span><span class="s2">s&quot;</span>
+</span><span id="__span-0-43"><a id="__codelineno-0-43" name="__codelineno-0-43"></a>    <span class="k">elif</span> <span class="n">hours</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
+</span><span id="__span-0-44"><a id="__codelineno-0-44" name="__codelineno-0-44"></a>        <span class="n">delta_str</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">hours</span><span class="si">}</span><span class="s2">h </span><span class="si">{</span><span class="n">minutes</span><span class="si">}</span><span class="s2">m </span><span class="si">{</span><span class="n">seconds</span><span class="si">}</span><span class="s2">s&quot;</span>
+</span><span id="__span-0-45"><a id="__codelineno-0-45" name="__codelineno-0-45"></a>    <span class="k">elif</span> <span class="n">minutes</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
+</span><span id="__span-0-46"><a id="__codelineno-0-46" name="__codelineno-0-46"></a>        <span class="n">delta_str</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">minutes</span><span class="si">}</span><span class="s2">m </span><span class="si">{</span><span class="n">seconds</span><span class="si">}</span><span class="s2">s&quot;</span>
+</span><span id="__span-0-47"><a id="__codelineno-0-47" name="__codelineno-0-47"></a>    <span class="k">else</span><span class="p">:</span>
+</span><span id="__span-0-48"><a id="__codelineno-0-48" name="__codelineno-0-48"></a>        <span class="n">delta_str</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">seconds</span><span class="si">}</span><span class="s2">s&quot;</span>
+</span><span id="__span-0-49"><a id="__codelineno-0-49" name="__codelineno-0-49"></a>    <span class="k">return</span> <span class="n">delta_str</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
diff --git a/reference/modules/dataloader/metadata/index.html b/reference/modules/dataloader/metadata/index.html
index 6f26b598..539d63df 100644
--- a/reference/modules/dataloader/metadata/index.html
+++ b/reference/modules/dataloader/metadata/index.html
@@ -3083,79 +3083,79 @@ <h2 id="nhssynth.modules.dataloader.metadata.MetaData" class="doc doc-heading">
 </span><span id="__span-0-250"><a id="__codelineno-0-250" name="__codelineno-0-250"></a>        <span class="n">assembled_metadata</span> <span class="o">=</span> <span class="p">{</span>
 </span><span id="__span-0-251"><a id="__codelineno-0-251" name="__codelineno-0-251"></a>            <span class="s2">&quot;columns&quot;</span><span class="p">:</span> <span class="p">{</span>
 </span><span id="__span-0-252"><a id="__codelineno-0-252" name="__codelineno-0-252"></a>                <span class="n">cn</span><span class="p">:</span> <span class="p">{</span>
-</span><span id="__span-0-253"><a id="__codelineno-0-253" name="__codelineno-0-253"></a>                    <span class="s2">&quot;dtype&quot;</span><span class="p">:</span> <span class="n">cmd</span><span class="o">.</span><span class="n">dtype</span><span class="o">.</span><span class="n">name</span>
-</span><span id="__span-0-254"><a id="__codelineno-0-254" name="__codelineno-0-254"></a>                    <span class="k">if</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">cmd</span><span class="p">,</span> <span class="s2">&quot;datetime_config&quot;</span><span class="p">)</span>
-</span><span id="__span-0-255"><a id="__codelineno-0-255" name="__codelineno-0-255"></a>                    <span class="k">else</span> <span class="p">{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="n">cmd</span><span class="o">.</span><span class="n">dtype</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="o">**</span><span class="n">cmd</span><span class="o">.</span><span class="n">datetime_config</span><span class="p">},</span>
-</span><span id="__span-0-256"><a id="__codelineno-0-256" name="__codelineno-0-256"></a>                    <span class="s2">&quot;categorical&quot;</span><span class="p">:</span> <span class="n">cmd</span><span class="o">.</span><span class="n">categorical</span><span class="p">,</span>
-</span><span id="__span-0-257"><a id="__codelineno-0-257" name="__codelineno-0-257"></a>                <span class="p">}</span>
-</span><span id="__span-0-258"><a id="__codelineno-0-258" name="__codelineno-0-258"></a>                <span class="k">for</span> <span class="n">cn</span><span class="p">,</span> <span class="n">cmd</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_metadata</span><span class="o">.</span><span class="n">items</span><span class="p">()</span>
-</span><span id="__span-0-259"><a id="__codelineno-0-259" name="__codelineno-0-259"></a>            <span class="p">}</span>
-</span><span id="__span-0-260"><a id="__codelineno-0-260" name="__codelineno-0-260"></a>        <span class="p">}</span>
-</span><span id="__span-0-261"><a id="__codelineno-0-261" name="__codelineno-0-261"></a>        <span class="c1"># We loop through the base dict above to add other parts if they are present in the metadata</span>
-</span><span id="__span-0-262"><a id="__codelineno-0-262" name="__codelineno-0-262"></a>        <span class="k">for</span> <span class="n">cn</span><span class="p">,</span> <span class="n">cmd</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_metadata</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
-</span><span id="__span-0-263"><a id="__codelineno-0-263" name="__codelineno-0-263"></a>            <span class="k">if</span> <span class="n">cmd</span><span class="o">.</span><span class="n">missingness_strategy</span><span class="p">:</span>
-</span><span id="__span-0-264"><a id="__codelineno-0-264" name="__codelineno-0-264"></a>                <span class="n">assembled_metadata</span><span class="p">[</span><span class="s2">&quot;columns&quot;</span><span class="p">][</span><span class="n">cn</span><span class="p">][</span><span class="s2">&quot;missingness&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span>
-</span><span id="__span-0-265"><a id="__codelineno-0-265" name="__codelineno-0-265"></a>                    <span class="n">cmd</span><span class="o">.</span><span class="n">missingness_strategy</span><span class="o">.</span><span class="n">name</span>
-</span><span id="__span-0-266"><a id="__codelineno-0-266" name="__codelineno-0-266"></a>                    <span class="k">if</span> <span class="n">cmd</span><span class="o">.</span><span class="n">missingness_strategy</span><span class="o">.</span><span class="n">name</span> <span class="o">!=</span> <span class="s2">&quot;impute&quot;</span>
-</span><span id="__span-0-267"><a id="__codelineno-0-267" name="__codelineno-0-267"></a>                    <span class="k">else</span> <span class="p">{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="n">cmd</span><span class="o">.</span><span class="n">missingness_strategy</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="s2">&quot;impute&quot;</span><span class="p">:</span> <span class="n">cmd</span><span class="o">.</span><span class="n">missingness_strategy</span><span class="o">.</span><span class="n">impute</span><span class="p">}</span>
-</span><span id="__span-0-268"><a id="__codelineno-0-268" name="__codelineno-0-268"></a>                <span class="p">)</span>
-</span><span id="__span-0-269"><a id="__codelineno-0-269" name="__codelineno-0-269"></a>            <span class="k">if</span> <span class="n">cmd</span><span class="o">.</span><span class="n">transformer_config</span><span class="p">:</span>
-</span><span id="__span-0-270"><a id="__codelineno-0-270" name="__codelineno-0-270"></a>                <span class="n">assembled_metadata</span><span class="p">[</span><span class="s2">&quot;columns&quot;</span><span class="p">][</span><span class="n">cn</span><span class="p">][</span><span class="s2">&quot;transformer&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span>
-</span><span id="__span-0-271"><a id="__codelineno-0-271" name="__codelineno-0-271"></a>                    <span class="o">**</span><span class="n">cmd</span><span class="o">.</span><span class="n">transformer_config</span><span class="p">,</span>
-</span><span id="__span-0-272"><a id="__codelineno-0-272" name="__codelineno-0-272"></a>                    <span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="n">cmd</span><span class="o">.</span><span class="n">transformer</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,</span>
-</span><span id="__span-0-273"><a id="__codelineno-0-273" name="__codelineno-0-273"></a>                <span class="p">}</span>
-</span><span id="__span-0-274"><a id="__codelineno-0-274" name="__codelineno-0-274"></a>
-</span><span id="__span-0-275"><a id="__codelineno-0-275" name="__codelineno-0-275"></a>        <span class="c1"># Add back the dropped_columns not present in the metadata</span>
-</span><span id="__span-0-276"><a id="__codelineno-0-276" name="__codelineno-0-276"></a>        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">dropped_columns</span><span class="p">:</span>
-</span><span id="__span-0-277"><a id="__codelineno-0-277" name="__codelineno-0-277"></a>            <span class="n">assembled_metadata</span><span class="p">[</span><span class="s2">&quot;columns&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">({</span><span class="n">cn</span><span class="p">:</span> <span class="s2">&quot;drop&quot;</span> <span class="k">for</span> <span class="n">cn</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">dropped_columns</span><span class="p">})</span>
-</span><span id="__span-0-278"><a id="__codelineno-0-278" name="__codelineno-0-278"></a>
-</span><span id="__span-0-279"><a id="__codelineno-0-279" name="__codelineno-0-279"></a>        <span class="k">if</span> <span class="n">collapse_yaml</span><span class="p">:</span>
-</span><span id="__span-0-280"><a id="__codelineno-0-280" name="__codelineno-0-280"></a>            <span class="n">assembled_metadata</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_collapse</span><span class="p">(</span><span class="n">assembled_metadata</span><span class="p">)</span>
-</span><span id="__span-0-281"><a id="__codelineno-0-281" name="__codelineno-0-281"></a>
-</span><span id="__span-0-282"><a id="__codelineno-0-282" name="__codelineno-0-282"></a>        <span class="c1"># We add the constraints section after all of the formatting and processing above</span>
-</span><span id="__span-0-283"><a id="__codelineno-0-283" name="__codelineno-0-283"></a>        <span class="c1"># In general, the constraints are kept the same as the input (provided they passed validation)</span>
-</span><span id="__span-0-284"><a id="__codelineno-0-284" name="__codelineno-0-284"></a>        <span class="c1"># If `collapse_yaml` is specified, we output the minimum set of equivalent constraints</span>
-</span><span id="__span-0-285"><a id="__codelineno-0-285" name="__codelineno-0-285"></a>        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">constraints</span><span class="p">:</span>
-</span><span id="__span-0-286"><a id="__codelineno-0-286" name="__codelineno-0-286"></a>            <span class="n">assembled_metadata</span><span class="p">[</span><span class="s2">&quot;constraints&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span>
-</span><span id="__span-0-287"><a id="__codelineno-0-287" name="__codelineno-0-287"></a>                <span class="p">[</span><span class="nb">str</span><span class="p">(</span><span class="n">c</span><span class="p">)</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">constraints</span><span class="o">.</span><span class="n">minimal_constraints</span><span class="p">]</span>
-</span><span id="__span-0-288"><a id="__codelineno-0-288" name="__codelineno-0-288"></a>                <span class="k">if</span> <span class="n">collapse_yaml</span>
-</span><span id="__span-0-289"><a id="__codelineno-0-289" name="__codelineno-0-289"></a>                <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">constraints</span><span class="o">.</span><span class="n">raw_constraint_strings</span>
-</span><span id="__span-0-290"><a id="__codelineno-0-290" name="__codelineno-0-290"></a>            <span class="p">)</span>
-</span><span id="__span-0-291"><a id="__codelineno-0-291" name="__codelineno-0-291"></a>        <span class="k">return</span> <span class="n">assembled_metadata</span>
-</span><span id="__span-0-292"><a id="__codelineno-0-292" name="__codelineno-0-292"></a>
-</span><span id="__span-0-293"><a id="__codelineno-0-293" name="__codelineno-0-293"></a>    <span class="k">def</span> <span class="nf">save</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="n">pathlib</span><span class="o">.</span><span class="n">Path</span><span class="p">,</span> <span class="n">collapse_yaml</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="__span-0-294"><a id="__codelineno-0-294" name="__codelineno-0-294"></a><span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-295"><a id="__codelineno-0-295" name="__codelineno-0-295"></a><span class="sd">        Writes metadata to a YAML file.</span>
-</span><span id="__span-0-296"><a id="__codelineno-0-296" name="__codelineno-0-296"></a>
-</span><span id="__span-0-297"><a id="__codelineno-0-297" name="__codelineno-0-297"></a><span class="sd">        Args:</span>
-</span><span id="__span-0-298"><a id="__codelineno-0-298" name="__codelineno-0-298"></a><span class="sd">            path: The path at which to write the metadata YAML file.</span>
-</span><span id="__span-0-299"><a id="__codelineno-0-299" name="__codelineno-0-299"></a><span class="sd">            collapse_yaml: A boolean indicating whether to collapse the YAML representation of the metadata, reducing duplication.</span>
-</span><span id="__span-0-300"><a id="__codelineno-0-300" name="__codelineno-0-300"></a><span class="sd">        &quot;&quot;&quot;</span>
-</span><span id="__span-0-301"><a id="__codelineno-0-301" name="__codelineno-0-301"></a>        <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s2">&quot;w&quot;</span><span class="p">)</span> <span class="k">as</span> <span class="n">yaml_file</span><span class="p">:</span>
-</span><span id="__span-0-302"><a id="__codelineno-0-302" name="__codelineno-0-302"></a>            <span class="n">yaml</span><span class="o">.</span><span class="n">safe_dump</span><span class="p">(</span>
-</span><span id="__span-0-303"><a id="__codelineno-0-303" name="__codelineno-0-303"></a>                <span class="bp">self</span><span class="o">.</span><span class="n">_assemble</span><span class="p">(</span><span class="n">collapse_yaml</span><span class="p">),</span>
-</span><span id="__span-0-304"><a id="__codelineno-0-304" name="__codelineno-0-304"></a>                <span class="n">yaml_file</span><span class="p">,</span>
-</span><span id="__span-0-305"><a id="__codelineno-0-305" name="__codelineno-0-305"></a>                <span class="n">default_flow_style</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
-</span><span id="__span-0-306"><a id="__codelineno-0-306" name="__codelineno-0-306"></a>                <span class="n">sort_keys</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
-</span><span id="__span-0-307"><a id="__codelineno-0-307" name="__codelineno-0-307"></a>            <span class="p">)</span>
-</span><span id="__span-0-308"><a id="__codelineno-0-308" name="__codelineno-0-308"></a>
-</span><span id="__span-0-309"><a id="__codelineno-0-309" name="__codelineno-0-309"></a>    <span class="k">def</span> <span class="nf">get_sdv_metadata</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]]:</span>
-</span><span id="__span-0-310"><a id="__codelineno-0-310" name="__codelineno-0-310"></a><span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-311"><a id="__codelineno-0-311" name="__codelineno-0-311"></a><span class="sd">        Map combinations of our metadata implementation to SDV&#39;s as required by SDMetrics.</span>
-</span><span id="__span-0-312"><a id="__codelineno-0-312" name="__codelineno-0-312"></a>
-</span><span id="__span-0-313"><a id="__codelineno-0-313" name="__codelineno-0-313"></a><span class="sd">        Returns:</span>
-</span><span id="__span-0-314"><a id="__codelineno-0-314" name="__codelineno-0-314"></a><span class="sd">            A dictionary containing the SDV metadata.</span>
-</span><span id="__span-0-315"><a id="__codelineno-0-315" name="__codelineno-0-315"></a><span class="sd">        &quot;&quot;&quot;</span>
-</span><span id="__span-0-316"><a id="__codelineno-0-316" name="__codelineno-0-316"></a>        <span class="n">sdv_metadata</span> <span class="o">=</span> <span class="p">{</span>
-</span><span id="__span-0-317"><a id="__codelineno-0-317" name="__codelineno-0-317"></a>            <span class="s2">&quot;columns&quot;</span><span class="p">:</span> <span class="p">{</span>
-</span><span id="__span-0-318"><a id="__codelineno-0-318" name="__codelineno-0-318"></a>                <span class="n">cn</span><span class="p">:</span> <span class="p">{</span>
-</span><span id="__span-0-319"><a id="__codelineno-0-319" name="__codelineno-0-319"></a>                    <span class="s2">&quot;sdtype&quot;</span><span class="p">:</span> <span class="s2">&quot;boolean&quot;</span>
-</span><span id="__span-0-320"><a id="__codelineno-0-320" name="__codelineno-0-320"></a>                    <span class="k">if</span> <span class="n">cmd</span><span class="o">.</span><span class="n">boolean</span>
-</span><span id="__span-0-321"><a id="__codelineno-0-321" name="__codelineno-0-321"></a>                    <span class="k">else</span> <span class="s2">&quot;categorical&quot;</span>
-</span><span id="__span-0-322"><a id="__codelineno-0-322" name="__codelineno-0-322"></a>                    <span class="k">if</span> <span class="n">cmd</span><span class="o">.</span><span class="n">categorical</span>
-</span><span id="__span-0-323"><a id="__codelineno-0-323" name="__codelineno-0-323"></a>                    <span class="k">else</span> <span class="s2">&quot;datetime&quot;</span>
-</span><span id="__span-0-324"><a id="__codelineno-0-324" name="__codelineno-0-324"></a>                    <span class="k">if</span> <span class="n">cmd</span><span class="o">.</span><span class="n">dtype</span><span class="o">.</span><span class="n">kind</span> <span class="o">==</span> <span class="s2">&quot;M&quot;</span>
-</span><span id="__span-0-325"><a id="__codelineno-0-325" name="__codelineno-0-325"></a>                    <span class="k">else</span> <span class="s2">&quot;numerical&quot;</span><span class="p">,</span>
+</span><span id="__span-0-253"><a id="__codelineno-0-253" name="__codelineno-0-253"></a>                    <span class="s2">&quot;dtype&quot;</span><span class="p">:</span> <span class="p">(</span>
+</span><span id="__span-0-254"><a id="__codelineno-0-254" name="__codelineno-0-254"></a>                        <span class="n">cmd</span><span class="o">.</span><span class="n">dtype</span><span class="o">.</span><span class="n">name</span>
+</span><span id="__span-0-255"><a id="__codelineno-0-255" name="__codelineno-0-255"></a>                        <span class="k">if</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">cmd</span><span class="p">,</span> <span class="s2">&quot;datetime_config&quot;</span><span class="p">)</span>
+</span><span id="__span-0-256"><a id="__codelineno-0-256" name="__codelineno-0-256"></a>                        <span class="k">else</span> <span class="p">{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="n">cmd</span><span class="o">.</span><span class="n">dtype</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="o">**</span><span class="n">cmd</span><span class="o">.</span><span class="n">datetime_config</span><span class="p">}</span>
+</span><span id="__span-0-257"><a id="__codelineno-0-257" name="__codelineno-0-257"></a>                    <span class="p">),</span>
+</span><span id="__span-0-258"><a id="__codelineno-0-258" name="__codelineno-0-258"></a>                    <span class="s2">&quot;categorical&quot;</span><span class="p">:</span> <span class="n">cmd</span><span class="o">.</span><span class="n">categorical</span><span class="p">,</span>
+</span><span id="__span-0-259"><a id="__codelineno-0-259" name="__codelineno-0-259"></a>                <span class="p">}</span>
+</span><span id="__span-0-260"><a id="__codelineno-0-260" name="__codelineno-0-260"></a>                <span class="k">for</span> <span class="n">cn</span><span class="p">,</span> <span class="n">cmd</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_metadata</span><span class="o">.</span><span class="n">items</span><span class="p">()</span>
+</span><span id="__span-0-261"><a id="__codelineno-0-261" name="__codelineno-0-261"></a>            <span class="p">}</span>
+</span><span id="__span-0-262"><a id="__codelineno-0-262" name="__codelineno-0-262"></a>        <span class="p">}</span>
+</span><span id="__span-0-263"><a id="__codelineno-0-263" name="__codelineno-0-263"></a>        <span class="c1"># We loop through the base dict above to add other parts if they are present in the metadata</span>
+</span><span id="__span-0-264"><a id="__codelineno-0-264" name="__codelineno-0-264"></a>        <span class="k">for</span> <span class="n">cn</span><span class="p">,</span> <span class="n">cmd</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_metadata</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
+</span><span id="__span-0-265"><a id="__codelineno-0-265" name="__codelineno-0-265"></a>            <span class="k">if</span> <span class="n">cmd</span><span class="o">.</span><span class="n">missingness_strategy</span><span class="p">:</span>
+</span><span id="__span-0-266"><a id="__codelineno-0-266" name="__codelineno-0-266"></a>                <span class="n">assembled_metadata</span><span class="p">[</span><span class="s2">&quot;columns&quot;</span><span class="p">][</span><span class="n">cn</span><span class="p">][</span><span class="s2">&quot;missingness&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span>
+</span><span id="__span-0-267"><a id="__codelineno-0-267" name="__codelineno-0-267"></a>                    <span class="n">cmd</span><span class="o">.</span><span class="n">missingness_strategy</span><span class="o">.</span><span class="n">name</span>
+</span><span id="__span-0-268"><a id="__codelineno-0-268" name="__codelineno-0-268"></a>                    <span class="k">if</span> <span class="n">cmd</span><span class="o">.</span><span class="n">missingness_strategy</span><span class="o">.</span><span class="n">name</span> <span class="o">!=</span> <span class="s2">&quot;impute&quot;</span>
+</span><span id="__span-0-269"><a id="__codelineno-0-269" name="__codelineno-0-269"></a>                    <span class="k">else</span> <span class="p">{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="n">cmd</span><span class="o">.</span><span class="n">missingness_strategy</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="s2">&quot;impute&quot;</span><span class="p">:</span> <span class="n">cmd</span><span class="o">.</span><span class="n">missingness_strategy</span><span class="o">.</span><span class="n">impute</span><span class="p">}</span>
+</span><span id="__span-0-270"><a id="__codelineno-0-270" name="__codelineno-0-270"></a>                <span class="p">)</span>
+</span><span id="__span-0-271"><a id="__codelineno-0-271" name="__codelineno-0-271"></a>            <span class="k">if</span> <span class="n">cmd</span><span class="o">.</span><span class="n">transformer_config</span><span class="p">:</span>
+</span><span id="__span-0-272"><a id="__codelineno-0-272" name="__codelineno-0-272"></a>                <span class="n">assembled_metadata</span><span class="p">[</span><span class="s2">&quot;columns&quot;</span><span class="p">][</span><span class="n">cn</span><span class="p">][</span><span class="s2">&quot;transformer&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span>
+</span><span id="__span-0-273"><a id="__codelineno-0-273" name="__codelineno-0-273"></a>                    <span class="o">**</span><span class="n">cmd</span><span class="o">.</span><span class="n">transformer_config</span><span class="p">,</span>
+</span><span id="__span-0-274"><a id="__codelineno-0-274" name="__codelineno-0-274"></a>                    <span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="n">cmd</span><span class="o">.</span><span class="n">transformer</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="p">,</span>
+</span><span id="__span-0-275"><a id="__codelineno-0-275" name="__codelineno-0-275"></a>                <span class="p">}</span>
+</span><span id="__span-0-276"><a id="__codelineno-0-276" name="__codelineno-0-276"></a>
+</span><span id="__span-0-277"><a id="__codelineno-0-277" name="__codelineno-0-277"></a>        <span class="c1"># Add back the dropped_columns not present in the metadata</span>
+</span><span id="__span-0-278"><a id="__codelineno-0-278" name="__codelineno-0-278"></a>        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">dropped_columns</span><span class="p">:</span>
+</span><span id="__span-0-279"><a id="__codelineno-0-279" name="__codelineno-0-279"></a>            <span class="n">assembled_metadata</span><span class="p">[</span><span class="s2">&quot;columns&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">({</span><span class="n">cn</span><span class="p">:</span> <span class="s2">&quot;drop&quot;</span> <span class="k">for</span> <span class="n">cn</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">dropped_columns</span><span class="p">})</span>
+</span><span id="__span-0-280"><a id="__codelineno-0-280" name="__codelineno-0-280"></a>
+</span><span id="__span-0-281"><a id="__codelineno-0-281" name="__codelineno-0-281"></a>        <span class="k">if</span> <span class="n">collapse_yaml</span><span class="p">:</span>
+</span><span id="__span-0-282"><a id="__codelineno-0-282" name="__codelineno-0-282"></a>            <span class="n">assembled_metadata</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_collapse</span><span class="p">(</span><span class="n">assembled_metadata</span><span class="p">)</span>
+</span><span id="__span-0-283"><a id="__codelineno-0-283" name="__codelineno-0-283"></a>
+</span><span id="__span-0-284"><a id="__codelineno-0-284" name="__codelineno-0-284"></a>        <span class="c1"># We add the constraints section after all of the formatting and processing above</span>
+</span><span id="__span-0-285"><a id="__codelineno-0-285" name="__codelineno-0-285"></a>        <span class="c1"># In general, the constraints are kept the same as the input (provided they passed validation)</span>
+</span><span id="__span-0-286"><a id="__codelineno-0-286" name="__codelineno-0-286"></a>        <span class="c1"># If `collapse_yaml` is specified, we output the minimum set of equivalent constraints</span>
+</span><span id="__span-0-287"><a id="__codelineno-0-287" name="__codelineno-0-287"></a>        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">constraints</span><span class="p">:</span>
+</span><span id="__span-0-288"><a id="__codelineno-0-288" name="__codelineno-0-288"></a>            <span class="n">assembled_metadata</span><span class="p">[</span><span class="s2">&quot;constraints&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span>
+</span><span id="__span-0-289"><a id="__codelineno-0-289" name="__codelineno-0-289"></a>                <span class="p">[</span><span class="nb">str</span><span class="p">(</span><span class="n">c</span><span class="p">)</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">constraints</span><span class="o">.</span><span class="n">minimal_constraints</span><span class="p">]</span>
+</span><span id="__span-0-290"><a id="__codelineno-0-290" name="__codelineno-0-290"></a>                <span class="k">if</span> <span class="n">collapse_yaml</span>
+</span><span id="__span-0-291"><a id="__codelineno-0-291" name="__codelineno-0-291"></a>                <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">constraints</span><span class="o">.</span><span class="n">raw_constraint_strings</span>
+</span><span id="__span-0-292"><a id="__codelineno-0-292" name="__codelineno-0-292"></a>            <span class="p">)</span>
+</span><span id="__span-0-293"><a id="__codelineno-0-293" name="__codelineno-0-293"></a>        <span class="k">return</span> <span class="n">assembled_metadata</span>
+</span><span id="__span-0-294"><a id="__codelineno-0-294" name="__codelineno-0-294"></a>
+</span><span id="__span-0-295"><a id="__codelineno-0-295" name="__codelineno-0-295"></a>    <span class="k">def</span> <span class="nf">save</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="n">pathlib</span><span class="o">.</span><span class="n">Path</span><span class="p">,</span> <span class="n">collapse_yaml</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="__span-0-296"><a id="__codelineno-0-296" name="__codelineno-0-296"></a><span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-297"><a id="__codelineno-0-297" name="__codelineno-0-297"></a><span class="sd">        Writes metadata to a YAML file.</span>
+</span><span id="__span-0-298"><a id="__codelineno-0-298" name="__codelineno-0-298"></a>
+</span><span id="__span-0-299"><a id="__codelineno-0-299" name="__codelineno-0-299"></a><span class="sd">        Args:</span>
+</span><span id="__span-0-300"><a id="__codelineno-0-300" name="__codelineno-0-300"></a><span class="sd">            path: The path at which to write the metadata YAML file.</span>
+</span><span id="__span-0-301"><a id="__codelineno-0-301" name="__codelineno-0-301"></a><span class="sd">            collapse_yaml: A boolean indicating whether to collapse the YAML representation of the metadata, reducing duplication.</span>
+</span><span id="__span-0-302"><a id="__codelineno-0-302" name="__codelineno-0-302"></a><span class="sd">        &quot;&quot;&quot;</span>
+</span><span id="__span-0-303"><a id="__codelineno-0-303" name="__codelineno-0-303"></a>        <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s2">&quot;w&quot;</span><span class="p">)</span> <span class="k">as</span> <span class="n">yaml_file</span><span class="p">:</span>
+</span><span id="__span-0-304"><a id="__codelineno-0-304" name="__codelineno-0-304"></a>            <span class="n">yaml</span><span class="o">.</span><span class="n">safe_dump</span><span class="p">(</span>
+</span><span id="__span-0-305"><a id="__codelineno-0-305" name="__codelineno-0-305"></a>                <span class="bp">self</span><span class="o">.</span><span class="n">_assemble</span><span class="p">(</span><span class="n">collapse_yaml</span><span class="p">),</span>
+</span><span id="__span-0-306"><a id="__codelineno-0-306" name="__codelineno-0-306"></a>                <span class="n">yaml_file</span><span class="p">,</span>
+</span><span id="__span-0-307"><a id="__codelineno-0-307" name="__codelineno-0-307"></a>                <span class="n">default_flow_style</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
+</span><span id="__span-0-308"><a id="__codelineno-0-308" name="__codelineno-0-308"></a>                <span class="n">sort_keys</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
+</span><span id="__span-0-309"><a id="__codelineno-0-309" name="__codelineno-0-309"></a>            <span class="p">)</span>
+</span><span id="__span-0-310"><a id="__codelineno-0-310" name="__codelineno-0-310"></a>
+</span><span id="__span-0-311"><a id="__codelineno-0-311" name="__codelineno-0-311"></a>    <span class="k">def</span> <span class="nf">get_sdv_metadata</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]]:</span>
+</span><span id="__span-0-312"><a id="__codelineno-0-312" name="__codelineno-0-312"></a><span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-313"><a id="__codelineno-0-313" name="__codelineno-0-313"></a><span class="sd">        Map combinations of our metadata implementation to SDV&#39;s as required by SDMetrics.</span>
+</span><span id="__span-0-314"><a id="__codelineno-0-314" name="__codelineno-0-314"></a>
+</span><span id="__span-0-315"><a id="__codelineno-0-315" name="__codelineno-0-315"></a><span class="sd">        Returns:</span>
+</span><span id="__span-0-316"><a id="__codelineno-0-316" name="__codelineno-0-316"></a><span class="sd">            A dictionary containing the SDV metadata.</span>
+</span><span id="__span-0-317"><a id="__codelineno-0-317" name="__codelineno-0-317"></a><span class="sd">        &quot;&quot;&quot;</span>
+</span><span id="__span-0-318"><a id="__codelineno-0-318" name="__codelineno-0-318"></a>        <span class="n">sdv_metadata</span> <span class="o">=</span> <span class="p">{</span>
+</span><span id="__span-0-319"><a id="__codelineno-0-319" name="__codelineno-0-319"></a>            <span class="s2">&quot;columns&quot;</span><span class="p">:</span> <span class="p">{</span>
+</span><span id="__span-0-320"><a id="__codelineno-0-320" name="__codelineno-0-320"></a>                <span class="n">cn</span><span class="p">:</span> <span class="p">{</span>
+</span><span id="__span-0-321"><a id="__codelineno-0-321" name="__codelineno-0-321"></a>                    <span class="s2">&quot;sdtype&quot;</span><span class="p">:</span> <span class="p">(</span>
+</span><span id="__span-0-322"><a id="__codelineno-0-322" name="__codelineno-0-322"></a>                        <span class="s2">&quot;boolean&quot;</span>
+</span><span id="__span-0-323"><a id="__codelineno-0-323" name="__codelineno-0-323"></a>                        <span class="k">if</span> <span class="n">cmd</span><span class="o">.</span><span class="n">boolean</span>
+</span><span id="__span-0-324"><a id="__codelineno-0-324" name="__codelineno-0-324"></a>                        <span class="k">else</span> <span class="s2">&quot;categorical&quot;</span> <span class="k">if</span> <span class="n">cmd</span><span class="o">.</span><span class="n">categorical</span> <span class="k">else</span> <span class="s2">&quot;datetime&quot;</span> <span class="k">if</span> <span class="n">cmd</span><span class="o">.</span><span class="n">dtype</span><span class="o">.</span><span class="n">kind</span> <span class="o">==</span> <span class="s2">&quot;M&quot;</span> <span class="k">else</span> <span class="s2">&quot;numerical&quot;</span>
+</span><span id="__span-0-325"><a id="__codelineno-0-325" name="__codelineno-0-325"></a>                    <span class="p">),</span>
 </span><span id="__span-0-326"><a id="__codelineno-0-326" name="__codelineno-0-326"></a>                <span class="p">}</span>
 </span><span id="__span-0-327"><a id="__codelineno-0-327" name="__codelineno-0-327"></a>                <span class="k">for</span> <span class="n">cn</span><span class="p">,</span> <span class="n">cmd</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_metadata</span><span class="o">.</span><span class="n">items</span><span class="p">()</span>
 </span><span id="__span-0-328"><a id="__codelineno-0-328" name="__codelineno-0-328"></a>            <span class="p">}</span>
@@ -3669,9 +3669,7 @@ <h3 id="nhssynth.modules.dataloader.metadata.MetaData.get_sdv_metadata" class="d
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/modules/dataloader/metadata.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-309">309</a></span>
-<span class="normal"><a href="#__codelineno-0-310">310</a></span>
-<span class="normal"><a href="#__codelineno-0-311">311</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-311">311</a></span>
 <span class="normal"><a href="#__codelineno-0-312">312</a></span>
 <span class="normal"><a href="#__codelineno-0-313">313</a></span>
 <span class="normal"><a href="#__codelineno-0-314">314</a></span>
@@ -3693,23 +3691,21 @@ <h3 id="nhssynth.modules.dataloader.metadata.MetaData.get_sdv_metadata" class="d
 <span class="normal"><a href="#__codelineno-0-330">330</a></span>
 <span class="normal"><a href="#__codelineno-0-331">331</a></span>
 <span class="normal"><a href="#__codelineno-0-332">332</a></span>
-<span class="normal"><a href="#__codelineno-0-333">333</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-309"><a id="__codelineno-0-309" name="__codelineno-0-309"></a><span class="k">def</span> <span class="nf">get_sdv_metadata</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]]:</span>
-</span><span id="__span-0-310"><a id="__codelineno-0-310" name="__codelineno-0-310"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-311"><a id="__codelineno-0-311" name="__codelineno-0-311"></a><span class="sd">    Map combinations of our metadata implementation to SDV&#39;s as required by SDMetrics.</span>
-</span><span id="__span-0-312"><a id="__codelineno-0-312" name="__codelineno-0-312"></a>
-</span><span id="__span-0-313"><a id="__codelineno-0-313" name="__codelineno-0-313"></a><span class="sd">    Returns:</span>
-</span><span id="__span-0-314"><a id="__codelineno-0-314" name="__codelineno-0-314"></a><span class="sd">        A dictionary containing the SDV metadata.</span>
-</span><span id="__span-0-315"><a id="__codelineno-0-315" name="__codelineno-0-315"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-316"><a id="__codelineno-0-316" name="__codelineno-0-316"></a>    <span class="n">sdv_metadata</span> <span class="o">=</span> <span class="p">{</span>
-</span><span id="__span-0-317"><a id="__codelineno-0-317" name="__codelineno-0-317"></a>        <span class="s2">&quot;columns&quot;</span><span class="p">:</span> <span class="p">{</span>
-</span><span id="__span-0-318"><a id="__codelineno-0-318" name="__codelineno-0-318"></a>            <span class="n">cn</span><span class="p">:</span> <span class="p">{</span>
-</span><span id="__span-0-319"><a id="__codelineno-0-319" name="__codelineno-0-319"></a>                <span class="s2">&quot;sdtype&quot;</span><span class="p">:</span> <span class="s2">&quot;boolean&quot;</span>
-</span><span id="__span-0-320"><a id="__codelineno-0-320" name="__codelineno-0-320"></a>                <span class="k">if</span> <span class="n">cmd</span><span class="o">.</span><span class="n">boolean</span>
-</span><span id="__span-0-321"><a id="__codelineno-0-321" name="__codelineno-0-321"></a>                <span class="k">else</span> <span class="s2">&quot;categorical&quot;</span>
-</span><span id="__span-0-322"><a id="__codelineno-0-322" name="__codelineno-0-322"></a>                <span class="k">if</span> <span class="n">cmd</span><span class="o">.</span><span class="n">categorical</span>
-</span><span id="__span-0-323"><a id="__codelineno-0-323" name="__codelineno-0-323"></a>                <span class="k">else</span> <span class="s2">&quot;datetime&quot;</span>
-</span><span id="__span-0-324"><a id="__codelineno-0-324" name="__codelineno-0-324"></a>                <span class="k">if</span> <span class="n">cmd</span><span class="o">.</span><span class="n">dtype</span><span class="o">.</span><span class="n">kind</span> <span class="o">==</span> <span class="s2">&quot;M&quot;</span>
-</span><span id="__span-0-325"><a id="__codelineno-0-325" name="__codelineno-0-325"></a>                <span class="k">else</span> <span class="s2">&quot;numerical&quot;</span><span class="p">,</span>
+<span class="normal"><a href="#__codelineno-0-333">333</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-311"><a id="__codelineno-0-311" name="__codelineno-0-311"></a><span class="k">def</span> <span class="nf">get_sdv_metadata</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]]:</span>
+</span><span id="__span-0-312"><a id="__codelineno-0-312" name="__codelineno-0-312"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-313"><a id="__codelineno-0-313" name="__codelineno-0-313"></a><span class="sd">    Map combinations of our metadata implementation to SDV&#39;s as required by SDMetrics.</span>
+</span><span id="__span-0-314"><a id="__codelineno-0-314" name="__codelineno-0-314"></a>
+</span><span id="__span-0-315"><a id="__codelineno-0-315" name="__codelineno-0-315"></a><span class="sd">    Returns:</span>
+</span><span id="__span-0-316"><a id="__codelineno-0-316" name="__codelineno-0-316"></a><span class="sd">        A dictionary containing the SDV metadata.</span>
+</span><span id="__span-0-317"><a id="__codelineno-0-317" name="__codelineno-0-317"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-318"><a id="__codelineno-0-318" name="__codelineno-0-318"></a>    <span class="n">sdv_metadata</span> <span class="o">=</span> <span class="p">{</span>
+</span><span id="__span-0-319"><a id="__codelineno-0-319" name="__codelineno-0-319"></a>        <span class="s2">&quot;columns&quot;</span><span class="p">:</span> <span class="p">{</span>
+</span><span id="__span-0-320"><a id="__codelineno-0-320" name="__codelineno-0-320"></a>            <span class="n">cn</span><span class="p">:</span> <span class="p">{</span>
+</span><span id="__span-0-321"><a id="__codelineno-0-321" name="__codelineno-0-321"></a>                <span class="s2">&quot;sdtype&quot;</span><span class="p">:</span> <span class="p">(</span>
+</span><span id="__span-0-322"><a id="__codelineno-0-322" name="__codelineno-0-322"></a>                    <span class="s2">&quot;boolean&quot;</span>
+</span><span id="__span-0-323"><a id="__codelineno-0-323" name="__codelineno-0-323"></a>                    <span class="k">if</span> <span class="n">cmd</span><span class="o">.</span><span class="n">boolean</span>
+</span><span id="__span-0-324"><a id="__codelineno-0-324" name="__codelineno-0-324"></a>                    <span class="k">else</span> <span class="s2">&quot;categorical&quot;</span> <span class="k">if</span> <span class="n">cmd</span><span class="o">.</span><span class="n">categorical</span> <span class="k">else</span> <span class="s2">&quot;datetime&quot;</span> <span class="k">if</span> <span class="n">cmd</span><span class="o">.</span><span class="n">dtype</span><span class="o">.</span><span class="n">kind</span> <span class="o">==</span> <span class="s2">&quot;M&quot;</span> <span class="k">else</span> <span class="s2">&quot;numerical&quot;</span>
+</span><span id="__span-0-325"><a id="__codelineno-0-325" name="__codelineno-0-325"></a>                <span class="p">),</span>
 </span><span id="__span-0-326"><a id="__codelineno-0-326" name="__codelineno-0-326"></a>            <span class="p">}</span>
 </span><span id="__span-0-327"><a id="__codelineno-0-327" name="__codelineno-0-327"></a>            <span class="k">for</span> <span class="n">cn</span><span class="p">,</span> <span class="n">cmd</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_metadata</span><span class="o">.</span><span class="n">items</span><span class="p">()</span>
 </span><span id="__span-0-328"><a id="__codelineno-0-328" name="__codelineno-0-328"></a>        <span class="p">}</span>
@@ -3785,9 +3781,7 @@ <h3 id="nhssynth.modules.dataloader.metadata.MetaData.save" class="doc doc-headi
 
           <details class="quote">
             <summary>Source code in <code>src/nhssynth/modules/dataloader/metadata.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-293">293</a></span>
-<span class="normal"><a href="#__codelineno-0-294">294</a></span>
-<span class="normal"><a href="#__codelineno-0-295">295</a></span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-295">295</a></span>
 <span class="normal"><a href="#__codelineno-0-296">296</a></span>
 <span class="normal"><a href="#__codelineno-0-297">297</a></span>
 <span class="normal"><a href="#__codelineno-0-298">298</a></span>
@@ -3799,21 +3793,23 @@ <h3 id="nhssynth.modules.dataloader.metadata.MetaData.save" class="doc doc-headi
 <span class="normal"><a href="#__codelineno-0-304">304</a></span>
 <span class="normal"><a href="#__codelineno-0-305">305</a></span>
 <span class="normal"><a href="#__codelineno-0-306">306</a></span>
-<span class="normal"><a href="#__codelineno-0-307">307</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-293"><a id="__codelineno-0-293" name="__codelineno-0-293"></a><span class="k">def</span> <span class="nf">save</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="n">pathlib</span><span class="o">.</span><span class="n">Path</span><span class="p">,</span> <span class="n">collapse_yaml</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-</span><span id="__span-0-294"><a id="__codelineno-0-294" name="__codelineno-0-294"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-</span><span id="__span-0-295"><a id="__codelineno-0-295" name="__codelineno-0-295"></a><span class="sd">    Writes metadata to a YAML file.</span>
-</span><span id="__span-0-296"><a id="__codelineno-0-296" name="__codelineno-0-296"></a>
-</span><span id="__span-0-297"><a id="__codelineno-0-297" name="__codelineno-0-297"></a><span class="sd">    Args:</span>
-</span><span id="__span-0-298"><a id="__codelineno-0-298" name="__codelineno-0-298"></a><span class="sd">        path: The path at which to write the metadata YAML file.</span>
-</span><span id="__span-0-299"><a id="__codelineno-0-299" name="__codelineno-0-299"></a><span class="sd">        collapse_yaml: A boolean indicating whether to collapse the YAML representation of the metadata, reducing duplication.</span>
-</span><span id="__span-0-300"><a id="__codelineno-0-300" name="__codelineno-0-300"></a><span class="sd">    &quot;&quot;&quot;</span>
-</span><span id="__span-0-301"><a id="__codelineno-0-301" name="__codelineno-0-301"></a>    <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s2">&quot;w&quot;</span><span class="p">)</span> <span class="k">as</span> <span class="n">yaml_file</span><span class="p">:</span>
-</span><span id="__span-0-302"><a id="__codelineno-0-302" name="__codelineno-0-302"></a>        <span class="n">yaml</span><span class="o">.</span><span class="n">safe_dump</span><span class="p">(</span>
-</span><span id="__span-0-303"><a id="__codelineno-0-303" name="__codelineno-0-303"></a>            <span class="bp">self</span><span class="o">.</span><span class="n">_assemble</span><span class="p">(</span><span class="n">collapse_yaml</span><span class="p">),</span>
-</span><span id="__span-0-304"><a id="__codelineno-0-304" name="__codelineno-0-304"></a>            <span class="n">yaml_file</span><span class="p">,</span>
-</span><span id="__span-0-305"><a id="__codelineno-0-305" name="__codelineno-0-305"></a>            <span class="n">default_flow_style</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
-</span><span id="__span-0-306"><a id="__codelineno-0-306" name="__codelineno-0-306"></a>            <span class="n">sort_keys</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
-</span><span id="__span-0-307"><a id="__codelineno-0-307" name="__codelineno-0-307"></a>        <span class="p">)</span>
+<span class="normal"><a href="#__codelineno-0-307">307</a></span>
+<span class="normal"><a href="#__codelineno-0-308">308</a></span>
+<span class="normal"><a href="#__codelineno-0-309">309</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-295"><a id="__codelineno-0-295" name="__codelineno-0-295"></a><span class="k">def</span> <span class="nf">save</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="n">pathlib</span><span class="o">.</span><span class="n">Path</span><span class="p">,</span> <span class="n">collapse_yaml</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+</span><span id="__span-0-296"><a id="__codelineno-0-296" name="__codelineno-0-296"></a><span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+</span><span id="__span-0-297"><a id="__codelineno-0-297" name="__codelineno-0-297"></a><span class="sd">    Writes metadata to a YAML file.</span>
+</span><span id="__span-0-298"><a id="__codelineno-0-298" name="__codelineno-0-298"></a>
+</span><span id="__span-0-299"><a id="__codelineno-0-299" name="__codelineno-0-299"></a><span class="sd">    Args:</span>
+</span><span id="__span-0-300"><a id="__codelineno-0-300" name="__codelineno-0-300"></a><span class="sd">        path: The path at which to write the metadata YAML file.</span>
+</span><span id="__span-0-301"><a id="__codelineno-0-301" name="__codelineno-0-301"></a><span class="sd">        collapse_yaml: A boolean indicating whether to collapse the YAML representation of the metadata, reducing duplication.</span>
+</span><span id="__span-0-302"><a id="__codelineno-0-302" name="__codelineno-0-302"></a><span class="sd">    &quot;&quot;&quot;</span>
+</span><span id="__span-0-303"><a id="__codelineno-0-303" name="__codelineno-0-303"></a>    <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s2">&quot;w&quot;</span><span class="p">)</span> <span class="k">as</span> <span class="n">yaml_file</span><span class="p">:</span>
+</span><span id="__span-0-304"><a id="__codelineno-0-304" name="__codelineno-0-304"></a>        <span class="n">yaml</span><span class="o">.</span><span class="n">safe_dump</span><span class="p">(</span>
+</span><span id="__span-0-305"><a id="__codelineno-0-305" name="__codelineno-0-305"></a>            <span class="bp">self</span><span class="o">.</span><span class="n">_assemble</span><span class="p">(</span><span class="n">collapse_yaml</span><span class="p">),</span>
+</span><span id="__span-0-306"><a id="__codelineno-0-306" name="__codelineno-0-306"></a>            <span class="n">yaml_file</span><span class="p">,</span>
+</span><span id="__span-0-307"><a id="__codelineno-0-307" name="__codelineno-0-307"></a>            <span class="n">default_flow_style</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
+</span><span id="__span-0-308"><a id="__codelineno-0-308" name="__codelineno-0-308"></a>            <span class="n">sort_keys</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
+</span><span id="__span-0-309"><a id="__codelineno-0-309" name="__codelineno-0-309"></a>        <span class="p">)</span>
 </span></code></pre></div></td></tr></table></div>
           </details>
   </div>
diff --git a/search/search_index.json b/search/search_index.json
index 5a04c0d3..db8c3be1 100644
--- a/search/search_index.json
+++ b/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"NHS Synth","text":"<p>This is a package for generating useful synthetic data, audited and assessed along the dimensions of utility, privacy and fairness. Currently, the main focus of the package in its beta stage is to experiment with different model architectures to find which are the most promising for real-world usage.</p> <p>See the User Guide to get started with running an experiment with the package.</p> <p>See the Development Guide and Code Reference to get started with contributing to the package.</p>"},{"location":"development_guide/","title":"Development guide","text":"<p>This document aims to provide a comprehensive set of instructions for continuing development of this package. Good knowledge of Python development is assumed. Some ways of working are subjective and preferential; as such we try to be as minimal in our proscription of other methods as possible.</p>"},{"location":"development_guide/#development-environment-setup","title":"Development environment setup","text":""},{"location":"development_guide/#python","title":"Python","text":"<p>The package currently supports major versions 3.9, 3.10 and 3.11 of Python. We recommend installing all of these versions; at minimum the latest supported version of Python should be used. Many people use <code>pyenv</code> for managing multiple python versions. On MacOS homebrew is a good, less invasive option for this (provided you then use a virtual environment manager too). For virtual environment management, we recommend Python's in-built <code>venv</code> functionality, but conda or some similar system would suffice (note that in the section below it may not be necessary to use any specific virtual environment management at all depending on the setup of Poetry).</p>"},{"location":"development_guide/#poetry","title":"Poetry","text":"<p>We use Poetry to manage dependencies and the actual packaging and publishing of <code>NHSSynth</code> to PyPI. Poetry is a more robust alternative to a <code>requirements.txt</code> file, allowing for grouped dependencies and advanced build options. Rather than freezing a specific <code>pip</code> state, Poetry only specifies the top-level dependencies and then handles the resolution and installation of the latest compatible versions of the full dependency tree per these top-level dependencies. See the <code>pyproject.toml</code> in the GitHub repository and Poetry's documentation for further context.</p> <p>Once Poetry is installed (in your preferred way per the instructions on their website), you can choose one of two options:</p> <ol> <li> <p>Allow <code>poetry</code> to control virtual environments in their proprietary way), such that when you install and develop the package poetry will automatically create a virtual environment for you.</p> </li> <li> <p>Change <code>poetry</code>'s configuration to manage your own virtual environments:</p> <pre><code>poetry config virtualenvs.create false\npoetry config virtualenvs.in-project false\n</code></pre> <p>In this setup, a virtual environment can be be instantiated and activated in whichever way you prefer. For example, using <code>venv</code>:</p> <pre><code>python3.11 -m venv nhssynth-3.11\nsource nhssynth-3.11/bin/activate\n</code></pre> </li> </ol>"},{"location":"development_guide/#package-installation","title":"Package installation","text":"<p>At this point, the project dependencies can be installed via <code>poetry install --with dev</code> (add optional flags: <code>--with aux</code> to work with the auxiliary notebooks, <code>--with docs</code> to work with the documentation). This will install the package in editable mode, meaning that changes to the source code will be reflected in the installed package without needing to reinstall it. Note that if you are using your own virtual environment, you will need to activate it before running this command.</p> <p>You can then interact with the package in one of two ways:</p> <ol> <li> <p>Via the CLI module, which is accessed using the <code>nhssynth</code> command, e.g.</p> <pre><code>poetry run nhssynth ...\n</code></pre> <p>Note that you can omit the <code>poetry run</code> part and just type <code>nhssynth</code> if you followed the optional steps above to manage and activate your own virtual environment, or if you have executed <code>poetry shell</code> beforehand. 2. Through directly importing parts of the package to use in an existing project (<code>from nhssynth.modules... import ...</code>).</p> </li> </ol>"},{"location":"development_guide/#secure-mode","title":"Secure mode","text":"<p>Note that in order to train a generator in secure mode (see the documentation for details) the PyTorch extension package <code>csprng</code> must be installed separately. Currently this package's dependencies are not compatible with recent versions of PyTorch (the author's plan on rectifying this - watch this space), so you will need to install it manually, you can do this in your environment by running:</p> <pre><code>git clone git@github.com:pytorch/csprng.git\ncd csprng\ngit branch release \"v0.2.2-rc1\"\ngit checkout release\npython setup.py install\n</code></pre>"},{"location":"development_guide/#coding-practices","title":"Coding practices","text":""},{"location":"development_guide/#style","title":"Style","text":"<p>We use <code>black</code> for code formatting. This is a fairly opinionated formatter, but it is widely used and has a good reputation. We also use <code>ruff</code> to manage imports and lint the code. Both of these tools are run automatically via <code>pre-commit</code> hooks. Ensure you have installed the package with the <code>dev</code> group of dependencies and then run the following command to install the hooks:</p> <pre><code>pre-commit install\n</code></pre> <p>Note that you may need to pre-pend this command with <code>poetry run</code> if you are not using your own virtual environment.</p> <p>This will ensure that your code conforms to the two formatters' / linters' requirements each time you commit to a branch. <code>black</code> and <code>ruff</code> are also run as part of the CI workflow discussed below, such that even without these hooks, the code will be checked and raise an error on GitHub if it is not formatted consistently.</p> <p>Configuration for both packages can be found in the <code>pyproject.toml</code>, this configuration should be picked up automatically by both the pre-commit hooks and your IDE / running them manually in the command line. The main configuration is as follows:</p> <pre><code>[tool.black]\nline-length = 120\n\n[tool.ruff]\ninclude = [\"*.py\", \"*.pyi\", \"**/pyproject.toml\", \"*.ipynb\"]\nselect = [\"E4\", \"E7\", \"E9\", \"F\", \"C90\", \"I\"]\n\n[tool.ruff.per-file-ignores]\n\"src/nhssynth/common/constants.py\" = [\"F403\", \"F405\"]\n\n[tool.ruff.isort]\nknown-first-party = [\"nhssynth\"]\n</code></pre> <p>This ensure that absolute imports from <code>NHSSynth</code> are sorted separately from the rest of the imports in a file.</p> <p>There are a number of other hooks used as part of this repositories pre-commit, including one that automatically mirrors the poetry version of these packages in the <code>dev</code> per the list of supported packages and .poetry-sync-db.json. Roughly, these other hooks ensure correct formatting of <code>.yaml</code> and <code>.toml</code> files, checks for large files being added to a commit, strips notebook output from the files, and fixes whitespace and end-of-file issues. These are mostly consistent with the NHSx analytics project template's hooks</p>"},{"location":"development_guide/#documentation","title":"Documentation","text":"<p>There should be Google-style docstrings on all non-trivial functions and classes. Ideally a docstring should take the form:</p> <pre><code>def func(arg1: type1, arg2: type2) -&gt; returntype:\n    \"\"\"\n    One-line summary of the function.\n    AND / OR\n    Longer description of the function, including any caveats or assumptions where appropriate.\n\n    Args:\n        arg1: Description of arg1.\n        arg2: Description of arg2.\n\n    Returns:\n        Description of the return value.\n    \"\"\"\n    ...\n</code></pre> <p>These docstrings are then compiled into a full API documentation tree as part of a larger MkDocs documentation site hosted via GitHub (the one you are reading right now!). This process is derived from this tutorial.</p> <p>The MkDocs page is built using the <code>mkdocs-material</code> theme. The documentation is built and hosted automatically via GitHub Pages.</p> <p>The other parts of this site comprise markdown documents in the docs folder. Adding new pages is handled in the <code>mkdocs.yml</code> file as in any other Material MkDocs site. See their documentation if more complex changes to the site are required.</p>"},{"location":"development_guide/#testing","title":"Testing","text":"<p>We use <code>tox</code> to manage the execution of tests for the package against multiple versions of Python, and to ensure that they are being run in a clean environment. To run the tests, simply execute <code>tox</code> in the root directory of the repository. This will run the tests against all supported versions of Python. To run the tests against a specific version of Python, use <code>tox -e py311</code> (or <code>py310</code> or <code>py39</code>).</p>"},{"location":"development_guide/#configuration","title":"Configuration","text":"<p>See the tox.ini file for more information on the testing configuration. We follow the Poetry documentation on <code>tox</code> support to ensure that for each version of Python, <code>tox</code> will create an <code>sdist</code> package of the project and use <code>pip</code> to install it in a fresh environment. Thus, dependencies are resolved by pip in the first place and then afterwards updated to the locked dependencies in <code>poetry.lock</code> by running <code>poetry install ...</code> in this fresh environment. The tests are then run using <code>poetry pytest</code>, which is configured in the pyproject.toml file. This configuration is fairly minimal: simply specifying the testing directory as the tests folder and filtering some known warnings.</p> <pre><code>[tool.pytest.ini_options]\ntestpaths = \"tests\"\nfilterwarnings = [\"ignore::DeprecationWarning:pkg_resources\"]\n</code></pre> <p>We can also use <code>coverage</code> to check the test coverage of the package. This is configured in the pyproject.toml file as follows:</p> <pre><code>[tool.coverage.run]\nsource = [\"src/nhssynth/cli\", \"src/nhssynth/common\", \"src/nhssynth/modules\"]\nomit = [\n    \"src/nhssynth/common/debugging.py\",\n]\n</code></pre> <p>We omit <code>debugging.py</code> as it is a wrapper for reading full trace-backs of warnings and not to be imported directly.</p>"},{"location":"development_guide/#adding-tests","title":"Adding Tests","text":"<p>We use the <code>pytest</code> framework for testing. The testing directory structure mirrors that of <code>src</code>. The usual testing practices apply.</p>"},{"location":"development_guide/#releases","title":"Releases","text":""},{"location":"development_guide/#version-management","title":"Version management","text":"<p>The package's version should be updated following the semantic versioning framework. The package is currently in a pre-release state, such that major version 1.0.0 should only be tagged once the package is functionally complete and stable.</p> <p>To update the package's metadata, we can use Poetry's <code>version</code> command:</p> <pre><code>poetry version &lt;version&gt;\n</code></pre> <p>We can then commit and push the changes to the version file, and create a new tag:</p> <pre><code>git add pyproject.toml\ngit commit -m \"Bump version to &lt;version&gt;\"\ngit push\n</code></pre> <p>We should then tag the release using GitHub's CLI (or manually via <code>git</code> if you prefer):</p> <pre><code>gh release create &lt;version&gt; --generate-notes\n</code></pre> <p>This will create a new release on GitHub, and will automatically generate a changelog based on the commit messages and PR's closed since the last release. This changelog can then be edited to add more detail if necessary.</p>"},{"location":"development_guide/#building-and-publishing-to-pypi","title":"Building and publishing to PyPI","text":"<p>Poetry offers not only dependency management, but also a simple way to build and distribute the package.</p> <p>After tagging a release per the section above, we can build the package using Poetry's <code>build</code> command:</p> <pre><code>poetry build\n</code></pre> <p>This will create a <code>dist</code> folder containing the built package. To publish this to PyPI, we can use the <code>publish</code> command:</p> <pre><code>poetry publish\n</code></pre> <p>This will prompt for PyPI credentials, and then publish the package. Note that this will only work if you have been added as a Maintainer of the package on PyPI.</p> <p>It might be preferable at some point in the future to set up Trusted Publisher Management via OpenID Connect (OIDC) to allow for automated publishing of the package via a GitHub workflow. See the \"Publishing\" tab of <code>NHSSynth</code>'s project management panel on PyPI to set this up.</p>"},{"location":"development_guide/#github","title":"GitHub","text":""},{"location":"development_guide/#continuous-integration","title":"Continuous integration","text":"<p>We use GitHub Actions for continuous integration. The different workflows comprising this can be found in the <code>.github/workflows</code> folder. In general, the CI workflow is triggered on every push to the <code>main</code> or a feature branch - as appropriate - and runs tests against all supported versions of Python. It also runs <code>black</code> and <code>ruff</code> to check that the code is formatted correctly, and builds the documentation site.</p> <p>There are also scripts to update the dynamic badges in the <code>README</code>. These work via a gist associated with the repository. It is not easy to transfer ownership of this process, so if they break please feel free to contact me.</p>"},{"location":"development_guide/#branching","title":"Branching","text":"<p>We encourage the use of the Gitflow branching model for development. This means that the <code>main</code> branch is always in a stable state, and that all development work is done on feature branches. These feature branches are then merged into <code>main</code> via pull requests. The <code>main</code> branch is protected, such that pull requests must be reviewed and approved before they can be merged.</p> <p>At minimum, the <code>main</code> branches protection should be maintained, and roughly one branch per issue should be used. Ensure that all of the CI checks pass before merging.</p>"},{"location":"development_guide/#security-and-vulnerability-management","title":"Security and vulnerability management","text":"<p>The GitHub repository for the package has Dependabot, code scanning, and other security features enabled. These should be monitored continuously and any issues resolved as soon as possible. When issues of this type require a specific version of a dependency to be specified (and it is one that is not already amongst the dependency groups of the package), the version should be referenced as part of the <code>security</code> group of dependencies (i.e. with <code>poetry add &lt;package&gt; --group security</code>) and a new release created (see above).</p>"},{"location":"downstream_tasks/","title":"Defining a downstream task","text":"<p>It is likely that a synthetic dataset may be associated with specific modelling efforts or metrics that are not included in the general suite of evaluation tools supported more explicitly by this package. Additionally, analyses on model outputs for bias and fairness provided via Aequitas require some basis of predictions on which to perform the analysis. For these reasons, we provide a simple interface for defining a custom downstream task.</p> <p>All downstream tasks are to be located in a folder named <code>tasks</code> in the working directory of the project, with subfolders for each dataset, i.e. the tasks associated with the <code>support</code> dataset should be located in the <code>tasks/support</code> directory.</p> <p>The interface is then quite simple:</p> <ul> <li>There should be a function called <code>run</code> that takes a single argument: <code>dataset</code> (additional arguments could be provided with some further configuration if there is a need for this)</li> <li>The <code>run</code> function should fit a model and / or calculate some metric(s) on the dataset.</li> <li>It should then return predicted probabilities for the outcome variable(s) in the dataset and a dictionary of metrics.</li> <li>The file should contain a top-level variable containing an instantiation of the <code>nhssynth</code> <code>Task</code> class.</li> </ul> <p>See the example below of a logistic regression model fit on the <code>support</code> dataset with the <code>event</code> variable as the outcome and <code>rocauc</code> as the metric of interest:</p> <pre><code>import pandas as pd\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.metrics import roc_auc_score\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\n\nfrom nhssynth.modules.evaluation.tasks import Task\n\n\ndef run(dataset: pd.DataFrame) -&gt; tuple[pd.DataFrame, dict]:\n    # Split the dataset into features and target\n    target = \"event\"\n\n    data = dataset.dropna()\n    X, y = data.drop([\"dob\", \"x3\", target], axis=1), data[target]\n    X_train, X_test, y_train, y_test = train_test_split(\n        StandardScaler().fit_transform(X), y, test_size=0.33, random_state=42\n    )\n\n    lr = LogisticRegression()\n    lr.fit(X_train, y_train)\n\n    # Get the predicted probabilities and predictions\n    probs = pd.DataFrame(lr.predict_proba(X_test)[:, 1], columns=[f\"lr_{target}_prob\"])\n\n    rocauc = roc_auc_score(y_test, probs)\n\n    return probs, {\"rocauc_lr\": rocauc}\n\n\ntask = Task(\"Logistic Regression on 'event'\", run, supports_aequitas=True)\n</code></pre> <p>Note the highlighted lines above:</p> <ol> <li>The <code>Task</code> class has been imported from <code>nhssynth.modules.evaluations.tasks</code></li> <li>The <code>run</code> function should accept one argument and return a tuple</li> <li>The second element of this tuple should be a dictionary labelling each metric of interest (this name will be used in the dashboard as identification so ensure it is unique to the experiment)</li> <li>The <code>task</code> should be instantiated with a name, the <code>run</code> function and a boolean indicating whether the task supports Aequitas analysis, if the task does not support Aequitas analysis, then the first element of the tuple will not be used and <code>None</code> can be returned instead.</li> </ol> <p>The rest of this file can contain any arbitrary code that runs within these constraints, this could be a simple model as above, or a more complex pipeline of transformations and models to match a pre-existing workflow.</p>"},{"location":"getting_started/","title":"Getting Started","text":""},{"location":"getting_started/#running-an-experiment","title":"Running an experiment","text":"<p>This package offers two easy ways to run reproducible and highly-configurable experiments. The following sections describe how to use each of these two methods.</p>"},{"location":"getting_started/#via-the-cli","title":"Via the CLI","text":"<p>The CLI is the easiest way to quickly run an experiment. It is designed to be as simple as possible, whilst still offering a high degree of configurability. An example command to run a full pipeline experiment is:</p> <pre><code>nhssynth pipeline \\\n    --experiment-name test \\\n    --dataset support \\\n    --seed 123 \\\n    --architecture DPVAE PATEGAN DECAF \\\n    --repeats 3 \\\n    --downstream-tasks \\\n    --column-similarity-metrics CorrelationSimilarity ContingencySimilarity \\\n    --column-shape-metrics KSComplement TVComplement \\\n    --boundary-metrics BoundaryAdherence \\\n    --synthesis-metrics NewRowSynthesis \\\n    --divergence-metrics ContinuousKLDivergence DiscreteKLDivergence\n</code></pre> <p>This will run a full pipeline experiment on the <code>support</code> dataset in the <code>data</code> directory. The outputs of the experiment will be recorded in a folder named <code>test</code> (corresponding to the experiment name) in the <code>experiments</code> directory.</p> <p>In total, three different model architectures will be trained three times each with their default configurations. The resulting generated synthetic datasets will be evaluated via the downstream tasks in <code>tasks/support</code> alongside the metrics specified in the command. A dashboard will then be built automatically to exhibit the results.</p> <p>The components of the run are persistent to the experiment's folder. Suppose you have already run this experiment and want to add some new evaluations. You do not have to re-run the entire experiment, you can simply run:</p> <pre><code>nhssynth evaluation -e test -d support -s 123 --coverage-metrics RangeCoverage CategoryCoverage\nnhssynth dashboard -e test -d support\n</code></pre> <p>This will regenerate the dashboard with a different set of metrics corresponding to the arguments passed to <code>evaluation</code>. Note that the <code>--experiment-name</code> and <code>--dataset</code> arguments are required for all commands, as they are used to identify the experiment and ensure reproducibility.</p>"},{"location":"getting_started/#via-a-configuration-file","title":"Via a configuration file","text":"<p>A <code>yaml</code> configuration file placed in the <code>config</code> folder can be used to get the same result as the above:</p> <pre><code>seed: 123\nexperiment_name: test\nrun_type: pipeline\nmodel:\n  architecture:\n    - DPVAE\n    - DPGAN\n    - DECAF\n  max_grad_norm: 5.0\n  secure_mode: false\n  repeats: 4\nevaluation:\n  downstream_tasks: true\n  column_shape_metrics:\n  - KSComplement\n  - TVComplement\n  column_similarity_metrics:\n  - CorrelationSimilarity\n  - ContingencySimilarity\n  boundary_metrics:\n  - BoundaryAdherence\n  synthesis_metrics:\n  - NewRowSynthesis\n  divergence_metrics:\n  - ContinuousKLDivergence\n  - DiscreteKLDivergence\n</code></pre> <p>Once saved as <code>run_pipeline.yaml</code> in the <code>config</code> directory, the package can be run under the configuration laid out in the file via:</p> <pre><code>nhssynth config -c run_pipeline\n</code></pre> <p>Note that if you run via the CLI, you can add the <code>--save-config</code> flag to your command to save the configuration file in the <code>experiments/test</code> (or whatever the <code>--experiment-name</code> has been set to) directory. This allows for easy reproduction of an experiment at a later date or on someone else's computer through sharing the configuration file with them.</p>"},{"location":"getting_started/#setting-up-a-datasets-metadata","title":"Setting up a dataset's metadata","text":"<p>For each dataset you wish to work with, it is advisable to setup a corresponding metadata file. The package will infer this when information is missing (and you can then tweak it). The reason we suggest specifying metadata in this way is because Pandas / Python are in general bad at interpreting CSV files, particularly the specifics of datatypes, date objects and so on.</p> <p>To do this, we must create a metadata <code>yaml</code> file in the dataset's directory. For example, for the <code>support</code> dataset, this file is located at <code>data/support_metadata.yaml</code>. By default, the package will look for a file with the same name as the dataset in the dataset's directory, but with <code>_metadata</code> appended to the end. This is configurable like most other filenaming conventions via the CLI.</p> <p>The metadata file is split into two sections: <code>columns</code> and <code>constraints</code>. The former specifies the nature of each column in the dataset, whilst the latter specifies any constraints that should be enforced on the dataset.</p>"},{"location":"getting_started/#column-metadata","title":"Column metadata","text":"<p>Again, we refer to the <code>support</code> dataset's metadata file as an example:</p> <pre><code>columns:\n  dob:\n    dtype:\n      name: datetime64\n      floor: S\n  x1:\n    categorical: true\n    dtype: int64\n  x2:\n    categorical: true\n    dtype: int64\n  x3:\n    categorical: true\n  x4:\n    categorical: true\n    dtype: int64\n  x5:\n    categorical: true\n    dtype: int64\n  x6:\n    categorical: true\n    dtype: int64\n  x7:\n    dtype: int64\n  x8:\n    dtype: float64\n    missingness:\n      impute: mean\n  x9:\n    dtype: int64\n  x10:\n    dtype:\n      name: float64\n      rounding_scheme: 0.1\n  x11:\n    dtype: int64\n  x12:\n    dtype: float64\n  x13:\n    dtype: float64\n  x14:\n    dtype: float64\n  duration:\n    dtype: int64\n  event:\n    categorical: true\n    dtype: int64\n</code></pre> <p>For each column in the dataset, we specify the following:</p> <ul> <li>It's <code>dtype</code>, this can be any <code>numpy</code> data type or a datetime type.</li> <li>In the case of a datetime type, we also specify the <code>floor</code> (i.e. the smallest unit of time that we care about). In general this should be set to match the smallest unit of time in the dataset.</li> <li>In the case of a <code>float</code> type, we can also specify a <code>rounding_scheme</code> to round the values to a certain number of decimal places, again this should be set according to the rounding applied to the column in the real data, or if you want to round the values for some other reason.</li> <li>Whether it is <code>categorical</code> or not. If a column is not categorical, you don't need to specify this. A column is inferred as <code>categorical</code> if it has less than 10 unique values or is a string type.</li> <li>If the column has missing values, we can specify how to deal with them by specifying a <code>missingness</code> strategy. In the case of the <code>x8</code> column, we <code>impute</code> the missing values with the column's <code>mean</code>. If you don't specify this, the CLI or configuration file's specified global missingness strategy will be applied instead (this defaults to the augment strategy which model's the missingness as a separate level in the case of categorical features, or as a separate cluster in the case of continuous features).</li> </ul>"},{"location":"getting_started/#constraints","title":"Constraints","text":"<p>The second part of the metadata file specifies any constraints that should be enforced on the dataset. These can be a relative constraint between two columns, or a fixed one via a constant on a single column. For example, the <code>support</code> dataset's constraints are as follows (note that these are arbitrarily defined and do not necessarily reflect the real data):</p> <pre><code>constraints:\n  - \"x10 in (0,100)\"\n  - \"x12 in (0,100)\"\n  - \"x13 in (0,100)\"\n  - \"x10 &lt;= x12\"\n  - \"x12 &lt; x13\"\n  - \"x10 &lt; x13\"\n  - \"x8 &gt; x10\"\n  - \"x8 &gt; x12\"\n  - \"x8 &gt; x13\"\n  - \"x11 &gt; 100\"\n  - \"x12 &gt; 10\"\n</code></pre> <p>The function of these constraints is fairly self-explanatory: The package ensures the constraints are feasible and minimises them before applying transformations to ensure that they will be satisfied in the synthetic data as well. When a column does not meet a feasible constraint in the real data, we assume that this is intentional and use the violation as a feature upon which to generate synthetic data that also violates the constraint.</p> <p>There is a further constraint <code>fixcombo</code> that only applies to categorical columns. This suggests that only existing combinations of two or more categorical columns should be generated, i.e. the columns can be collapsed into a single composite feature. I.e. if we have a column for pregnancy, and another for sex, we may only want to allow three categories, 'male:not-pregnant', 'female:pregnant', 'female:not-pregnant'. This is specified as follows:</p> <pre><code>constraints:\n  - \"pregnancy fixcombo sex\"\n</code></pre> <p>In conclusion then, we support the following constraint types:</p> <ul> <li><code>fixcombo</code> for categorical columns</li> <li><code>&lt;</code> and <code>&lt;</code> for non-categorical columns</li> <li><code>&gt;=</code> and <code>&lt;=</code> for non-categorical columns</li> <li><code>in</code> for non-categorical columns, which is effectively two of the above constraints combined. I.e. <code>x in [a, b)</code> is equivalent to <code>x &gt;= a and x &lt; b</code>. This is purely a UX feature and is treated as two separate constraints internally.</li> </ul> <p>Once this metadata is setup, you are ready to run your experiment.</p>"},{"location":"getting_started/#evaluation","title":"Evaluation","text":"<p>Once models have been trained and synthetic datasets generated, we leverage evaluations from SDMetrics, Aequitas, the NHS' internal SynAdvSuite (at current time you must request access to this repository to use the privacy-related attacks it implements), and also offer a facility for the custom specification of downstream tasks. These evaluations are then aggregated into a dashboard for ease of comparison and analysis.</p> <p>See the relevant documentation for each of these packages for more information on the metrics they offer.</p>"},{"location":"model_card/","title":"Model Card: Variational AutoEncoder with Differential Privacy","text":""},{"location":"model_card/#model-details","title":"Model Details","text":"<p>The implementation of the Variational AutoEncoder (VAE) with Differential Privacy within this repository is based on work done by Dominic Danks during an NHSX Analytics Unit PhD internship (last commit to the original SynthVAE repository: commit 88a4bdf). This model card describes an updated and extended version of the model, by Harrison Wilde. Further information about the previous version created by Dom and its model implementation can be found in Section 5.4 of the associated report.</p>"},{"location":"model_card/#model-use","title":"Model Use","text":""},{"location":"model_card/#intended-use","title":"Intended Use","text":"<p>This model is intended for use in experimenting with differential privacy and VAEs.</p>"},{"location":"model_card/#training-data","title":"Training Data","text":"<p>Experiments in this repository are run against the Study to Understand Prognoses Preferences Outcomes and Risks of Treatment (SUPPORT) dataset accessed via the pycox python library. We also performed further analysis on a single table that we extracted from MIMIC-III.</p>"},{"location":"model_card/#performance-and-limitations","title":"Performance and Limitations","text":"<p>A from-scratch VAE implementation was compared against various models available within the SDV framework using a variety of quality and privacy metrics on the SUPPORT dataset. The VAE was found to be competitive with all of these models across the various metrics. Differential Privacy (DP) was introduced via DP-SGD and the performance of the VAE for different levels of privacy was evaluated. It was found that as the level of Differential Privacy introduced by DP-SGD was increased, it became easier to distinguish between synthetic and real data.</p> <p>Proper evaluation of quality and privacy of synthetic data is challenging. In this work, we utilised metrics from the SDV library due to their natural integration with the rest of the codebase. A valuable extension of this work would be to apply a variety of external metrics, including more advanced adversarial attacks to more thoroughly evaluate the privacy of the considered methods, including as the level of DP is varied. It would also be of interest to apply DP-SGD and/or PATE to all of the considered methods and evaluate whether the performance drop as a function of implemented privacy is similar or different across the models.</p> <p>Currently the SynthVAE model only works for data which is 'clean'. I.e data that has no missingness or NaNs within its input. It can handle continuous, categorical and datetime variables. Special types such as nominal data cannot be handled properly however the model may still run. Column names have to be specified in the code for the variable group they belong to.</p> <p>Hyperparameter tuning of the model can result in errors if certain parameter values are selected. Most commonly, changing learning rate in our example results in errors during training. An extensive test to evaluate plausible ranges has not been performed as of yet. If you get errors during tuning then consider your hyperparameter values and adjust accordingly.</p>"},{"location":"model_card/#acknowledgements","title":"Acknowledgements","text":"<p>This documentation is inspired by Model Cards for Model Reporting (Mitchell et al.) and Lessons from Archives (Jo &amp; Gebru).</p>"},{"location":"models/","title":"Adding new models","text":"<p>The <code>model</code> module contains all of the architectures implemented as part of this package. We offer GAN and VAE based architectures with a number of adjustments to achieve privacy and other augmented functionalities. The module handles the training and generation of synthetic data using these architectures, per a user's choice of model(s) and configuration.</p> <p>It is likely that as the literature matures, more effective architectures will present themselves as promising for application to the type of tabular data <code>NHSSynth</code> is designed for. Below we discuss how to add new models to the package.</p>"},{"location":"models/#model-design","title":"Model design","text":"<p>The models in this package are built entirely in PyTorch and use Opacus for differential privacy.</p> <p>We have built the VAE and (Tabular)GAN implementations in this package to serve as the foundations for a number of other architectures. As such, we try to maintain a somewhat modular design to building up more complex differentially private (or otherwise augmented) architectures. Each model inherits from either the <code>GAN</code> or <code>VAE</code> class (in files of the same name) which in turn inherit from a generic <code>Model</code> class found in the <code>common</code> folder. This folder contains components of models which are not to be instantiated themselves, e.g. a mixin class for differential privacy, the MLP underlying the <code>GAN</code> and so on.</p> <p>The <code>Model</code> class from which all of the models derive handles all of the general attributes. Roughly, these are the specifics of the dataset the instance of the model is relative to, the device that training is to be carried out upon, and other training parameters such as the total number of epochs to execute.</p> <p>We define these things at the model level, as when using differential privacy or other privacy accountant methods, we must know ahead of time the data and length of training exposure in order to calculate the levels of noise required to reach a certain privacy guarantee and so on.</p>"},{"location":"models/#implementing-a-new-model","title":"Implementing a new model","text":"<p>In order to add a new architecture then, it is important to first investigate the modular parts already implemented to ensure that what you want to build is not already possible through the composition of these existing parts. Then you must ensure that your architecture either inherits from the <code>GAN</code> or <code>VAE</code>, or <code>Model</code> if you wish to implement a different type of generative model.</p> <p>In all of these cases, the interface expects for the implementation to have the following methods:</p> <ul> <li><code>get_args</code>: a class method that lists the architecture specific arguments that the model requires. This is used to facilitate default arguments in the python API whilst still allowing for arguments in the CLI to be propagated and recorded automatically in the experiment output. This should be a list of variable names equal to the concatenation of all of the non-<code>Model</code> parent classes (e.g. <code>DPVAE</code> has <code>DP</code> and <code>VAE</code> args) plus any architecture specific arguments in the <code>__init__</code> method of the model in question.</li> <li><code>get_metrics</code>: another class method that behaves similarly to the above, should return a list of valid metrics to track during training for this model</li> <li><code>train</code>: a method handling the training loop for the model. This should take <code>num_epochs</code>, <code>patience</code> and <code>displayed_metrics</code> as arguments and return a tuple containing the number of epochs that were executed plus a bundle of training metrics (the values over time returned by <code>get_metrics</code> on the class). In the execution of this method, the utility methods defined in <code>Model</code> should be called in order, <code>_start_training</code> at the beginning, then <code>_record_metrics</code> at each training step of the data loader, and finally <code>_finish_training</code> to clean up progress bars and so on. <code>displayed_metrics</code> determines which metrics are actively displayed during training.</li> <li><code>generate</code>: a method to call on the trained model which generates <code>N</code> samples of data, and calls the model's associated <code>MetaTransformer</code> to return a valid pandas DataFrame of synthetic data ready to output.</li> </ul>"},{"location":"models/#adding-a-new-model-to-the-cli","title":"Adding a new model to the CLI","text":"<p>Once you have implemented your new model, you must add it to the CLI. To do this, we must first export the model's class into the <code>MODELS</code> constant in the <code>__init__</code> file in the <code>models</code> subfolder. We can then add a new function and option in <code>module_arguments.py</code> to list the arguments and their types unique to this type of architecture.</p> <p>Note that you should not duplicate arguments that are already defined in the <code>Model</code> class or foundational model architectures such as the <code>GAN</code> if you are implementing an extension to it. If you have setup <code>get_args</code> correctly all of this will be propagated automatically.</p>"},{"location":"modules/","title":"Adding new modules","text":"<p>The package is designed such that each module can be used as part of a pipeline (via the CLI or a configuration file) or independently (via importing them into an existing codebase).</p> <p>In the future it may be desireable to add / adjust the modules of the package, this guide offers a high-level overview of how to do so.</p>"},{"location":"modules/#importing-a-module-from-this-package","title":"Importing a module from this package","text":"<p>After installing the package, you can simply do: <pre><code>from nhssynth.modules import &lt;module&gt;\n</code></pre> and you will be able to use it in your code!</p>"},{"location":"modules/#creating-a-new-module-and-folding-it-into-the-cli","title":"Creating a new module and folding it into the CLI","text":"<p>The following instructions specify how to extend this package with a new module:</p> <ol> <li>Create a folder for your module within the package, i.e. <code>src/nhssynth/modules/mymodule</code></li> <li> <p>Include within it a main executor function that accepts arguments from the CLI, i.e.</p> <pre><code>def myexecutor(args):\n    ...\n</code></pre> <p>In <code>mymodule/executor.py</code> and export it by adding <code>from .executor import myexecutor</code> to <code>mymodule/__init__.py</code>. Check the existing modules for examples of what a typical executor function looks like.</p> </li> <li> <p>In the <code>cli</code> folder, add a corresponding function to <code>module_arguments.py</code> and populate with arguments you want to expose in the CLI:</p> <pre><code>def add_mymodule_args(parser: argparse.ArgumentParser, group_title: str, overrides=False):\n    group = parser.add_argument_group(title=group_title)\n    group.add_argument(...)\n    group.add_argument(...)\n    ...\n</code></pre> </li> <li> <p>Next, in <code>module_setup.py</code> make the following adjustments to the <code>MODULE_MAP</code> code:</p> <pre><code>MODULE_MAP = {\n    ...\n    \"mymodule\": ModuleConfig(\n        func=m.mymodule.myexecutor,\n        add_args=ma.add_mymodule_args,\n        description=\"...\",\n        help=\"...\",\n        common_parsers=[...]\n    ),\n    ...\n}\n</code></pre> <p>Where <code>common_parsers</code> is a subset of <code>COMMON_PARSERS</code> defined in <code>common_arguments.py</code>. Note that the \"seed\" and \"core\" parsers are added automatically, so you don't need to specify them. These parsers can be used to add arguments to your module that are common to multiple modules, e.g. the <code>dataloader</code> and <code>evaluation</code> modules both use <code>--typed</code> to specify the path of the typed input dataset.</p> </li> <li> <p>You can (optionally) also edit the following block if you want your module to be included in a full pipeline run:</p> <pre><code>PIPELINE = [..., mymodule, ...]  # NOTE this determines the order of a pipeline run\n</code></pre> </li> <li> <p>Congrats, your module is implemented within the CLI, its documentation etc. will now be built automatically and it can be referenced in configuration files!</p> </li> </ol>"},{"location":"secure_mode/","title":"Opacus' secure mode","text":"<p>Part of the process for achieving a differential privacy guarantee under Opacus involves generating noise according to a Gaussian distribution with mean 0 in Opacus' <code>_generate_noise()</code> function.</p> <p>Enabling <code>secure_mode</code> when using the NHSSynth package ensures that the generated noise is also secure against floating point representation attacks, such as the ones in https://arxiv.org/abs/2107.10138 and https://arxiv.org/abs/2112.05307.</p> <p>This attack first appeared in https://arxiv.org/abs/2112.05307; the fix via the <code>csprng</code> package is based on https://arxiv.org/abs/2107.10138 and involves calling the Gaussian noise function $2n$ times, where $n=2$ (see section 5.1 in https://arxiv.org/abs/2107.10138).</p> <p>The reason for choosing $n=2$ is that $n$ can be any number greater than $1$. The bigger $n$ is, though, the more computation needs to be done to generate the Gaussian samples. The choice of $n=2$ is justified via the knowledge that the attack has a complexity of $2^{p(2n-1)}$. In PyTorch, $p=53$ and so the complexity is $2^159$, which is deemed sufficiently hard for an attacker to break.</p>"},{"location":"reference/SUMMARY/","title":"SUMMARY","text":"<ul> <li>cli<ul> <li>common_arguments</li> <li>config</li> <li>model_arguments</li> <li>module_arguments</li> <li>module_setup</li> <li>run</li> </ul> </li> <li>common<ul> <li>common</li> <li>constants</li> <li>debugging</li> <li>dicts</li> <li>io</li> <li>strings</li> </ul> </li> <li>modules<ul> <li>dashboard<ul> <li>Upload</li> <li>io</li> <li>pages<ul> <li>1_Tables</li> <li>2_Plots</li> <li>3_Experiment_Configurations</li> </ul> </li> <li>run</li> <li>utils</li> </ul> </li> <li>dataloader<ul> <li>constraints</li> <li>io</li> <li>metadata</li> <li>metatransformer</li> <li>missingness</li> <li>run</li> <li>transformers<ul> <li>base</li> <li>categorical</li> <li>continuous</li> <li>datetime</li> </ul> </li> </ul> </li> <li>evaluation<ul> <li>aequitas</li> <li>io</li> <li>metrics</li> <li>run</li> <li>tasks</li> <li>utils</li> </ul> </li> <li>model<ul> <li>common<ul> <li>dp</li> <li>mlp</li> <li>model</li> </ul> </li> <li>io</li> <li>models<ul> <li>dpvae</li> <li>gan</li> <li>vae</li> </ul> </li> <li>run</li> <li>utils</li> </ul> </li> <li>plotting<ul> <li>io</li> <li>plots</li> <li>run</li> </ul> </li> <li>structure<ul> <li>run</li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/cli/","title":"cli","text":""},{"location":"reference/cli/common_arguments/","title":"common_arguments","text":"<p>Functions to define the CLI's \"common\" arguments, i.e. those that can be applied to either:  - All module argument lists, e.g. --dataset, --seed, etc.  - A subset of module(s) argument lists, e.g. --synthetic, --typed, etc.</p>"},{"location":"reference/cli/common_arguments/#nhssynth.cli.common_arguments.get_core_parser","title":"<code>get_core_parser(overrides=False)</code>","text":"<p>Create the core common parser group applied to all modules (and the <code>pipeline</code> and <code>config</code> options). Note that we leverage common titling of the argument group to ensure arguments appear together even if declared separately.</p> <p>Parameters:</p> Name Type Description Default <code>overrides</code> <p>whether the arguments declared within are required or not.</p> <code>False</code> <p>Returns:</p> Type Description <code>ArgumentParser</code> <p>The parser with the group containing the core arguments attached.</p> Source code in <code>src/nhssynth/cli/common_arguments.py</code> <pre><code>def get_core_parser(overrides=False) -&gt; argparse.ArgumentParser:\n    \"\"\"\n    Create the core common parser group applied to all modules (and the `pipeline` and `config` options).\n    Note that we leverage common titling of the argument group to ensure arguments appear together even if declared separately.\n\n    Args:\n        overrides: whether the arguments declared within are required or not.\n\n    Returns:\n        The parser with the group containing the core arguments attached.\n    \"\"\"\n    \"\"\"\"\"\"\n    core = argparse.ArgumentParser(add_help=False)\n    core_grp = core.add_argument_group(title=\"options\")\n    core_grp.add_argument(\n        \"-d\",\n        \"--dataset\",\n        required=(not overrides),\n        type=str,\n        help=\"the name of the dataset to experiment with, should be present in `&lt;DATA_DIR&gt;`\",\n    )\n    core_grp.add_argument(\n        \"-e\",\n        \"--experiment-name\",\n        type=str,\n        default=TIME,\n        help=\"name the experiment run to affect logging, config, and default-behaviour i/o\",\n    )\n    core_grp.add_argument(\n        \"--save-config\",\n        action=\"store_true\",\n        help=\"save the config provided via the cli, this is a recommended option for reproducibility\",\n    )\n    return core\n</code></pre>"},{"location":"reference/cli/common_arguments/#nhssynth.cli.common_arguments.get_seed_parser","title":"<code>get_seed_parser(overrides=False)</code>","text":"<p>Create the common parser group for the seed. NB This is separate to the rest of the core arguments as it does not apply to the dashboard module.</p> <p>Parameters:</p> Name Type Description Default <code>overrides</code> <p>whether the arguments declared within are required or not.</p> <code>False</code> <p>Returns:</p> Type Description <code>ArgumentParser</code> <p>The parser with the group containing the seed argument attached.</p> Source code in <code>src/nhssynth/cli/common_arguments.py</code> <pre><code>def get_seed_parser(overrides=False) -&gt; argparse.ArgumentParser:\n    \"\"\"\n    Create the common parser group for the seed.\n    NB This is separate to the rest of the core arguments as it does not apply to the dashboard module.\n\n    Args:\n        overrides: whether the arguments declared within are required or not.\n\n    Returns:\n        The parser with the group containing the seed argument attached.\n    \"\"\"\n    parser = argparse.ArgumentParser(add_help=False)\n    parser_grp = parser.add_argument_group(title=\"options\")\n    parser_grp.add_argument(\n        \"-s\",\n        \"--seed\",\n        type=int,\n        help=\"specify a seed for reproducibility, this is a recommended option for reproducibility\",\n    )\n    return parser\n</code></pre>"},{"location":"reference/cli/common_arguments/#nhssynth.cli.common_arguments.suffix_parser_generator","title":"<code>suffix_parser_generator(name, help, required=False)</code>","text":"<p>Generator function for creating parsers following a common template. These parsers are all suffixes to the --dataset / -d / DATASET argument, see <code>COMMON_TITLE</code>.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>the name / label of the argument to add to the CLI options.</p> required <code>help</code> <code>str</code> <p>the help message when the CLI is run with --help / -h.</p> required <code>required</code> <code>bool</code> <p>whether the argument must be provided or not.</p> <code>False</code> Source code in <code>src/nhssynth/cli/common_arguments.py</code> <pre><code>def suffix_parser_generator(name: str, help: str, required: bool = False) -&gt; argparse.ArgumentParser:\n    \"\"\"Generator function for creating parsers following a common template.\n    These parsers are all suffixes to the --dataset / -d / DATASET argument, see `COMMON_TITLE`.\n\n    Args:\n        name: the name / label of the argument to add to the CLI options.\n        help: the help message when the CLI is run with --help / -h.\n        required: whether the argument must be provided or not.\n    \"\"\"\n\n    def get_parser(overrides: bool = False) -&gt; argparse.ArgumentParser:\n        parser = argparse.ArgumentParser(add_help=False)\n        parser_grp = parser.add_argument_group(title=COMMON_TITLE)\n        parser_grp.add_argument(\n            f\"--{name.replace('_', '-')}\",\n            required=required and not overrides,\n            type=str,\n            default=f\"_{name}\",\n            help=help,\n        )\n        return parser\n\n    return get_parser\n</code></pre>"},{"location":"reference/cli/config/","title":"config","text":"<p>Read, write and process config files, including handling of module-specific / common config overrides.</p>"},{"location":"reference/cli/config/#nhssynth.cli.config.assemble_config","title":"<code>assemble_config(args, all_subparsers)</code>","text":"<p>Assemble and arrange a nested-via-module configuration dictionary from parsed command-line arguments to be output as a YAML record.</p> <p>Parameters:</p> Name Type Description Default <code>args</code> <code>Namespace</code> <p>A namespace object containing all parsed command-line arguments.</p> required <code>all_subparsers</code> <code>dict[str, ArgumentParser]</code> <p>A dictionary mapping module names to subparser objects.</p> required <p>Returns:</p> Type Description <code>dict[str, Any]</code> <p>A dictionary containing configuration information extracted from <code>args</code> in a module-wise nested format that is YAML-friendly.</p> <p>Raises:</p> Type Description <code>ValueError</code> <p>If a module specified in <code>args.modules_to_run</code> is not in <code>all_subparsers</code>.</p> Source code in <code>src/nhssynth/cli/config.py</code> <pre><code>def assemble_config(\n    args: argparse.Namespace,\n    all_subparsers: dict[str, argparse.ArgumentParser],\n) -&gt; dict[str, Any]:\n    \"\"\"\n    Assemble and arrange a nested-via-module configuration dictionary from parsed command-line arguments to be output as a YAML record.\n\n    Args:\n        args: A namespace object containing all parsed command-line arguments.\n        all_subparsers: A dictionary mapping module names to subparser objects.\n\n    Returns:\n        A dictionary containing configuration information extracted from `args` in a module-wise nested format that is YAML-friendly.\n\n    Raises:\n        ValueError: If a module specified in `args.modules_to_run` is not in `all_subparsers`.\n    \"\"\"\n    args_dict = vars(args)\n\n    # Filter out the keys that are not relevant to the config file\n    args_dict = filter_dict(\n        args_dict, {\"func\", \"experiment_name\", \"save_config\", \"save_config_path\", \"module_handover\"}\n    )\n    for k in args_dict.copy().keys():\n        # Remove empty metric lists from the config\n        if \"_metrics\" in k and not args_dict[k]:\n            args_dict.pop(k)\n\n    modules_to_run = args_dict.pop(\"modules_to_run\")\n    if len(modules_to_run) == 1:\n        run_type = modules_to_run[0]\n    elif modules_to_run == PIPELINE:\n        run_type = \"pipeline\"\n    else:\n        raise ValueError(f\"Invalid value for `modules_to_run`: {modules_to_run}\")\n\n    # Generate a dictionary containing each module's name from the run, with all of its possible corresponding config args\n    module_args = {\n        module_name: [action.dest for action in all_subparsers[module_name]._actions if action.dest != \"help\"]\n        for module_name in modules_to_run\n    }\n\n    # Use the flat namespace to populate a nested (by module) dictionary of config args and values\n    out_dict = {}\n    for module_name in modules_to_run:\n        for k in args_dict.copy().keys():\n            # We want to keep dataset, experiment_name, seed and save_config at the top-level as they are core args\n            if k in module_args[module_name] and k not in {\n                \"version\",\n                \"dataset\",\n                \"experiment_name\",\n                \"seed\",\n                \"save_config\",\n            }:\n                if module_name not in out_dict:\n                    out_dict[module_name] = {}\n                v = args_dict.pop(k)\n                if v is not None:\n                    out_dict[module_name][k] = v\n\n    # Assemble the final dictionary in YAML-compliant form\n    return {**({\"run_type\": run_type} if run_type else {}), **args_dict, **out_dict}\n</code></pre>"},{"location":"reference/cli/config/#nhssynth.cli.config.get_default_and_required_args","title":"<code>get_default_and_required_args(top_parser, module_parsers)</code>","text":"<p>Get the default and required arguments for the top-level parser and the current run's corresponding list of module parsers.</p> <p>Parameters:</p> Name Type Description Default <code>top_parser</code> <code>ArgumentParser</code> <p>The top-level parser (contains common arguments).</p> required <code>module_parsers</code> <code>dict[str, ArgumentParser]</code> <p>The dict of module-level parsers mapped to their names.</p> required <p>Returns:</p> Type Description <code>tuple[dict[str, Any], list[str]]</code> <p>A tuple containing two elements: - A dictionary containing all arguments and their default values. - A list of key-value-pairs of the required arguments and their associated module.</p> Source code in <code>src/nhssynth/cli/config.py</code> <pre><code>def get_default_and_required_args(\n    top_parser: argparse.ArgumentParser,\n    module_parsers: dict[str, argparse.ArgumentParser],\n) -&gt; tuple[dict[str, Any], list[str]]:\n    \"\"\"\n    Get the default and required arguments for the top-level parser and the current run's corresponding list of module parsers.\n\n    Args:\n        top_parser: The top-level parser (contains common arguments).\n        module_parsers: The dict of module-level parsers mapped to their names.\n\n    Returns:\n        A tuple containing two elements:\n            - A dictionary containing all arguments and their default values.\n            - A list of key-value-pairs of the required arguments and their associated module.\n    \"\"\"\n    all_actions = {\"top-level\": top_parser._actions} | {m: p._actions for m, p in module_parsers.items()}\n    defaults = {}\n    required_args = []\n    for module, actions in all_actions.items():\n        for action in actions:\n            if action.dest not in [\"help\", \"==SUPPRESS==\"]:\n                defaults[action.dest] = action.default\n                if action.required:\n                    required_args.append({\"arg\": action.dest, \"module\": module})\n    return defaults, required_args\n</code></pre>"},{"location":"reference/cli/config/#nhssynth.cli.config.get_modules_to_run","title":"<code>get_modules_to_run(executor)</code>","text":"<p>Get the list of modules to run from the passed executor function.</p> <p>Parameters:</p> Name Type Description Default <code>executor</code> <code>Callable</code> <p>The executor function to run.</p> required <p>Returns:</p> Type Description <code>list[str]</code> <p>A list of module names to run.</p> Source code in <code>src/nhssynth/cli/config.py</code> <pre><code>def get_modules_to_run(executor: Callable) -&gt; list[str]:\n    \"\"\"\n    Get the list of modules to run from the passed executor function.\n\n    Args:\n        executor: The executor function to run.\n\n    Returns:\n        A list of module names to run.\n    \"\"\"\n    if executor == run_pipeline:\n        return PIPELINE\n    else:\n        return [get_key_by_value({mn: mc.func for mn, mc in MODULE_MAP.items()}, executor)]\n</code></pre>"},{"location":"reference/cli/config/#nhssynth.cli.config.read_config","title":"<code>read_config(args, parser, all_subparsers)</code>","text":"<p>Hierarchically assembles a config <code>argparse.Namespace</code> object for the inferred modules to run and execute, given a file.</p> <ol> <li>Load the YAML file containing the config to read from</li> <li>Check a valid <code>run_type</code> is specified or infer it and determine the list of <code>modules_to_run</code></li> <li>Establish the appropriate default configuration set of arguments from the <code>parser</code> and <code>all_subparsers</code> for the determined <code>modules_to_run</code></li> <li>Overwrite these with the specified (sub)set of config in the YAML file</li> <li>Overwrite again with passed command-line <code>args</code> (these are considered 'overrides')</li> <li>Run the appropriate module(s) or pipeline with the resulting configuration <code>Namespace</code> object</li> </ol> <p>Parameters:</p> Name Type Description Default <code>args</code> <code>Namespace</code> <p>Namespace object containing arguments from the command line</p> required <code>parser</code> <code>ArgumentParser</code> <p>top-level <code>ArgumentParser</code> object containing common arguments</p> required <code>all_subparsers</code> <code>dict[str, ArgumentParser]</code> <p>dictionary of <code>ArgumentParser</code> objects, one for each module</p> required <p>Returns:</p> Type Description <code>Namespace</code> <p>A Namespace object containing the assembled configuration settings</p> <p>Raises:</p> Type Description <code>AssertionError</code> <p>if any required arguments are missing from the configuration file / overrides</p> Source code in <code>src/nhssynth/cli/config.py</code> <pre><code>def read_config(\n    args: argparse.Namespace,\n    parser: argparse.ArgumentParser,\n    all_subparsers: dict[str, argparse.ArgumentParser],\n) -&gt; argparse.Namespace:\n    \"\"\"\n    Hierarchically assembles a config `argparse.Namespace` object for the inferred modules to run and execute, given a file.\n\n    1. Load the YAML file containing the config to read from\n    2. Check a valid `run_type` is specified or infer it and determine the list of `modules_to_run`\n    3. Establish the appropriate default configuration set of arguments from the `parser` and `all_subparsers` for the determined `modules_to_run`\n    4. Overwrite these with the specified (sub)set of config in the YAML file\n    5. Overwrite again with passed command-line `args` (these are considered 'overrides')\n    6. Run the appropriate module(s) or pipeline with the resulting configuration `Namespace` object\n\n    Args:\n        args: Namespace object containing arguments from the command line\n        parser: top-level `ArgumentParser` object containing common arguments\n        all_subparsers: dictionary of `ArgumentParser` objects, one for each module\n\n    Returns:\n        A Namespace object containing the assembled configuration settings\n\n    Raises:\n        AssertionError: if any required arguments are missing from the configuration file / overrides\n    \"\"\"\n    # Open the passed yaml file and load into a dictionary\n    with open(f\"config/{args.input_config}.yaml\") as stream:\n        config_dict = yaml.safe_load(stream)\n\n    valid_run_types = [x for x in all_subparsers.keys() if x != \"config\"]\n\n    version = config_dict.pop(\"version\", None)\n    if version and version != version(\"nhssynth\"):\n        warnings.warn(\n            f\"This config file's specified version ({version}) does not match the currently installed version of nhssynth ({version('nhssynth')}), results may differ.\"\n        )\n    elif not version:\n        version = ver(\"nhssynth\")\n\n    run_type = config_dict.pop(\"run_type\", None)\n\n    if run_type == \"pipeline\":\n        modules_to_run = PIPELINE\n    else:\n        modules_to_run = [x for x in config_dict.keys() | {run_type} if x in valid_run_types]\n        if not args.custom_pipeline:\n            modules_to_run = sorted(modules_to_run, key=lambda x: PIPELINE.index(x))\n\n    if not modules_to_run:\n        warnings.warn(\n            \"Missing or invalid `run_type` and / or module specification hierarchy in `config/{args.input_config}.yaml`, defaulting to a full run of the pipeline\"\n        )\n        modules_to_run = PIPELINE\n\n    # Get all possible default arguments by scraping the top level `parser` and the appropriate sub-parser for the `run_type`\n    args_dict, required_args = get_default_and_required_args(\n        parser, filter_dict(all_subparsers, modules_to_run, include=True)\n    )\n\n    # Find the non-default arguments amongst passed `args` by seeing which of them are different to the entries of `args_dict`\n    non_default_passed_args_dict = {\n        k: v\n        for k, v in vars(args).items()\n        if k in [\"input_config\", \"custom_pipeline\"] or (k in args_dict and k != \"func\" and v != args_dict[k])\n    }\n\n    # Overwrite the default arguments with the ones from the yaml file\n    args_dict.update(flatten_dict(config_dict))\n\n    # Overwrite the result of the above with any non-default CLI args\n    args_dict.update(non_default_passed_args_dict)\n\n    # Create a new Namespace using the assembled dictionary\n    new_args = argparse.Namespace(**args_dict)\n    assert getattr(\n        new_args, \"dataset\"\n    ), \"No dataset specified in the passed config file, provide one with the `--dataset` argument or add it to the config file\"\n    assert all(\n        getattr(new_args, req_arg[\"arg\"]) for req_arg in required_args\n    ), f\"Required arguments are missing from the passed config file: {[ra['module'] + ':' + ra['arg'] for ra in required_args if not getattr(new_args, ra['arg'])]}\"\n\n    # Run the appropriate execution function(s)\n    if not new_args.seed:\n        warnings.warn(\"No seed has been specified, meaning the results of this run may not be reproducible.\")\n    new_args.version = version\n    new_args.modules_to_run = modules_to_run\n    new_args.module_handover = {}\n    for module in new_args.modules_to_run:\n        MODULE_MAP[module](new_args)\n\n    return new_args\n</code></pre>"},{"location":"reference/cli/config/#nhssynth.cli.config.write_config","title":"<code>write_config(args, all_subparsers)</code>","text":"<p>Assembles a configuration dictionary from the run config and writes it to a YAML file at the location specified by <code>args.save_config_path</code>.</p> <p>Parameters:</p> Name Type Description Default <code>args</code> <code>Namespace</code> <p>A namespace containing the run's configuration.</p> required <code>all_subparsers</code> <code>dict[str, ArgumentParser]</code> <p>A dictionary containing all subparsers for the config args.</p> required Source code in <code>src/nhssynth/cli/config.py</code> <pre><code>def write_config(\n    args: argparse.Namespace,\n    all_subparsers: dict[str, argparse.ArgumentParser],\n) -&gt; None:\n    \"\"\"\n    Assembles a configuration dictionary from the run config and writes it to a YAML file at the location specified by `args.save_config_path`.\n\n    Args:\n        args: A namespace containing the run's configuration.\n        all_subparsers: A dictionary containing all subparsers for the config args.\n    \"\"\"\n    experiment_name = args.experiment_name\n    args_dict = assemble_config(args, all_subparsers)\n    with open(f\"experiments/{experiment_name}/config_{experiment_name}.yaml\", \"w\") as yaml_file:\n        yaml.dump(args_dict, yaml_file, default_flow_style=False, sort_keys=False)\n</code></pre>"},{"location":"reference/cli/model_arguments/","title":"model_arguments","text":"<p>Define arguments for each of the model classes.</p>"},{"location":"reference/cli/model_arguments/#nhssynth.cli.model_arguments.add_gan_args","title":"<code>add_gan_args(group, overrides=False)</code>","text":"<p>Adds arguments to an existing group for the GAN model.</p> Source code in <code>src/nhssynth/cli/model_arguments.py</code> <pre><code>def add_gan_args(group: argparse._ArgumentGroup, overrides: bool = False) -&gt; None:\n    \"\"\"Adds arguments to an existing group for the GAN model.\"\"\"\n    group.add_argument(\n        \"--n-units-conditional\",\n        type=int,\n        help=\"the number of units in the conditional layer\",\n    )\n    group.add_argument(\n        \"--generator-n-layers-hidden\",\n        type=int,\n        help=\"the number of hidden layers in the generator\",\n    )\n    group.add_argument(\n        \"--generator-n-units-hidden\",\n        type=int,\n        help=\"the number of units in each hidden layer of the generator\",\n    )\n    group.add_argument(\n        \"--generator-activation\",\n        type=str,\n        choices=list(ACTIVATION_FUNCTIONS.keys()),\n        help=\"the activation function of the generator\",\n    )\n    group.add_argument(\n        \"--generator-batch-norm\",\n        action=\"store_true\",\n        help=\"whether to use batch norm in the generator\",\n    )\n    group.add_argument(\n        \"--generator-dropout\",\n        type=float,\n        help=\"the dropout rate in the generator\",\n    )\n    group.add_argument(\n        \"--generator-lr\",\n        type=float,\n        help=\"the learning rate for the generator\",\n    )\n    group.add_argument(\n        \"--generator-residual\",\n        action=\"store_true\",\n        help=\"whether to use residual connections in the generator\",\n    )\n    group.add_argument(\n        \"--generator-opt-betas\",\n        type=float,\n        nargs=2,\n        help=\"the beta values for the generator optimizer\",\n    )\n    group.add_argument(\n        \"--discriminator-n-layers-hidden\",\n        type=int,\n        help=\"the number of hidden layers in the discriminator\",\n    )\n    group.add_argument(\n        \"--discriminator-n-units-hidden\",\n        type=int,\n        help=\"the number of units in each hidden layer of the discriminator\",\n    )\n    group.add_argument(\n        \"--discriminator-activation\",\n        type=str,\n        choices=list(ACTIVATION_FUNCTIONS.keys()),\n        help=\"the activation function of the discriminator\",\n    )\n    group.add_argument(\n        \"--discriminator-batch-norm\",\n        action=\"store_true\",\n        help=\"whether to use batch norm in the discriminator\",\n    )\n    group.add_argument(\n        \"--discriminator-dropout\",\n        type=float,\n        help=\"the dropout rate in the discriminator\",\n    )\n    group.add_argument(\n        \"--discriminator-lr\",\n        type=float,\n        help=\"the learning rate for the discriminator\",\n    )\n    group.add_argument(\n        \"--discriminator-opt-betas\",\n        type=float,\n        nargs=2,\n        help=\"the beta values for the discriminator optimizer\",\n    )\n    group.add_argument(\n        \"--clipping-value\",\n        type=float,\n        help=\"the clipping value for the discriminator\",\n    )\n    group.add_argument(\n        \"--lambda-gradient-penalty\",\n        type=float,\n        help=\"the gradient penalty coefficient\",\n    )\n</code></pre>"},{"location":"reference/cli/model_arguments/#nhssynth.cli.model_arguments.add_model_specific_args","title":"<code>add_model_specific_args(group, name, overrides=False)</code>","text":"<p>Adds arguments to an existing group according to <code>name</code>.</p> Source code in <code>src/nhssynth/cli/model_arguments.py</code> <pre><code>def add_model_specific_args(group: argparse._ArgumentGroup, name: str, overrides: bool = False) -&gt; None:\n    \"\"\"Adds arguments to an existing group according to `name`.\"\"\"\n    if name == \"VAE\":\n        add_vae_args(group, overrides)\n    elif name == \"GAN\":\n        add_gan_args(group, overrides)\n    elif name == \"TabularGAN\":\n        add_tabular_gan_args(group, overrides)\n</code></pre>"},{"location":"reference/cli/model_arguments/#nhssynth.cli.model_arguments.add_vae_args","title":"<code>add_vae_args(group, overrides=False)</code>","text":"<p>Adds arguments to an existing group for the VAE model.</p> Source code in <code>src/nhssynth/cli/model_arguments.py</code> <pre><code>def add_vae_args(group: argparse._ArgumentGroup, overrides: bool = False) -&gt; None:\n    \"\"\"Adds arguments to an existing group for the VAE model.\"\"\"\n    group.add_argument(\n        \"--encoder-latent-dim\",\n        type=int,\n        nargs=\"+\",\n        help=\"the latent dimension of the encoder\",\n    )\n    group.add_argument(\n        \"--encoder-hidden-dim\",\n        type=int,\n        nargs=\"+\",\n        help=\"the hidden dimension of the encoder\",\n    )\n    group.add_argument(\n        \"--encoder-activation\",\n        type=str,\n        nargs=\"+\",\n        choices=list(ACTIVATION_FUNCTIONS.keys()),\n        help=\"the activation function of the encoder\",\n    )\n    group.add_argument(\n        \"--encoder-learning-rate\",\n        type=float,\n        nargs=\"+\",\n        help=\"the learning rate for the encoder\",\n    )\n    group.add_argument(\n        \"--decoder-latent-dim\",\n        type=int,\n        nargs=\"+\",\n        help=\"the latent dimension of the decoder\",\n    )\n    group.add_argument(\n        \"--decoder-hidden-dim\",\n        type=int,\n        nargs=\"+\",\n        help=\"the hidden dimension of the decoder\",\n    )\n    group.add_argument(\n        \"--decoder-activation\",\n        type=str,\n        nargs=\"+\",\n        choices=list(ACTIVATION_FUNCTIONS.keys()),\n        help=\"the activation function of the decoder\",\n    )\n    group.add_argument(\n        \"--decoder-learning-rate\",\n        type=float,\n        nargs=\"+\",\n        help=\"the learning rate for the decoder\",\n    )\n    group.add_argument(\n        \"--shared-optimizer\",\n        action=\"store_true\",\n        help=\"whether to use a shared optimizer for the encoder and decoder\",\n    )\n</code></pre>"},{"location":"reference/cli/module_arguments/","title":"module_arguments","text":"<p>Define arguments for each of the modules' CLI sub-parsers.</p>"},{"location":"reference/cli/module_arguments/#nhssynth.cli.module_arguments.AllChoicesDefault","title":"<code>AllChoicesDefault</code>","text":"<p>             Bases: <code>Action</code></p> <p>Customised argparse action for defaulting to the full list of choices if only the argument's flag is supplied: (i.e. user passes <code>--metrics</code> with no follow up list of metric groups =&gt; all metric groups will be executed).</p> Notes <p>1) If no <code>option_string</code> is supplied: set to default value (<code>self.default</code>) 2) If <code>option_string</code> is supplied:     a) If <code>values</code> are supplied, set to list of values     b) If no <code>values</code> are supplied, set to <code>self.const</code>, if <code>self.const</code> is not set, set to <code>self.default</code></p> Source code in <code>src/nhssynth/cli/module_arguments.py</code> <pre><code>class AllChoicesDefault(argparse.Action):\n    \"\"\"\n    Customised argparse action for defaulting to the full list of choices if only the argument's flag is supplied:\n    (i.e. user passes `--metrics` with no follow up list of metric groups =&gt; all metric groups will be executed).\n\n    Notes:\n        1) If no `option_string` is supplied: set to default value (`self.default`)\n        2) If `option_string` is supplied:\n            a) If `values` are supplied, set to list of values\n            b) If no `values` are supplied, set to `self.const`, if `self.const` is not set, set to `self.default`\n    \"\"\"\n\n    def __call__(self, parser, namespace, values=None, option_string=None):\n        if values:\n            setattr(namespace, self.dest, values)\n        elif option_string:\n            setattr(namespace, self.dest, self.const if self.const else self.default)\n        else:\n            setattr(namespace, self.dest, self.default)\n</code></pre>"},{"location":"reference/cli/module_arguments/#nhssynth.cli.module_arguments.add_dataloader_args","title":"<code>add_dataloader_args(parser, group_title, overrides=False)</code>","text":"<p>Adds arguments to an existing dataloader module sub-parser instance.</p> Source code in <code>src/nhssynth/cli/module_arguments.py</code> <pre><code>def add_dataloader_args(parser: argparse.ArgumentParser, group_title: str, overrides: bool = False) -&gt; None:\n    \"\"\"Adds arguments to an existing dataloader module sub-parser instance.\"\"\"\n    group = parser.add_argument_group(title=group_title)\n    group.add_argument(\n        \"--data-dir\",\n        type=str,\n        default=\"./data\",\n        help=\"the directory containing the chosen dataset\",\n    )\n    group.add_argument(\n        \"--index-col\",\n        default=None,\n        nargs=\"*\",\n        help=\"indicate the name of the index column(s) in the csv file, such that pandas can index by it\",\n    )\n    group.add_argument(\n        \"--constraint-graph\",\n        type=str,\n        default=\"_constraint_graph\",\n        help=\"the name of the html file to write the constraint graph to, defaults to `&lt;DATASET&gt;_constraint_graph`\",\n    )\n    group.add_argument(\n        \"--collapse-yaml\",\n        action=\"store_true\",\n        help=\"use aliases and anchors in the output metadata yaml, this will make it much more compact\",\n    )\n    group.add_argument(\n        \"--missingness\",\n        type=str,\n        default=\"augment\",\n        choices=MISSINGNESS_STRATEGIES,\n        help=\"how to handle missing values in the dataset\",\n    )\n    group.add_argument(\n        \"--impute\",\n        type=str,\n        default=None,\n        help=\"the imputation strategy to use, ONLY USED if &lt;MISSINGNESS&gt; is set to 'impute', choose from: 'mean', 'median', 'mode', or any specific value (e.g. '0')\",\n    )\n    group.add_argument(\n        \"--write-csv\",\n        action=\"store_true\",\n        help=\"write the transformed real data to a csv file\",\n    )\n</code></pre>"},{"location":"reference/cli/module_arguments/#nhssynth.cli.module_arguments.add_evaluation_args","title":"<code>add_evaluation_args(parser, group_title, overrides=False)</code>","text":"<p>Adds arguments to an existing evaluation module sub-parser instance.</p> Source code in <code>src/nhssynth/cli/module_arguments.py</code> <pre><code>def add_evaluation_args(parser: argparse.ArgumentParser, group_title: str, overrides: bool = False) -&gt; None:\n    \"\"\"Adds arguments to an existing evaluation module sub-parser instance.\"\"\"\n    group = parser.add_argument_group(title=group_title)\n    group.add_argument(\n        \"--downstream-tasks\",\n        \"--tasks\",\n        action=\"store_true\",\n        help=\"run the downstream tasks evaluation\",\n    )\n    group.add_argument(\n        \"--tasks-dir\",\n        type=str,\n        default=\"./tasks\",\n        help=\"the directory containing the downstream tasks to run, this directory must contain a folder called &lt;DATASET&gt; containing the tasks to run\",\n    )\n    group.add_argument(\n        \"--aequitas\",\n        action=\"store_true\",\n        help=\"run the aequitas fairness evaluation (note this runs for each of the downstream tasks)\",\n    )\n    group.add_argument(\n        \"--aequitas-attributes\",\n        type=str,\n        nargs=\"+\",\n        default=None,\n        help=\"the attributes to use for the aequitas fairness evaluation, defaults to all attributes\",\n    )\n    group.add_argument(\n        \"--key-numerical-fields\",\n        type=str,\n        nargs=\"+\",\n        default=None,\n        help=\"the numerical key field attributes to use for SDV privacy evaluations\",\n    )\n    group.add_argument(\n        \"--sensitive-numerical-fields\",\n        type=str,\n        nargs=\"+\",\n        default=None,\n        help=\"the numerical sensitive field attributes to use for SDV privacy evaluations\",\n    )\n    group.add_argument(\n        \"--key-categorical-fields\",\n        type=str,\n        nargs=\"+\",\n        default=None,\n        help=\"the categorical key field attributes to use for SDV privacy evaluations\",\n    )\n    group.add_argument(\n        \"--sensitive-categorical-fields\",\n        type=str,\n        nargs=\"+\",\n        default=None,\n        help=\"the categorical sensitive field attributes to use for SDV privacy evaluations\",\n    )\n    for name in METRIC_CHOICES:\n        generate_evaluation_arg(group, name)\n</code></pre>"},{"location":"reference/cli/module_arguments/#nhssynth.cli.module_arguments.add_model_args","title":"<code>add_model_args(parser, group_title, overrides=False)</code>","text":"<p>Adds arguments to an existing model module sub-parser instance.</p> Source code in <code>src/nhssynth/cli/module_arguments.py</code> <pre><code>def add_model_args(parser: argparse.ArgumentParser, group_title: str, overrides: bool = False) -&gt; None:\n    \"\"\"Adds arguments to an existing model module sub-parser instance.\"\"\"\n    group = parser.add_argument_group(title=group_title)\n    group.add_argument(\n        \"--architecture\",\n        type=str,\n        nargs=\"+\",\n        default=[\"VAE\"],\n        choices=MODELS,\n        help=\"the model architecture(s) to train\",\n    )\n    group.add_argument(\n        \"--repeats\",\n        type=int,\n        default=1,\n        help=\"how many times to repeat the training process per model architecture (&lt;SEED&gt; is incremented each time)\",\n    )\n    group.add_argument(\n        \"--batch-size\",\n        type=int,\n        nargs=\"+\",\n        default=32,\n        help=\"the batch size for the model\",\n    )\n    group.add_argument(\n        \"--num-epochs\",\n        type=int,\n        nargs=\"+\",\n        default=100,\n        help=\"number of epochs to train for\",\n    )\n    group.add_argument(\n        \"--patience\",\n        type=int,\n        nargs=\"+\",\n        default=5,\n        help=\"how many epochs the model is allowed to train for without improvement\",\n    )\n    group.add_argument(\n        \"--displayed-metrics\",\n        type=str,\n        nargs=\"+\",\n        default=[],\n        help=\"metrics to display during training of the model, when set to `None`, all metrics are displayed\",\n    )\n    group.add_argument(\n        \"--use-gpu\",\n        action=\"store_true\",\n        help=\"use the GPU for training\",\n    )\n    group.add_argument(\n        \"--num-samples\",\n        type=int,\n        default=None,\n        help=\"the number of samples to generate from the model, defaults to the size of the original dataset\",\n    )\n    privacy_group = parser.add_argument_group(title=\"model privacy options\")\n    privacy_group.add_argument(\n        \"--target-epsilon\",\n        type=float,\n        nargs=\"+\",\n        default=1.0,\n        help=\"the target epsilon for differential privacy\",\n    )\n    privacy_group.add_argument(\n        \"--target-delta\",\n        type=float,\n        nargs=\"+\",\n        help=\"the target delta for differential privacy, defaults to `1 / len(dataset)` if not specified\",\n    )\n    privacy_group.add_argument(\n        \"--max-grad-norm\",\n        type=float,\n        nargs=\"+\",\n        default=5.0,\n        help=\"the clipping threshold for gradients (only relevant under differential privacy)\",\n    )\n    privacy_group.add_argument(\n        \"--secure-mode\",\n        action=\"store_true\",\n        help=\"Enable secure RNG via the `csprng` package to make privacy guarantees more robust, comes at a cost of performance and reproducibility\",\n    )\n    for model_name in MODELS.keys():\n        model_group = parser.add_argument_group(title=f\"{model_name}-specific options\")\n        add_model_specific_args(model_group, model_name, overrides=overrides)\n</code></pre>"},{"location":"reference/cli/module_arguments/#nhssynth.cli.module_arguments.add_plotting_args","title":"<code>add_plotting_args(parser, group_title, overrides=False)</code>","text":"<p>Adds arguments to an existing plotting module sub-parser instance.</p> Source code in <code>src/nhssynth/cli/module_arguments.py</code> <pre><code>def add_plotting_args(parser: argparse.ArgumentParser, group_title: str, overrides: bool = False) -&gt; None:\n    \"\"\"Adds arguments to an existing plotting module sub-parser instance.\"\"\"\n    group = parser.add_argument_group(title=group_title)\n    group.add_argument(\n        \"--plot-quality\",\n        action=\"store_true\",\n        help=\"plot the SDV quality report\",\n    )\n    group.add_argument(\n        \"--plot-diagnostic\",\n        action=\"store_true\",\n        help=\"plot the SDV diagnostic report\",\n    )\n    group.add_argument(\n        \"--plot-sdv-report\",\n        action=\"store_true\",\n        help=\"plot the SDV report\",\n    )\n    group.add_argument(\n        \"--plot-tsne\",\n        action=\"store_true\",\n        help=\"plot the t-SNE embeddings of the real and synthetic data\",\n    )\n</code></pre>"},{"location":"reference/cli/module_setup/","title":"module_setup","text":"<p>Specify all CLI-accessible modules and their configurations, the pipeline to run by default, and define special functions for the <code>config</code> and <code>pipeline</code> CLI option trees.</p>"},{"location":"reference/cli/module_setup/#nhssynth.cli.module_setup.ModuleConfig","title":"<code>ModuleConfig</code>","text":"<p>Represents a module's configuration, containing the following attributes:</p> <p>Attributes:</p> Name Type Description <code>func</code> <p>A callable that executes the module's functionality.</p> <code>add_args</code> <p>A callable that populates the module's sub-parser arguments.</p> <code>description</code> <p>A description of the module's functionality.</p> <code>help</code> <p>A help message for the module's command-line interface.</p> <code>common_parsers</code> <p>A list of common parsers to add to the module's sub-parser, appending the 'dataset' and 'core' parsers to those passed.</p> Source code in <code>src/nhssynth/cli/module_setup.py</code> <pre><code>class ModuleConfig:\n    \"\"\"\n    Represents a module's configuration, containing the following attributes:\n\n    Attributes:\n        func: A callable that executes the module's functionality.\n        add_args: A callable that populates the module's sub-parser arguments.\n        description: A description of the module's functionality.\n        help: A help message for the module's command-line interface.\n        common_parsers: A list of common parsers to add to the module's sub-parser, appending the 'dataset' and 'core' parsers to those passed.\n    \"\"\"\n\n    def __init__(\n        self,\n        func: Callable[..., argparse.Namespace],\n        add_args: Callable[..., None],\n        description: str,\n        help: str,\n        common_parsers: Optional[list[str]] = None,\n        no_seed: bool = False,\n    ) -&gt; None:\n        self.func = func\n        self.add_args = add_args\n        self.description = description\n        self.help = help\n        self.common_parsers = [\"core\", \"seed\"] if not no_seed else [\"core\"]\n        if common_parsers:\n            assert set(common_parsers) &lt;= COMMON_PARSERS.keys(), \"Invalid common parser(s) specified.\"\n            # merge the below two assert statements\n            assert (\n                \"core\" not in common_parsers and \"seed\" not in common_parsers\n            ), \"The 'seed' and 'core' parser groups are automatically added to all modules, remove the from `ModuleConfig`s.\"\n            self.common_parsers += common_parsers\n\n    def __call__(self, args: argparse.Namespace) -&gt; argparse.Namespace:\n        return self.func(args)\n</code></pre>"},{"location":"reference/cli/module_setup/#nhssynth.cli.module_setup.add_config_args","title":"<code>add_config_args(parser)</code>","text":"<p>Adds arguments to <code>parser</code> relating to configuration file handling and module-specific config overrides.</p> Source code in <code>src/nhssynth/cli/module_setup.py</code> <pre><code>def add_config_args(parser: argparse.ArgumentParser) -&gt; None:\n    \"\"\"Adds arguments to `parser` relating to configuration file handling and module-specific config overrides.\"\"\"\n    parser.add_argument(\n        \"-c\",\n        \"--input-config\",\n        required=True,\n        help=\"specify the config file name\",\n    )\n    parser.add_argument(\n        \"-cp\",\n        \"--custom-pipeline\",\n        action=\"store_true\",\n        help=\"infer a custom pipeline running order of modules from the config\",\n    )\n    for module_name in PIPELINE:\n        MODULE_MAP[module_name].add_args(parser, f\"{module_name} option overrides\", overrides=True)\n    for module_name in VALID_MODULES - set(PIPELINE):\n        MODULE_MAP[module_name].add_args(parser, f\"{module_name} options overrides\", overrides=True)\n</code></pre>"},{"location":"reference/cli/module_setup/#nhssynth.cli.module_setup.add_pipeline_args","title":"<code>add_pipeline_args(parser)</code>","text":"<p>Adds arguments to <code>parser</code> for each module in the pipeline.</p> Source code in <code>src/nhssynth/cli/module_setup.py</code> <pre><code>def add_pipeline_args(parser: argparse.ArgumentParser) -&gt; None:\n    \"\"\"Adds arguments to `parser` for each module in the pipeline.\"\"\"\n    for module_name in PIPELINE:\n        MODULE_MAP[module_name].add_args(parser, f\"{module_name} options\")\n</code></pre>"},{"location":"reference/cli/module_setup/#nhssynth.cli.module_setup.add_subparser","title":"<code>add_subparser(subparsers, name, module_config)</code>","text":"<p>Add a subparser to an argparse argument parser.</p> <p>Parameters:</p> Name Type Description Default <code>subparsers</code> <code>_SubParsersAction</code> <p>The subparsers action to which the subparser will be added.</p> required <code>name</code> <code>str</code> <p>The name of the subparser.</p> required <code>module_config</code> <code>ModuleConfig</code> <p>A <code>ModuleConfig</code> object containing information about the subparser, including a function to execute and a function to add arguments.</p> required <p>Returns:</p> Type Description <code>ArgumentParser</code> <p>The newly created subparser.</p> Source code in <code>src/nhssynth/cli/module_setup.py</code> <pre><code>def add_subparser(\n    subparsers: argparse._SubParsersAction,\n    name: str,\n    module_config: ModuleConfig,\n) -&gt; argparse.ArgumentParser:\n    \"\"\"\n    Add a subparser to an argparse argument parser.\n\n    Args:\n        subparsers: The subparsers action to which the subparser will be added.\n        name: The name of the subparser.\n        module_config: A [`ModuleConfig`][nhssynth.cli.module_setup.ModuleConfig] object containing information about the subparser, including a function to execute and a function to add arguments.\n\n    Returns:\n        The newly created subparser.\n    \"\"\"\n    parent_parsers = get_parent_parsers(name, module_config.common_parsers)\n    parser = subparsers.add_parser(\n        name=name,\n        description=module_config.description,\n        help=module_config.help,\n        formatter_class=argparse.ArgumentDefaultsHelpFormatter,\n        parents=parent_parsers,\n    )\n    if name not in {\"pipeline\", \"config\"}:\n        module_config.add_args(parser, f\"{name} options\")\n    else:\n        module_config.add_args(parser)\n    parser.set_defaults(func=module_config.func)\n    return parser\n</code></pre>"},{"location":"reference/cli/module_setup/#nhssynth.cli.module_setup.get_parent_parsers","title":"<code>get_parent_parsers(name, module_parsers)</code>","text":"<p>Get a list of parent parsers for a given module, based on the module's <code>common_parsers</code> attribute.</p> Source code in <code>src/nhssynth/cli/module_setup.py</code> <pre><code>def get_parent_parsers(name: str, module_parsers: list[str]) -&gt; list[argparse.ArgumentParser]:\n    \"\"\"Get a list of parent parsers for a given module, based on the module's `common_parsers` attribute.\"\"\"\n    if name in {\"pipeline\", \"config\"}:\n        return [p(name == \"config\") for p in COMMON_PARSERS.values()]\n    elif name == \"dashboard\":\n        return [COMMON_PARSERS[pn](True) for pn in module_parsers]\n    else:\n        return [COMMON_PARSERS[pn]() for pn in module_parsers]\n</code></pre>"},{"location":"reference/cli/module_setup/#nhssynth.cli.module_setup.run_pipeline","title":"<code>run_pipeline(args)</code>","text":"<p>Runs the specified pipeline of modules with the passed configuration <code>args</code>.</p> Source code in <code>src/nhssynth/cli/module_setup.py</code> <pre><code>def run_pipeline(args: argparse.Namespace) -&gt; None:\n    \"\"\"Runs the specified pipeline of modules with the passed configuration `args`.\"\"\"\n    print(\"Running full pipeline...\")\n    args.modules_to_run = PIPELINE\n    for module_name in PIPELINE:\n        args = MODULE_MAP[module_name](args)\n</code></pre>"},{"location":"reference/cli/run/","title":"run","text":""},{"location":"reference/common/","title":"common","text":""},{"location":"reference/common/common/","title":"common","text":"<p>Common functions for all modules.</p>"},{"location":"reference/common/common/#nhssynth.common.common.set_seed","title":"<code>set_seed(seed=None)</code>","text":"<p>(Potentially) set the seed for numpy, torch and random. If no seed is provided, nothing happens.</p> <p>Parameters:</p> Name Type Description Default <code>seed</code> <code>Optional[int]</code> <p>The seed to set.</p> <code>None</code> Source code in <code>src/nhssynth/common/common.py</code> <pre><code>def set_seed(seed: Optional[int] = None) -&gt; None:\n    \"\"\"\n    (Potentially) set the seed for numpy, torch and random. If no seed is provided, nothing happens.\n\n    Args:\n        seed: The seed to set.\n    \"\"\"\n    if seed:\n        np.random.seed(seed)\n        torch.manual_seed(seed)\n        random.seed(seed)\n</code></pre>"},{"location":"reference/common/constants/","title":"constants","text":"<p>Define all of the common constants used throughout the project.</p>"},{"location":"reference/common/debugging/","title":"debugging","text":"<p>Debugging utilities.</p>"},{"location":"reference/common/dicts/","title":"dicts","text":"<p>Common functions for working with dictionaries.</p>"},{"location":"reference/common/dicts/#nhssynth.common.dicts.filter_dict","title":"<code>filter_dict(d, filter_keys, include=False)</code>","text":"<p>Given a dictionary, return a new dictionary either including or excluding keys in a given <code>filter</code> set.</p> <p>Parameters:</p> Name Type Description Default <code>d</code> <code>dict</code> <p>A dictionary to filter.</p> required <code>filter_keys</code> <code>Union[set, list]</code> <p>A list or set of keys to either include or exclude.</p> required <code>include</code> <code>bool</code> <p>Determine whether to return a dictionary including or excluding keys in <code>filter</code>.</p> <code>False</code> <p>Returns:</p> Type Description <code>dict</code> <p>A filtered dictionary.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; d = {'a': 1, 'b': 2, 'c': 3}\n&gt;&gt;&gt; filter_dict(d, {'a', 'b'})\n{'c': 3}\n&gt;&gt;&gt; filter_dict(d, {'a', 'b'}, include=True)\n{'a': 1, 'b': 2}\n</code></pre> Source code in <code>src/nhssynth/common/dicts.py</code> <pre><code>def filter_dict(d: dict, filter_keys: Union[set, list], include: bool = False) -&gt; dict:\n    \"\"\"\n    Given a dictionary, return a new dictionary either including or excluding keys in a given `filter` set.\n\n    Args:\n        d: A dictionary to filter.\n        filter_keys: A list or set of keys to either include or exclude.\n        include: Determine whether to return a dictionary including or excluding keys in `filter`.\n\n    Returns:\n        A filtered dictionary.\n\n    Examples:\n        &gt;&gt;&gt; d = {'a': 1, 'b': 2, 'c': 3}\n        &gt;&gt;&gt; filter_dict(d, {'a', 'b'})\n        {'c': 3}\n        &gt;&gt;&gt; filter_dict(d, {'a', 'b'}, include=True)\n        {'a': 1, 'b': 2}\n    \"\"\"\n    if include:\n        filtered_keys = set(filter_keys) &amp; set(d.keys())\n    else:\n        filtered_keys = set(d.keys()) - set(filter_keys)\n    return {k: v for k, v in d.items() if k in filtered_keys}\n</code></pre>"},{"location":"reference/common/dicts/#nhssynth.common.dicts.flatten_dict","title":"<code>flatten_dict(d)</code>","text":"<p>Flatten a dictionary by recursively combining nested keys into a single dictionary until no nested keys remain.</p> <p>Parameters:</p> Name Type Description Default <code>d</code> <code>dict[str, Any]</code> <p>A dictionary with potentially nested keys.</p> required <p>Returns:</p> Type Description <code>dict[str, Any]</code> <p>A flattened dictionary.</p> <p>Raises:</p> Type Description <code>ValueError</code> <p>If duplicate keys are found in the flattened dictionary.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; d = {'a': 1, 'b': {'c': 2, 'd': {'e': 3}}}\n&gt;&gt;&gt; flatten_dict(d)\n{'a': 1, 'c': 2, 'e': 3}\n</code></pre> Source code in <code>src/nhssynth/common/dicts.py</code> <pre><code>def flatten_dict(d: dict[str, Any]) -&gt; dict[str, Any]:\n    \"\"\"\n    Flatten a dictionary by recursively combining nested keys into a single dictionary until no nested keys remain.\n\n    Args:\n        d: A dictionary with potentially nested keys.\n\n    Returns:\n        A flattened dictionary.\n\n    Raises:\n        ValueError: If duplicate keys are found in the flattened dictionary.\n\n    Examples:\n        &gt;&gt;&gt; d = {'a': 1, 'b': {'c': 2, 'd': {'e': 3}}}\n        &gt;&gt;&gt; flatten_dict(d)\n        {'a': 1, 'c': 2, 'e': 3}\n    \"\"\"\n    items = []\n    for k, v in d.items():\n        if isinstance(v, dict):\n            items.extend(flatten_dict(v).items())\n        else:\n            items.append((k, v))\n    if len(set([p[0] for p in items])) != len(items):\n        raise ValueError(\"Duplicate keys found in flattened dictionary\")\n    return dict(items)\n</code></pre>"},{"location":"reference/common/dicts/#nhssynth.common.dicts.get_key_by_value","title":"<code>get_key_by_value(d, value)</code>","text":"<p>Find the first key in a dictionary with a given value.</p> <p>Parameters:</p> Name Type Description Default <code>d</code> <code>dict</code> <p>A dictionary to search through.</p> required <code>value</code> <code>Any</code> <p>The value to search for.</p> required <p>Returns:</p> Type Description <code>Union[Any, None]</code> <p>The first key in <code>d</code> with the value <code>value</code>, or <code>None</code> if no such key exists.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; d = {'a': 1, 'b': 2, 'c': 1}\n&gt;&gt;&gt; get_key_by_value(d, 2)\n'b'\n&gt;&gt;&gt; get_key_by_value(d, 3)\nNone\n</code></pre> Source code in <code>src/nhssynth/common/dicts.py</code> <pre><code>def get_key_by_value(d: dict, value: Any) -&gt; Union[Any, None]:\n    \"\"\"\n    Find the first key in a dictionary with a given value.\n\n    Args:\n        d: A dictionary to search through.\n        value: The value to search for.\n\n    Returns:\n        The first key in `d` with the value `value`, or `None` if no such key exists.\n\n    Examples:\n        &gt;&gt;&gt; d = {'a': 1, 'b': 2, 'c': 1}\n        &gt;&gt;&gt; get_key_by_value(d, 2)\n        'b'\n        &gt;&gt;&gt; get_key_by_value(d, 3)\n        None\n\n    \"\"\"\n    for key, val in d.items():\n        if val == value:\n            return key\n    return None\n</code></pre>"},{"location":"reference/common/io/","title":"io","text":"<p>Common building-block functions for handling module input and output.</p>"},{"location":"reference/common/io/#nhssynth.common.io.check_exists","title":"<code>check_exists(fns, dir)</code>","text":"<p>Checks if the files in <code>fns</code> exist in <code>dir</code>.</p> <p>Parameters:</p> Name Type Description Default <code>fns</code> <code>list[str]</code> <p>The list of files to check.</p> required <code>dir</code> <code>Path</code> <p>The directory the files should exist in.</p> required <p>Raises:</p> Type Description <code>FileNotFoundError</code> <p>If any of the files in <code>fns</code> do not exist in <code>dir</code>.</p> Source code in <code>src/nhssynth/common/io.py</code> <pre><code>def check_exists(fns: list[str], dir: Path) -&gt; None:\n    \"\"\"\n    Checks if the files in `fns` exist in `dir`.\n\n    Args:\n        fns: The list of files to check.\n        dir: The directory the files should exist in.\n\n    Raises:\n        FileNotFoundError: If any of the files in `fns` do not exist in `dir`.\n    \"\"\"\n    for fn in fns:\n        if not (dir / fn).exists():\n            raise FileNotFoundError(f\"File {fn} does not exist at {dir}.\")\n</code></pre>"},{"location":"reference/common/io/#nhssynth.common.io.consistent_ending","title":"<code>consistent_ending(fn, ending='.pkl', suffix='')</code>","text":"<p>Ensures that the filename <code>fn</code> ends with <code>ending</code>. If not, removes any existing ending and appends <code>ending</code>.</p> <p>Parameters:</p> Name Type Description Default <code>fn</code> <code>str</code> <p>The filename to check.</p> required <code>ending</code> <code>str</code> <p>The desired ending to check for. Default is \".pkl\".</p> <code>'.pkl'</code> <code>suffix</code> <code>str</code> <p>A suffix to append to the filename before the ending.</p> <code>''</code> <p>Returns:</p> Type Description <code>str</code> <p>The filename with the correct ending and potentially an inserted suffix.</p> Source code in <code>src/nhssynth/common/io.py</code> <pre><code>def consistent_ending(fn: str, ending: str = \".pkl\", suffix: str = \"\") -&gt; str:\n    \"\"\"\n    Ensures that the filename `fn` ends with `ending`. If not, removes any existing ending and appends `ending`.\n\n    Args:\n        fn: The filename to check.\n        ending: The desired ending to check for. Default is \".pkl\".\n        suffix: A suffix to append to the filename before the ending.\n\n    Returns:\n        The filename with the correct ending and potentially an inserted suffix.\n    \"\"\"\n    path_fn = Path(fn)\n    return str(path_fn.parent / path_fn.stem) + (\"_\" if suffix else \"\") + suffix + ending\n</code></pre>"},{"location":"reference/common/io/#nhssynth.common.io.consistent_endings","title":"<code>consistent_endings(args)</code>","text":"<p>Wrapper around <code>consistent_ending</code> to apply it to a list of filenames.</p> <p>Parameters:</p> Name Type Description Default <code>args</code> <code>list[Union[str, tuple[str, str], tuple[str, str, str]]]</code> <p>The list of filenames to check. Can take the form of a single filename, a pair of a filename and an ending, or a triple of a filename, an ending and a suffix.</p> required <p>Returns:</p> Type Description <code>list[str]</code> <p>The list of filenames with the correct endings.</p> Source code in <code>src/nhssynth/common/io.py</code> <pre><code>def consistent_endings(args: list[Union[str, tuple[str, str], tuple[str, str, str]]]) -&gt; list[str]:\n    \"\"\"\n    Wrapper around `consistent_ending` to apply it to a list of filenames.\n\n    Args:\n        args: The list of filenames to check. Can take the form of a single filename, a pair of a filename and an ending, or a triple of a filename, an ending and a suffix.\n\n    Returns:\n        The list of filenames with the correct endings.\n    \"\"\"\n    return list(consistent_ending(arg) if isinstance(arg, str) else consistent_ending(*arg) for arg in args)\n</code></pre>"},{"location":"reference/common/io/#nhssynth.common.io.experiment_io","title":"<code>experiment_io(experiment_name, dir_experiments='experiments')</code>","text":"<p>Create an experiment's directory and return the path.</p> <p>Parameters:</p> Name Type Description Default <code>experiment_name</code> <code>str</code> <p>The name of the experiment.</p> required <code>dir_experiments</code> <code>str</code> <p>The name of the directory containing all experiments.</p> <code>'experiments'</code> <p>Returns:</p> Type Description <code>str</code> <p>The path to the experiment directory.</p> Source code in <code>src/nhssynth/common/io.py</code> <pre><code>def experiment_io(experiment_name: str, dir_experiments: str = \"experiments\") -&gt; str:\n    \"\"\"\n    Create an experiment's directory and return the path.\n\n    Args:\n        experiment_name: The name of the experiment.\n        dir_experiments: The name of the directory containing all experiments.\n\n    Returns:\n        The path to the experiment directory.\n    \"\"\"\n    dir_experiment = Path(dir_experiments) / experiment_name\n    dir_experiment.mkdir(parents=True, exist_ok=True)\n    return dir_experiment\n</code></pre>"},{"location":"reference/common/io/#nhssynth.common.io.potential_suffix","title":"<code>potential_suffix(fn, fn_base)</code>","text":"<p>Checks if <code>fn</code> is a suffix (starts with an underscore) to append to <code>fn_base</code>, or a filename in its own right.</p> <p>Parameters:</p> Name Type Description Default <code>fn</code> <code>str</code> <p>The filename / potential suffix to append to <code>fn_base</code>.</p> required <code>fn_base</code> <code>str</code> <p>The name of the file the suffix would attach to.</p> required <p>Returns:</p> Type Description <code>str</code> <p>The appropriately processed <code>fn</code></p> Source code in <code>src/nhssynth/common/io.py</code> <pre><code>def potential_suffix(fn: str, fn_base: str) -&gt; str:\n    \"\"\"\n    Checks if `fn` is a suffix (starts with an underscore) to append to `fn_base`, or a filename in its own right.\n\n    Args:\n        fn: The filename / potential suffix to append to `fn_base`.\n        fn_base: The name of the file the suffix would attach to.\n\n    Returns:\n        The appropriately processed `fn`\n    \"\"\"\n    fn_base = Path(fn_base).stem\n    if fn[0] == \"_\":\n        return fn_base + fn\n    else:\n        return fn\n</code></pre>"},{"location":"reference/common/io/#nhssynth.common.io.potential_suffixes","title":"<code>potential_suffixes(fns, fn_base)</code>","text":"<p>Wrapper around <code>potential_suffix</code> to apply it to a list of filenames.</p> <p>Parameters:</p> Name Type Description Default <code>fns</code> <code>list[str]</code> <p>The list of filenames / potential suffixes to append to <code>fn_base</code>.</p> required <code>fn_base</code> <code>str</code> <p>The name of the file the suffixes would attach to.</p> required Source code in <code>src/nhssynth/common/io.py</code> <pre><code>def potential_suffixes(fns: list[str], fn_base: str) -&gt; list[str]:\n    \"\"\"\n    Wrapper around `potential_suffix` to apply it to a list of filenames.\n\n    Args:\n        fns: The list of filenames / potential suffixes to append to `fn_base`.\n        fn_base: The name of the file the suffixes would attach to.\n    \"\"\"\n    return list(potential_suffix(fn, fn_base) for fn in fns)\n</code></pre>"},{"location":"reference/common/io/#nhssynth.common.io.warn_if_path_supplied","title":"<code>warn_if_path_supplied(fns, dir)</code>","text":"<p>Warns if the files in <code>fns</code> include directory separators.</p> <p>Parameters:</p> Name Type Description Default <code>fns</code> <code>list[str]</code> <p>The list of files to check.</p> required <code>dir</code> <code>Path</code> <p>The directory the files should exist in.</p> required <p>Warns:</p> Type Description <code>UserWarning</code> <p>when the path to any of the files in <code>fns</code> includes directory separators, as this may lead to unintended consequences if the user doesn't realise default directories are pre-specified.</p> Source code in <code>src/nhssynth/common/io.py</code> <pre><code>def warn_if_path_supplied(fns: list[str], dir: Path) -&gt; None:\n    \"\"\"\n    Warns if the files in `fns` include directory separators.\n\n    Args:\n        fns: The list of files to check.\n        dir: The directory the files should exist in.\n\n    Warnings:\n        UserWarning: when the path to any of the files in `fns` includes directory separators, as this may lead to unintended consequences if the user doesn't realise default directories are pre-specified.\n    \"\"\"\n    for fn in fns:\n        if \"/\" in fn:\n            warnings.warn(\n                f\"Using the path supplied appended to {dir}, i.e. attempting to read data from {dir / fn}\",\n                UserWarning,\n            )\n</code></pre>"},{"location":"reference/common/strings/","title":"strings","text":"<p>String manipulation functions.</p>"},{"location":"reference/common/strings/#nhssynth.common.strings.add_spaces_before_caps","title":"<code>add_spaces_before_caps(string)</code>","text":"<p>Adds spaces before capital letters in a string if there is a lower-case letter following it.</p> <p>Parameters:</p> Name Type Description Default <code>string</code> <code>str</code> <p>The string to add spaces to.</p> required <p>Returns:</p> Type Description <code>str</code> <p>The string with spaces added before capital letters.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; add_spaces_before_caps(\"HelloWorld\")\n'Hello World'\n&gt;&gt;&gt; add_spaces_before_caps(\"HelloWorldAGAIN\")\n'Hello World AGAIN'\n</code></pre> Source code in <code>src/nhssynth/common/strings.py</code> <pre><code>def add_spaces_before_caps(string: str) -&gt; str:\n    \"\"\"\n    Adds spaces before capital letters in a string if there is a lower-case letter following it.\n\n    Args:\n        string: The string to add spaces to.\n\n    Returns:\n        The string with spaces added before capital letters.\n\n    Examples:\n        &gt;&gt;&gt; add_spaces_before_caps(\"HelloWorld\")\n        'Hello World'\n        &gt;&gt;&gt; add_spaces_before_caps(\"HelloWorldAGAIN\")\n        'Hello World AGAIN'\n    \"\"\"\n    return \" \".join(re.findall(r\"[a-z]?[A-Z][a-z]+|[A-Z]+(?=[A-Z][a-z]|\\b)\", string))\n</code></pre>"},{"location":"reference/common/strings/#nhssynth.common.strings.format_timedelta","title":"<code>format_timedelta(start, finish)</code>","text":"<p>Calculate and prettily format the difference between two calls to <code>time.time()</code>.</p> <p>Parameters:</p> Name Type Description Default <code>start</code> <code>float</code> <p>The start time.</p> required <code>finish</code> <code>float</code> <p>The finish time.</p> required <p>Returns:</p> Type Description <code>str</code> <p>A string containing the time difference in a human-readable format.</p> Source code in <code>src/nhssynth/common/strings.py</code> <pre><code>def format_timedelta(start: float, finish: float) -&gt; str:\n    \"\"\"\n    Calculate and prettily format the difference between two calls to `time.time()`.\n\n    Args:\n        start: The start time.\n        finish: The finish time.\n\n    Returns:\n        A string containing the time difference in a human-readable format.\n    \"\"\"\n    total = datetime.timedelta(seconds=finish - start)\n    hours, remainder = divmod(total.seconds, 3600)\n    minutes, seconds = divmod(remainder, 60)\n\n    if total.days &gt; 0:\n        delta_str = f\"{total.days}d {hours}h {minutes}m {seconds}s\"\n    elif hours &gt; 0:\n        delta_str = f\"{hours}h {minutes}m {seconds}s\"\n    elif minutes &gt; 0:\n        delta_str = f\"{minutes}m {seconds}s\"\n    else:\n        delta_str = f\"{seconds}s\"\n    return delta_str\n</code></pre>"},{"location":"reference/modules/","title":"modules","text":""},{"location":"reference/modules/dashboard/","title":"dashboard","text":""},{"location":"reference/modules/dashboard/Upload/","title":"Upload","text":""},{"location":"reference/modules/dashboard/Upload/#nhssynth.modules.dashboard.Upload.get_component","title":"<code>get_component(args, name, component_type, text)</code>","text":"<p>Generate an upload field and its functionality for a given component of the evaluations.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the component as it should be recorded in the session state and as it exists in the args.</p> required <code>component_type</code> <code>Any</code> <p>The type of the component (to ensure that only the expected object can be uploaded)</p> required <code>text</code> <code>str</code> <p>The human-readable text to display to the user as part of the element.</p> required Source code in <code>src/nhssynth/modules/dashboard/Upload.py</code> <pre><code>def get_component(args: argparse.Namespace, name: str, component_type: Any, text: str) -&gt; None:\n    \"\"\"\n    Generate an upload field and its functionality for a given component of the evaluations.\n\n    Args:\n        name: The name of the component as it should be recorded in the session state and as it exists in the args.\n        component_type: The type of the component (to ensure that only the expected object can be uploaded)\n        text: The human-readable text to display to the user as part of the element.\n    \"\"\"\n    uploaded = st.file_uploader(f\"Upload a pickle file containing a {text}\", type=\"pkl\")\n    if getattr(args, name):\n        with open(os.getcwd() + \"/\" + getattr(args, name), \"rb\") as f:\n            loaded = pickle.load(f)\n    if uploaded is not None:\n        loaded = pickle.load(uploaded)\n    if loaded is not None:\n        assert isinstance(loaded, component_type), f\"Uploaded file does not contain a {text}!\"\n        st.session_state[name] = loaded.contents\n        st.success(f\"Loaded {text}!\")\n</code></pre>"},{"location":"reference/modules/dashboard/Upload/#nhssynth.modules.dashboard.Upload.parse_args","title":"<code>parse_args()</code>","text":"<p>These arguments allow a user to automatically load the required data for the dashboard from disk.</p> <p>Returns:</p> Type Description <code>Namespace</code> <p>The parsed arguments.</p> Source code in <code>src/nhssynth/modules/dashboard/Upload.py</code> <pre><code>def parse_args() -&gt; argparse.Namespace:\n    \"\"\"\n    These arguments allow a user to automatically load the required data for the dashboard from disk.\n\n    Returns:\n        The parsed arguments.\n    \"\"\"\n    parser = argparse.ArgumentParser(description=\"NHSSynth Evaluation Dashboard\")\n    parser.add_argument(\"--evaluations\", type=str, help=\"Path to a set of evaluations.\")\n    parser.add_argument(\"--experiments\", type=str, help=\"Path to a set of experiments.\")\n    parser.add_argument(\"--synthetic-datasets\", type=str, help=\"Path to a set of synthetic datasets.\")\n    parser.add_argument(\"--typed\", type=str, help=\"Path to a typed real dataset.\")\n    return parser.parse_args()\n</code></pre>"},{"location":"reference/modules/dashboard/io/","title":"io","text":""},{"location":"reference/modules/dashboard/io/#nhssynth.modules.dashboard.io.check_input_paths","title":"<code>check_input_paths(dir_experiment, fn_dataset, fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations)</code>","text":"<p>Sets up the input and output paths for the model files.</p> <p>Parameters:</p> Name Type Description Default <code>dir_experiment</code> <code>str</code> <p>The path to the experiment directory.</p> required <code>fn_dataset</code> <code>str</code> <p>The base name of the dataset.</p> required <code>fn_experiments</code> <code>str</code> <p>The filename of the collection of experiments.</p> required <code>fn_synthetic_datasets</code> <code>str</code> <p>The filename of the collection of synthetic datasets.</p> required <code>fn_evaluations</code> <code>str</code> <p>The filename of the collection of evaluations.</p> required <p>Returns:</p> Type Description <code>str</code> <p>The paths</p> Source code in <code>src/nhssynth/modules/dashboard/io.py</code> <pre><code>def check_input_paths(\n    dir_experiment: str,\n    fn_dataset: str,\n    fn_typed: str,\n    fn_experiments: str,\n    fn_synthetic_datasets: str,\n    fn_evaluations: str,\n) -&gt; str:\n    \"\"\"\n    Sets up the input and output paths for the model files.\n\n    Args:\n        dir_experiment: The path to the experiment directory.\n        fn_dataset: The base name of the dataset.\n        fn_experiments: The filename of the collection of experiments.\n        fn_synthetic_datasets: The filename of the collection of synthetic datasets.\n        fn_evaluations: The filename of the collection of evaluations.\n\n    Returns:\n        The paths\n    \"\"\"\n    fn_dataset = Path(fn_dataset).stem\n    fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations = io.consistent_endings(\n        [fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations]\n    )\n    fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations = io.potential_suffixes(\n        [fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations], fn_dataset\n    )\n    io.warn_if_path_supplied([fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations], dir_experiment)\n    io.check_exists([fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations], dir_experiment)\n    return (\n        dir_experiment / fn_typed,\n        dir_experiment / fn_experiments,\n        dir_experiment / fn_synthetic_datasets,\n        dir_experiment / fn_evaluations,\n    )\n</code></pre>"},{"location":"reference/modules/dashboard/run/","title":"run","text":""},{"location":"reference/modules/dashboard/utils/","title":"utils","text":""},{"location":"reference/modules/dashboard/utils/#nhssynth.modules.dashboard.utils.hide_streamlit_content","title":"<code>hide_streamlit_content()</code>","text":"<p>Hide the footer message and deploy button in Streamlit.</p> Source code in <code>src/nhssynth/modules/dashboard/utils.py</code> <pre><code>def hide_streamlit_content() -&gt; None:\n    \"\"\"\n    Hide the footer message and deploy button in Streamlit.\n    \"\"\"\n    hide_streamlit_style = \"\"\"\n    &lt;style&gt;\n    footer {visibility: hidden;}\n    .stDeployButton {visibility: hidden;}\n    &lt;/style&gt;\n    \"\"\"\n    st.markdown(hide_streamlit_style, unsafe_allow_html=True)\n</code></pre>"},{"location":"reference/modules/dashboard/utils/#nhssynth.modules.dashboard.utils.id_selector","title":"<code>id_selector(df)</code>","text":"<p>Select an ID from the dataframe to then operate on.</p> <p>Parameters:</p> Name Type Description Default <code>df</code> <code>DataFrame</code> <p>The dataframe to select an ID from.</p> required <p>Returns:</p> Type Description <code>Series</code> <p>The dataset subset to only the row corresponding to the ID.</p> Source code in <code>src/nhssynth/modules/dashboard/utils.py</code> <pre><code>def id_selector(df: pd.DataFrame) -&gt; pd.Series:\n    \"\"\"\n    Select an ID from the dataframe to then operate on.\n\n    Args:\n        df: The dataframe to select an ID from.\n\n    Returns:\n        The dataset subset to only the row corresponding to the ID.\n    \"\"\"\n    architecture = st.sidebar.selectbox(\n        \"Select architecture to display\", df.index.get_level_values(\"architecture\").unique()\n    )\n    # Different architectures may have different numbers of repeats and configs\n    repeats = df.loc[architecture].index.get_level_values(\"repeat\").astype(int).unique()\n    configs = df.loc[architecture].index.get_level_values(\"config\").astype(int).unique()\n    if len(repeats) &gt; 1:\n        repeat = st.sidebar.selectbox(\"Select repeat to display\", repeats)\n    else:\n        repeat = repeats[0]\n    if len(configs) &gt; 1:\n        config = st.sidebar.selectbox(\"Select configuration to display\", configs)\n    else:\n        config = configs[0]\n    return df.loc[(architecture, repeat, config)]\n</code></pre>"},{"location":"reference/modules/dashboard/utils/#nhssynth.modules.dashboard.utils.subset_selector","title":"<code>subset_selector(df)</code>","text":"<p>Select a subset of the dataframe to then operate on.</p> <p>Parameters:</p> Name Type Description Default <code>df</code> <code>DataFrame</code> <p>The dataframe to select a subset of.</p> required <p>Returns:</p> Type Description <code>DataFrame</code> <p>The subset of the dataframe.</p> Source code in <code>src/nhssynth/modules/dashboard/utils.py</code> <pre><code>def subset_selector(df: pd.DataFrame) -&gt; pd.DataFrame:\n    \"\"\"\n    Select a subset of the dataframe to then operate on.\n\n    Args:\n        df: The dataframe to select a subset of.\n\n    Returns:\n        The subset of the dataframe.\n    \"\"\"\n    architectures = df.index.get_level_values(\"architecture\").unique().tolist()\n    repeats = df.index.get_level_values(\"repeat\").astype(int).unique().tolist()\n    configs = df.index.get_level_values(\"config\").astype(int).unique().tolist()\n    selected_architectures = st.sidebar.multiselect(\n        \"Select architectures to display\", architectures, default=architectures\n    )\n    selected_repeats = st.sidebar.multiselect(\"Select repeats to display\", repeats, default=repeats[0])\n    selected_configs = st.sidebar.multiselect(\"Select configurations to display\", configs, default=configs)\n    return df.loc[(selected_architectures, selected_repeats, selected_configs)]\n</code></pre>"},{"location":"reference/modules/dashboard/pages/","title":"pages","text":""},{"location":"reference/modules/dashboard/pages/1_Tables/","title":"1_Tables","text":""},{"location":"reference/modules/dashboard/pages/2_Plots/","title":"2_Plots","text":""},{"location":"reference/modules/dashboard/pages/2_Plots/#nhssynth.modules.dashboard.pages.2_Plots.prepare_for_dimensionality","title":"<code>prepare_for_dimensionality(df)</code>","text":"<p>Factorize all categorical columns in a dataframe.</p> Source code in <code>src/nhssynth/modules/dashboard/pages/2_Plots.py</code> <pre><code>def prepare_for_dimensionality(df: pd.DataFrame) -&gt; pd.DataFrame:\n    \"\"\"Factorize all categorical columns in a dataframe.\"\"\"\n    for col in df.columns:\n        if df[col].dtype == \"object\":\n            df[col] = pd.factorize(df[col])[0]\n        elif df[col].dtype == \"datetime64[ns]\":\n            df[col] = pd.to_numeric(df[col])\n        min_val = df[col].min()\n        max_val = df[col].max()\n        df[col] = (df[col] - min_val) / (max_val - min_val)\n    return df\n</code></pre>"},{"location":"reference/modules/dashboard/pages/3_Experiment_Configurations/","title":"3_Experiment_Configurations","text":""},{"location":"reference/modules/dataloader/","title":"dataloader","text":""},{"location":"reference/modules/dataloader/constraints/","title":"constraints","text":""},{"location":"reference/modules/dataloader/io/","title":"io","text":""},{"location":"reference/modules/dataloader/io/#nhssynth.modules.dataloader.io.check_input_paths","title":"<code>check_input_paths(fn_input, fn_metadata, dir_data)</code>","text":"<p>Formats the input filenames and directory for an experiment.</p> <p>Parameters:</p> Name Type Description Default <code>fn_input</code> <code>str</code> <p>The input data filename.</p> required <code>fn_metadata</code> <code>str</code> <p>The metadata filename / suffix to append to <code>fn_input</code>.</p> required <code>dir_data</code> <code>str</code> <p>The directory that should contain both of the above.</p> required <p>Returns:</p> Type Description <code>tuple[Path, str, str]</code> <p>A tuple containing the correct directory path, input data filename and metadata filename (used for both in and out).</p> <p>Warns:</p> Type Description <code>UserWarning</code> <p>When the path to <code>fn_input</code> includes directory separators, as this is not supported and may not work as intended.</p> <code>UserWarning</code> <p>When the path to <code>fn_metadata</code> includes directory separators, as this is not supported and may not work as intended.</p> Source code in <code>src/nhssynth/modules/dataloader/io.py</code> <pre><code>def check_input_paths(\n    fn_input: str,\n    fn_metadata: str,\n    dir_data: str,\n) -&gt; tuple[Path, str, str]:\n    \"\"\"\n    Formats the input filenames and directory for an experiment.\n\n    Args:\n        fn_input: The input data filename.\n        fn_metadata: The metadata filename / suffix to append to `fn_input`.\n        dir_data: The directory that should contain both of the above.\n\n    Returns:\n        A tuple containing the correct directory path, input data filename and metadata filename (used for both in and out).\n\n    Warnings:\n        UserWarning: When the path to `fn_input` includes directory separators, as this is not supported and may not work as intended.\n        UserWarning: When the path to `fn_metadata` includes directory separators, as this is not supported and may not work as intended.\n    \"\"\"\n    fn_input, fn_metadata = io.consistent_endings([(fn_input, \".csv\"), (fn_metadata, \".yaml\")])\n    dir_data = Path(dir_data)\n    fn_metadata = io.potential_suffix(fn_metadata, fn_input)\n    io.warn_if_path_supplied([fn_input, fn_metadata], dir_data)\n    io.check_exists([fn_input], dir_data)\n    return dir_data, fn_input, fn_metadata\n</code></pre>"},{"location":"reference/modules/dataloader/io/#nhssynth.modules.dataloader.io.check_output_paths","title":"<code>check_output_paths(fn_dataset, fn_typed, fn_transformed, fn_metatransformer, fn_constraint_graph, fn_sdv_metadata, dir_experiment)</code>","text":"<p>Formats the output filenames for an experiment.</p> <p>Parameters:</p> Name Type Description Default <code>fn_dataset</code> <code>str</code> <p>The input data filename.</p> required <code>fn_typed</code> <code>str</code> <p>The typed input data filename/suffix to append to <code>fn_dataset</code>.</p> required <code>fn_transformed</code> <code>str</code> <p>The transformed output data filename/suffix to append to <code>fn_dataset</code>.</p> required <code>fn_metatransformer</code> <code>str</code> <p>The metatransformer filename/suffix to append to <code>fn_dataset</code>.</p> required <code>fn_constraint_graph</code> <code>str</code> <p>The constraint graph filename/suffix to append to <code>fn_dataset</code>.</p> required <code>fn_sdv_metadata</code> <code>str</code> <p>The SDV metadata filename/suffix to append to <code>fn_dataset</code>.</p> required <code>dir_experiment</code> <code>Path</code> <p>The experiment directory to write the outputs to.</p> required <p>Returns:</p> Type Description <code>tuple[str, str, str]</code> <p>A tuple containing the formatted output filenames.</p> <p>Warns:</p> Type Description <code>UserWarning</code> <p>When any of the filenames include directory separators, as this is not supported and may not work as intended.</p> Source code in <code>src/nhssynth/modules/dataloader/io.py</code> <pre><code>def check_output_paths(\n    fn_dataset: str,\n    fn_typed: str,\n    fn_transformed: str,\n    fn_metatransformer: str,\n    fn_constraint_graph: str,\n    fn_sdv_metadata: str,\n    dir_experiment: Path,\n) -&gt; tuple[str, str, str]:\n    \"\"\"\n    Formats the output filenames for an experiment.\n\n    Args:\n        fn_dataset: The input data filename.\n        fn_typed: The typed input data filename/suffix to append to `fn_dataset`.\n        fn_transformed: The transformed output data filename/suffix to append to `fn_dataset`.\n        fn_metatransformer: The metatransformer filename/suffix to append to `fn_dataset`.\n        fn_constraint_graph: The constraint graph filename/suffix to append to `fn_dataset`.\n        fn_sdv_metadata: The SDV metadata filename/suffix to append to `fn_dataset`.\n        dir_experiment: The experiment directory to write the outputs to.\n\n    Returns:\n        A tuple containing the formatted output filenames.\n\n    Warnings:\n        UserWarning: When any of the filenames include directory separators, as this is not supported and may not work as intended.\n    \"\"\"\n    fn_dataset = Path(fn_dataset).stem\n    fn_typed, fn_transformed, fn_metatransformer, fn_constraint_graph, fn_sdv_metadata = io.consistent_endings(\n        [fn_typed, fn_transformed, fn_metatransformer, (fn_constraint_graph, \".html\"), fn_sdv_metadata]\n    )\n    fn_typed, fn_transformed, fn_metatransformer, fn_constraint_graph, fn_sdv_metadata = io.potential_suffixes(\n        [fn_typed, fn_transformed, fn_metatransformer, fn_constraint_graph, fn_sdv_metadata], fn_dataset\n    )\n    io.warn_if_path_supplied(\n        [fn_typed, fn_transformed, fn_metatransformer, fn_constraint_graph, fn_sdv_metadata], dir_experiment\n    )\n    return fn_dataset, fn_typed, fn_transformed, fn_metatransformer, fn_constraint_graph, fn_sdv_metadata\n</code></pre>"},{"location":"reference/modules/dataloader/io/#nhssynth.modules.dataloader.io.write_data_outputs","title":"<code>write_data_outputs(metatransformer, fn_dataset, fn_metadata, dir_experiment, args)</code>","text":"<p>Writes the transformed data and metatransformer to disk.</p> <p>Parameters:</p> Name Type Description Default <code>metatransformer</code> <code>MetaTransformer</code> <p>The metatransformer used to transform the data into its model-ready state.</p> required <code>fn_dataset</code> <code>str</code> <p>The base dataset filename.</p> required <code>fn_metadata</code> <code>str</code> <p>The metadata filename.</p> required <code>dir_experiment</code> <code>Path</code> <p>The experiment directory to write the outputs to.</p> required <code>args</code> <code>Namespace</code> <p>The full set of parsed command line arguments.</p> required <p>Returns:</p> Type Description <code>None</code> <p>The filename of the dataset used.</p> Source code in <code>src/nhssynth/modules/dataloader/io.py</code> <pre><code>def write_data_outputs(\n    metatransformer: MetaTransformer,\n    fn_dataset: str,\n    fn_metadata: str,\n    dir_experiment: Path,\n    args: argparse.Namespace,\n) -&gt; None:\n    \"\"\"\n    Writes the transformed data and metatransformer to disk.\n\n    Args:\n        metatransformer: The metatransformer used to transform the data into its model-ready state.\n        fn_dataset: The base dataset filename.\n        fn_metadata: The metadata filename.\n        dir_experiment: The experiment directory to write the outputs to.\n        args: The full set of parsed command line arguments.\n\n    Returns:\n        The filename of the dataset used.\n    \"\"\"\n    fn_dataset, fn_typed, fn_transformed, fn_metatransformer, fn_constraint_graph, fn_sdv_metadata = check_output_paths(\n        fn_dataset,\n        args.typed,\n        args.transformed,\n        args.metatransformer,\n        args.constraint_graph,\n        args.sdv_metadata,\n        dir_experiment,\n    )\n    metatransformer.save_metadata(dir_experiment / fn_metadata, args.collapse_yaml)\n    metatransformer.save_constraint_graphs(dir_experiment / fn_constraint_graph)\n    with open(dir_experiment / fn_typed, \"wb\") as f:\n        pickle.dump(TypedDataset(metatransformer.get_typed_dataset()), f)\n    transformed_dataset = metatransformer.get_transformed_dataset()\n    transformed_dataset.to_pickle(dir_experiment / fn_transformed)\n    if args.write_csv:\n        chunks = np.array_split(transformed_dataset.index, 100)\n        for chunk, subset in enumerate(tqdm(chunks, desc=\"Writing transformed dataset to CSV\", unit=\"chunk\")):\n            if chunk == 0:\n                transformed_dataset.loc[subset].to_csv(\n                    dir_experiment / (fn_transformed[:-3] + \"csv\"), mode=\"w\", index=False\n                )\n            else:\n                transformed_dataset.loc[subset].to_csv(\n                    dir_experiment / (fn_transformed[:-3] + \"csv\"), mode=\"a\", index=False, header=False\n                )\n    with open(dir_experiment / fn_metatransformer, \"wb\") as f:\n        pickle.dump(metatransformer, f)\n    with open(dir_experiment / fn_sdv_metadata, \"wb\") as f:\n        pickle.dump(metatransformer.get_sdv_metadata(), f)\n\n    return fn_dataset\n</code></pre>"},{"location":"reference/modules/dataloader/metadata/","title":"metadata","text":""},{"location":"reference/modules/dataloader/metadata/#nhssynth.modules.dataloader.metadata.MetaData","title":"<code>MetaData</code>","text":"Source code in <code>src/nhssynth/modules/dataloader/metadata.py</code> <pre><code>class MetaData:\n    class ColumnMetaData:\n        def __init__(self, name: str, data: pd.Series, raw: dict) -&gt; None:\n            self.name = name\n            self.dtype: np.dtype = self._validate_dtype(data, raw.get(\"dtype\"))\n            self.categorical: bool = self._validate_categorical(data, raw.get(\"categorical\"))\n            self.missingness_strategy: GenericMissingnessStrategy = self._validate_missingness_strategy(\n                raw.get(\"missingness\")\n            )\n            self.transformer: ColumnTransformer = self._validate_transformer(raw.get(\"transformer\"))\n\n        def _validate_dtype(self, data: pd.Series, dtype_raw: Optional[Union[dict, str]] = None) -&gt; np.dtype:\n            if isinstance(dtype_raw, dict):\n                dtype_name = dtype_raw.pop(\"name\", None)\n            elif isinstance(dtype_raw, str):\n                dtype_name = dtype_raw\n            else:\n                dtype_name = self._infer_dtype(data)\n            try:\n                dtype = np.dtype(dtype_name)\n            except TypeError:\n                warnings.warn(\n                    f\"Invalid dtype specification '{dtype_name}' for column '{self.name}', ignoring dtype for this column\"\n                )\n                dtype = self._infer_dtype(data)\n            if dtype.kind == \"M\":\n                self._setup_datetime_config(data, dtype_raw)\n            elif dtype.kind in [\"f\", \"i\", \"u\"]:\n                self.rounding_scheme = self._validate_rounding_scheme(data, dtype, dtype_raw)\n            return dtype\n\n        def _infer_dtype(self, data: pd.Series) -&gt; np.dtype:\n            return data.dtype.name\n\n        def _infer_datetime_format(self, data: pd.Series) -&gt; str:\n            return _guess_datetime_format_for_array(data[data.notna()].astype(str).to_numpy())\n\n        def _setup_datetime_config(self, data: pd.Series, datetime_config: dict) -&gt; dict:\n            \"\"\"\n            Add keys to `datetime_config` corresponding to args from the `pd.to_datetime` function\n            (see [the docs](https://pandas.pydata.org/docs/reference/api/pandas.to_datetime.html))\n            \"\"\"\n            if not isinstance(datetime_config, dict):\n                datetime_config = {}\n            else:\n                datetime_config = filter_dict(datetime_config, {\"format\", \"floor\"}, include=True)\n            if \"format\" not in datetime_config:\n                datetime_config[\"format\"] = self._infer_datetime_format(data)\n            self.datetime_config = datetime_config\n\n        def _validate_rounding_scheme(self, data: pd.Series, dtype: np.dtype, dtype_dict: dict) -&gt; int:\n            if dtype_dict and \"rounding_scheme\" in dtype_dict:\n                return dtype_dict[\"rounding_scheme\"]\n            else:\n                if dtype.kind != \"f\":\n                    return 1.0\n                roundable_data = data[data.notna()]\n                for i in range(np.finfo(dtype).precision):\n                    if (roundable_data.round(i) == roundable_data).all():\n                        return 10**-i\n            return None\n\n        def _validate_categorical(self, data: pd.Series, categorical: Optional[bool] = None) -&gt; bool:\n            if categorical is None:\n                return self._infer_categorical(data)\n            elif not isinstance(categorical, bool):\n                warnings.warn(\n                    f\"Invalid categorical '{categorical}' for column '{self.name}', ignoring categorical for this column\"\n                )\n                return self._infer_categorical(data)\n            else:\n                self.boolean = data.nunique() &lt;= 2\n                return categorical\n\n        def _infer_categorical(self, data: pd.Series) -&gt; bool:\n            self.boolean = data.nunique() &lt;= 2\n            return data.nunique() &lt;= 10 or self.dtype.kind == \"O\"\n\n        def _validate_missingness_strategy(self, missingness_strategy: Optional[Union[dict, str]]) -&gt; tuple[str, dict]:\n            if not missingness_strategy:\n                return None\n            if isinstance(missingness_strategy, dict):\n                impute = missingness_strategy.get(\"impute\", None)\n                strategy = \"impute\" if impute else missingness_strategy.get(\"strategy\", None)\n            else:\n                strategy = missingness_strategy\n            if (\n                strategy not in MISSINGNESS_STRATEGIES\n                or (strategy == \"impute\" and impute == \"mean\" and self.dtype.kind != \"f\")\n                or (strategy == \"impute\" and not impute)\n            ):\n                warnings.warn(\n                    f\"Invalid missingness strategy '{missingness_strategy}' for column '{self.name}', ignoring missingness strategy for this column\"\n                )\n                return None\n            return (\n                MISSINGNESS_STRATEGIES[strategy](impute) if strategy == \"impute\" else MISSINGNESS_STRATEGIES[strategy]()\n            )\n\n        def _validate_transformer(self, transformer: Optional[Union[dict, str]] = {}) -&gt; tuple[str, dict]:\n            # if transformer is neither a dict nor a str statement below will raise a TypeError\n            if isinstance(transformer, dict):\n                self.transformer_name = transformer.get(\"name\")\n                self.transformer_config = filter_dict(transformer, \"name\")\n            elif isinstance(transformer, str):\n                self.transformer_name = transformer\n                self.transformer_config = {}\n            else:\n                if transformer is not None:\n                    warnings.warn(\n                        f\"Invalid transformer config '{transformer}' for column '{self.name}', ignoring transformer for this column\"\n                    )\n                self.transformer_name = None\n                self.transformer_config = {}\n            if not self.transformer_name:\n                return self._infer_transformer()\n            else:\n                try:\n                    return eval(self.transformer_name)(**self.transformer_config)\n                except NameError:\n                    warnings.warn(\n                        f\"Invalid transformer '{self.transformer_name}' or config '{self.transformer_config}' for column '{self.name}', ignoring transformer for this column\"\n                    )\n                    return self._infer_transformer()\n\n        def _infer_transformer(self) -&gt; ColumnTransformer:\n            if self.categorical:\n                transformer = OHECategoricalTransformer(**self.transformer_config)\n            else:\n                transformer = ClusterContinuousTransformer(**self.transformer_config)\n            if self.dtype.kind == \"M\":\n                transformer = DatetimeTransformer(transformer)\n            return transformer\n\n    def __init__(self, data: pd.DataFrame, metadata: Optional[dict] = {}):\n        self.columns: pd.Index = data.columns\n        self.raw_metadata: dict = metadata\n        if set(self.raw_metadata[\"columns\"].keys()) - set(self.columns):\n            raise ValueError(\"Metadata contains keys that do not appear amongst the columns.\")\n        self.dropped_columns = [cn for cn in self.columns if self.raw_metadata[\"columns\"].get(cn, None) == \"drop\"]\n        self.columns = self.columns.drop(self.dropped_columns)\n        self._metadata = {\n            cn: self.ColumnMetaData(cn, data[cn], self.raw_metadata[\"columns\"].get(cn, {})) for cn in self.columns\n        }\n        self.constraints = ConstraintGraph(self.raw_metadata.get(\"constraints\", []), self.columns, self._metadata)\n\n    def __getitem__(self, key: str) -&gt; dict[str, Any]:\n        return self._metadata[key]\n\n    def __iter__(self) -&gt; Iterator:\n        return iter(self._metadata.values())\n\n    def __repr__(self) -&gt; None:\n        return yaml.dump(self._metadata, default_flow_style=False, sort_keys=False)\n\n    @classmethod\n    def from_path(cls, data: pd.DataFrame, path_str: str):\n        \"\"\"\n        Instantiate a MetaData object from a YAML file via a specified path.\n\n        Args:\n            data: The data to be used to infer / validate the metadata.\n            path_str: The path to the metadata YAML file.\n\n        Returns:\n            The metadata object.\n        \"\"\"\n        path = pathlib.Path(path_str)\n        if path.exists():\n            with open(path) as stream:\n                metadata = yaml.safe_load(stream)\n            # Filter out the expanded alias/anchor group as it is not needed\n            metadata = filter_dict(metadata, {\"column_types\"})\n        else:\n            warnings.warn(f\"No metadata found at {path}...\")\n            metadata = {\"columns\": {}}\n        return cls(data, metadata)\n\n    def _collapse(self, metadata: dict) -&gt; dict:\n        \"\"\"\n        Given a metadata dictionary, rewrite to collapse duplicate column types in order to leverage YAML anchors and shrink the file.\n\n        Args:\n            metadata: The metadata dictionary to be rewritten.\n\n        Returns:\n            A rewritten metadata dictionary with collapsed column types and transformers.\n                The returned dictionary has the following structure:\n                {\n                    \"column_types\": dict,\n                    **metadata  # one entry for each column in \"columns\" that now reference the dicts above\n                }\n                - \"column_types\" is a dictionary mapping column type indices to column type configurations.\n                - \"**metadata\" contains the original metadata dictionary, with column types rewritten to use the indices and \"column_types\".\n        \"\"\"\n        c_index = 1\n        column_types = {}\n        column_type_counts = {}\n        for cn, cd in metadata[\"columns\"].items():\n            if cd not in column_types.values():\n                column_types[c_index] = cd if isinstance(cd, str) else cd.copy()\n                column_type_counts[c_index] = 1\n                c_index += 1\n            else:\n                cix = get_key_by_value(column_types, cd)\n                column_type_counts[cix] += 1\n\n        for cn, cd in metadata[\"columns\"].items():\n            cix = get_key_by_value(column_types, cd)\n            if column_type_counts[cix] &gt; 1:\n                metadata[\"columns\"][cn] = column_types[cix]\n            else:\n                column_types.pop(cix)\n\n        return {\"column_types\": {i + 1: x for i, x in enumerate(column_types.values())}, **metadata}\n\n    def _assemble(self, collapse_yaml: bool) -&gt; dict[str, dict[str, Any]]:\n        \"\"\"\n        Rearrange the metadata into a dictionary that can be written to a YAML file.\n\n        Args:\n            collapse_yaml: A boolean indicating whether to collapse the YAML representation of the metadata, reducing duplication.\n\n        Returns:\n            A dictionary containing the assembled metadata.\n        \"\"\"\n        assembled_metadata = {\n            \"columns\": {\n                cn: {\n                    \"dtype\": cmd.dtype.name\n                    if not hasattr(cmd, \"datetime_config\")\n                    else {\"name\": cmd.dtype.name, **cmd.datetime_config},\n                    \"categorical\": cmd.categorical,\n                }\n                for cn, cmd in self._metadata.items()\n            }\n        }\n        # We loop through the base dict above to add other parts if they are present in the metadata\n        for cn, cmd in self._metadata.items():\n            if cmd.missingness_strategy:\n                assembled_metadata[\"columns\"][cn][\"missingness\"] = (\n                    cmd.missingness_strategy.name\n                    if cmd.missingness_strategy.name != \"impute\"\n                    else {\"name\": cmd.missingness_strategy.name, \"impute\": cmd.missingness_strategy.impute}\n                )\n            if cmd.transformer_config:\n                assembled_metadata[\"columns\"][cn][\"transformer\"] = {\n                    **cmd.transformer_config,\n                    \"name\": cmd.transformer.__class__.__name__,\n                }\n\n        # Add back the dropped_columns not present in the metadata\n        if self.dropped_columns:\n            assembled_metadata[\"columns\"].update({cn: \"drop\" for cn in self.dropped_columns})\n\n        if collapse_yaml:\n            assembled_metadata = self._collapse(assembled_metadata)\n\n        # We add the constraints section after all of the formatting and processing above\n        # In general, the constraints are kept the same as the input (provided they passed validation)\n        # If `collapse_yaml` is specified, we output the minimum set of equivalent constraints\n        if self.constraints:\n            assembled_metadata[\"constraints\"] = (\n                [str(c) for c in self.constraints.minimal_constraints]\n                if collapse_yaml\n                else self.constraints.raw_constraint_strings\n            )\n        return assembled_metadata\n\n    def save(self, path: pathlib.Path, collapse_yaml: bool) -&gt; None:\n        \"\"\"\n        Writes metadata to a YAML file.\n\n        Args:\n            path: The path at which to write the metadata YAML file.\n            collapse_yaml: A boolean indicating whether to collapse the YAML representation of the metadata, reducing duplication.\n        \"\"\"\n        with open(path, \"w\") as yaml_file:\n            yaml.safe_dump(\n                self._assemble(collapse_yaml),\n                yaml_file,\n                default_flow_style=False,\n                sort_keys=False,\n            )\n\n    def get_sdv_metadata(self) -&gt; dict[str, dict[str, dict[str, str]]]:\n        \"\"\"\n        Map combinations of our metadata implementation to SDV's as required by SDMetrics.\n\n        Returns:\n            A dictionary containing the SDV metadata.\n        \"\"\"\n        sdv_metadata = {\n            \"columns\": {\n                cn: {\n                    \"sdtype\": \"boolean\"\n                    if cmd.boolean\n                    else \"categorical\"\n                    if cmd.categorical\n                    else \"datetime\"\n                    if cmd.dtype.kind == \"M\"\n                    else \"numerical\",\n                }\n                for cn, cmd in self._metadata.items()\n            }\n        }\n        for cn, cmd in self._metadata.items():\n            if cmd.dtype.kind == \"M\":\n                sdv_metadata[\"columns\"][cn][\"format\"] = cmd.datetime_config[\"format\"]\n        return sdv_metadata\n\n    def save_constraint_graphs(self, path: pathlib.Path) -&gt; None:\n        \"\"\"\n        Output the constraint graphs as HTML files.\n\n        Args:\n            path: The path at which to write the constraint graph HTML files.\n        \"\"\"\n        self.constraints._output_graphs_html(path)\n</code></pre>"},{"location":"reference/modules/dataloader/metadata/#nhssynth.modules.dataloader.metadata.MetaData.ColumnMetaData","title":"<code>ColumnMetaData</code>","text":"Source code in <code>src/nhssynth/modules/dataloader/metadata.py</code> <pre><code>class ColumnMetaData:\n    def __init__(self, name: str, data: pd.Series, raw: dict) -&gt; None:\n        self.name = name\n        self.dtype: np.dtype = self._validate_dtype(data, raw.get(\"dtype\"))\n        self.categorical: bool = self._validate_categorical(data, raw.get(\"categorical\"))\n        self.missingness_strategy: GenericMissingnessStrategy = self._validate_missingness_strategy(\n            raw.get(\"missingness\")\n        )\n        self.transformer: ColumnTransformer = self._validate_transformer(raw.get(\"transformer\"))\n\n    def _validate_dtype(self, data: pd.Series, dtype_raw: Optional[Union[dict, str]] = None) -&gt; np.dtype:\n        if isinstance(dtype_raw, dict):\n            dtype_name = dtype_raw.pop(\"name\", None)\n        elif isinstance(dtype_raw, str):\n            dtype_name = dtype_raw\n        else:\n            dtype_name = self._infer_dtype(data)\n        try:\n            dtype = np.dtype(dtype_name)\n        except TypeError:\n            warnings.warn(\n                f\"Invalid dtype specification '{dtype_name}' for column '{self.name}', ignoring dtype for this column\"\n            )\n            dtype = self._infer_dtype(data)\n        if dtype.kind == \"M\":\n            self._setup_datetime_config(data, dtype_raw)\n        elif dtype.kind in [\"f\", \"i\", \"u\"]:\n            self.rounding_scheme = self._validate_rounding_scheme(data, dtype, dtype_raw)\n        return dtype\n\n    def _infer_dtype(self, data: pd.Series) -&gt; np.dtype:\n        return data.dtype.name\n\n    def _infer_datetime_format(self, data: pd.Series) -&gt; str:\n        return _guess_datetime_format_for_array(data[data.notna()].astype(str).to_numpy())\n\n    def _setup_datetime_config(self, data: pd.Series, datetime_config: dict) -&gt; dict:\n        \"\"\"\n        Add keys to `datetime_config` corresponding to args from the `pd.to_datetime` function\n        (see [the docs](https://pandas.pydata.org/docs/reference/api/pandas.to_datetime.html))\n        \"\"\"\n        if not isinstance(datetime_config, dict):\n            datetime_config = {}\n        else:\n            datetime_config = filter_dict(datetime_config, {\"format\", \"floor\"}, include=True)\n        if \"format\" not in datetime_config:\n            datetime_config[\"format\"] = self._infer_datetime_format(data)\n        self.datetime_config = datetime_config\n\n    def _validate_rounding_scheme(self, data: pd.Series, dtype: np.dtype, dtype_dict: dict) -&gt; int:\n        if dtype_dict and \"rounding_scheme\" in dtype_dict:\n            return dtype_dict[\"rounding_scheme\"]\n        else:\n            if dtype.kind != \"f\":\n                return 1.0\n            roundable_data = data[data.notna()]\n            for i in range(np.finfo(dtype).precision):\n                if (roundable_data.round(i) == roundable_data).all():\n                    return 10**-i\n        return None\n\n    def _validate_categorical(self, data: pd.Series, categorical: Optional[bool] = None) -&gt; bool:\n        if categorical is None:\n            return self._infer_categorical(data)\n        elif not isinstance(categorical, bool):\n            warnings.warn(\n                f\"Invalid categorical '{categorical}' for column '{self.name}', ignoring categorical for this column\"\n            )\n            return self._infer_categorical(data)\n        else:\n            self.boolean = data.nunique() &lt;= 2\n            return categorical\n\n    def _infer_categorical(self, data: pd.Series) -&gt; bool:\n        self.boolean = data.nunique() &lt;= 2\n        return data.nunique() &lt;= 10 or self.dtype.kind == \"O\"\n\n    def _validate_missingness_strategy(self, missingness_strategy: Optional[Union[dict, str]]) -&gt; tuple[str, dict]:\n        if not missingness_strategy:\n            return None\n        if isinstance(missingness_strategy, dict):\n            impute = missingness_strategy.get(\"impute\", None)\n            strategy = \"impute\" if impute else missingness_strategy.get(\"strategy\", None)\n        else:\n            strategy = missingness_strategy\n        if (\n            strategy not in MISSINGNESS_STRATEGIES\n            or (strategy == \"impute\" and impute == \"mean\" and self.dtype.kind != \"f\")\n            or (strategy == \"impute\" and not impute)\n        ):\n            warnings.warn(\n                f\"Invalid missingness strategy '{missingness_strategy}' for column '{self.name}', ignoring missingness strategy for this column\"\n            )\n            return None\n        return (\n            MISSINGNESS_STRATEGIES[strategy](impute) if strategy == \"impute\" else MISSINGNESS_STRATEGIES[strategy]()\n        )\n\n    def _validate_transformer(self, transformer: Optional[Union[dict, str]] = {}) -&gt; tuple[str, dict]:\n        # if transformer is neither a dict nor a str statement below will raise a TypeError\n        if isinstance(transformer, dict):\n            self.transformer_name = transformer.get(\"name\")\n            self.transformer_config = filter_dict(transformer, \"name\")\n        elif isinstance(transformer, str):\n            self.transformer_name = transformer\n            self.transformer_config = {}\n        else:\n            if transformer is not None:\n                warnings.warn(\n                    f\"Invalid transformer config '{transformer}' for column '{self.name}', ignoring transformer for this column\"\n                )\n            self.transformer_name = None\n            self.transformer_config = {}\n        if not self.transformer_name:\n            return self._infer_transformer()\n        else:\n            try:\n                return eval(self.transformer_name)(**self.transformer_config)\n            except NameError:\n                warnings.warn(\n                    f\"Invalid transformer '{self.transformer_name}' or config '{self.transformer_config}' for column '{self.name}', ignoring transformer for this column\"\n                )\n                return self._infer_transformer()\n\n    def _infer_transformer(self) -&gt; ColumnTransformer:\n        if self.categorical:\n            transformer = OHECategoricalTransformer(**self.transformer_config)\n        else:\n            transformer = ClusterContinuousTransformer(**self.transformer_config)\n        if self.dtype.kind == \"M\":\n            transformer = DatetimeTransformer(transformer)\n        return transformer\n</code></pre>"},{"location":"reference/modules/dataloader/metadata/#nhssynth.modules.dataloader.metadata.MetaData.from_path","title":"<code>from_path(data, path_str)</code>  <code>classmethod</code>","text":"<p>Instantiate a MetaData object from a YAML file via a specified path.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>DataFrame</code> <p>The data to be used to infer / validate the metadata.</p> required <code>path_str</code> <code>str</code> <p>The path to the metadata YAML file.</p> required <p>Returns:</p> Type Description <p>The metadata object.</p> Source code in <code>src/nhssynth/modules/dataloader/metadata.py</code> <pre><code>@classmethod\ndef from_path(cls, data: pd.DataFrame, path_str: str):\n    \"\"\"\n    Instantiate a MetaData object from a YAML file via a specified path.\n\n    Args:\n        data: The data to be used to infer / validate the metadata.\n        path_str: The path to the metadata YAML file.\n\n    Returns:\n        The metadata object.\n    \"\"\"\n    path = pathlib.Path(path_str)\n    if path.exists():\n        with open(path) as stream:\n            metadata = yaml.safe_load(stream)\n        # Filter out the expanded alias/anchor group as it is not needed\n        metadata = filter_dict(metadata, {\"column_types\"})\n    else:\n        warnings.warn(f\"No metadata found at {path}...\")\n        metadata = {\"columns\": {}}\n    return cls(data, metadata)\n</code></pre>"},{"location":"reference/modules/dataloader/metadata/#nhssynth.modules.dataloader.metadata.MetaData.get_sdv_metadata","title":"<code>get_sdv_metadata()</code>","text":"<p>Map combinations of our metadata implementation to SDV's as required by SDMetrics.</p> <p>Returns:</p> Type Description <code>dict[str, dict[str, dict[str, str]]]</code> <p>A dictionary containing the SDV metadata.</p> Source code in <code>src/nhssynth/modules/dataloader/metadata.py</code> <pre><code>def get_sdv_metadata(self) -&gt; dict[str, dict[str, dict[str, str]]]:\n    \"\"\"\n    Map combinations of our metadata implementation to SDV's as required by SDMetrics.\n\n    Returns:\n        A dictionary containing the SDV metadata.\n    \"\"\"\n    sdv_metadata = {\n        \"columns\": {\n            cn: {\n                \"sdtype\": \"boolean\"\n                if cmd.boolean\n                else \"categorical\"\n                if cmd.categorical\n                else \"datetime\"\n                if cmd.dtype.kind == \"M\"\n                else \"numerical\",\n            }\n            for cn, cmd in self._metadata.items()\n        }\n    }\n    for cn, cmd in self._metadata.items():\n        if cmd.dtype.kind == \"M\":\n            sdv_metadata[\"columns\"][cn][\"format\"] = cmd.datetime_config[\"format\"]\n    return sdv_metadata\n</code></pre>"},{"location":"reference/modules/dataloader/metadata/#nhssynth.modules.dataloader.metadata.MetaData.save","title":"<code>save(path, collapse_yaml)</code>","text":"<p>Writes metadata to a YAML file.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>Path</code> <p>The path at which to write the metadata YAML file.</p> required <code>collapse_yaml</code> <code>bool</code> <p>A boolean indicating whether to collapse the YAML representation of the metadata, reducing duplication.</p> required Source code in <code>src/nhssynth/modules/dataloader/metadata.py</code> <pre><code>def save(self, path: pathlib.Path, collapse_yaml: bool) -&gt; None:\n    \"\"\"\n    Writes metadata to a YAML file.\n\n    Args:\n        path: The path at which to write the metadata YAML file.\n        collapse_yaml: A boolean indicating whether to collapse the YAML representation of the metadata, reducing duplication.\n    \"\"\"\n    with open(path, \"w\") as yaml_file:\n        yaml.safe_dump(\n            self._assemble(collapse_yaml),\n            yaml_file,\n            default_flow_style=False,\n            sort_keys=False,\n        )\n</code></pre>"},{"location":"reference/modules/dataloader/metadata/#nhssynth.modules.dataloader.metadata.MetaData.save_constraint_graphs","title":"<code>save_constraint_graphs(path)</code>","text":"<p>Output the constraint graphs as HTML files.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>Path</code> <p>The path at which to write the constraint graph HTML files.</p> required Source code in <code>src/nhssynth/modules/dataloader/metadata.py</code> <pre><code>def save_constraint_graphs(self, path: pathlib.Path) -&gt; None:\n    \"\"\"\n    Output the constraint graphs as HTML files.\n\n    Args:\n        path: The path at which to write the constraint graph HTML files.\n    \"\"\"\n    self.constraints._output_graphs_html(path)\n</code></pre>"},{"location":"reference/modules/dataloader/metatransformer/","title":"metatransformer","text":""},{"location":"reference/modules/dataloader/metatransformer/#nhssynth.modules.dataloader.metatransformer.MetaTransformer","title":"<code>MetaTransformer</code>","text":"<p>The metatransformer is responsible for transforming input dataset into a format that can be used by the <code>model</code> module, and for transforming this module's output back to the original format of the input dataset.</p> <p>Parameters:</p> Name Type Description Default <code>dataset</code> <code>DataFrame</code> <p>The raw input DataFrame.</p> required <code>metadata</code> <code>Optional[MetaData]</code> <p>Optionally, a <code>MetaData</code> object containing the metadata for the dataset. If this is not provided it will be inferred from the dataset.</p> <code>None</code> <code>missingness_strategy</code> <code>Optional[str]</code> <p>The missingness strategy to use. Defaults to augmenting missing values in the data, see the missingness strategies for more information.</p> <code>'augment'</code> <code>impute_value</code> <code>Optional[Any]</code> <p>Only used when <code>missingness_strategy</code> is set to 'impute'. The value to use when imputing missing values in the data.</p> <code>None</code> <p>After calling <code>MetaTransformer.apply()</code>, the following attributes and methods will be available:</p> <p>Attributes:</p> Name Type Description <code>typed_dataset</code> <code>DataFrame</code> <p>The dataset with the dtypes applied.</p> <code>post_missingness_strategy_dataset</code> <code>DataFrame</code> <p>The dataset with the missingness strategies applied.</p> <code>transformed_dataset</code> <code>DataFrame</code> <p>The transformed dataset.</p> <code>single_column_indices</code> <code>list[int]</code> <p>The indices of the columns that were transformed into a single column.</p> <code>multi_column_indices</code> <code>list[list[int]]</code> <p>The indices of the columns that were transformed into multiple columns.</p> <p>Methods:</p> <ul> <li><code>get_typed_dataset()</code>: Returns the typed dataset.</li> <li><code>get_prepared_dataset()</code>: Returns the dataset with the missingness strategies applied.</li> <li><code>get_transformed_dataset()</code>: Returns the transformed dataset.</li> <li><code>get_multi_and_single_column_indices()</code>: Returns the indices of the columns that were transformed into one or multiple column(s).</li> <li><code>get_sdv_metadata()</code>: Returns the metadata in the correct format for SDMetrics.</li> <li><code>save_metadata()</code>: Saves the metadata to a file.</li> <li><code>save_constraint_graphs()</code>: Saves the constraint graphs to a file.</li> </ul> <p>Note that <code>mt.apply</code> is a helper function that runs <code>mt.apply_dtypes</code>, <code>mt.apply_missingness_strategy</code> and <code>mt.transform</code> in sequence. This is the recommended way to use the MetaTransformer to ensure that it is fully instantiated for use downstream.</p> Source code in <code>src/nhssynth/modules/dataloader/metatransformer.py</code> <pre><code>class MetaTransformer:\n    \"\"\"\n    The metatransformer is responsible for transforming input dataset into a format that can be used by the `model` module, and for transforming\n    this module's output back to the original format of the input dataset.\n\n    Args:\n        dataset: The raw input DataFrame.\n        metadata: Optionally, a [`MetaData`][nhssynth.modules.dataloader.metadata.MetaData] object containing the metadata for the dataset. If this is not provided it will be inferred from the dataset.\n        missingness_strategy: The missingness strategy to use. Defaults to augmenting missing values in the data, see [the missingness strategies][nhssynth.modules.dataloader.missingness] for more information.\n        impute_value: Only used when `missingness_strategy` is set to 'impute'. The value to use when imputing missing values in the data.\n\n    After calling `MetaTransformer.apply()`, the following attributes and methods will be available:\n\n    Attributes:\n        typed_dataset (pd.DataFrame): The dataset with the dtypes applied.\n        post_missingness_strategy_dataset (pd.DataFrame): The dataset with the missingness strategies applied.\n        transformed_dataset (pd.DataFrame): The transformed dataset.\n        single_column_indices (list[int]): The indices of the columns that were transformed into a single column.\n        multi_column_indices (list[list[int]]): The indices of the columns that were transformed into multiple columns.\n\n    **Methods:**\n\n    - `get_typed_dataset()`: Returns the typed dataset.\n    - `get_prepared_dataset()`: Returns the dataset with the missingness strategies applied.\n    - `get_transformed_dataset()`: Returns the transformed dataset.\n    - `get_multi_and_single_column_indices()`: Returns the indices of the columns that were transformed into one or multiple column(s).\n    - `get_sdv_metadata()`: Returns the metadata in the correct format for SDMetrics.\n    - `save_metadata()`: Saves the metadata to a file.\n    - `save_constraint_graphs()`: Saves the constraint graphs to a file.\n\n    Note that `mt.apply` is a helper function that runs `mt.apply_dtypes`, `mt.apply_missingness_strategy` and `mt.transform` in sequence.\n    This is the recommended way to use the MetaTransformer to ensure that it is fully instantiated for use downstream.\n    \"\"\"\n\n    def __init__(\n        self,\n        dataset: pd.DataFrame,\n        metadata: Optional[MetaData] = None,\n        missingness_strategy: Optional[str] = \"augment\",\n        impute_value: Optional[Any] = None,\n    ):\n        self._raw_dataset: pd.DataFrame = dataset\n        self._metadata: MetaData = metadata or MetaData(dataset)\n        if missingness_strategy == \"impute\":\n            assert (\n                impute_value is not None\n            ), \"`impute_value` of the `MetaTransformer` must be specified (via the --impute flag) when using the imputation missingness strategy\"\n            self._impute_value = impute_value\n        self._missingness_strategy = MISSINGNESS_STRATEGIES[missingness_strategy]\n\n    @classmethod\n    def from_path(cls, dataset: pd.DataFrame, metadata_path: str, **kwargs) -&gt; Self:\n        \"\"\"\n        Instantiates a MetaTransformer from a metadata file via a provided path.\n\n        Args:\n            dataset: The raw input DataFrame.\n            metadata_path: The path to the metadata file.\n\n        Returns:\n            A MetaTransformer object.\n        \"\"\"\n        return cls(dataset, MetaData.from_path(dataset, metadata_path), **kwargs)\n\n    @classmethod\n    def from_dict(cls, dataset: pd.DataFrame, metadata: dict, **kwargs) -&gt; Self:\n        \"\"\"\n        Instantiates a MetaTransformer from a metadata dictionary.\n\n        Args:\n            dataset: The raw input DataFrame.\n            metadata: A dictionary of raw metadata.\n\n        Returns:\n            A MetaTransformer object.\n        \"\"\"\n        return cls(dataset, MetaData(dataset, metadata), **kwargs)\n\n    def drop_columns(self) -&gt; None:\n        \"\"\"\n        Drops columns from the dataset that are not in the `MetaData`.\n        \"\"\"\n        self._raw_dataset = self._raw_dataset[self._metadata.columns]\n\n    def _apply_rounding_scheme(self, working_column: pd.Series, rounding_scheme: float) -&gt; pd.Series:\n        \"\"\"\n        A rounding scheme takes the form of the smallest value that should be rounded to 0, i.e. 0.01 for 2dp.\n        We first round to the nearest multiple in the standard way, through dividing, rounding and then multiplying.\n        However, this can lead to floating point errors, so we then round to the number of decimal places required by the rounding scheme.\n\n        e.g. `np.round(0.15 / 0.1) * 0.1` will erroneously return 0.1.\n\n        Args:\n            working_column: The column to apply the rounding scheme to.\n            rounding_scheme: The rounding scheme to apply.\n\n        Returns:\n            The column with the rounding scheme applied.\n        \"\"\"\n        working_column = np.round(working_column / rounding_scheme) * rounding_scheme\n        return working_column.round(max(0, int(np.ceil(np.log10(1 / rounding_scheme)))))\n\n    def _apply_dtype(\n        self,\n        working_column: pd.Series,\n        column_metadata: MetaData.ColumnMetaData,\n    ) -&gt; pd.Series:\n        \"\"\"\n        Given a `working_column`, the dtype specified in the `column_metadata` is applied to it.\n         - Datetime columns are floored, and their format is inferred.\n         - Rounding schemes are applied to numeric columns if specified.\n         - Columns with missing values have their dtype converted to the pandas equivalent to allow for NA values.\n\n        Args:\n            working_column: The column to apply the dtype to.\n            column_metadata: The metadata for the column.\n\n        Returns:\n            The column with the dtype applied.\n        \"\"\"\n        dtype = column_metadata.dtype\n        try:\n            if dtype.kind == \"M\":\n                working_column = pd.to_datetime(working_column, format=column_metadata.datetime_config.get(\"format\"))\n                if column_metadata.datetime_config.get(\"floor\"):\n                    working_column = working_column.dt.floor(column_metadata.datetime_config.get(\"floor\"))\n                    column_metadata.datetime_config[\"format\"] = column_metadata._infer_datetime_format(working_column)\n                return working_column\n            else:\n                if hasattr(column_metadata, \"rounding_scheme\") and column_metadata.rounding_scheme is not None:\n                    working_column = self._apply_rounding_scheme(working_column, column_metadata.rounding_scheme)\n                # If there are missing values in the column, we need to use the pandas equivalent of the dtype to allow for NA values\n                if working_column.isnull().any() and dtype.kind in [\"i\", \"u\", \"f\"]:\n                    return working_column.astype(dtype.name.capitalize())\n                else:\n                    return working_column.astype(dtype)\n        except ValueError:\n            raise ValueError(f\"{sys.exc_info()[1]}\\nError applying dtype '{dtype}' to column '{working_column.name}'\")\n\n    def apply_dtypes(self, data: pd.DataFrame) -&gt; pd.DataFrame:\n        \"\"\"\n        Applies dtypes from the metadata to `dataset`.\n\n        Returns:\n            The dataset with the dtypes applied.\n        \"\"\"\n        working_data = data.copy()\n        for column_metadata in self._metadata:\n            working_data[column_metadata.name] = self._apply_dtype(working_data[column_metadata.name], column_metadata)\n        return working_data\n\n    def apply_missingness_strategy(self) -&gt; pd.DataFrame:\n        \"\"\"\n        Resolves missingness in the dataset via the `MetaTransformer`'s global missingness strategy or\n        column-wise missingness strategies. In the case of the `AugmentMissingnessStrategy`, the missingness\n        is not resolved, instead a new column / value is added for later transformation.\n\n        Returns:\n            The dataset with the missingness strategies applied.\n        \"\"\"\n        working_data = self.typed_dataset.copy()\n        for column_metadata in self._metadata:\n            if not column_metadata.missingness_strategy:\n                column_metadata.missingness_strategy = (\n                    self._missingness_strategy(self._impute_value)\n                    if hasattr(self, \"_impute_value\")\n                    else self._missingness_strategy()\n                )\n            if not working_data[column_metadata.name].isnull().any():\n                continue\n            working_data = column_metadata.missingness_strategy.remove(working_data, column_metadata)\n        return working_data\n\n    # def apply_constraints(self) -&gt; pd.DataFrame:\n    #     working_data = self.post_missingness_strategy_dataset.copy()\n    #     for constraint in self._metadata.constraints:\n    #         working_data = constraint.apply(working_data)\n    #     return working_data\n\n    def _get_missingness_carrier(self, column_metadata: MetaData.ColumnMetaData) -&gt; Union[pd.Series, Any]:\n        \"\"\"\n        In the case of the `AugmentMissingnessStrategy`, a `missingness_carrier` has been determined for each column.\n        For continuous columns this is an indicator column for the presence of NaN values.\n        For categorical columns this is the value to be used to represent missingness as a category.\n\n        Args:\n            column_metadata: The metadata for the column.\n\n        Returns:\n            The missingness carrier for the column.\n        \"\"\"\n        missingness_carrier = getattr(column_metadata.missingness_strategy, \"missingness_carrier\", None)\n        if missingness_carrier in self.post_missingness_strategy_dataset.columns:\n            return self.post_missingness_strategy_dataset[missingness_carrier]\n        else:\n            return missingness_carrier\n\n    def transform(self) -&gt; pd.DataFrame:\n        \"\"\"\n        Prepares the dataset by applying each of the columns' transformers and recording the indices of the single and multi columns.\n\n        Returns:\n            The transformed dataset.\n        \"\"\"\n        transformed_columns = []\n        self.single_column_indices = []\n        self.multi_column_indices = []\n        col_counter = 0\n        working_data = self.post_missingness_strategy_dataset.copy()\n\n        # iteratively build the transformed df\n        for column_metadata in tqdm(\n            self._metadata, desc=\"Transforming data\", unit=\"column\", total=len(self._metadata.columns)\n        ):\n            missingness_carrier = self._get_missingness_carrier(column_metadata)\n            transformed_data = column_metadata.transformer.apply(\n                working_data[column_metadata.name], missingness_carrier\n            )\n            transformed_columns.append(transformed_data)\n\n            # track single and multi column indices to supply to the model\n            if isinstance(transformed_data, pd.DataFrame) and transformed_data.shape[1] &gt; 1:\n                num_to_add = transformed_data.shape[1]\n                if not column_metadata.categorical:\n                    self.single_column_indices.append(col_counter)\n                    col_counter += 1\n                    num_to_add -= 1\n                self.multi_column_indices.append(list(range(col_counter, col_counter + num_to_add)))\n                col_counter += num_to_add\n            else:\n                self.single_column_indices.append(col_counter)\n                col_counter += 1\n\n        return pd.concat(transformed_columns, axis=1)\n\n    def apply(self) -&gt; pd.DataFrame:\n        \"\"\"\n        Applies the various steps of the MetaTransformer to a passed DataFrame.\n\n        Returns:\n            The transformed dataset.\n        \"\"\"\n        self.drop_columns()\n        self.typed_dataset = self.apply_dtypes(self._raw_dataset)\n        self.post_missingness_strategy_dataset = self.apply_missingness_strategy()\n        # self.constrained_dataset = self.apply_constraints()\n        self.transformed_dataset = self.transform()\n        return self.transformed_dataset\n\n    def inverse_apply(self, dataset: pd.DataFrame) -&gt; pd.DataFrame:\n        \"\"\"\n        Reverses the transformation applied by the MetaTransformer.\n\n        Args:\n            dataset: The transformed dataset.\n\n        Returns:\n            The original dataset.\n        \"\"\"\n        for column_metadata in self._metadata:\n            dataset = column_metadata.transformer.revert(dataset)\n        return self.apply_dtypes(dataset)\n\n    def get_typed_dataset(self) -&gt; pd.DataFrame:\n        if not hasattr(self, \"typed_dataset\"):\n            raise ValueError(\n                \"The typed dataset has not yet been created. Call `mt.apply()` (or `mt.apply_dtypes()`) first.\"\n            )\n        return self.typed_dataset\n\n    def get_prepared_dataset(self) -&gt; pd.DataFrame:\n        if not hasattr(self, \"prepared_dataset\"):\n            raise ValueError(\n                \"The prepared dataset has not yet been created. Call `mt.apply()` (or `mt.apply_missingness_strategy()`) first.\"\n            )\n        return self.prepared_dataset\n\n    def get_transformed_dataset(self) -&gt; pd.DataFrame:\n        if not hasattr(self, \"transformed_dataset\"):\n            raise ValueError(\n                \"The prepared dataset has not yet been created. Call `mt.apply()` (or `mt.transform()`) first.\"\n            )\n        return self.transformed_dataset\n\n    def get_multi_and_single_column_indices(self) -&gt; tuple[list[int], list[int]]:\n        \"\"\"\n        Returns the indices of the columns that were transformed into one or multiple column(s).\n\n        Returns:\n            A tuple containing the indices of the single and multi columns.\n        \"\"\"\n        if not hasattr(self, \"multi_column_indices\") or not hasattr(self, \"single_column_indices\"):\n            raise ValueError(\n                \"The single and multi column indices have not yet been created. Call `mt.apply()` (or `mt.transform()`) first.\"\n            )\n        return self.multi_column_indices, self.single_column_indices\n\n    def get_sdv_metadata(self) -&gt; dict[str, dict[str, Any]]:\n        \"\"\"\n        Calls the `MetaData` method to reformat its contents into the correct format for use with SDMetrics.\n\n        Returns:\n            The metadata in the correct format for SDMetrics.\n        \"\"\"\n        return self._metadata.get_sdv_metadata()\n\n    def save_metadata(self, path: pathlib.Path, collapse_yaml: bool = False) -&gt; None:\n        return self._metadata.save(path, collapse_yaml)\n\n    def save_constraint_graphs(self, path: pathlib.Path) -&gt; None:\n        return self._metadata.constraints._output_graphs_html(path)\n</code></pre>"},{"location":"reference/modules/dataloader/metatransformer/#nhssynth.modules.dataloader.metatransformer.MetaTransformer.apply","title":"<code>apply()</code>","text":"<p>Applies the various steps of the MetaTransformer to a passed DataFrame.</p> <p>Returns:</p> Type Description <code>DataFrame</code> <p>The transformed dataset.</p> Source code in <code>src/nhssynth/modules/dataloader/metatransformer.py</code> <pre><code>def apply(self) -&gt; pd.DataFrame:\n    \"\"\"\n    Applies the various steps of the MetaTransformer to a passed DataFrame.\n\n    Returns:\n        The transformed dataset.\n    \"\"\"\n    self.drop_columns()\n    self.typed_dataset = self.apply_dtypes(self._raw_dataset)\n    self.post_missingness_strategy_dataset = self.apply_missingness_strategy()\n    # self.constrained_dataset = self.apply_constraints()\n    self.transformed_dataset = self.transform()\n    return self.transformed_dataset\n</code></pre>"},{"location":"reference/modules/dataloader/metatransformer/#nhssynth.modules.dataloader.metatransformer.MetaTransformer.apply_dtypes","title":"<code>apply_dtypes(data)</code>","text":"<p>Applies dtypes from the metadata to <code>dataset</code>.</p> <p>Returns:</p> Type Description <code>DataFrame</code> <p>The dataset with the dtypes applied.</p> Source code in <code>src/nhssynth/modules/dataloader/metatransformer.py</code> <pre><code>def apply_dtypes(self, data: pd.DataFrame) -&gt; pd.DataFrame:\n    \"\"\"\n    Applies dtypes from the metadata to `dataset`.\n\n    Returns:\n        The dataset with the dtypes applied.\n    \"\"\"\n    working_data = data.copy()\n    for column_metadata in self._metadata:\n        working_data[column_metadata.name] = self._apply_dtype(working_data[column_metadata.name], column_metadata)\n    return working_data\n</code></pre>"},{"location":"reference/modules/dataloader/metatransformer/#nhssynth.modules.dataloader.metatransformer.MetaTransformer.apply_missingness_strategy","title":"<code>apply_missingness_strategy()</code>","text":"<p>Resolves missingness in the dataset via the <code>MetaTransformer</code>'s global missingness strategy or column-wise missingness strategies. In the case of the <code>AugmentMissingnessStrategy</code>, the missingness is not resolved, instead a new column / value is added for later transformation.</p> <p>Returns:</p> Type Description <code>DataFrame</code> <p>The dataset with the missingness strategies applied.</p> Source code in <code>src/nhssynth/modules/dataloader/metatransformer.py</code> <pre><code>def apply_missingness_strategy(self) -&gt; pd.DataFrame:\n    \"\"\"\n    Resolves missingness in the dataset via the `MetaTransformer`'s global missingness strategy or\n    column-wise missingness strategies. In the case of the `AugmentMissingnessStrategy`, the missingness\n    is not resolved, instead a new column / value is added for later transformation.\n\n    Returns:\n        The dataset with the missingness strategies applied.\n    \"\"\"\n    working_data = self.typed_dataset.copy()\n    for column_metadata in self._metadata:\n        if not column_metadata.missingness_strategy:\n            column_metadata.missingness_strategy = (\n                self._missingness_strategy(self._impute_value)\n                if hasattr(self, \"_impute_value\")\n                else self._missingness_strategy()\n            )\n        if not working_data[column_metadata.name].isnull().any():\n            continue\n        working_data = column_metadata.missingness_strategy.remove(working_data, column_metadata)\n    return working_data\n</code></pre>"},{"location":"reference/modules/dataloader/metatransformer/#nhssynth.modules.dataloader.metatransformer.MetaTransformer.drop_columns","title":"<code>drop_columns()</code>","text":"<p>Drops columns from the dataset that are not in the <code>MetaData</code>.</p> Source code in <code>src/nhssynth/modules/dataloader/metatransformer.py</code> <pre><code>def drop_columns(self) -&gt; None:\n    \"\"\"\n    Drops columns from the dataset that are not in the `MetaData`.\n    \"\"\"\n    self._raw_dataset = self._raw_dataset[self._metadata.columns]\n</code></pre>"},{"location":"reference/modules/dataloader/metatransformer/#nhssynth.modules.dataloader.metatransformer.MetaTransformer.from_dict","title":"<code>from_dict(dataset, metadata, **kwargs)</code>  <code>classmethod</code>","text":"<p>Instantiates a MetaTransformer from a metadata dictionary.</p> <p>Parameters:</p> Name Type Description Default <code>dataset</code> <code>DataFrame</code> <p>The raw input DataFrame.</p> required <code>metadata</code> <code>dict</code> <p>A dictionary of raw metadata.</p> required <p>Returns:</p> Type Description <code>Self</code> <p>A MetaTransformer object.</p> Source code in <code>src/nhssynth/modules/dataloader/metatransformer.py</code> <pre><code>@classmethod\ndef from_dict(cls, dataset: pd.DataFrame, metadata: dict, **kwargs) -&gt; Self:\n    \"\"\"\n    Instantiates a MetaTransformer from a metadata dictionary.\n\n    Args:\n        dataset: The raw input DataFrame.\n        metadata: A dictionary of raw metadata.\n\n    Returns:\n        A MetaTransformer object.\n    \"\"\"\n    return cls(dataset, MetaData(dataset, metadata), **kwargs)\n</code></pre>"},{"location":"reference/modules/dataloader/metatransformer/#nhssynth.modules.dataloader.metatransformer.MetaTransformer.from_path","title":"<code>from_path(dataset, metadata_path, **kwargs)</code>  <code>classmethod</code>","text":"<p>Instantiates a MetaTransformer from a metadata file via a provided path.</p> <p>Parameters:</p> Name Type Description Default <code>dataset</code> <code>DataFrame</code> <p>The raw input DataFrame.</p> required <code>metadata_path</code> <code>str</code> <p>The path to the metadata file.</p> required <p>Returns:</p> Type Description <code>Self</code> <p>A MetaTransformer object.</p> Source code in <code>src/nhssynth/modules/dataloader/metatransformer.py</code> <pre><code>@classmethod\ndef from_path(cls, dataset: pd.DataFrame, metadata_path: str, **kwargs) -&gt; Self:\n    \"\"\"\n    Instantiates a MetaTransformer from a metadata file via a provided path.\n\n    Args:\n        dataset: The raw input DataFrame.\n        metadata_path: The path to the metadata file.\n\n    Returns:\n        A MetaTransformer object.\n    \"\"\"\n    return cls(dataset, MetaData.from_path(dataset, metadata_path), **kwargs)\n</code></pre>"},{"location":"reference/modules/dataloader/metatransformer/#nhssynth.modules.dataloader.metatransformer.MetaTransformer.get_multi_and_single_column_indices","title":"<code>get_multi_and_single_column_indices()</code>","text":"<p>Returns the indices of the columns that were transformed into one or multiple column(s).</p> <p>Returns:</p> Type Description <code>tuple[list[int], list[int]]</code> <p>A tuple containing the indices of the single and multi columns.</p> Source code in <code>src/nhssynth/modules/dataloader/metatransformer.py</code> <pre><code>def get_multi_and_single_column_indices(self) -&gt; tuple[list[int], list[int]]:\n    \"\"\"\n    Returns the indices of the columns that were transformed into one or multiple column(s).\n\n    Returns:\n        A tuple containing the indices of the single and multi columns.\n    \"\"\"\n    if not hasattr(self, \"multi_column_indices\") or not hasattr(self, \"single_column_indices\"):\n        raise ValueError(\n            \"The single and multi column indices have not yet been created. Call `mt.apply()` (or `mt.transform()`) first.\"\n        )\n    return self.multi_column_indices, self.single_column_indices\n</code></pre>"},{"location":"reference/modules/dataloader/metatransformer/#nhssynth.modules.dataloader.metatransformer.MetaTransformer.get_sdv_metadata","title":"<code>get_sdv_metadata()</code>","text":"<p>Calls the <code>MetaData</code> method to reformat its contents into the correct format for use with SDMetrics.</p> <p>Returns:</p> Type Description <code>dict[str, dict[str, Any]]</code> <p>The metadata in the correct format for SDMetrics.</p> Source code in <code>src/nhssynth/modules/dataloader/metatransformer.py</code> <pre><code>def get_sdv_metadata(self) -&gt; dict[str, dict[str, Any]]:\n    \"\"\"\n    Calls the `MetaData` method to reformat its contents into the correct format for use with SDMetrics.\n\n    Returns:\n        The metadata in the correct format for SDMetrics.\n    \"\"\"\n    return self._metadata.get_sdv_metadata()\n</code></pre>"},{"location":"reference/modules/dataloader/metatransformer/#nhssynth.modules.dataloader.metatransformer.MetaTransformer.inverse_apply","title":"<code>inverse_apply(dataset)</code>","text":"<p>Reverses the transformation applied by the MetaTransformer.</p> <p>Parameters:</p> Name Type Description Default <code>dataset</code> <code>DataFrame</code> <p>The transformed dataset.</p> required <p>Returns:</p> Type Description <code>DataFrame</code> <p>The original dataset.</p> Source code in <code>src/nhssynth/modules/dataloader/metatransformer.py</code> <pre><code>def inverse_apply(self, dataset: pd.DataFrame) -&gt; pd.DataFrame:\n    \"\"\"\n    Reverses the transformation applied by the MetaTransformer.\n\n    Args:\n        dataset: The transformed dataset.\n\n    Returns:\n        The original dataset.\n    \"\"\"\n    for column_metadata in self._metadata:\n        dataset = column_metadata.transformer.revert(dataset)\n    return self.apply_dtypes(dataset)\n</code></pre>"},{"location":"reference/modules/dataloader/metatransformer/#nhssynth.modules.dataloader.metatransformer.MetaTransformer.transform","title":"<code>transform()</code>","text":"<p>Prepares the dataset by applying each of the columns' transformers and recording the indices of the single and multi columns.</p> <p>Returns:</p> Type Description <code>DataFrame</code> <p>The transformed dataset.</p> Source code in <code>src/nhssynth/modules/dataloader/metatransformer.py</code> <pre><code>def transform(self) -&gt; pd.DataFrame:\n    \"\"\"\n    Prepares the dataset by applying each of the columns' transformers and recording the indices of the single and multi columns.\n\n    Returns:\n        The transformed dataset.\n    \"\"\"\n    transformed_columns = []\n    self.single_column_indices = []\n    self.multi_column_indices = []\n    col_counter = 0\n    working_data = self.post_missingness_strategy_dataset.copy()\n\n    # iteratively build the transformed df\n    for column_metadata in tqdm(\n        self._metadata, desc=\"Transforming data\", unit=\"column\", total=len(self._metadata.columns)\n    ):\n        missingness_carrier = self._get_missingness_carrier(column_metadata)\n        transformed_data = column_metadata.transformer.apply(\n            working_data[column_metadata.name], missingness_carrier\n        )\n        transformed_columns.append(transformed_data)\n\n        # track single and multi column indices to supply to the model\n        if isinstance(transformed_data, pd.DataFrame) and transformed_data.shape[1] &gt; 1:\n            num_to_add = transformed_data.shape[1]\n            if not column_metadata.categorical:\n                self.single_column_indices.append(col_counter)\n                col_counter += 1\n                num_to_add -= 1\n            self.multi_column_indices.append(list(range(col_counter, col_counter + num_to_add)))\n            col_counter += num_to_add\n        else:\n            self.single_column_indices.append(col_counter)\n            col_counter += 1\n\n    return pd.concat(transformed_columns, axis=1)\n</code></pre>"},{"location":"reference/modules/dataloader/missingness/","title":"missingness","text":""},{"location":"reference/modules/dataloader/missingness/#nhssynth.modules.dataloader.missingness.AugmentMissingnessStrategy","title":"<code>AugmentMissingnessStrategy</code>","text":"<p>             Bases: <code>GenericMissingnessStrategy</code></p> Source code in <code>src/nhssynth/modules/dataloader/missingness.py</code> <pre><code>class AugmentMissingnessStrategy(GenericMissingnessStrategy):\n    def __init__(self) -&gt; None:\n        super().__init__(\"augment\")\n\n    def remove(self, data: pd.DataFrame, column_metadata: ColumnMetaData) -&gt; pd.DataFrame:\n        \"\"\"\n        Impute missingness with the model. To do this we create a new column for continuous features and a new category for categorical features.\n\n        Args:\n            data: The dataset.\n            column_metadata: The column metadata enabling the correct set up of the missingness strategy.\n\n        Returns:\n            The dataset, potentially with a new column representing the missingness for the column added.\n        \"\"\"\n        if column_metadata.categorical:\n            if column_metadata.dtype.kind == \"O\":\n                self.missingness_carrier = column_metadata.name + \"_missing\"\n            else:\n                self.missingness_carrier = data[column_metadata.name].min() - 1\n        else:\n            self.missingness_carrier = column_metadata.name + \"_missing\"\n            data[self.missingness_carrier] = data[column_metadata.name].isnull().astype(int)\n        return data\n</code></pre>"},{"location":"reference/modules/dataloader/missingness/#nhssynth.modules.dataloader.missingness.AugmentMissingnessStrategy.remove","title":"<code>remove(data, column_metadata)</code>","text":"<p>Impute missingness with the model. To do this we create a new column for continuous features and a new category for categorical features.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>DataFrame</code> <p>The dataset.</p> required <code>column_metadata</code> <code>ColumnMetaData</code> <p>The column metadata enabling the correct set up of the missingness strategy.</p> required <p>Returns:</p> Type Description <code>DataFrame</code> <p>The dataset, potentially with a new column representing the missingness for the column added.</p> Source code in <code>src/nhssynth/modules/dataloader/missingness.py</code> <pre><code>def remove(self, data: pd.DataFrame, column_metadata: ColumnMetaData) -&gt; pd.DataFrame:\n    \"\"\"\n    Impute missingness with the model. To do this we create a new column for continuous features and a new category for categorical features.\n\n    Args:\n        data: The dataset.\n        column_metadata: The column metadata enabling the correct set up of the missingness strategy.\n\n    Returns:\n        The dataset, potentially with a new column representing the missingness for the column added.\n    \"\"\"\n    if column_metadata.categorical:\n        if column_metadata.dtype.kind == \"O\":\n            self.missingness_carrier = column_metadata.name + \"_missing\"\n        else:\n            self.missingness_carrier = data[column_metadata.name].min() - 1\n    else:\n        self.missingness_carrier = column_metadata.name + \"_missing\"\n        data[self.missingness_carrier] = data[column_metadata.name].isnull().astype(int)\n    return data\n</code></pre>"},{"location":"reference/modules/dataloader/missingness/#nhssynth.modules.dataloader.missingness.DropMissingnessStrategy","title":"<code>DropMissingnessStrategy</code>","text":"<p>             Bases: <code>GenericMissingnessStrategy</code></p> <p>Drop missingness strategy.</p> Source code in <code>src/nhssynth/modules/dataloader/missingness.py</code> <pre><code>class DropMissingnessStrategy(GenericMissingnessStrategy):\n    \"\"\"Drop missingness strategy.\"\"\"\n\n    def __init__(self) -&gt; None:\n        super().__init__(\"drop\")\n\n    def remove(self, data: pd.DataFrame, column_metadata: ColumnMetaData) -&gt; pd.DataFrame:\n        \"\"\"\n        Drop rows containing missing values in the appropriate column.\n\n        Args:\n            data: The dataset.\n            column_metadata: The column metadata.\n\n        Returns:\n            The dataset with rows containing missing values in the appropriate column dropped.\n        \"\"\"\n        return data.dropna(subset=[column_metadata.name]).reset_index(drop=True)\n</code></pre>"},{"location":"reference/modules/dataloader/missingness/#nhssynth.modules.dataloader.missingness.DropMissingnessStrategy.remove","title":"<code>remove(data, column_metadata)</code>","text":"<p>Drop rows containing missing values in the appropriate column.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>DataFrame</code> <p>The dataset.</p> required <code>column_metadata</code> <code>ColumnMetaData</code> <p>The column metadata.</p> required <p>Returns:</p> Type Description <code>DataFrame</code> <p>The dataset with rows containing missing values in the appropriate column dropped.</p> Source code in <code>src/nhssynth/modules/dataloader/missingness.py</code> <pre><code>def remove(self, data: pd.DataFrame, column_metadata: ColumnMetaData) -&gt; pd.DataFrame:\n    \"\"\"\n    Drop rows containing missing values in the appropriate column.\n\n    Args:\n        data: The dataset.\n        column_metadata: The column metadata.\n\n    Returns:\n        The dataset with rows containing missing values in the appropriate column dropped.\n    \"\"\"\n    return data.dropna(subset=[column_metadata.name]).reset_index(drop=True)\n</code></pre>"},{"location":"reference/modules/dataloader/missingness/#nhssynth.modules.dataloader.missingness.GenericMissingnessStrategy","title":"<code>GenericMissingnessStrategy</code>","text":"<p>             Bases: <code>ABC</code></p> <p>Generic missingness strategy.</p> Source code in <code>src/nhssynth/modules/dataloader/missingness.py</code> <pre><code>class GenericMissingnessStrategy(ABC):\n    \"\"\"Generic missingness strategy.\"\"\"\n\n    def __init__(self, name: str) -&gt; None:\n        super().__init__()\n        self.name: str = name\n\n    @abstractmethod\n    def remove(self, data: pd.DataFrame, column_metadata: ColumnMetaData) -&gt; pd.DataFrame:\n        \"\"\"Remove missingness.\"\"\"\n        pass\n</code></pre>"},{"location":"reference/modules/dataloader/missingness/#nhssynth.modules.dataloader.missingness.GenericMissingnessStrategy.remove","title":"<code>remove(data, column_metadata)</code>  <code>abstractmethod</code>","text":"<p>Remove missingness.</p> Source code in <code>src/nhssynth/modules/dataloader/missingness.py</code> <pre><code>@abstractmethod\ndef remove(self, data: pd.DataFrame, column_metadata: ColumnMetaData) -&gt; pd.DataFrame:\n    \"\"\"Remove missingness.\"\"\"\n    pass\n</code></pre>"},{"location":"reference/modules/dataloader/missingness/#nhssynth.modules.dataloader.missingness.ImputeMissingnessStrategy","title":"<code>ImputeMissingnessStrategy</code>","text":"<p>             Bases: <code>GenericMissingnessStrategy</code></p> <p>Impute missingness with mean strategy.</p> Source code in <code>src/nhssynth/modules/dataloader/missingness.py</code> <pre><code>class ImputeMissingnessStrategy(GenericMissingnessStrategy):\n    \"\"\"Impute missingness with mean strategy.\"\"\"\n\n    def __init__(self, impute: Any) -&gt; None:\n        super().__init__(\"impute\")\n        self.impute = impute.lower() if isinstance(impute, str) else impute\n\n    def remove(self, data: pd.DataFrame, column_metadata: ColumnMetaData) -&gt; pd.DataFrame:\n        \"\"\"\n        Impute missingness in the data via the `impute` strategy. 'Special' values trigger specific behaviour.\n\n        Args:\n            data: The dataset.\n            column_metadata: The column metadata.\n\n        Returns:\n            The dataset with missing values in the appropriate column replaced with imputed ones.\n        \"\"\"\n        if (self.impute == \"mean\" or self.impute == \"median\") and column_metadata.categorical:\n            warnings.warn(\"Cannot impute mean or median for categorical data, using mode instead.\")\n            self.imputation_value = data[column_metadata.name].mode()[0]\n        elif self.impute == \"mean\":\n            self.imputation_value = data[column_metadata.name].mean()\n        elif self.impute == \"median\":\n            self.imputation_value = data[column_metadata.name].median()\n        elif self.impute == \"mode\":\n            self.imputation_value = data[column_metadata.name].mode()[0]\n        else:\n            self.imputation_value = self.impute\n        self.imputation_value = column_metadata.dtype.type(self.imputation_value)\n        try:\n            data[column_metadata.name].fillna(self.imputation_value, inplace=True)\n        except AssertionError:\n            raise ValueError(f\"Could not impute '{self.imputation_value}' into column: '{column_metadata.name}'.\")\n        return data\n</code></pre>"},{"location":"reference/modules/dataloader/missingness/#nhssynth.modules.dataloader.missingness.ImputeMissingnessStrategy.remove","title":"<code>remove(data, column_metadata)</code>","text":"<p>Impute missingness in the data via the <code>impute</code> strategy. 'Special' values trigger specific behaviour.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>DataFrame</code> <p>The dataset.</p> required <code>column_metadata</code> <code>ColumnMetaData</code> <p>The column metadata.</p> required <p>Returns:</p> Type Description <code>DataFrame</code> <p>The dataset with missing values in the appropriate column replaced with imputed ones.</p> Source code in <code>src/nhssynth/modules/dataloader/missingness.py</code> <pre><code>def remove(self, data: pd.DataFrame, column_metadata: ColumnMetaData) -&gt; pd.DataFrame:\n    \"\"\"\n    Impute missingness in the data via the `impute` strategy. 'Special' values trigger specific behaviour.\n\n    Args:\n        data: The dataset.\n        column_metadata: The column metadata.\n\n    Returns:\n        The dataset with missing values in the appropriate column replaced with imputed ones.\n    \"\"\"\n    if (self.impute == \"mean\" or self.impute == \"median\") and column_metadata.categorical:\n        warnings.warn(\"Cannot impute mean or median for categorical data, using mode instead.\")\n        self.imputation_value = data[column_metadata.name].mode()[0]\n    elif self.impute == \"mean\":\n        self.imputation_value = data[column_metadata.name].mean()\n    elif self.impute == \"median\":\n        self.imputation_value = data[column_metadata.name].median()\n    elif self.impute == \"mode\":\n        self.imputation_value = data[column_metadata.name].mode()[0]\n    else:\n        self.imputation_value = self.impute\n    self.imputation_value = column_metadata.dtype.type(self.imputation_value)\n    try:\n        data[column_metadata.name].fillna(self.imputation_value, inplace=True)\n    except AssertionError:\n        raise ValueError(f\"Could not impute '{self.imputation_value}' into column: '{column_metadata.name}'.\")\n    return data\n</code></pre>"},{"location":"reference/modules/dataloader/missingness/#nhssynth.modules.dataloader.missingness.NullMissingnessStrategy","title":"<code>NullMissingnessStrategy</code>","text":"<p>             Bases: <code>GenericMissingnessStrategy</code></p> <p>Null missingness strategy.</p> Source code in <code>src/nhssynth/modules/dataloader/missingness.py</code> <pre><code>class NullMissingnessStrategy(GenericMissingnessStrategy):\n    \"\"\"Null missingness strategy.\"\"\"\n\n    def __init__(self) -&gt; None:\n        super().__init__(\"none\")\n\n    def remove(self, data: pd.DataFrame, column_metadata: ColumnMetaData) -&gt; pd.DataFrame:\n        \"\"\"Do nothing.\"\"\"\n        return data\n</code></pre>"},{"location":"reference/modules/dataloader/missingness/#nhssynth.modules.dataloader.missingness.NullMissingnessStrategy.remove","title":"<code>remove(data, column_metadata)</code>","text":"<p>Do nothing.</p> Source code in <code>src/nhssynth/modules/dataloader/missingness.py</code> <pre><code>def remove(self, data: pd.DataFrame, column_metadata: ColumnMetaData) -&gt; pd.DataFrame:\n    \"\"\"Do nothing.\"\"\"\n    return data\n</code></pre>"},{"location":"reference/modules/dataloader/run/","title":"run","text":""},{"location":"reference/modules/dataloader/transformers/","title":"transformers","text":""},{"location":"reference/modules/dataloader/transformers/base/","title":"base","text":""},{"location":"reference/modules/dataloader/transformers/base/#nhssynth.modules.dataloader.transformers.base.ColumnTransformer","title":"<code>ColumnTransformer</code>","text":"<p>             Bases: <code>ABC</code></p> <p>A generic column transformer class to prototype all of the transformers applied via the <code>MetaTransformer</code>.</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/base.py</code> <pre><code>class ColumnTransformer(ABC):\n    \"\"\"A generic column transformer class to prototype all of the transformers applied via the [`MetaTransformer`][nhssynth.modules.dataloader.metatransformer.MetaTransformer].\"\"\"\n\n    def __init__(self) -&gt; None:\n        super().__init__()\n\n    @abstractmethod\n    def apply(self, data: pd.DataFrame, missingness_column: Optional[pd.Series]) -&gt; None:\n        \"\"\"Apply the transformer to the data.\"\"\"\n        pass\n\n    @abstractmethod\n    def revert(self, data: pd.DataFrame) -&gt; None:\n        \"\"\"Revert data to pre-transformer state.\"\"\"\n        pass\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/base/#nhssynth.modules.dataloader.transformers.base.ColumnTransformer.apply","title":"<code>apply(data, missingness_column)</code>  <code>abstractmethod</code>","text":"<p>Apply the transformer to the data.</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/base.py</code> <pre><code>@abstractmethod\ndef apply(self, data: pd.DataFrame, missingness_column: Optional[pd.Series]) -&gt; None:\n    \"\"\"Apply the transformer to the data.\"\"\"\n    pass\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/base/#nhssynth.modules.dataloader.transformers.base.ColumnTransformer.revert","title":"<code>revert(data)</code>  <code>abstractmethod</code>","text":"<p>Revert data to pre-transformer state.</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/base.py</code> <pre><code>@abstractmethod\ndef revert(self, data: pd.DataFrame) -&gt; None:\n    \"\"\"Revert data to pre-transformer state.\"\"\"\n    pass\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/base/#nhssynth.modules.dataloader.transformers.base.TransformerWrapper","title":"<code>TransformerWrapper</code>","text":"<p>             Bases: <code>ABC</code></p> <p>A class to facilitate nesting of <code>ColumnTransformer</code>s.</p> <p>Parameters:</p> Name Type Description Default <code>wrapped_transformer</code> <code>ColumnTransformer</code> <p>The <code>ColumnTransformer</code> to wrap.</p> required Source code in <code>src/nhssynth/modules/dataloader/transformers/base.py</code> <pre><code>class TransformerWrapper(ABC):\n    \"\"\"\n    A class to facilitate nesting of [`ColumnTransformer`][nhssynth.modules.dataloader.transformers.base.ColumnTransformer]s.\n\n    Args:\n        wrapped_transformer: The [`ColumnTransformer`][nhssynth.modules.dataloader.transformers.base.ColumnTransformer] to wrap.\n    \"\"\"\n\n    def __init__(self, wrapped_transformer: ColumnTransformer) -&gt; None:\n        super().__init__()\n        self._wrapped_transformer: ColumnTransformer = wrapped_transformer\n\n    def apply(self, data: pd.Series, missingness_column: Optional[pd.Series], **kwargs) -&gt; pd.DataFrame:\n        \"\"\"Method for applying the wrapped transformer to the data.\"\"\"\n        return self._wrapped_transformer.apply(data, missingness_column, **kwargs)\n\n    def revert(self, data: pd.Series, **kwargs) -&gt; pd.DataFrame:\n        \"\"\"Method for reverting the passed data via the wrapped transformer.\"\"\"\n        return self._wrapped_transformer.revert(data, **kwargs)\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/base/#nhssynth.modules.dataloader.transformers.base.TransformerWrapper.apply","title":"<code>apply(data, missingness_column, **kwargs)</code>","text":"<p>Method for applying the wrapped transformer to the data.</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/base.py</code> <pre><code>def apply(self, data: pd.Series, missingness_column: Optional[pd.Series], **kwargs) -&gt; pd.DataFrame:\n    \"\"\"Method for applying the wrapped transformer to the data.\"\"\"\n    return self._wrapped_transformer.apply(data, missingness_column, **kwargs)\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/base/#nhssynth.modules.dataloader.transformers.base.TransformerWrapper.revert","title":"<code>revert(data, **kwargs)</code>","text":"<p>Method for reverting the passed data via the wrapped transformer.</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/base.py</code> <pre><code>def revert(self, data: pd.Series, **kwargs) -&gt; pd.DataFrame:\n    \"\"\"Method for reverting the passed data via the wrapped transformer.\"\"\"\n    return self._wrapped_transformer.revert(data, **kwargs)\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/categorical/","title":"categorical","text":""},{"location":"reference/modules/dataloader/transformers/categorical/#nhssynth.modules.dataloader.transformers.categorical.OHECategoricalTransformer","title":"<code>OHECategoricalTransformer</code>","text":"<p>             Bases: <code>ColumnTransformer</code></p> <p>A transformer to one-hot encode categorical features via sklearn's <code>OneHotEncoder</code>. Essentially wraps the <code>fit_transformer</code> and <code>inverse_transform</code> methods of <code>OneHotEncoder</code> to comply with the <code>ColumnTransformer</code> interface.</p> <p>Parameters:</p> Name Type Description Default <code>drop</code> <code>Optional[Union[list, str]]</code> <p>str or list of str, to pass to <code>OneHotEncoder</code>'s <code>drop</code> parameter.</p> <code>None</code> <p>Attributes:</p> Name Type Description <code>missing_value</code> <code>Any</code> <p>The value used to fill missing values in the data.</p> <p>After applying the transformer, the following attributes will be populated:</p> <p>Attributes:</p> Name Type Description <code>original_column_name</code> <p>The name of the original column.</p> <code>new_column_names</code> <p>The names of the columns generated by the transformer.</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/categorical.py</code> <pre><code>class OHECategoricalTransformer(ColumnTransformer):\n    \"\"\"\n    A transformer to one-hot encode categorical features via sklearn's `OneHotEncoder`.\n    Essentially wraps the `fit_transformer` and `inverse_transform` methods of `OneHotEncoder` to comply with the `ColumnTransformer` interface.\n\n    Args:\n        drop: str or list of str, to pass to `OneHotEncoder`'s `drop` parameter.\n\n    Attributes:\n        missing_value: The value used to fill missing values in the data.\n\n    After applying the transformer, the following attributes will be populated:\n\n    Attributes:\n        original_column_name: The name of the original column.\n        new_column_names: The names of the columns generated by the transformer.\n    \"\"\"\n\n    def __init__(self, drop: Optional[Union[list, str]] = None) -&gt; None:\n        super().__init__()\n        self._drop: Union[list, str] = drop\n        self._transformer: OneHotEncoder = OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False, drop=self._drop)\n        self.missing_value: Any = None\n\n    def apply(self, data: pd.Series, missing_value: Optional[Any] = None) -&gt; pd.DataFrame:\n        \"\"\"\n        Apply the transformer to the data via sklearn's `OneHotEncoder`'s `fit_transform` method. Name the new columns via manipulation of the original column name.\n        If `missing_value` is provided, fill missing values with this value before applying the transformer to ensure a new category is added.\n\n        Args:\n            data: The column of data to transform.\n            missing_value: The value learned by the `MetaTransformer` to represent missingness, this is only used as part of the `AugmentMissingnessStrategy`.\n        \"\"\"\n        self.original_column_name = data.name\n        if missing_value:\n            data = data.fillna(missing_value)\n            self.missing_value = missing_value\n        transformed_data = pd.DataFrame(\n            self._transformer.fit_transform(data.values.reshape(-1, 1)),\n            columns=self._transformer.get_feature_names_out(input_features=[data.name]),\n        )\n        self.new_column_names = transformed_data.columns\n        return transformed_data\n\n    def revert(self, data: pd.DataFrame) -&gt; pd.DataFrame:\n        \"\"\"\n        Revert data to pre-transformer state via sklearn's `OneHotEncoder`'s `inverse_transform` method.\n        If `missing_value` is provided, replace instances of this value in the data with `np.nan` to ensure missing values are represented correctly in the case\n        where `missing_value` was 'modelled' and thus generated.\n\n        Args:\n            data: The full dataset including the column(s) to be reverted to their pre-transformer state.\n\n        Returns:\n            The dataset with a single categorical column that is analogous to the original column, with the same name, and without the generated one-hot columns.\n        \"\"\"\n        data[self.original_column_name] = pd.Series(\n            self._transformer.inverse_transform(data[self.new_column_names].values).flatten(),\n            index=data.index,\n            name=self.original_column_name,\n        )\n        if self.missing_value:\n            data[self.original_column_name] = data[self.original_column_name].replace(self.missing_value, np.nan)\n        return data.drop(self.new_column_names, axis=1)\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/categorical/#nhssynth.modules.dataloader.transformers.categorical.OHECategoricalTransformer.apply","title":"<code>apply(data, missing_value=None)</code>","text":"<p>Apply the transformer to the data via sklearn's <code>OneHotEncoder</code>'s <code>fit_transform</code> method. Name the new columns via manipulation of the original column name. If <code>missing_value</code> is provided, fill missing values with this value before applying the transformer to ensure a new category is added.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>Series</code> <p>The column of data to transform.</p> required <code>missing_value</code> <code>Optional[Any]</code> <p>The value learned by the <code>MetaTransformer</code> to represent missingness, this is only used as part of the <code>AugmentMissingnessStrategy</code>.</p> <code>None</code> Source code in <code>src/nhssynth/modules/dataloader/transformers/categorical.py</code> <pre><code>def apply(self, data: pd.Series, missing_value: Optional[Any] = None) -&gt; pd.DataFrame:\n    \"\"\"\n    Apply the transformer to the data via sklearn's `OneHotEncoder`'s `fit_transform` method. Name the new columns via manipulation of the original column name.\n    If `missing_value` is provided, fill missing values with this value before applying the transformer to ensure a new category is added.\n\n    Args:\n        data: The column of data to transform.\n        missing_value: The value learned by the `MetaTransformer` to represent missingness, this is only used as part of the `AugmentMissingnessStrategy`.\n    \"\"\"\n    self.original_column_name = data.name\n    if missing_value:\n        data = data.fillna(missing_value)\n        self.missing_value = missing_value\n    transformed_data = pd.DataFrame(\n        self._transformer.fit_transform(data.values.reshape(-1, 1)),\n        columns=self._transformer.get_feature_names_out(input_features=[data.name]),\n    )\n    self.new_column_names = transformed_data.columns\n    return transformed_data\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/categorical/#nhssynth.modules.dataloader.transformers.categorical.OHECategoricalTransformer.revert","title":"<code>revert(data)</code>","text":"<p>Revert data to pre-transformer state via sklearn's <code>OneHotEncoder</code>'s <code>inverse_transform</code> method. If <code>missing_value</code> is provided, replace instances of this value in the data with <code>np.nan</code> to ensure missing values are represented correctly in the case where <code>missing_value</code> was 'modelled' and thus generated.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>DataFrame</code> <p>The full dataset including the column(s) to be reverted to their pre-transformer state.</p> required <p>Returns:</p> Type Description <code>DataFrame</code> <p>The dataset with a single categorical column that is analogous to the original column, with the same name, and without the generated one-hot columns.</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/categorical.py</code> <pre><code>def revert(self, data: pd.DataFrame) -&gt; pd.DataFrame:\n    \"\"\"\n    Revert data to pre-transformer state via sklearn's `OneHotEncoder`'s `inverse_transform` method.\n    If `missing_value` is provided, replace instances of this value in the data with `np.nan` to ensure missing values are represented correctly in the case\n    where `missing_value` was 'modelled' and thus generated.\n\n    Args:\n        data: The full dataset including the column(s) to be reverted to their pre-transformer state.\n\n    Returns:\n        The dataset with a single categorical column that is analogous to the original column, with the same name, and without the generated one-hot columns.\n    \"\"\"\n    data[self.original_column_name] = pd.Series(\n        self._transformer.inverse_transform(data[self.new_column_names].values).flatten(),\n        index=data.index,\n        name=self.original_column_name,\n    )\n    if self.missing_value:\n        data[self.original_column_name] = data[self.original_column_name].replace(self.missing_value, np.nan)\n    return data.drop(self.new_column_names, axis=1)\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/continuous/","title":"continuous","text":""},{"location":"reference/modules/dataloader/transformers/continuous/#nhssynth.modules.dataloader.transformers.continuous.ClusterContinuousTransformer","title":"<code>ClusterContinuousTransformer</code>","text":"<p>             Bases: <code>ColumnTransformer</code></p> <p>A transformer to cluster continuous features via sklearn's <code>BayesianGaussianMixture</code>. Essentially wraps the process of fitting the BGM model and generating cluster assignments and normalised values for the data to comply with the <code>ColumnTransformer</code> interface.</p> <p>Parameters:</p> Name Type Description Default <code>n_components</code> <code>int</code> <p>The number of components to use in the BGM model.</p> <code>10</code> <code>n_init</code> <code>int</code> <p>The number of initialisations to use in the BGM model.</p> <code>1</code> <code>init_params</code> <code>str</code> <p>The initialisation method to use in the BGM model.</p> <code>'kmeans'</code> <code>random_state</code> <code>int</code> <p>The random state to use in the BGM model.</p> <code>0</code> <code>max_iter</code> <code>int</code> <p>The maximum number of iterations to use in the BGM model.</p> <code>1000</code> <code>remove_unused_components</code> <code>bool</code> <p>Whether to remove components that have no data assigned EXPERIMENTAL.</p> <code>False</code> <code>clip_output</code> <code>bool</code> <p>Whether to clip the output normalised values to the range [-1, 1].</p> <code>False</code> <p>After applying the transformer, the following attributes will be populated:</p> <p>Attributes:</p> Name Type Description <code>means</code> <p>The means of the components in the BGM model.</p> <code>stds</code> <p>The standard deviations of the components in the BGM model.</p> <code>new_column_names</code> <p>The names of the columns generated by the transformer (one for the normalised values and one for each cluster component).</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/continuous.py</code> <pre><code>class ClusterContinuousTransformer(ColumnTransformer):\n    \"\"\"\n    A transformer to cluster continuous features via sklearn's `BayesianGaussianMixture`.\n    Essentially wraps the process of fitting the BGM model and generating cluster assignments and normalised values for the data to comply with the `ColumnTransformer` interface.\n\n    Args:\n        n_components: The number of components to use in the BGM model.\n        n_init: The number of initialisations to use in the BGM model.\n        init_params: The initialisation method to use in the BGM model.\n        random_state: The random state to use in the BGM model.\n        max_iter: The maximum number of iterations to use in the BGM model.\n        remove_unused_components: Whether to remove components that have no data assigned EXPERIMENTAL.\n        clip_output: Whether to clip the output normalised values to the range [-1, 1].\n\n    After applying the transformer, the following attributes will be populated:\n\n    Attributes:\n        means: The means of the components in the BGM model.\n        stds: The standard deviations of the components in the BGM model.\n        new_column_names: The names of the columns generated by the transformer (one for the normalised values and one for each cluster component).\n    \"\"\"\n\n    def __init__(\n        self,\n        n_components: int = 10,\n        n_init: int = 1,\n        init_params: str = \"kmeans\",\n        random_state: int = 0,\n        max_iter: int = 1000,\n        remove_unused_components: bool = False,\n        clip_output: bool = False,\n    ) -&gt; None:\n        super().__init__()\n        self._transformer = BayesianGaussianMixture(\n            n_components=n_components,\n            random_state=random_state,\n            n_init=n_init,\n            init_params=init_params,\n            max_iter=max_iter,\n            weight_concentration_prior=1e-3,\n        )\n        self._n_components = n_components\n        self._std_multiplier = 4\n        self._missingness_column_name = None\n        self._max_iter = max_iter\n        self.remove_unused_components = remove_unused_components\n        self.clip_output = clip_output\n\n    def apply(self, data: pd.Series, missingness_column: Optional[pd.Series] = None) -&gt; pd.DataFrame:\n        \"\"\"\n        Apply the transformer to the data via sklearn's `BayesianGaussianMixture`'s `fit` and `predict_proba` methods.\n        Name the new columns via the original column name.\n\n        If `missingness_column` is provided, use this to extract the non-missing data; the missing values are assigned to a new pseudo-cluster with mean 0\n        (i.e. all values in the normalised column are 0.0). We do this by taking the full index before subsetting to non-missing data, then reindexing.\n\n        Args:\n            data: The column of data to transform.\n            missingness_column: The column of data representing missingness, this is only used as part of the `AugmentMissingnessStrategy`.\n\n        Returns:\n            The transformed data (will be multiple columns if `n_components` &gt; 1 at initialisation).\n        \"\"\"\n        self.original_column_name = data.name\n        if missingness_column is not None:\n            self._missingness_column_name = missingness_column.name\n            full_index = data.index\n            data = data[missingness_column == 0]\n        index = data.index\n        data = np.array(data.values.reshape(-1, 1), dtype=data.dtype.name.lower())\n\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)\n            self._transformer.fit(data)\n\n        self.means = self._transformer.means_.reshape(-1)\n        self.stds = np.sqrt(self._transformer.covariances_).reshape(-1)\n\n        components = np.argmax(self._transformer.predict_proba(data), axis=1)\n        normalised_values = (data - self.means.reshape(1, -1)) / (self._std_multiplier * self.stds.reshape(1, -1))\n        normalised = normalised_values[np.arange(len(data)), components]\n        normalised = np.clip(normalised, -1.0, 1.0)\n        components = np.eye(self._n_components, dtype=int)[components]\n\n        transformed_data = pd.DataFrame(\n            np.hstack([normalised.reshape(-1, 1), components]),\n            index=index,\n            columns=[f\"{self.original_column_name}_normalised\"]\n            + [f\"{self.original_column_name}_c{i + 1}\" for i in range(self._n_components)],\n        )\n\n        # EXPERIMENTAL feature, removing components from the column matrix that have no data assigned to them\n        if self.remove_unused_components:\n            nunique = transformed_data.iloc[:, 1:].nunique(dropna=False)\n            unused_components = nunique[nunique == 1].index\n            unused_component_idx = [transformed_data.columns.get_loc(col_name) - 1 for col_name in unused_components]\n            self.means = np.delete(self.means, unused_component_idx)\n            self.stds = np.delete(self.stds, unused_component_idx)\n            transformed_data.drop(unused_components, axis=1, inplace=True)\n\n        if missingness_column is not None:\n            transformed_data = pd.concat([transformed_data.reindex(full_index).fillna(0.0), missingness_column], axis=1)\n\n        self.new_column_names = transformed_data.columns\n        return transformed_data.astype(\n            {col_name: int for col_name in transformed_data.columns if re.search(r\"_c\\d+\", col_name)}\n        )\n\n    def revert(self, data: pd.DataFrame) -&gt; pd.DataFrame:\n        \"\"\"\n        Revert data to pre-transformer state via the means and stds of the BGM. Extract the relevant columns from the data via the `new_column_names` attribute.\n        If `missingness_column` was provided to the `apply` method, drop the missing values from the data before reverting and use the `full_index` to\n        reintroduce missing values when `original_column_name` is constructed.\n\n        Args:\n            data: The full dataset including the column(s) to be reverted to their pre-transformer state.\n\n        Returns:\n            The dataset with a single continuous column that is analogous to the original column, with the same name, and without the generated columns from which it is derived.\n        \"\"\"\n        working_data = data[self.new_column_names]\n        full_index = working_data.index\n        if self._missingness_column_name is not None:\n            working_data = working_data[working_data[self._missingness_column_name] == 0]\n            working_data = working_data.drop(self._missingness_column_name, axis=1)\n        index = working_data.index\n\n        components = np.argmax(working_data.filter(regex=r\".*_c\\d+\").values, axis=1)\n        working_data = working_data.filter(like=\"_normalised\").values.reshape(-1)\n        if self.clip_output:\n            working_data = np.clip(working_data, -1.0, 1.0)\n\n        mean_t = self.means[components]\n        std_t = self.stds[components]\n        data[self.original_column_name] = pd.Series(\n            working_data * self._std_multiplier * std_t + mean_t, index=index, name=self.original_column_name\n        ).reindex(full_index)\n        data.drop(self.new_column_names, axis=1, inplace=True)\n        return data\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/continuous/#nhssynth.modules.dataloader.transformers.continuous.ClusterContinuousTransformer.apply","title":"<code>apply(data, missingness_column=None)</code>","text":"<p>Apply the transformer to the data via sklearn's <code>BayesianGaussianMixture</code>'s <code>fit</code> and <code>predict_proba</code> methods. Name the new columns via the original column name.</p> <p>If <code>missingness_column</code> is provided, use this to extract the non-missing data; the missing values are assigned to a new pseudo-cluster with mean 0 (i.e. all values in the normalised column are 0.0). We do this by taking the full index before subsetting to non-missing data, then reindexing.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>Series</code> <p>The column of data to transform.</p> required <code>missingness_column</code> <code>Optional[Series]</code> <p>The column of data representing missingness, this is only used as part of the <code>AugmentMissingnessStrategy</code>.</p> <code>None</code> <p>Returns:</p> Type Description <code>DataFrame</code> <p>The transformed data (will be multiple columns if <code>n_components</code> &gt; 1 at initialisation).</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/continuous.py</code> <pre><code>def apply(self, data: pd.Series, missingness_column: Optional[pd.Series] = None) -&gt; pd.DataFrame:\n    \"\"\"\n    Apply the transformer to the data via sklearn's `BayesianGaussianMixture`'s `fit` and `predict_proba` methods.\n    Name the new columns via the original column name.\n\n    If `missingness_column` is provided, use this to extract the non-missing data; the missing values are assigned to a new pseudo-cluster with mean 0\n    (i.e. all values in the normalised column are 0.0). We do this by taking the full index before subsetting to non-missing data, then reindexing.\n\n    Args:\n        data: The column of data to transform.\n        missingness_column: The column of data representing missingness, this is only used as part of the `AugmentMissingnessStrategy`.\n\n    Returns:\n        The transformed data (will be multiple columns if `n_components` &gt; 1 at initialisation).\n    \"\"\"\n    self.original_column_name = data.name\n    if missingness_column is not None:\n        self._missingness_column_name = missingness_column.name\n        full_index = data.index\n        data = data[missingness_column == 0]\n    index = data.index\n    data = np.array(data.values.reshape(-1, 1), dtype=data.dtype.name.lower())\n\n    with warnings.catch_warnings():\n        warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)\n        self._transformer.fit(data)\n\n    self.means = self._transformer.means_.reshape(-1)\n    self.stds = np.sqrt(self._transformer.covariances_).reshape(-1)\n\n    components = np.argmax(self._transformer.predict_proba(data), axis=1)\n    normalised_values = (data - self.means.reshape(1, -1)) / (self._std_multiplier * self.stds.reshape(1, -1))\n    normalised = normalised_values[np.arange(len(data)), components]\n    normalised = np.clip(normalised, -1.0, 1.0)\n    components = np.eye(self._n_components, dtype=int)[components]\n\n    transformed_data = pd.DataFrame(\n        np.hstack([normalised.reshape(-1, 1), components]),\n        index=index,\n        columns=[f\"{self.original_column_name}_normalised\"]\n        + [f\"{self.original_column_name}_c{i + 1}\" for i in range(self._n_components)],\n    )\n\n    # EXPERIMENTAL feature, removing components from the column matrix that have no data assigned to them\n    if self.remove_unused_components:\n        nunique = transformed_data.iloc[:, 1:].nunique(dropna=False)\n        unused_components = nunique[nunique == 1].index\n        unused_component_idx = [transformed_data.columns.get_loc(col_name) - 1 for col_name in unused_components]\n        self.means = np.delete(self.means, unused_component_idx)\n        self.stds = np.delete(self.stds, unused_component_idx)\n        transformed_data.drop(unused_components, axis=1, inplace=True)\n\n    if missingness_column is not None:\n        transformed_data = pd.concat([transformed_data.reindex(full_index).fillna(0.0), missingness_column], axis=1)\n\n    self.new_column_names = transformed_data.columns\n    return transformed_data.astype(\n        {col_name: int for col_name in transformed_data.columns if re.search(r\"_c\\d+\", col_name)}\n    )\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/continuous/#nhssynth.modules.dataloader.transformers.continuous.ClusterContinuousTransformer.revert","title":"<code>revert(data)</code>","text":"<p>Revert data to pre-transformer state via the means and stds of the BGM. Extract the relevant columns from the data via the <code>new_column_names</code> attribute. If <code>missingness_column</code> was provided to the <code>apply</code> method, drop the missing values from the data before reverting and use the <code>full_index</code> to reintroduce missing values when <code>original_column_name</code> is constructed.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>DataFrame</code> <p>The full dataset including the column(s) to be reverted to their pre-transformer state.</p> required <p>Returns:</p> Type Description <code>DataFrame</code> <p>The dataset with a single continuous column that is analogous to the original column, with the same name, and without the generated columns from which it is derived.</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/continuous.py</code> <pre><code>def revert(self, data: pd.DataFrame) -&gt; pd.DataFrame:\n    \"\"\"\n    Revert data to pre-transformer state via the means and stds of the BGM. Extract the relevant columns from the data via the `new_column_names` attribute.\n    If `missingness_column` was provided to the `apply` method, drop the missing values from the data before reverting and use the `full_index` to\n    reintroduce missing values when `original_column_name` is constructed.\n\n    Args:\n        data: The full dataset including the column(s) to be reverted to their pre-transformer state.\n\n    Returns:\n        The dataset with a single continuous column that is analogous to the original column, with the same name, and without the generated columns from which it is derived.\n    \"\"\"\n    working_data = data[self.new_column_names]\n    full_index = working_data.index\n    if self._missingness_column_name is not None:\n        working_data = working_data[working_data[self._missingness_column_name] == 0]\n        working_data = working_data.drop(self._missingness_column_name, axis=1)\n    index = working_data.index\n\n    components = np.argmax(working_data.filter(regex=r\".*_c\\d+\").values, axis=1)\n    working_data = working_data.filter(like=\"_normalised\").values.reshape(-1)\n    if self.clip_output:\n        working_data = np.clip(working_data, -1.0, 1.0)\n\n    mean_t = self.means[components]\n    std_t = self.stds[components]\n    data[self.original_column_name] = pd.Series(\n        working_data * self._std_multiplier * std_t + mean_t, index=index, name=self.original_column_name\n    ).reindex(full_index)\n    data.drop(self.new_column_names, axis=1, inplace=True)\n    return data\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/datetime/","title":"datetime","text":""},{"location":"reference/modules/dataloader/transformers/datetime/#nhssynth.modules.dataloader.transformers.datetime.DatetimeTransformer","title":"<code>DatetimeTransformer</code>","text":"<p>             Bases: <code>TransformerWrapper</code></p> <p>A transformer to convert datetime features to numeric features. Before applying an underlying (wrapped) transformer. The datetime features are converted to nanoseconds since the epoch, and missing values are assigned to 0.0 under the <code>AugmentMissingnessStrategy</code>.</p> <p>Parameters:</p> Name Type Description Default <code>transformer</code> <code>ColumnTransformer</code> <p>The <code>ColumnTransformer</code> to wrap.</p> required <p>After applying the transformer, the following attributes will be populated:</p> <p>Attributes:</p> Name Type Description <code>original_column_name</code> <p>The name of the original column.</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/datetime.py</code> <pre><code>class DatetimeTransformer(TransformerWrapper):\n    \"\"\"\n    A transformer to convert datetime features to numeric features. Before applying an underlying (wrapped) transformer.\n    The datetime features are converted to nanoseconds since the epoch, and missing values are assigned to 0.0 under the `AugmentMissingnessStrategy`.\n\n    Args:\n        transformer: The [`ColumnTransformer`][nhssynth.modules.dataloader.transformers.base.ColumnTransformer] to wrap.\n\n    After applying the transformer, the following attributes will be populated:\n\n    Attributes:\n        original_column_name: The name of the original column.\n    \"\"\"\n\n    def __init__(self, transformer: ColumnTransformer) -&gt; None:\n        super().__init__(transformer)\n\n    def apply(self, data: pd.Series, missingness_column: Optional[pd.Series] = None, **kwargs) -&gt; pd.DataFrame:\n        \"\"\"\n        Firstly, the datetime data is floored to the nano-second level. Next, the floored data is converted to float nanoseconds since the epoch.\n        The float value of `pd.NaT` under the operation above is then replaced with `np.nan` to ensure missing values are represented correctly.\n        Finally, the wrapped transformer is applied to the data.\n\n        Args:\n            data: The column of data to transform.\n            missingness_column: The column of missingness indicators to augment the data with.\n\n        Returns:\n            The transformed data.\n        \"\"\"\n        self.original_column_name = data.name\n        floored_data = pd.Series(data.dt.floor(\"ns\").to_numpy().astype(float), name=data.name)\n        nan_corrected_data = floored_data.replace(pd.to_datetime(pd.NaT).to_numpy().astype(float), np.nan)\n        return super().apply(nan_corrected_data, missingness_column, **kwargs)\n\n    def revert(self, data: pd.DataFrame, **kwargs) -&gt; pd.DataFrame:\n        \"\"\"\n        The wrapped transformer's `revert` method is applied to the data. The data is then converted back to datetime format.\n\n        Args:\n            data: The full dataset including the column(s) to be reverted to their pre-transformer state.\n\n        Returns:\n            The reverted data.\n        \"\"\"\n        reverted_data = super().revert(data, **kwargs)\n        data[self.original_column_name] = pd.to_datetime(\n            reverted_data[self.original_column_name].astype(\"Int64\"), unit=\"ns\"\n        )\n        return data\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/datetime/#nhssynth.modules.dataloader.transformers.datetime.DatetimeTransformer.apply","title":"<code>apply(data, missingness_column=None, **kwargs)</code>","text":"<p>Firstly, the datetime data is floored to the nano-second level. Next, the floored data is converted to float nanoseconds since the epoch. The float value of <code>pd.NaT</code> under the operation above is then replaced with <code>np.nan</code> to ensure missing values are represented correctly. Finally, the wrapped transformer is applied to the data.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>Series</code> <p>The column of data to transform.</p> required <code>missingness_column</code> <code>Optional[Series]</code> <p>The column of missingness indicators to augment the data with.</p> <code>None</code> <p>Returns:</p> Type Description <code>DataFrame</code> <p>The transformed data.</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/datetime.py</code> <pre><code>def apply(self, data: pd.Series, missingness_column: Optional[pd.Series] = None, **kwargs) -&gt; pd.DataFrame:\n    \"\"\"\n    Firstly, the datetime data is floored to the nano-second level. Next, the floored data is converted to float nanoseconds since the epoch.\n    The float value of `pd.NaT` under the operation above is then replaced with `np.nan` to ensure missing values are represented correctly.\n    Finally, the wrapped transformer is applied to the data.\n\n    Args:\n        data: The column of data to transform.\n        missingness_column: The column of missingness indicators to augment the data with.\n\n    Returns:\n        The transformed data.\n    \"\"\"\n    self.original_column_name = data.name\n    floored_data = pd.Series(data.dt.floor(\"ns\").to_numpy().astype(float), name=data.name)\n    nan_corrected_data = floored_data.replace(pd.to_datetime(pd.NaT).to_numpy().astype(float), np.nan)\n    return super().apply(nan_corrected_data, missingness_column, **kwargs)\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/datetime/#nhssynth.modules.dataloader.transformers.datetime.DatetimeTransformer.revert","title":"<code>revert(data, **kwargs)</code>","text":"<p>The wrapped transformer's <code>revert</code> method is applied to the data. The data is then converted back to datetime format.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>DataFrame</code> <p>The full dataset including the column(s) to be reverted to their pre-transformer state.</p> required <p>Returns:</p> Type Description <code>DataFrame</code> <p>The reverted data.</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/datetime.py</code> <pre><code>def revert(self, data: pd.DataFrame, **kwargs) -&gt; pd.DataFrame:\n    \"\"\"\n    The wrapped transformer's `revert` method is applied to the data. The data is then converted back to datetime format.\n\n    Args:\n        data: The full dataset including the column(s) to be reverted to their pre-transformer state.\n\n    Returns:\n        The reverted data.\n    \"\"\"\n    reverted_data = super().revert(data, **kwargs)\n    data[self.original_column_name] = pd.to_datetime(\n        reverted_data[self.original_column_name].astype(\"Int64\"), unit=\"ns\"\n    )\n    return data\n</code></pre>"},{"location":"reference/modules/evaluation/","title":"evaluation","text":""},{"location":"reference/modules/evaluation/aequitas/","title":"aequitas","text":""},{"location":"reference/modules/evaluation/io/","title":"io","text":""},{"location":"reference/modules/evaluation/io/#nhssynth.modules.evaluation.io.check_input_paths","title":"<code>check_input_paths(fn_dataset, fn_typed, fn_synthetic_datasets, fn_sdv_metadata, dir_experiment)</code>","text":"<p>Sets up the input and output paths for the model files.</p> <p>Parameters:</p> Name Type Description Default <code>fn_dataset</code> <code>str</code> <p>The base name of the dataset.</p> required <code>fn_typed</code> <code>str</code> <p>The name of the typed real dataset file.</p> required <code>fn_synthetic_datasets</code> <code>str</code> <p>The filename of the collection of synethtic datasets.</p> required <code>fn_sdv_metadata</code> <code>str</code> <p>The name of the SDV metadata file.</p> required <code>dir_experiment</code> <code>Path</code> <p>The path to the experiment directory.</p> required <p>Returns:</p> Type Description <code>tuple[str, str]</code> <p>The paths to the data, metadata and metatransformer files.</p> Source code in <code>src/nhssynth/modules/evaluation/io.py</code> <pre><code>def check_input_paths(\n    fn_dataset: str, fn_typed: str, fn_synthetic_datasets: str, fn_sdv_metadata: str, dir_experiment: Path\n) -&gt; tuple[str, str]:\n    \"\"\"\n    Sets up the input and output paths for the model files.\n\n    Args:\n        fn_dataset: The base name of the dataset.\n        fn_typed: The name of the typed real dataset file.\n        fn_synthetic_datasets: The filename of the collection of synethtic datasets.\n        fn_sdv_metadata: The name of the SDV metadata file.\n        dir_experiment: The path to the experiment directory.\n\n    Returns:\n        The paths to the data, metadata and metatransformer files.\n    \"\"\"\n    fn_dataset = Path(fn_dataset).stem\n    fn_typed, fn_synthetic_datasets, fn_sdv_metadata = io.consistent_endings(\n        [fn_typed, fn_synthetic_datasets, fn_sdv_metadata]\n    )\n    fn_typed, fn_synthetic_datasets, fn_sdv_metadata = io.potential_suffixes(\n        [fn_typed, fn_synthetic_datasets, fn_sdv_metadata], fn_dataset\n    )\n    io.warn_if_path_supplied([fn_typed, fn_synthetic_datasets, fn_sdv_metadata], dir_experiment)\n    io.check_exists([fn_typed, fn_synthetic_datasets, fn_sdv_metadata], dir_experiment)\n    return fn_dataset, fn_typed, fn_synthetic_datasets, fn_sdv_metadata\n</code></pre>"},{"location":"reference/modules/evaluation/io/#nhssynth.modules.evaluation.io.load_required_data","title":"<code>load_required_data(args, dir_experiment)</code>","text":"<p>Loads the data from <code>args</code> or from disk when the dataloader has not be run previously.</p> <p>Parameters:</p> Name Type Description Default <code>args</code> <code>Namespace</code> <p>The arguments passed to the module, in this case potentially carrying the outputs of the dataloader module.</p> required <code>dir_experiment</code> <code>Path</code> <p>The path to the experiment directory.</p> required <p>Returns:</p> Type Description <code>tuple[str, DataFrame, DataFrame, dict[str, dict[str, Any]]]</code> <p>The dataset name, the real data, the bundle of synthetic data from the modelling stage, and the SDV metadata.</p> Source code in <code>src/nhssynth/modules/evaluation/io.py</code> <pre><code>def load_required_data(\n    args: argparse.Namespace, dir_experiment: Path\n) -&gt; tuple[str, pd.DataFrame, pd.DataFrame, dict[str, dict[str, Any]]]:\n    \"\"\"\n    Loads the data from `args` or from disk when the dataloader has not be run previously.\n\n    Args:\n        args: The arguments passed to the module, in this case potentially carrying the outputs of the dataloader module.\n        dir_experiment: The path to the experiment directory.\n\n    Returns:\n        The dataset name, the real data, the bundle of synthetic data from the modelling stage, and the SDV metadata.\n    \"\"\"\n    if all(x in args.module_handover for x in [\"dataset\", \"typed\", \"synthetic_datasets\", \"sdv_metadata\"]):\n        return (\n            args.module_handover[\"dataset\"],\n            args.module_handover[\"typed\"],\n            args.module_handover[\"synthetic_datasets\"],\n            args.module_handover[\"sdv_metadata\"],\n        )\n    else:\n        fn_dataset, fn_typed, fn_synthetic_datasets, fn_sdv_metadata = check_input_paths(\n            args.dataset, args.typed, args.synthetic_datasets, args.sdv_metadata, dir_experiment\n        )\n        with open(dir_experiment / fn_typed, \"rb\") as f:\n            real_data = pickle.load(f).contents\n        with open(dir_experiment / fn_sdv_metadata, \"rb\") as f:\n            sdv_metadata = pickle.load(f)\n        with open(dir_experiment / fn_synthetic_datasets, \"rb\") as f:\n            synthetic_datasets = pickle.load(f).contents\n\n        return fn_dataset, real_data, synthetic_datasets, sdv_metadata\n</code></pre>"},{"location":"reference/modules/evaluation/io/#nhssynth.modules.evaluation.io.output_eval","title":"<code>output_eval(evaluations, fn_dataset, fn_evaluations, dir_experiment)</code>","text":"<p>Sets up the input and output paths for the model files.</p> <p>Parameters:</p> Name Type Description Default <code>evaluations</code> <code>DataFrame</code> <p>The evaluations to output.</p> required <code>fn_dataset</code> <code>Path</code> <p>The base name of the dataset.</p> required <code>fn_evaluations</code> <code>str</code> <p>The filename of the collection of evaluations.</p> required <code>dir_experiment</code> <code>Path</code> <p>The path to the experiment output directory.</p> required <p>Returns:</p> Type Description <code>None</code> <p>The path to output the model.</p> Source code in <code>src/nhssynth/modules/evaluation/io.py</code> <pre><code>def output_eval(\n    evaluations: pd.DataFrame,\n    fn_dataset: Path,\n    fn_evaluations: str,\n    dir_experiment: Path,\n) -&gt; None:\n    \"\"\"\n    Sets up the input and output paths for the model files.\n\n    Args:\n        evaluations: The evaluations to output.\n        fn_dataset: The base name of the dataset.\n        fn_evaluations: The filename of the collection of evaluations.\n        dir_experiment: The path to the experiment output directory.\n\n    Returns:\n        The path to output the model.\n    \"\"\"\n    fn_evaluations = io.consistent_ending(fn_evaluations)\n    fn_evaluations = io.potential_suffix(fn_evaluations, fn_dataset)\n    io.warn_if_path_supplied([fn_evaluations], dir_experiment)\n    with open(dir_experiment / fn_evaluations, \"wb\") as f:\n        pickle.dump(Evaluations(evaluations), f)\n</code></pre>"},{"location":"reference/modules/evaluation/metrics/","title":"metrics","text":""},{"location":"reference/modules/evaluation/run/","title":"run","text":""},{"location":"reference/modules/evaluation/tasks/","title":"tasks","text":""},{"location":"reference/modules/evaluation/tasks/#nhssynth.modules.evaluation.tasks.Task","title":"<code>Task</code>","text":"<p>A task offers a light-touch way for users to specify any arbitrary downstream task that they want to run on a dataset.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the task.</p> required <code>run</code> <code>Callable</code> <p>The function to run.</p> required <code>supports_aequitas</code> <p>Whether the task supports Aequitas evaluation.</p> <code>False</code> <code>description</code> <code>str</code> <p>The description of the task.</p> <code>''</code> Source code in <code>src/nhssynth/modules/evaluation/tasks.py</code> <pre><code>class Task:\n    \"\"\"\n    A task offers a light-touch way for users to specify any arbitrary downstream task that they want to run on a dataset.\n\n    Args:\n        name: The name of the task.\n        run: The function to run.\n        supports_aequitas: Whether the task supports Aequitas evaluation.\n        description: The description of the task.\n    \"\"\"\n\n    def __init__(self, name: str, run: Callable, supports_aequitas=False, description: str = \"\"):\n        self._name: str = name\n        self._run: Callable = run\n        self._supports_aequitas: bool = supports_aequitas\n        self._description: str = description\n\n    def __str__(self) -&gt; str:\n        return f\"{self.name}: {self.description}\" if self.description else self.name\n\n    def __repr__(self) -&gt; str:\n        return str([self.name, self.run, self.supports_aequitas, self.description])\n\n    def run(self, *args, **kwargs):\n        return self._run(*args, **kwargs)\n</code></pre>"},{"location":"reference/modules/evaluation/tasks/#nhssynth.modules.evaluation.tasks.get_tasks","title":"<code>get_tasks(fn_dataset, tasks_root)</code>","text":"<p>Searches for and imports all tasks in the tasks directory for a given dataset. Uses <code>importlib</code> to extract the task from the file.</p> <p>Parameters:</p> Name Type Description Default <code>fn_dataset</code> <code>str</code> <p>The name of the dataset.</p> required <code>tasks_root</code> <code>str</code> <p>The root directory for downstream tasks.</p> required <p>Returns:</p> Type Description <code>list[Task]</code> <p>A list of tasks.</p> Source code in <code>src/nhssynth/modules/evaluation/tasks.py</code> <pre><code>def get_tasks(\n    fn_dataset: str,\n    tasks_root: str,\n) -&gt; list[Task]:\n    \"\"\"\n    Searches for and imports all tasks in the tasks directory for a given dataset.\n    Uses `importlib` to extract the task from the file.\n\n    Args:\n        fn_dataset: The name of the dataset.\n        tasks_root: The root directory for downstream tasks.\n\n    Returns:\n        A list of tasks.\n    \"\"\"\n    tasks_dir = Path(tasks_root) / fn_dataset\n    assert (\n        tasks_dir.exists()\n    ), f\"Downstream tasks directory does not exist ({tasks_dir}), NB there should be a directory in TASKS_DIR with the same name as the dataset.\"\n    tasks = []\n    for task_path in tasks_dir.iterdir():\n        if task_path.name.startswith((\".\", \"__\")):\n            continue\n        assert task_path.suffix == \".py\", f\"Downstream task file must be a python file ({task_path.name})\"\n        spec = importlib.util.spec_from_file_location(\n            \"nhssynth_task_\" + task_path.name, os.getcwd() + \"/\" + str(task_path)\n        )\n        task_module = importlib.util.module_from_spec(spec)\n        spec.loader.exec_module(task_module)\n        tasks.append(task_module.task)\n    return tasks\n</code></pre>"},{"location":"reference/modules/evaluation/utils/","title":"utils","text":""},{"location":"reference/modules/evaluation/utils/#nhssynth.modules.evaluation.utils.EvalFrame","title":"<code>EvalFrame</code>","text":"<p>Data structure for specifying and recording the evaluations of a set of synthetic datasets against a real dataset. All of the choices made by the user in the evaluation module are consolidated into this class.</p> <p>After running <code>evaluate</code> on a set of synthetic datasets, the evaluations can be retrieved using <code>get_evaluations</code>. They are stored in a dict of dataframes with indices matching that of the supplied dataframe of synthetic datasets.</p> <p>Parameters:</p> Name Type Description Default <code>tasks</code> <code>list[Task]</code> <p>A list of downstream tasks to run on the experiments.</p> required <code>metrics</code> <code>list[str]</code> <p>A list of metrics to calculate on the experiments.</p> required <code>sdv_metadata</code> <code>dict[str, dict[str, str]]</code> <p>The SDV metadata for the dataset.</p> required <code>aequitas</code> <code>bool</code> <p>Whether to run Aequitas on the results of supported downstream tasks.</p> <code>False</code> <code>aequitas_attributes</code> <code>list[str]</code> <p>The fairness-related attributes to use for Aequitas analysis.</p> <code>[]</code> <code>key_numerical_fields</code> <code>list[str]</code> <p>The numerical fields to use for SDV privacy metrics.</p> <code>[]</code> <code>sensitive_numerical_fields</code> <code>list[str]</code> <p>The numerical fields to use for SDV privacy metrics.</p> <code>[]</code> <code>key_categorical_fields</code> <code>list[str]</code> <p>The categorical fields to use for SDV privacy metrics.</p> <code>[]</code> <code>sensitive_categorical_fields</code> <code>list[str]</code> <p>The categorical fields to use for SDV privacy metrics.</p> <code>[]</code> Source code in <code>src/nhssynth/modules/evaluation/utils.py</code> <pre><code>class EvalFrame:\n    \"\"\"\n    Data structure for specifying and recording the evaluations of a set of synthetic datasets against a real dataset.\n    All of the choices made by the user in the evaluation module are consolidated into this class.\n\n    After running `evaluate` on a set of synthetic datasets, the evaluations can be retrieved using `get_evaluations`.\n    They are stored in a dict of dataframes with indices matching that of the supplied dataframe of synthetic datasets.\n\n    Args:\n        tasks: A list of downstream tasks to run on the experiments.\n        metrics: A list of metrics to calculate on the experiments.\n        sdv_metadata: The SDV metadata for the dataset.\n        aequitas: Whether to run Aequitas on the results of supported downstream tasks.\n        aequitas_attributes: The fairness-related attributes to use for Aequitas analysis.\n        key_numerical_fields: The numerical fields to use for SDV privacy metrics.\n        sensitive_numerical_fields: The numerical fields to use for SDV privacy metrics.\n        key_categorical_fields: The categorical fields to use for SDV privacy metrics.\n        sensitive_categorical_fields: The categorical fields to use for SDV privacy metrics.\n    \"\"\"\n\n    def __init__(\n        self,\n        tasks: list[Task],\n        metrics: list[str],\n        sdv_metadata: dict[str, dict[str, str]],\n        aequitas: bool = False,\n        aequitas_attributes: list[str] = [],\n        key_numerical_fields: list[str] = [],\n        sensitive_numerical_fields: list[str] = [],\n        key_categorical_fields: list[str] = [],\n        sensitive_categorical_fields: list[str] = [],\n    ):\n        self._tasks = tasks\n        self._aequitas = aequitas\n        self._aequitas_attributes = aequitas_attributes\n\n        self._metrics = metrics\n        self._sdv_metadata = sdv_metadata\n\n        self._key_numerical_fields = key_numerical_fields\n        self._sensitive_numerical_fields = sensitive_numerical_fields\n        self._key_categorical_fields = key_categorical_fields\n        self._sensitive_categorical_fields = sensitive_categorical_fields\n        assert all([metric not in NUMERICAL_PRIVACY_METRICS for metric in self._metrics]) or (\n            self._key_numerical_fields and self._sensitive_numerical_fields\n        ), \"Numerical key and sensitive fields must be provided when an SDV privacy metric is used.\"\n        assert all([metric not in CATEGORICAL_PRIVACY_METRICS for metric in self._metrics]) or (\n            self._key_categorical_fields and self._sensitive_categorical_fields\n        ), \"Categorical key and sensitive fields must be provided when an SDV privacy metric is used.\"\n\n        self._metric_groups = self._build_metric_groups()\n\n    def _build_metric_groups(self) -&gt; list[str]:\n        \"\"\"\n        Iterate through the concatenated list of metrics provided by the user and refer to the\n        [defined metric groups][nhssynth.common.constants] to identify which to evaluate.\n\n        Returns:\n            A list of metric groups to evaluate.\n        \"\"\"\n        metric_groups = set()\n        if self._tasks:\n            metric_groups.add(\"task\")\n        if self._aequitas:\n            metric_groups.add(\"aequitas\")\n        for metric in self._metrics:\n            if metric in TABLE_METRICS:\n                metric_groups.add(\"table\")\n            if metric in NUMERICAL_PRIVACY_METRICS or metric in CATEGORICAL_PRIVACY_METRICS:\n                metric_groups.add(\"privacy\")\n            if metric in TABLE_METRICS and issubclass(TABLE_METRICS[metric], MultiSingleColumnMetric):\n                metric_groups.add(\"columnwise\")\n            if metric in TABLE_METRICS and issubclass(TABLE_METRICS[metric], MultiColumnPairsMetric):\n                metric_groups.add(\"pairwise\")\n        return list(metric_groups)\n\n    def evaluate(self, real_dataset: pd.DataFrame, synthetic_datasets: list[dict[str, Any]]) -&gt; None:\n        \"\"\"\n        Evaluate a set of synthetic datasets against a real dataset.\n\n        Args:\n            real_dataset: The real dataset to evaluate against.\n            synthetic_datasets: The synthetic datasets to evaluate.\n        \"\"\"\n        assert not any(\"Real\" in i for i in synthetic_datasets.index), \"Real is a reserved dataset ID.\"\n        assert synthetic_datasets.index.is_unique, \"Dataset IDs must be unique.\"\n        self._evaluations = pd.DataFrame(index=synthetic_datasets.index, columns=self._metric_groups)\n        self._evaluations.loc[(\"Real\", None, None)] = self._step(real_dataset)\n        pbar = tqdm(synthetic_datasets.iterrows(), desc=\"Evaluating\", total=len(synthetic_datasets))\n        for i, dataset in pbar:\n            pbar.set_description(f\"Evaluating {i[0]}, repeat {i[1]}, config {i[2]}\")\n            self._evaluations.loc[i] = self._step(real_dataset, dataset.values[0])\n\n    def get_evaluations(self) -&gt; dict[str, pd.DataFrame]:\n        \"\"\"\n        Unpack the `self._evaluations` dataframe, where each metric group is a column, into a dict of dataframes.\n\n        Returns:\n            A dict of dataframes, one for each metric group, containing the evaluations.\n        \"\"\"\n        assert hasattr(\n            self, \"_evaluations\"\n        ), \"You must first run `evaluate` on a `real_dataset` and set of `synthetic_datasets`.\"\n        return {\n            metric_group: pd.DataFrame(\n                self._evaluations[metric_group].values.tolist(), index=self._evaluations.index\n            ).dropna(how=\"all\")\n            for metric_group in self._metric_groups\n        }\n\n    def _task_step(self, data: pd.DataFrame) -&gt; dict[str, dict]:\n        \"\"\"\n        Run the downstream tasks on the dataset. Optionally run Aequitas on the results of the tasks.\n\n        Args:\n            data: The dataset to run the tasks on.\n\n        Returns:\n            A dict of dicts, one for each metric group, to be populated with each groups metric values.\n        \"\"\"\n        metric_dict = {metric_group: {} for metric_group in self._metric_groups}\n        for task in tqdm(self._tasks, desc=\"Running downstream tasks\", leave=False):\n            task_pred_column, task_metric_values = task.run(data)\n            metric_dict[\"task\"].update(task_metric_values)\n            if self._aequitas and task.supports_aequitas:\n                metric_dict[\"aequitas\"].update(run_aequitas(data[self._aequitas_attributes].join(task_pred_column)))\n        return metric_dict\n\n    def _compute_metric(\n        self, metric_dict: dict, metric: str, real_data: pd.DataFrame, synthetic_data: pd.DataFrame\n    ) -&gt; dict[str, dict]:\n        \"\"\"\n        Given a metric, determine the correct way to evaluate it via the lists defined in `nhssynth.common.constants`.\n\n        Args:\n            metric_dict: The dict of dicts to populate with metric values.\n            metric: The metric to evaluate.\n            real_data: The real dataset to evaluate against.\n            synthetic_data: The synthetic dataset to evaluate.\n\n        Returns:\n            The metric_dict updated with the value of the metric.\n        \"\"\"\n        with pd.option_context(\"mode.chained_assignment\", None), warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", message=\"ConvergenceWarning\")\n            if metric in TABLE_METRICS:\n                metric_dict[\"table\"][metric] = TABLE_METRICS[metric].compute(\n                    real_data, synthetic_data, self._sdv_metadata\n                )\n                if issubclass(TABLE_METRICS[metric], MultiSingleColumnMetric):\n                    metric_dict[\"columnwise\"][metric] = TABLE_METRICS[metric].compute_breakdown(\n                        real_data, synthetic_data, self._sdv_metadata\n                    )\n                elif issubclass(TABLE_METRICS[metric], MultiColumnPairsMetric):\n                    metric_dict[\"pairwise\"][metric] = TABLE_METRICS[metric].compute_breakdown(\n                        real_data, synthetic_data, self._sdv_metadata\n                    )\n            elif metric in NUMERICAL_PRIVACY_METRICS:\n                metric_dict[\"privacy\"][metric] = NUMERICAL_PRIVACY_METRICS[metric].compute(\n                    real_data.dropna(),\n                    synthetic_data.dropna(),\n                    self._sdv_metadata,\n                    self._key_numerical_fields,\n                    self._sensitive_numerical_fields,\n                )\n            elif metric in CATEGORICAL_PRIVACY_METRICS:\n                metric_dict[\"privacy\"][metric] = CATEGORICAL_PRIVACY_METRICS[metric].compute(\n                    real_data.dropna(),\n                    synthetic_data.dropna(),\n                    self._sdv_metadata,\n                    self._key_categorical_fields,\n                    self._sensitive_categorical_fields,\n                )\n        return metric_dict\n\n    def _step(self, real_data: pd.DataFrame, synthetic_data: pd.DataFrame = None) -&gt; dict[str, dict]:\n        \"\"\"\n        Run the two functions above (or only the tasks when no synthetic data is provided).\n\n        Args:\n            real_data: The real dataset to evaluate against.\n            synthetic_data: The synthetic dataset to evaluate.\n\n        Returns:\n            A dict of dicts, one for each metric grou, to populate a row of `self._evaluations` corresponding to the `synthetic_data`.\n        \"\"\"\n        if synthetic_data is None:\n            metric_dict = self._task_step(real_data)\n        else:\n            metric_dict = self._task_step(synthetic_data)\n            for metric in tqdm(self._metrics, desc=\"Running metrics\", leave=False):\n                metric_dict = self._compute_metric(metric_dict, metric, real_data, synthetic_data)\n        return metric_dict\n</code></pre>"},{"location":"reference/modules/evaluation/utils/#nhssynth.modules.evaluation.utils.EvalFrame.evaluate","title":"<code>evaluate(real_dataset, synthetic_datasets)</code>","text":"<p>Evaluate a set of synthetic datasets against a real dataset.</p> <p>Parameters:</p> Name Type Description Default <code>real_dataset</code> <code>DataFrame</code> <p>The real dataset to evaluate against.</p> required <code>synthetic_datasets</code> <code>list[dict[str, Any]]</code> <p>The synthetic datasets to evaluate.</p> required Source code in <code>src/nhssynth/modules/evaluation/utils.py</code> <pre><code>def evaluate(self, real_dataset: pd.DataFrame, synthetic_datasets: list[dict[str, Any]]) -&gt; None:\n    \"\"\"\n    Evaluate a set of synthetic datasets against a real dataset.\n\n    Args:\n        real_dataset: The real dataset to evaluate against.\n        synthetic_datasets: The synthetic datasets to evaluate.\n    \"\"\"\n    assert not any(\"Real\" in i for i in synthetic_datasets.index), \"Real is a reserved dataset ID.\"\n    assert synthetic_datasets.index.is_unique, \"Dataset IDs must be unique.\"\n    self._evaluations = pd.DataFrame(index=synthetic_datasets.index, columns=self._metric_groups)\n    self._evaluations.loc[(\"Real\", None, None)] = self._step(real_dataset)\n    pbar = tqdm(synthetic_datasets.iterrows(), desc=\"Evaluating\", total=len(synthetic_datasets))\n    for i, dataset in pbar:\n        pbar.set_description(f\"Evaluating {i[0]}, repeat {i[1]}, config {i[2]}\")\n        self._evaluations.loc[i] = self._step(real_dataset, dataset.values[0])\n</code></pre>"},{"location":"reference/modules/evaluation/utils/#nhssynth.modules.evaluation.utils.EvalFrame.get_evaluations","title":"<code>get_evaluations()</code>","text":"<p>Unpack the <code>self._evaluations</code> dataframe, where each metric group is a column, into a dict of dataframes.</p> <p>Returns:</p> Type Description <code>dict[str, DataFrame]</code> <p>A dict of dataframes, one for each metric group, containing the evaluations.</p> Source code in <code>src/nhssynth/modules/evaluation/utils.py</code> <pre><code>def get_evaluations(self) -&gt; dict[str, pd.DataFrame]:\n    \"\"\"\n    Unpack the `self._evaluations` dataframe, where each metric group is a column, into a dict of dataframes.\n\n    Returns:\n        A dict of dataframes, one for each metric group, containing the evaluations.\n    \"\"\"\n    assert hasattr(\n        self, \"_evaluations\"\n    ), \"You must first run `evaluate` on a `real_dataset` and set of `synthetic_datasets`.\"\n    return {\n        metric_group: pd.DataFrame(\n            self._evaluations[metric_group].values.tolist(), index=self._evaluations.index\n        ).dropna(how=\"all\")\n        for metric_group in self._metric_groups\n    }\n</code></pre>"},{"location":"reference/modules/evaluation/utils/#nhssynth.modules.evaluation.utils.validate_metric_args","title":"<code>validate_metric_args(args, fn_dataset, columns)</code>","text":"<p>Validate the arguments for downstream tasks and Aequitas.</p> <p>Parameters:</p> Name Type Description Default <code>args</code> <code>Namespace</code> <p>The argument namespace to validate.</p> required <code>fn_dataset</code> <code>str</code> <p>The name of the dataset.</p> required <code>columns</code> <code>Index</code> <p>The columns in the dataset.</p> required <p>Returns:</p> Type Description <code>tuple[list[Task], Namespace]</code> <p>The validated arguments, the list of tasks and the list of metrics.</p> Source code in <code>src/nhssynth/modules/evaluation/utils.py</code> <pre><code>def validate_metric_args(\n    args: argparse.Namespace, fn_dataset: str, columns: pd.Index\n) -&gt; tuple[list[Task], argparse.Namespace]:\n    \"\"\"\n    Validate the arguments for downstream tasks and Aequitas.\n\n    Args:\n        args: The argument namespace to validate.\n        fn_dataset: The name of the dataset.\n        columns: The columns in the dataset.\n\n    Returns:\n        The validated arguments, the list of tasks and the list of metrics.\n    \"\"\"\n    if args.downstream_tasks:\n        tasks = get_tasks(fn_dataset, args.tasks_dir)\n        if not tasks:\n            warnings.warn(\"No valid downstream tasks found.\")\n    else:\n        tasks = []\n    if args.aequitas:\n        if not args.downstream_tasks or not any([task.supports_aequitas for task in tasks]):\n            warnings.warn(\n                \"Aequitas can only work in context of downstream tasks involving binary classification problems.\"\n            )\n        if not args.aequitas_attributes:\n            warnings.warn(\"No attributes specified for Aequitas analysis, defaulting to all columns in the dataset.\")\n            args.aequitas_attributes = columns.tolist()\n        assert all(\n            [attr in columns for attr in args.aequitas_attributes]\n        ), \"Invalid attribute(s) specified for Aequitas analysis.\"\n    metrics = {}\n    for metric_group in METRIC_CHOICES:\n        selected_metrics = getattr(args, \"_\".join(metric_group.split()).lower() + \"_metrics\") or []\n        metrics.update({metric_name: METRIC_CHOICES[metric_group][metric_name] for metric_name in selected_metrics})\n    return args, tasks, metrics\n</code></pre>"},{"location":"reference/modules/model/","title":"model","text":""},{"location":"reference/modules/model/io/","title":"io","text":""},{"location":"reference/modules/model/io/#nhssynth.modules.model.io.check_input_paths","title":"<code>check_input_paths(fn_dataset, fn_transformed, fn_metatransformer, dir_experiment)</code>","text":"<p>Sets up the input and output paths for the model files.</p> <p>Parameters:</p> Name Type Description Default <code>fn_dataset</code> <code>str</code> <p>The base name of the dataset.</p> required <code>fn_transformed</code> <code>str</code> <p>The name of the transformed data file.</p> required <code>fn_metatransformer</code> <code>str</code> <p>The name of the metatransformer file.</p> required <code>dir_experiment</code> <code>Path</code> <p>The path to the experiment directory.</p> required <p>Returns:</p> Type Description <code>tuple[str, str]</code> <p>The paths to the data, metadata and metatransformer files.</p> Source code in <code>src/nhssynth/modules/model/io.py</code> <pre><code>def check_input_paths(\n    fn_dataset: str, fn_transformed: str, fn_metatransformer: str, dir_experiment: Path\n) -&gt; tuple[str, str]:\n    \"\"\"\n    Sets up the input and output paths for the model files.\n\n    Args:\n        fn_dataset: The base name of the dataset.\n        fn_transformed: The name of the transformed data file.\n        fn_metatransformer: The name of the metatransformer file.\n        dir_experiment: The path to the experiment directory.\n\n    Returns:\n        The paths to the data, metadata and metatransformer files.\n    \"\"\"\n    fn_dataset = Path(fn_dataset).stem\n    fn_transformed, fn_metatransformer = io.consistent_endings([fn_transformed, fn_metatransformer])\n    fn_transformed, fn_metatransformer = io.potential_suffixes([fn_transformed, fn_metatransformer], fn_dataset)\n    io.warn_if_path_supplied([fn_transformed, fn_metatransformer], dir_experiment)\n    io.check_exists([fn_transformed, fn_metatransformer], dir_experiment)\n    return fn_dataset, fn_transformed, fn_metatransformer\n</code></pre>"},{"location":"reference/modules/model/io/#nhssynth.modules.model.io.load_required_data","title":"<code>load_required_data(args, dir_experiment)</code>","text":"<p>Loads the data from <code>args</code> or from disk when the dataloader has not be run previously.</p> <p>Parameters:</p> Name Type Description Default <code>args</code> <code>Namespace</code> <p>The arguments passed to the module, in this case potentially carrying the outputs of the dataloader module.</p> required <code>dir_experiment</code> <code>Path</code> <p>The path to the experiment directory.</p> required <p>Returns:</p> Type Description <code>tuple[str, DataFrame, dict[str, int], MetaTransformer]</code> <p>The data, metadata and metatransformer.</p> Source code in <code>src/nhssynth/modules/model/io.py</code> <pre><code>def load_required_data(\n    args: argparse.Namespace, dir_experiment: Path\n) -&gt; tuple[str, pd.DataFrame, dict[str, int], MetaTransformer]:\n    \"\"\"\n    Loads the data from `args` or from disk when the dataloader has not be run previously.\n\n    Args:\n        args: The arguments passed to the module, in this case potentially carrying the outputs of the dataloader module.\n        dir_experiment: The path to the experiment directory.\n\n    Returns:\n        The data, metadata and metatransformer.\n    \"\"\"\n    if all(x in args.module_handover for x in [\"dataset\", \"transformed\", \"metatransformer\"]):\n        return (\n            args.module_handover[\"dataset\"],\n            args.module_handover[\"transformed\"],\n            args.module_handover[\"metatransformer\"],\n        )\n    else:\n        fn_dataset, fn_transformed, fn_metatransformer = check_input_paths(\n            args.dataset, args.transformed, args.metatransformer, dir_experiment\n        )\n\n        with open(dir_experiment / fn_transformed, \"rb\") as f:\n            data = pickle.load(f)\n        with open(dir_experiment / fn_metatransformer, \"rb\") as f:\n            mt = pickle.load(f)\n\n        return fn_dataset, data, mt\n</code></pre>"},{"location":"reference/modules/model/run/","title":"run","text":""},{"location":"reference/modules/model/utils/","title":"utils","text":""},{"location":"reference/modules/model/utils/#nhssynth.modules.model.utils.configs_from_arg_combinations","title":"<code>configs_from_arg_combinations(args, arg_list)</code>","text":"<p>Generates a list of configurations from a list of arguments. Each configuration is one of a cartesian product of the arguments provided and identified in <code>arg_list</code>.</p> <p>Parameters:</p> Name Type Description Default <code>args</code> <code>Namespace</code> <p>The arguments.</p> required <code>arg_list</code> <code>list[str]</code> <p>The list of arguments to generate configurations from.</p> required <p>Returns:</p> Type Description <code>list[dict[str, Any]]</code> <p>A list of configurations.</p> Source code in <code>src/nhssynth/modules/model/utils.py</code> <pre><code>def configs_from_arg_combinations(args: argparse.Namespace, arg_list: list[str]) -&gt; list[dict[str, Any]]:\n    \"\"\"\n    Generates a list of configurations from a list of arguments. Each configuration is one of a cartesian product of\n    the arguments provided and identified in `arg_list`.\n\n    Args:\n        args: The arguments.\n        arg_list: The list of arguments to generate configurations from.\n\n    Returns:\n        A list of configurations.\n    \"\"\"\n    wrapped_args = {arg: wrap_arg(getattr(args, arg)) for arg in arg_list}\n    combinations = list(itertools.product(*wrapped_args.values()))\n    return [{k: v for k, v in zip(wrapped_args.keys(), values) if v is not None} for values in combinations]\n</code></pre>"},{"location":"reference/modules/model/utils/#nhssynth.modules.model.utils.get_experiments","title":"<code>get_experiments(args)</code>","text":"<p>Generates a dataframe of experiments from the arguments provided.</p> <p>Parameters:</p> Name Type Description Default <code>args</code> <code>Namespace</code> <p>The arguments.</p> required <p>Returns:</p> Type Description <code>DataFrame</code> <p>A dataframe of experiments indexed by architecture, repeat and config ID.</p> Source code in <code>src/nhssynth/modules/model/utils.py</code> <pre><code>def get_experiments(args: argparse.Namespace) -&gt; pd.DataFrame:\n    \"\"\"\n    Generates a dataframe of experiments from the arguments provided.\n\n    Args:\n        args: The arguments.\n\n    Returns:\n        A dataframe of experiments indexed by architecture, repeat and config ID.\n    \"\"\"\n    experiments = pd.DataFrame(\n        columns=[\"architecture\", \"repeat\", \"config\", \"model_config\", \"seed\", \"train_config\", \"num_configs\"]\n    )\n    train_configs = configs_from_arg_combinations(args, [\"num_epochs\", \"patience\"])\n    for arch_name, repeat in itertools.product(*[wrap_arg(args.architecture), list(range(args.repeats))]):\n        arch = MODELS[arch_name]\n        model_configs = configs_from_arg_combinations(args, arch.get_args() + [\"batch_size\", \"use_gpu\"])\n        for i, (train_config, model_config) in enumerate(itertools.product(train_configs, model_configs)):\n            experiments.loc[len(experiments.index)] = {\n                \"architecture\": arch_name,\n                \"repeat\": repeat + 1,\n                \"config\": i + 1,\n                \"model_config\": model_config,\n                \"num_configs\": len(model_configs) * len(train_configs),\n                \"seed\": args.seed + repeat if args.seed else None,\n                \"train_config\": train_config,\n            }\n    return experiments.set_index([\"architecture\", \"repeat\", \"config\"], drop=True)\n</code></pre>"},{"location":"reference/modules/model/utils/#nhssynth.modules.model.utils.wrap_arg","title":"<code>wrap_arg(arg)</code>","text":"<p>Wraps a single argument in a list if it is not already a list or tuple.</p> <p>Parameters:</p> Name Type Description Default <code>arg</code> <code>Any</code> <p>The argument to wrap.</p> required <p>Returns:</p> Type Description <code>Union[list, tuple]</code> <p>The wrapped argument.</p> Source code in <code>src/nhssynth/modules/model/utils.py</code> <pre><code>def wrap_arg(arg: Any) -&gt; Union[list, tuple]:\n    \"\"\"\n    Wraps a single argument in a list if it is not already a list or tuple.\n\n    Args:\n        arg: The argument to wrap.\n\n    Returns:\n        The wrapped argument.\n    \"\"\"\n    if not isinstance(arg, list) and not isinstance(arg, tuple):\n        return [arg]\n    return arg\n</code></pre>"},{"location":"reference/modules/model/common/","title":"common","text":""},{"location":"reference/modules/model/common/dp/","title":"dp","text":""},{"location":"reference/modules/model/common/dp/#nhssynth.modules.model.common.dp.DPMixin","title":"<code>DPMixin</code>","text":"<p>             Bases: <code>ABC</code></p> <p>Mixin class to make a <code>Model</code> differentially private</p> <p>Parameters:</p> Name Type Description Default <code>target_epsilon</code> <code>float</code> <p>The target epsilon for the model during training</p> <code>3.0</code> <code>target_delta</code> <code>Optional[float]</code> <p>The target delta for the model during training</p> <code>None</code> <code>max_grad_norm</code> <code>float</code> <p>The maximum norm for the gradients, they are trimmed to this norm if they are larger</p> <code>5.0</code> <code>secure_mode</code> <code>bool</code> <p>Whether to use the 'secure mode' of PyTorch's DP-SGD implementation via the <code>csprng</code> package</p> <code>False</code> <p>Attributes:</p> Name Type Description <code>target_epsilon</code> <code>float</code> <p>The target epsilon for the model during training</p> <code>target_delta</code> <code>float</code> <p>The target delta for the model during training</p> <code>max_grad_norm</code> <code>float</code> <p>The maximum norm for the gradients, they are trimmed to this norm if they are larger</p> <code>secure_mode</code> <code>bool</code> <p>Whether to use the 'secure mode' of PyTorch's DP-SGD implementation via the <code>csprng</code> package</p> <p>Raises:</p> Type Description <code>TypeError</code> <p>If the inheritor is not a <code>Model</code></p> Source code in <code>src/nhssynth/modules/model/common/dp.py</code> <pre><code>class DPMixin(ABC):\n    \"\"\"\n    Mixin class to make a [`Model`][nhssynth.modules.model.common.model.Model] differentially private\n\n    Args:\n        target_epsilon: The target epsilon for the model during training\n        target_delta: The target delta for the model during training\n        max_grad_norm: The maximum norm for the gradients, they are trimmed to this norm if they are larger\n        secure_mode: Whether to use the 'secure mode' of PyTorch's DP-SGD implementation via the `csprng` package\n\n    Attributes:\n        target_epsilon: The target epsilon for the model during training\n        target_delta: The target delta for the model during training\n        max_grad_norm: The maximum norm for the gradients, they are trimmed to this norm if they are larger\n        secure_mode: Whether to use the 'secure mode' of PyTorch's DP-SGD implementation via the `csprng` package\n\n    Raises:\n        TypeError: If the inheritor is not a `Model`\n    \"\"\"\n\n    def __init__(\n        self,\n        *args,\n        target_epsilon: float = 3.0,\n        target_delta: Optional[float] = None,\n        max_grad_norm: float = 5.0,\n        secure_mode: bool = False,\n        **kwargs,\n    ):\n        if not isinstance(self, Model):\n            raise TypeError(\"DPMixin can only be used with Model classes\")\n        super(DPMixin, self).__init__(*args, **kwargs)\n        self.target_epsilon: float = target_epsilon\n        self.target_delta: float = target_delta or 1 / self.nrows\n        self.max_grad_norm: float = max_grad_norm\n        self.secure_mode: bool = secure_mode\n\n    def make_private(self, num_epochs: int, module: Optional[nn.Module] = None) -&gt; GradSampleModule:\n        \"\"\"\n        Make the passed module (or the full model if a module is not passed), and its associated optimizer and data loader private.\n\n        Args:\n            num_epochs: The number of epochs to train for, used to calculate the privacy budget.\n            module: The module to make private.\n\n        Returns:\n            The privatised module.\n        \"\"\"\n        module = module or self\n        self.privacy_engine = PrivacyEngine(secure_mode=self.secure_mode)\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", message=\"invalid value encountered in log\")\n            warnings.filterwarnings(\"ignore\", message=\"Optimal order is the largest alpha\")\n            module, module.optim, self.data_loader = self.privacy_engine.make_private_with_epsilon(\n                module=module,\n                optimizer=module.optim,\n                data_loader=self.data_loader,\n                epochs=num_epochs,\n                target_epsilon=self.target_epsilon,\n                target_delta=self.target_delta,\n                max_grad_norm=self.max_grad_norm,\n            )\n        print(\n            f\"Using sigma={module.optim.noise_multiplier} and C={self.max_grad_norm} to target (\u03b5, \u03b4) = ({self.target_epsilon}, {self.target_delta})-differential privacy.\".format()\n        )\n        self.get_epsilon = self.privacy_engine.accountant.get_epsilon\n        return module\n\n    def _generate_metric_str(self, key) -&gt; str:\n        \"\"\"Generates a string to display the current value of the metric `key`.\"\"\"\n        if key == \"Privacy\":\n            with warnings.catch_warnings():\n                warnings.filterwarnings(\"ignore\", message=\"invalid value encountered in log\")\n                warnings.filterwarnings(\"ignore\", message=\"Optimal order is the largest alpha\")\n                val = self.get_epsilon(self.target_delta)\n            self.metrics[key] = np.append(self.metrics[key], val)\n            return f\"{(key + ' \u03b5 Spent:').ljust(self.max_length)}  {val:.4f}\"\n        else:\n            return super()._generate_metric_str(key)\n\n    @classmethod\n    def get_args(cls) -&gt; list[str]:\n        return [\"target_epsilon\", \"target_delta\", \"max_grad_norm\", \"secure_mode\"]\n\n    @classmethod\n    def get_metrics(cls) -&gt; list[str]:\n        return [\"Privacy\"]\n\n    def _start_training(self, num_epochs, patience, displayed_metrics):\n        self.make_private(num_epochs)\n        super()._start_training(num_epochs, patience, displayed_metrics)\n</code></pre>"},{"location":"reference/modules/model/common/dp/#nhssynth.modules.model.common.dp.DPMixin.make_private","title":"<code>make_private(num_epochs, module=None)</code>","text":"<p>Make the passed module (or the full model if a module is not passed), and its associated optimizer and data loader private.</p> <p>Parameters:</p> Name Type Description Default <code>num_epochs</code> <code>int</code> <p>The number of epochs to train for, used to calculate the privacy budget.</p> required <code>module</code> <code>Optional[Module]</code> <p>The module to make private.</p> <code>None</code> <p>Returns:</p> Type Description <code>GradSampleModule</code> <p>The privatised module.</p> Source code in <code>src/nhssynth/modules/model/common/dp.py</code> <pre><code>def make_private(self, num_epochs: int, module: Optional[nn.Module] = None) -&gt; GradSampleModule:\n    \"\"\"\n    Make the passed module (or the full model if a module is not passed), and its associated optimizer and data loader private.\n\n    Args:\n        num_epochs: The number of epochs to train for, used to calculate the privacy budget.\n        module: The module to make private.\n\n    Returns:\n        The privatised module.\n    \"\"\"\n    module = module or self\n    self.privacy_engine = PrivacyEngine(secure_mode=self.secure_mode)\n    with warnings.catch_warnings():\n        warnings.filterwarnings(\"ignore\", message=\"invalid value encountered in log\")\n        warnings.filterwarnings(\"ignore\", message=\"Optimal order is the largest alpha\")\n        module, module.optim, self.data_loader = self.privacy_engine.make_private_with_epsilon(\n            module=module,\n            optimizer=module.optim,\n            data_loader=self.data_loader,\n            epochs=num_epochs,\n            target_epsilon=self.target_epsilon,\n            target_delta=self.target_delta,\n            max_grad_norm=self.max_grad_norm,\n        )\n    print(\n        f\"Using sigma={module.optim.noise_multiplier} and C={self.max_grad_norm} to target (\u03b5, \u03b4) = ({self.target_epsilon}, {self.target_delta})-differential privacy.\".format()\n    )\n    self.get_epsilon = self.privacy_engine.accountant.get_epsilon\n    return module\n</code></pre>"},{"location":"reference/modules/model/common/mlp/","title":"mlp","text":""},{"location":"reference/modules/model/common/mlp/#nhssynth.modules.model.common.mlp.MLP","title":"<code>MLP</code>","text":"<p>             Bases: <code>Module</code></p> <p>Fully connected or residual neural nets for classification and regression.</p>"},{"location":"reference/modules/model/common/mlp/#nhssynth.modules.model.common.mlp.MLP--parameters","title":"Parameters","text":"<p>task_type: str     classification or regression n_units_int: int     Number of features n_units_out: int     Number of outputs n_layers_hidden: int     Number of hidden layers n_units_hidden: int     Number of hidden units in each layer nonlin: string, default 'elu'     Nonlinearity to use in NN. Can be 'elu', 'relu', 'selu', 'tanh' or 'leaky_relu'. lr: float     learning rate for optimizer. weight_decay: float     l2 (ridge) penalty for the weights. n_iter: int     Maximum number of iterations. batch_size: int     Batch size n_iter_print: int     Number of iterations after which to print updates and check the validation loss. random_state: int     random_state used patience: int     Number of iterations to wait before early stopping after decrease in validation loss n_iter_min: int     Minimum number of iterations to go through before starting early stopping dropout: float     Dropout value. If 0, the dropout is not used. clipping_value: int, default 1     Gradients clipping value batch_norm: bool     Enable/disable batch norm early_stopping: bool     Enable/disable early stopping residual: bool     Add residuals. loss: Callable     Optional Custom loss function. If None, the loss is CrossEntropy for classification tasks, or RMSE for regression.</p> Source code in <code>src/nhssynth/modules/model/common/mlp.py</code> <pre><code>class MLP(nn.Module):\n    \"\"\"\n    Fully connected or residual neural nets for classification and regression.\n\n    Parameters\n    ----------\n    task_type: str\n        classification or regression\n    n_units_int: int\n        Number of features\n    n_units_out: int\n        Number of outputs\n    n_layers_hidden: int\n        Number of hidden layers\n    n_units_hidden: int\n        Number of hidden units in each layer\n    nonlin: string, default 'elu'\n        Nonlinearity to use in NN. Can be 'elu', 'relu', 'selu', 'tanh' or 'leaky_relu'.\n    lr: float\n        learning rate for optimizer.\n    weight_decay: float\n        l2 (ridge) penalty for the weights.\n    n_iter: int\n        Maximum number of iterations.\n    batch_size: int\n        Batch size\n    n_iter_print: int\n        Number of iterations after which to print updates and check the validation loss.\n    random_state: int\n        random_state used\n    patience: int\n        Number of iterations to wait before early stopping after decrease in validation loss\n    n_iter_min: int\n        Minimum number of iterations to go through before starting early stopping\n    dropout: float\n        Dropout value. If 0, the dropout is not used.\n    clipping_value: int, default 1\n        Gradients clipping value\n    batch_norm: bool\n        Enable/disable batch norm\n    early_stopping: bool\n        Enable/disable early stopping\n    residual: bool\n        Add residuals.\n    loss: Callable\n        Optional Custom loss function. If None, the loss is CrossEntropy for classification tasks, or RMSE for regression.\n    \"\"\"\n\n    def __init__(\n        self,\n        n_units_in: int,\n        n_units_out: int,\n        n_layers_hidden: int = 1,\n        n_units_hidden: int = 100,\n        activation: str = \"relu\",\n        activation_out: Optional[list[tuple[str, int]]] = None,\n        lr: float = 1e-3,\n        weight_decay: float = 1e-3,\n        opt_betas: tuple = (0.9, 0.999),\n        n_iter: int = 1000,\n        batch_size: int = 500,\n        n_iter_print: int = 100,\n        patience: int = 10,\n        n_iter_min: int = 100,\n        dropout: float = 0.1,\n        clipping_value: int = 1,\n        batch_norm: bool = False,\n        early_stopping: bool = True,\n        residual: bool = False,\n        loss: Optional[Callable] = None,\n    ) -&gt; None:\n        super(MLP, self).__init__()\n        activation = ACTIVATION_FUNCTIONS[activation] if activation in ACTIVATION_FUNCTIONS else None\n\n        if n_units_in &lt; 0:\n            raise ValueError(\"n_units_in must be &gt;= 0\")\n        if n_units_out &lt; 0:\n            raise ValueError(\"n_units_out must be &gt;= 0\")\n\n        if residual:\n            block = ResidualLayer\n        else:\n            block = LinearLayer\n\n        # network\n        layers = []\n\n        if n_layers_hidden &gt; 0:\n            layers.append(\n                block(\n                    n_units_in,\n                    n_units_hidden,\n                    batch_norm=batch_norm,\n                    activation=activation,\n                )\n            )\n            n_units_hidden += int(residual) * n_units_in\n\n            # add required number of layers\n            for i in range(n_layers_hidden - 1):\n                layers.append(\n                    block(\n                        n_units_hidden,\n                        n_units_hidden,\n                        batch_norm=batch_norm,\n                        activation=activation,\n                        dropout=dropout,\n                    )\n                )\n                n_units_hidden += int(residual) * n_units_hidden\n\n            # add final layers\n            layers.append(nn.Linear(n_units_hidden, n_units_out))\n        else:\n            layers = [nn.Linear(n_units_in, n_units_out)]\n\n        if activation_out is not None:\n            total_nonlin_len = 0\n            activations = []\n            for nonlin, nonlin_len in activation_out:\n                total_nonlin_len += nonlin_len\n                activations.append((ACTIVATION_FUNCTIONS[nonlin](), nonlin_len))\n\n            if total_nonlin_len != n_units_out:\n                raise RuntimeError(\n                    f\"Shape mismatch for the output layer. Expected length {n_units_out}, but got {activation_out} with length {total_nonlin_len}\"\n                )\n            layers.append(MultiActivationHead(activations))\n\n        self.model = nn.Sequential(*layers)\n\n        # optimizer\n        self.lr = lr\n        self.weight_decay = weight_decay\n        self.opt_betas = opt_betas\n        self.optimizer = torch.optim.Adam(\n            self.parameters(),\n            lr=self.lr,\n            weight_decay=self.weight_decay,\n            betas=self.opt_betas,\n        )\n\n        # training\n        self.n_iter = n_iter\n        self.n_iter_print = n_iter_print\n        self.n_iter_min = n_iter_min\n        self.batch_size = batch_size\n        self.patience = patience\n        self.clipping_value = clipping_value\n        self.early_stopping = early_stopping\n        if loss is not None:\n            self.loss = loss\n        else:\n            self.loss = nn.MSELoss()\n\n    def fit(self, X: np.ndarray, y: np.ndarray) -&gt; \"MLP\":\n        Xt = self._check_tensor(X)\n        yt = self._check_tensor(y)\n\n        self._train(Xt, yt)\n\n        return self\n\n    def predict_proba(self, X: np.ndarray) -&gt; np.ndarray:\n        if self.task_type != \"classification\":\n            raise ValueError(f\"Invalid task type for predict_proba {self.task_type}\")\n\n        with torch.no_grad():\n            Xt = self._check_tensor(X)\n\n            yt = self.forward(Xt)\n\n            return yt.cpu().numpy().squeeze()\n\n    def predict(self, X: np.ndarray) -&gt; np.ndarray:\n        with torch.no_grad():\n            Xt = self._check_tensor(X)\n\n            yt = self.forward(Xt)\n\n            if self.task_type == \"classification\":\n                return np.argmax(yt.cpu().numpy().squeeze(), -1).squeeze()\n            else:\n                return yt.cpu().numpy().squeeze()\n\n    def score(self, X: np.ndarray, y: np.ndarray) -&gt; float:\n        y_pred = self.predict(X)\n        if self.task_type == \"classification\":\n            return np.mean(y_pred == y)\n        else:\n            return np.mean(np.inner(y - y_pred, y - y_pred) / 2.0)\n\n    def forward(self, X: torch.Tensor) -&gt; torch.Tensor:\n        return self.model(X.float())\n\n    def _train_epoch(self, loader: DataLoader) -&gt; float:\n        train_loss = []\n\n        for batch_ndx, sample in enumerate(loader):\n            self.optimizer.zero_grad()\n\n            X_next, y_next = sample\n            if len(X_next) &lt; 2:\n                continue\n\n            preds = self.forward(X_next).squeeze()\n\n            batch_loss = self.loss(preds, y_next)\n\n            batch_loss.backward()\n\n            if self.clipping_value &gt; 0:\n                torch.nn.utils.clip_grad_norm_(self.parameters(), self.clipping_value)\n\n            self.optimizer.step()\n\n            train_loss.append(batch_loss.detach())\n\n        return torch.mean(torch.Tensor(train_loss))\n\n    def _train(self, X: torch.Tensor, y: torch.Tensor) -&gt; \"MLP\":\n        X = self._check_tensor(X).float()\n        y = self._check_tensor(y).squeeze().float()\n        if self.task_type == \"classification\":\n            y = y.long()\n\n        # Load Dataset\n        dataset = TensorDataset(X, y)\n\n        train_size = int(0.8 * len(dataset))\n        test_size = len(dataset) - train_size\n        train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])\n        loader = DataLoader(train_dataset, batch_size=self.batch_size, pin_memory=False)\n\n        # Setup the network and optimizer\n        val_loss_best = 1e12\n        patience = 0\n\n        # do training\n        for i in range(self.n_iter):\n            self._train_epoch(loader)\n\n            if self.early_stopping or i % self.n_iter_print == 0:\n                with torch.no_grad():\n                    X_val, y_val = test_dataset.dataset.tensors\n\n                    preds = self.forward(X_val).squeeze()\n                    val_loss = self.loss(preds, y_val)\n\n                    if self.early_stopping:\n                        if val_loss_best &gt; val_loss:\n                            val_loss_best = val_loss\n                            patience = 0\n                        else:\n                            patience += 1\n\n                        if patience &gt; self.patience and i &gt; self.n_iter_min:\n                            break\n\n        return self\n\n    def _check_tensor(self, X: torch.Tensor) -&gt; torch.Tensor:\n        if isinstance(X, torch.Tensor):\n            return X\n        else:\n            return torch.from_numpy(np.asarray(X))\n\n    def __len__(self) -&gt; int:\n        return len(self.model)\n</code></pre>"},{"location":"reference/modules/model/common/mlp/#nhssynth.modules.model.common.mlp.MultiActivationHead","title":"<code>MultiActivationHead</code>","text":"<p>             Bases: <code>Module</code></p> <p>Final layer with multiple activations. Useful for tabular data.</p> Source code in <code>src/nhssynth/modules/model/common/mlp.py</code> <pre><code>class MultiActivationHead(nn.Module):\n    \"\"\"Final layer with multiple activations. Useful for tabular data.\"\"\"\n\n    def __init__(\n        self,\n        activations: list[tuple[nn.Module, int]],\n    ) -&gt; None:\n        super(MultiActivationHead, self).__init__()\n        self.activations = []\n        self.activation_lengths = []\n\n        for activation, length in activations:\n            self.activations.append(activation)\n            self.activation_lengths.append(length)\n\n    def forward(self, X: torch.Tensor) -&gt; torch.Tensor:\n        if X.shape[-1] != np.sum(self.activation_lengths):\n            raise RuntimeError(\n                f\"Shape mismatch for the activations: expected {np.sum(self.activation_lengths)}. Got shape {X.shape}.\"\n            )\n\n        split = 0\n        out = torch.zeros(X.shape)\n\n        for activation, step in zip(self.activations, self.activation_lengths):\n            out[..., split : split + step] = activation(X[..., split : split + step])\n            split += step\n\n        return out\n</code></pre>"},{"location":"reference/modules/model/common/mlp/#nhssynth.modules.model.common.mlp.SkipConnection","title":"<code>SkipConnection(cls)</code>","text":"<p>Wraps a model to add a skip connection from the input to the output.</p> <p>Example:</p> <p>ResidualBlock = SkipConnection(MLP) res_block = ResidualBlock(n_units_in=10, n_units_out=3, n_units_hidden=64) res_block(torch.ones(10, 10)).shape (10, 13)</p> Source code in <code>src/nhssynth/modules/model/common/mlp.py</code> <pre><code>def SkipConnection(cls: Type[nn.Module]) -&gt; Type[nn.Module]:\n    \"\"\"Wraps a model to add a skip connection from the input to the output.\n\n    Example:\n    &gt;&gt;&gt; ResidualBlock = SkipConnection(MLP)\n    &gt;&gt;&gt; res_block = ResidualBlock(n_units_in=10, n_units_out=3, n_units_hidden=64)\n    &gt;&gt;&gt; res_block(torch.ones(10, 10)).shape\n    (10, 13)\n    \"\"\"\n\n    class Wrapper(cls):\n        pass\n\n    Wrapper._forward = cls.forward\n    Wrapper.forward = _forward_skip_connection\n    Wrapper.__name__ = f\"SkipConnection({cls.__name__})\"\n    Wrapper.__qualname__ = f\"SkipConnection({cls.__qualname__})\"\n    Wrapper.__doc__ = f\"\"\"(With skipped connection) {cls.__doc__}\"\"\"\n    return Wrapper\n</code></pre>"},{"location":"reference/modules/model/common/model/","title":"model","text":""},{"location":"reference/modules/model/common/model/#nhssynth.modules.model.common.model.Model","title":"<code>Model</code>","text":"<p>             Bases: <code>Module</code>, <code>ABC</code></p> <p>Abstract base class for all NHSSynth models</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>DataFrame</code> <p>The data to train on</p> required <code>metatransformer</code> <code>MetaTransformer</code> <p>A <code>MetaTransformer</code> to use for converting the generated data to match the original data</p> required <code>batch_size</code> <code>int</code> <p>The batch size to use during training</p> <code>32</code> <code>use_gpu</code> <code>bool</code> <p>Flag to determine whether to use the GPU (if available)</p> <code>False</code> <p>Attributes:</p> Name Type Description <code>nrows</code> <p>The number of rows in the <code>data</code></p> <code>ncols</code> <p>The number of columns in the <code>data</code></p> <code>columns</code> <code>Index</code> <p>The names of the columns in the <code>data</code></p> <code>metatransformer</code> <p>The <code>MetaTransformer</code> (potentially) associated with the model</p> <code>multi_column_indices</code> <code>list[list[int]]</code> <p>A list of lists of column indices, where each sublist containts the indices for a one-hot encoded column</p> <code>single_column_indices</code> <code>list[int]</code> <p>Indices of all non-onehot columns</p> <code>data_loader</code> <code>DataLoader</code> <p>A PyTorch DataLoader for the <code>data</code></p> <code>private</code> <code>DataLoader</code> <p>Whether the model is private, i.e. whether the <code>DPMixin</code> class has been inherited</p> <code>device</code> <code>DataLoader</code> <p>The device to use for training (CPU or GPU)</p> <p>Raises:</p> Type Description <code>TypeError</code> <p>If the <code>Model</code> class is directly instantiated (i.e. not inherited)</p> <code>AssertionError</code> <p>If the number of columns in the <code>data</code> does not match the number of indices in <code>multi_column_indices</code> and <code>single_column_indices</code></p> <code>UserWarning</code> <p>If <code>use_gpu</code> is True but no GPU is available</p> Source code in <code>src/nhssynth/modules/model/common/model.py</code> <pre><code>class Model(nn.Module, ABC):\n    \"\"\"\n    Abstract base class for all NHSSynth models\n\n    Args:\n        data: The data to train on\n        metatransformer: A `MetaTransformer` to use for converting the generated data to match the original data\n        batch_size: The batch size to use during training\n        use_gpu: Flag to determine whether to use the GPU (if available)\n\n    Attributes:\n        nrows: The number of rows in the `data`\n        ncols: The number of columns in the `data`\n        columns: The names of the columns in the `data`\n        metatransformer: The `MetaTransformer` (potentially) associated with the model\n        multi_column_indices: A list of lists of column indices, where each sublist containts the indices for a one-hot encoded column\n        single_column_indices: Indices of all non-onehot columns\n        data_loader: A PyTorch DataLoader for the `data`\n        private: Whether the model is private, i.e. whether the `DPMixin` class has been inherited\n        device: The device to use for training (CPU or GPU)\n\n    Raises:\n        TypeError: If the `Model` class is directly instantiated (i.e. not inherited)\n        AssertionError: If the number of columns in the `data` does not match the number of indices in `multi_column_indices` and `single_column_indices`\n        UserWarning: If `use_gpu` is True but no GPU is available\n    \"\"\"\n\n    def __init__(\n        self,\n        data: pd.DataFrame,\n        metatransformer: MetaTransformer,\n        cond: Optional[Union[pd.DataFrame, pd.Series, np.ndarray]] = None,\n        batch_size: int = 32,\n        use_gpu: bool = False,\n    ) -&gt; None:\n        if type(self) is Model:\n            raise TypeError(\"Cannot directly instantiate the `Model` class\")\n        super().__init__()\n\n        self.nrows, self.ncols = data.shape\n        self.columns: pd.Index = data.columns\n\n        self.batch_size = batch_size\n\n        self.metatransformer = metatransformer\n        self.multi_column_indices: list[list[int]] = metatransformer.multi_column_indices\n        self.single_column_indices: list[int] = metatransformer.single_column_indices\n        assert len(self.single_column_indices) + sum([len(x) for x in self.multi_column_indices]) == self.ncols\n\n        tensor_data = torch.Tensor(data.to_numpy())\n        self.cond_encoder: Optional[OneHotEncoder] = None\n        if cond is not None:\n            cond = np.asarray(cond)\n            if len(cond.shape) == 1:\n                cond = cond.reshape(-1, 1)\n            self.cond_encoder = OneHotEncoder(handle_unknown=\"ignore\").fit(cond)\n            cond = self.cond_encoder.transform(cond).toarray()\n            self.n_units_conditional = cond.shape[-1]\n            dataset = TensorDataset(tensor_data, cond)\n        else:\n            self.n_units_conditional = 0\n            dataset = TensorDataset(tensor_data)\n\n        self.data_loader: DataLoader = DataLoader(\n            dataset,\n            pin_memory=True,\n            batch_size=self.batch_size,\n        )\n        self.setup_device(use_gpu)\n\n    def setup_device(self, use_gpu: bool) -&gt; None:\n        \"\"\"Sets up the device to use for training (CPU or GPU) depending on `use_gpu` and device availability.\"\"\"\n        if use_gpu:\n            if torch.cuda.is_available():\n                self.device: torch.device = torch.device(\"cuda:0\")\n            else:\n                warnings.warn(\"`use_gpu` was provided but no GPU is available, using CPU\")\n        self.device: torch.device = torch.device(\"cpu\")\n\n    def save(self, filename: str) -&gt; None:\n        \"\"\"Saves the model to `filename`.\"\"\"\n        torch.save(self.state_dict(), filename)\n\n    def load(self, path: str) -&gt; None:\n        \"\"\"Loads the model from `path`.\"\"\"\n        self.load_state_dict(torch.load(path))\n\n    @classmethod\n    @abstractmethod\n    def get_args() -&gt; list[str]:\n        \"\"\"Returns the list of arguments to look for in an `argparse.Namespace`, these must map to the arguments of the inheritor.\"\"\"\n        raise NotImplementedError\n\n    @classmethod\n    @abstractmethod\n    def get_metrics() -&gt; list[str]:\n        \"\"\"Returns the list of metrics to track during training.\"\"\"\n        raise NotImplementedError\n\n    def _start_training(self, num_epochs: int, patience: int, displayed_metrics: list[str]) -&gt; None:\n        \"\"\"\n        Initialises the training process.\n\n        Args:\n            num_epochs: The number of epochs to train for\n            patience: The number of epochs to wait before stopping training early if the loss does not improve\n            displayed_metrics: The metrics to display during training, this should be set to an empty list if running `train` in a notebook or the output may be messy\n\n        Attributes:\n            metrics: A dictionary of lists of tracked metrics, where each list contains the values for each batch\n            stats_bars: A dictionary of tqdm status bars for each tracked metric\n            max_length: The maximum length of the tracked metric names, used for formatting the tqdm status bars\n            start_time: The time at which training started\n            update_time: The time at which the tqdm status bars were last updated\n        \"\"\"\n        self.num_epochs = num_epochs\n        self.patience = patience\n        self.metrics = {metric: np.empty(0, dtype=float) for metric in self.get_metrics()}\n        displayed_metrics = displayed_metrics or self.get_metrics()\n        self.stats_bars = {\n            metric: tqdm(total=0, desc=\"\", position=i, bar_format=\"{desc}\", leave=True)\n            for i, metric in enumerate(displayed_metrics)\n        }\n        self.max_length = max([len(add_spaces_before_caps(s)) + 5 for s in displayed_metrics] + [20])\n        self.start_time = self.update_time = time.time()\n\n    def _generate_metric_str(self, key) -&gt; str:\n        \"\"\"Generates a string to display the current value of the metric `key`.\"\"\"\n        return f\"{(add_spaces_before_caps(key) + ':').ljust(self.max_length)}  {np.mean(self.metrics[key][-len(self.data_loader) :]):.4f}\"\n\n    def _record_metrics(self, losses):\n        \"\"\"Records the metrics for the current batch to file and updates the tqdm status bars.\"\"\"\n        for key in self.metrics.keys():\n            if key in losses:\n                if losses[key]:\n                    self.metrics[key] = np.append(\n                        self.metrics[key], losses[key].item() if isinstance(losses[key], torch.Tensor) else losses[key]\n                    )\n        if time.time() - self.update_time &gt; 0.5:\n            for key, stats_bar in self.stats_bars.items():\n                stats_bar.set_description_str(self._generate_metric_str(key))\n                self.update_time = time.time()\n\n    def _check_patience(self, epoch: int, metric: float) -&gt; bool:\n        \"\"\"Maintains `_min_metric` and `_stop_counter` to determine whether to stop training early according to `patience`.\"\"\"\n        if epoch == 0:\n            self._stop_counter = 0\n            self._min_metric = metric\n            self._patience_delta = self._min_metric / 1e4\n        if metric &lt; (self._min_metric - self._patience_delta):\n            self._min_metric = metric\n            self._stop_counter = 0  # Set counter to zero\n        else:  # elbo has not improved\n            self._stop_counter += 1\n        return self._stop_counter == self.patience\n\n    def _finish_training(self, num_epochs: int) -&gt; None:\n        \"\"\"Closes each of the tqdm status bars and prints the time taken to do `num_epochs`.\"\"\"\n        for stats_bar in self.stats_bars.values():\n            stats_bar.close()\n        tqdm.write(f\"Completed {num_epochs} epochs in {time.time() - self.start_time:.2f} seconds.\\033[0m\")\n</code></pre>"},{"location":"reference/modules/model/common/model/#nhssynth.modules.model.common.model.Model.get_args","title":"<code>get_args()</code>  <code>abstractmethod</code> <code>classmethod</code>","text":"<p>Returns the list of arguments to look for in an <code>argparse.Namespace</code>, these must map to the arguments of the inheritor.</p> Source code in <code>src/nhssynth/modules/model/common/model.py</code> <pre><code>@classmethod\n@abstractmethod\ndef get_args() -&gt; list[str]:\n    \"\"\"Returns the list of arguments to look for in an `argparse.Namespace`, these must map to the arguments of the inheritor.\"\"\"\n    raise NotImplementedError\n</code></pre>"},{"location":"reference/modules/model/common/model/#nhssynth.modules.model.common.model.Model.get_metrics","title":"<code>get_metrics()</code>  <code>abstractmethod</code> <code>classmethod</code>","text":"<p>Returns the list of metrics to track during training.</p> Source code in <code>src/nhssynth/modules/model/common/model.py</code> <pre><code>@classmethod\n@abstractmethod\ndef get_metrics() -&gt; list[str]:\n    \"\"\"Returns the list of metrics to track during training.\"\"\"\n    raise NotImplementedError\n</code></pre>"},{"location":"reference/modules/model/common/model/#nhssynth.modules.model.common.model.Model.load","title":"<code>load(path)</code>","text":"<p>Loads the model from <code>path</code>.</p> Source code in <code>src/nhssynth/modules/model/common/model.py</code> <pre><code>def load(self, path: str) -&gt; None:\n    \"\"\"Loads the model from `path`.\"\"\"\n    self.load_state_dict(torch.load(path))\n</code></pre>"},{"location":"reference/modules/model/common/model/#nhssynth.modules.model.common.model.Model.save","title":"<code>save(filename)</code>","text":"<p>Saves the model to <code>filename</code>.</p> Source code in <code>src/nhssynth/modules/model/common/model.py</code> <pre><code>def save(self, filename: str) -&gt; None:\n    \"\"\"Saves the model to `filename`.\"\"\"\n    torch.save(self.state_dict(), filename)\n</code></pre>"},{"location":"reference/modules/model/common/model/#nhssynth.modules.model.common.model.Model.setup_device","title":"<code>setup_device(use_gpu)</code>","text":"<p>Sets up the device to use for training (CPU or GPU) depending on <code>use_gpu</code> and device availability.</p> Source code in <code>src/nhssynth/modules/model/common/model.py</code> <pre><code>def setup_device(self, use_gpu: bool) -&gt; None:\n    \"\"\"Sets up the device to use for training (CPU or GPU) depending on `use_gpu` and device availability.\"\"\"\n    if use_gpu:\n        if torch.cuda.is_available():\n            self.device: torch.device = torch.device(\"cuda:0\")\n        else:\n            warnings.warn(\"`use_gpu` was provided but no GPU is available, using CPU\")\n    self.device: torch.device = torch.device(\"cpu\")\n</code></pre>"},{"location":"reference/modules/model/models/","title":"models","text":""},{"location":"reference/modules/model/models/dpvae/","title":"dpvae","text":""},{"location":"reference/modules/model/models/dpvae/#nhssynth.modules.model.models.dpvae.DPVAE","title":"<code>DPVAE</code>","text":"<p>             Bases: <code>DPMixin</code>, <code>VAE</code></p> <p>A differentially private VAE. Accepts <code>VAE</code> arguments as well as <code>DPMixin</code> arguments.</p> Source code in <code>src/nhssynth/modules/model/models/dpvae.py</code> <pre><code>class DPVAE(DPMixin, VAE):\n    \"\"\"\n    A differentially private VAE. Accepts [`VAE`][nhssynth.modules.model.models.vae.VAE] arguments\n    as well as [`DPMixin`][nhssynth.modules.model.common.dp.DPMixin] arguments.\n    \"\"\"\n\n    def __init__(\n        self,\n        *args,\n        target_epsilon: float = 3.0,\n        target_delta: Optional[float] = None,\n        max_grad_norm: float = 5.0,\n        secure_mode: bool = False,\n        shared_optimizer: bool = False,\n        **kwargs,\n    ) -&gt; None:\n        super(DPVAE, self).__init__(\n            *args,\n            target_epsilon=target_epsilon,\n            target_delta=target_delta,\n            max_grad_norm=max_grad_norm,\n            secure_mode=secure_mode,\n            # TODO fix shared_optimizer workflow for DP models\n            shared_optimizer=False,\n            **kwargs,\n        )\n\n    def make_private(self, num_epochs: int) -&gt; GradSampleModule:\n        \"\"\"\n        Make the [`Decoder`][nhssynth.modules.model.models.vae.Decoder] differentially private\n        unless `shared_optimizer` is True, in which case the whole VAE will be privatised.\n\n        Args:\n            num_epochs: The number of epochs to train for\n        \"\"\"\n        if self.shared_optimizer:\n            super().make_private(num_epochs)\n        else:\n            self.decoder = super().make_private(num_epochs, self.decoder)\n\n    @classmethod\n    def get_args(cls) -&gt; list[str]:\n        return VAE.get_args() + DPMixin.get_args()\n\n    @classmethod\n    def get_metrics(cls) -&gt; list[str]:\n        return VAE.get_metrics() + DPMixin.get_metrics()\n</code></pre>"},{"location":"reference/modules/model/models/dpvae/#nhssynth.modules.model.models.dpvae.DPVAE.make_private","title":"<code>make_private(num_epochs)</code>","text":"<p>Make the <code>Decoder</code> differentially private unless <code>shared_optimizer</code> is True, in which case the whole VAE will be privatised.</p> <p>Parameters:</p> Name Type Description Default <code>num_epochs</code> <code>int</code> <p>The number of epochs to train for</p> required Source code in <code>src/nhssynth/modules/model/models/dpvae.py</code> <pre><code>def make_private(self, num_epochs: int) -&gt; GradSampleModule:\n    \"\"\"\n    Make the [`Decoder`][nhssynth.modules.model.models.vae.Decoder] differentially private\n    unless `shared_optimizer` is True, in which case the whole VAE will be privatised.\n\n    Args:\n        num_epochs: The number of epochs to train for\n    \"\"\"\n    if self.shared_optimizer:\n        super().make_private(num_epochs)\n    else:\n        self.decoder = super().make_private(num_epochs, self.decoder)\n</code></pre>"},{"location":"reference/modules/model/models/gan/","title":"gan","text":""},{"location":"reference/modules/model/models/gan/#nhssynth.modules.model.models.gan.GAN","title":"<code>GAN</code>","text":"<p>             Bases: <code>Model</code></p> <p>Basic GAN implementation.</p> <p>Parameters:</p> Name Type Description Default <code>n_units_conditional</code> <code>int</code> <p>int Number of conditional units</p> <code>0</code> <code>generator_n_layers_hidden</code> <code>int</code> <p>int Number of hidden layers in the generator</p> <code>2</code> <code>generator_n_units_hidden</code> <code>int</code> <p>int Number of hidden units in each layer of the Generator</p> <code>250</code> <code>generator_activation</code> <code>str</code> <p>string, default 'elu' Nonlinearity to use in the generator. Can be 'elu', 'relu', 'selu' or 'leaky_relu'.</p> <code>'leaky_relu'</code> <code>generator_n_iter</code> <p>int Maximum number of iterations in the Generator.</p> required <code>generator_batch_norm</code> <code>bool</code> <p>bool Enable/disable batch norm for the generator</p> <code>False</code> <code>generator_dropout</code> <code>float</code> <p>float Dropout value. If 0, the dropout is not used.</p> <code>0</code> <code>generator_residual</code> <code>bool</code> <p>bool Use residuals for the generator</p> <code>True</code> <code>generator_activation_out</code> <p>Optional[List[Tuple[str, int]]] List of activations. Useful with the TabularEncoder</p> required <code>generator_lr</code> <code>float</code> <p>float = 2e-4 Generator learning rate, used by the Adam optimizer</p> <code>0.0002</code> <code>generator_weight_decay</code> <p>float = 1e-3 Generator weight decay, used by the Adam optimizer</p> required <code>generator_opt_betas</code> <code>tuple</code> <p>tuple = (0.9, 0.999) Generator initial decay rates, used by the Adam Optimizer</p> <code>(0.9, 0.999)</code> <code>generator_extra_penalty_cbks</code> <p>List[Callable] Additional loss callabacks for the generator. Used by the TabularGAN for the conditional loss</p> required <code>discriminator_n_layers_hidden</code> <code>int</code> <p>int Number of hidden layers in the discriminator</p> <code>3</code> <code>discriminator_n_units_hidden</code> <code>int</code> <p>int Number of hidden units in each layer of the discriminator</p> <code>300</code> <code>discriminator_activation</code> <code>str</code> <p>string, default 'relu' Nonlinearity to use in the discriminator. Can be 'elu', 'relu', 'selu' or 'leaky_relu'.</p> <code>'leaky_relu'</code> <code>discriminator_batch_norm</code> <code>bool</code> <p>bool Enable/disable batch norm for the discriminator</p> <code>False</code> <code>discriminator_dropout</code> <code>float</code> <p>float Dropout value for the discriminator. If 0, the dropout is not used.</p> <code>0.1</code> <code>discriminator_lr</code> <code>float</code> <p>float Discriminator learning rate, used by the Adam optimizer</p> <code>0.0002</code> <code>discriminator_weight_decay</code> <p>float Discriminator weight decay, used by the Adam optimizer</p> required <code>discriminator_opt_betas</code> <code>tuple</code> <p>tuple Initial weight decays for the Adam optimizer</p> <code>(0.9, 0.999)</code> <code>clipping_value</code> <code>int</code> <p>int, default 0 Gradients clipping value. Zero disables the feature</p> <code>0</code> <code>lambda_gradient_penalty</code> <code>float</code> <p>float = 10 Weight for the gradient penalty</p> <code>10</code> Source code in <code>src/nhssynth/modules/model/models/gan.py</code> <pre><code>class GAN(Model):\n    \"\"\"\n    Basic GAN implementation.\n\n    Args:\n        n_units_conditional: int\n            Number of conditional units\n        generator_n_layers_hidden: int\n            Number of hidden layers in the generator\n        generator_n_units_hidden: int\n            Number of hidden units in each layer of the Generator\n        generator_activation: string, default 'elu'\n            Nonlinearity to use in the generator. Can be 'elu', 'relu', 'selu' or 'leaky_relu'.\n        generator_n_iter: int\n            Maximum number of iterations in the Generator.\n        generator_batch_norm: bool\n            Enable/disable batch norm for the generator\n        generator_dropout: float\n            Dropout value. If 0, the dropout is not used.\n        generator_residual: bool\n            Use residuals for the generator\n        generator_activation_out: Optional[List[Tuple[str, int]]]\n            List of activations. Useful with the TabularEncoder\n        generator_lr: float = 2e-4\n            Generator learning rate, used by the Adam optimizer\n        generator_weight_decay: float = 1e-3\n            Generator weight decay, used by the Adam optimizer\n        generator_opt_betas: tuple = (0.9, 0.999)\n            Generator initial decay rates, used by the Adam Optimizer\n        generator_extra_penalty_cbks: List[Callable]\n            Additional loss callabacks for the generator. Used by the TabularGAN for the conditional loss\n        discriminator_n_layers_hidden: int\n            Number of hidden layers in the discriminator\n        discriminator_n_units_hidden: int\n            Number of hidden units in each layer of the discriminator\n        discriminator_activation: string, default 'relu'\n            Nonlinearity to use in the discriminator. Can be 'elu', 'relu', 'selu' or 'leaky_relu'.\n        discriminator_batch_norm: bool\n            Enable/disable batch norm for the discriminator\n        discriminator_dropout: float\n            Dropout value for the discriminator. If 0, the dropout is not used.\n        discriminator_lr: float\n            Discriminator learning rate, used by the Adam optimizer\n        discriminator_weight_decay: float\n            Discriminator weight decay, used by the Adam optimizer\n        discriminator_opt_betas: tuple\n            Initial weight decays for the Adam optimizer\n        clipping_value: int, default 0\n            Gradients clipping value. Zero disables the feature\n        lambda_gradient_penalty: float = 10\n            Weight for the gradient penalty\n    \"\"\"\n\n    def __init__(\n        self,\n        *args,\n        n_units_conditional: int = 0,\n        generator_n_layers_hidden: int = 2,\n        generator_n_units_hidden: int = 250,\n        generator_activation: str = \"leaky_relu\",\n        generator_batch_norm: bool = False,\n        generator_dropout: float = 0,\n        generator_lr: float = 2e-4,\n        generator_residual: bool = True,\n        generator_opt_betas: tuple = (0.9, 0.999),\n        discriminator_n_layers_hidden: int = 3,\n        discriminator_n_units_hidden: int = 300,\n        discriminator_activation: str = \"leaky_relu\",\n        discriminator_batch_norm: bool = False,\n        discriminator_dropout: float = 0.1,\n        discriminator_lr: float = 2e-4,\n        discriminator_opt_betas: tuple = (0.9, 0.999),\n        clipping_value: int = 0,\n        lambda_gradient_penalty: float = 10,\n        **kwargs,\n    ) -&gt; None:\n        super(GAN, self).__init__(*args, **kwargs)\n\n        self.generator_n_units_hidden = generator_n_units_hidden\n        self.n_units_conditional = n_units_conditional\n\n        self.generator = MLP(\n            n_units_in=generator_n_units_hidden + n_units_conditional,\n            n_units_out=self.ncols,\n            n_layers_hidden=generator_n_layers_hidden,\n            n_units_hidden=generator_n_units_hidden,\n            activation=generator_activation,\n            # nonlin_out=generator_activation_out,\n            batch_norm=generator_batch_norm,\n            dropout=generator_dropout,\n            lr=generator_lr,\n            residual=generator_residual,\n            opt_betas=generator_opt_betas,\n        ).to(self.device)\n\n        self.discriminator = MLP(\n            n_units_in=self.ncols + n_units_conditional,\n            n_units_out=1,\n            n_layers_hidden=discriminator_n_layers_hidden,\n            n_units_hidden=discriminator_n_units_hidden,\n            activation=discriminator_activation,\n            activation_out=[(\"none\", 1)],\n            batch_norm=discriminator_batch_norm,\n            dropout=discriminator_dropout,\n            lr=discriminator_lr,\n            opt_betas=discriminator_opt_betas,\n        ).to(self.device)\n\n        self.clipping_value = clipping_value\n        self.lambda_gradient_penalty = lambda_gradient_penalty\n\n        def gen_fake_labels(X: torch.Tensor) -&gt; torch.Tensor:\n            return torch.zeros((len(X),), device=self.device)\n\n        def gen_true_labels(X: torch.Tensor) -&gt; torch.Tensor:\n            return torch.ones((len(X),), device=self.device)\n\n        self.fake_labels_generator = gen_fake_labels\n        self.true_labels_generator = gen_true_labels\n\n    @classmethod\n    def get_args(cls) -&gt; list[str]:\n        return [\n            \"n_units_conditional\",\n            \"generator_n_layers_hidden\",\n            \"generator_n_units_hidden\",\n            \"generator_activation\",\n            \"generator_batch_norm\",\n            \"generator_dropout\",\n            \"generator_lr\",\n            \"generator_residual\",\n            \"generator_opt_betas\",\n            \"discriminator_n_layers_hidden\",\n            \"discriminator_n_units_hidden\",\n            \"discriminator_activation\",\n            \"discriminator_batch_norm\",\n            \"discriminator_dropout\",\n            \"discriminator_lr\",\n            \"discriminator_opt_betas\",\n            \"clipping_value\",\n            \"lambda_gradient_penalty\",\n        ]\n\n    @classmethod\n    def get_metrics(cls) -&gt; list[str]:\n        return [\"GLoss\", \"DLoss\"]\n\n    def generate(self, N: int, cond: Optional[np.ndarray] = None) -&gt; np.ndarray:\n        N = N or self.nrows\n        self.generator.eval()\n\n        condt: Optional[torch.Tensor] = None\n        if cond is not None:\n            condt = self._check_tensor(cond)\n        with torch.no_grad():\n            return self.metatransformer.inverse_apply(\n                pd.DataFrame(self(N, condt).detach().cpu().numpy(), columns=self.columns)\n            )\n\n    def forward(\n        self,\n        N: int,\n        cond: Optional[torch.Tensor] = None,\n    ) -&gt; torch.Tensor:\n        if cond is None and self.n_units_conditional &gt; 0:\n            # sample from the original conditional\n            if self._original_cond is None:\n                raise ValueError(\"Invalid original conditional. Provide a valid value.\")\n            cond_idxs = torch.randint(len(self._original_cond), (N,))\n            cond = self._original_cond[cond_idxs]\n\n        if cond is not None and len(cond.shape) == 1:\n            cond = cond.reshape(-1, 1)\n\n        if cond is not None and len(cond) != N:\n            raise ValueError(\"cond length must match N\")\n\n        fixed_noise = torch.randn(N, self.generator_n_units_hidden, device=self.device)\n        fixed_noise = self._append_optional_cond(fixed_noise, cond)\n\n        return self.generator(fixed_noise)\n\n    def _train_epoch_generator(\n        self,\n        X: torch.Tensor,\n        cond: Optional[torch.Tensor],\n    ) -&gt; float:\n        # Update the G network\n        self.generator.train()\n        self.generator.optimizer.zero_grad()\n\n        real_X_raw = X.to(self.device)\n        real_X = self._append_optional_cond(real_X_raw, cond)\n        batch_size = len(real_X)\n\n        noise = torch.randn(batch_size, self.generator_n_units_hidden, device=self.device)\n        noise = self._append_optional_cond(noise, cond)\n\n        fake_raw = self.generator(noise)\n        fake = self._append_optional_cond(fake_raw, cond)\n\n        output = self.discriminator(fake).squeeze().float()\n        # Calculate G's loss based on this output\n        errG = -torch.mean(output)\n        if hasattr(self, \"generator_extra_penalty_cbks\"):\n            for extra_loss in self.generator_extra_penalty_cbks:\n                errG += extra_loss(\n                    real_X_raw,\n                    fake_raw,\n                    cond=cond,\n                )\n\n        # Calculate gradients for G\n        errG.backward()\n\n        # Update G\n        if self.clipping_value &gt; 0:\n            torch.nn.utils.clip_grad_norm_(self.generator.parameters(), self.clipping_value)\n        self.generator.optimizer.step()\n\n        if torch.isnan(errG):\n            raise RuntimeError(\"NaNs detected in the generator loss\")\n\n        # Return loss\n        return errG.item()\n\n    def _train_epoch_discriminator(\n        self,\n        X: torch.Tensor,\n        cond: Optional[torch.Tensor],\n    ) -&gt; float:\n        # Update the D network\n        self.discriminator.train()\n\n        errors = []\n\n        batch_size = min(self.batch_size, len(X))\n\n        # Train with all-real batch\n        real_X = X.to(self.device)\n        real_X = self._append_optional_cond(real_X, cond)\n\n        real_labels = self.true_labels_generator(X).to(self.device).squeeze()\n        real_output = self.discriminator(real_X).squeeze().float()\n\n        # Train with all-fake batch\n        noise = torch.randn(batch_size, self.generator_n_units_hidden, device=self.device)\n        noise = self._append_optional_cond(noise, cond)\n\n        fake_raw = self.generator(noise)\n        fake = self._append_optional_cond(fake_raw, cond)\n\n        fake_labels = self.fake_labels_generator(fake_raw).to(self.device).squeeze().float()\n        fake_output = self.discriminator(fake.detach()).squeeze()\n\n        # Compute errors. Some fake inputs might be marked as real for privacy guarantees.\n\n        real_real_output = real_output[(real_labels * real_output) != 0]\n        real_fake_output = fake_output[(fake_labels * fake_output) != 0]\n        errD_real = torch.mean(torch.concat((real_real_output, real_fake_output)))\n\n        fake_real_output = real_output[((1 - real_labels) * real_output) != 0]\n        fake_fake_output = fake_output[((1 - fake_labels) * fake_output) != 0]\n        errD_fake = torch.mean(torch.concat((fake_real_output, fake_fake_output)))\n\n        penalty = self._loss_gradient_penalty(\n            real_samples=real_X,\n            fake_samples=fake,\n            batch_size=batch_size,\n        )\n        errD = -errD_real + errD_fake\n\n        self.discriminator.optimizer.zero_grad()\n        if isinstance(self, DPMixin):\n            # Adversarial loss\n            # 1. split fwd-bkwd on fake and real images into two explicit blocks.\n            # 2. no need to compute per_sample_gardients on fake data, disable hooks.\n            # 3. re-enable hooks to obtain per_sample_gardients for real data.\n            # fake fwd-bkwd\n            self.discriminator.disable_hooks()\n            penalty.backward(retain_graph=True)\n            errD_fake.backward(retain_graph=True)\n\n            self.discriminator.enable_hooks()\n            errD_real.backward()  # HACK: calling bkwd without zero_grad() accumulates param gradients\n        else:\n            penalty.backward(retain_graph=True)\n            errD.backward()\n\n        # Update D\n        if self.clipping_value &gt; 0:\n            torch.nn.utils.clip_grad_norm_(self.discriminator.parameters(), self.clipping_value)\n        self.discriminator.optimizer.step()\n\n        errors.append(errD.item())\n\n        if np.isnan(np.mean(errors)):\n            raise RuntimeError(\"NaNs detected in the discriminator loss\")\n\n        return np.mean(errors)\n\n    def _train_epoch(self) -&gt; Tuple[float, float]:\n        for data in tqdm(self.data_loader, desc=\"Batches\", position=len(self.stats_bars) + 1, leave=False):\n            cond: Optional[torch.Tensor] = None\n            if self.n_units_conditional &gt; 0:\n                X, cond = data\n            else:\n                X = data[0]\n\n            losses = {\n                \"DLoss\": self._train_epoch_discriminator(X, cond),\n                \"GLoss\": self._train_epoch_generator(X, cond),\n            }\n            self._record_metrics(losses)\n\n        return np.mean(self.metrics[\"GLoss\"][-len(self.data_loader) :]), np.mean(\n            self.metrics[\"DLoss\"][-len(self.data_loader) :]\n        )\n\n    def train(\n        self,\n        num_epochs: int = 100,\n        patience: int = 5,\n        displayed_metrics: list[str] = [\"GLoss\", \"DLoss\"],\n    ) -&gt; tuple[int, dict[str, np.ndarray]]:\n        self._start_training(num_epochs, patience, displayed_metrics)\n\n        for epoch in tqdm(range(num_epochs), desc=\"Epochs\", position=len(self.stats_bars), leave=False):\n            losses = self._train_epoch()\n            if self._check_patience(epoch, losses[0]) and self._check_patience(epoch, losses[1]):\n                num_epochs = epoch + 1\n                break\n\n        self._finish_training(num_epochs)\n        return (num_epochs, self.metrics)\n\n    def _check_tensor(self, X: torch.Tensor) -&gt; torch.Tensor:\n        if isinstance(X, torch.Tensor):\n            return X.to(self.device)\n        else:\n            return torch.from_numpy(np.asarray(X)).to(self.device)\n\n    def _loss_gradient_penalty(\n        self,\n        real_samples: torch.tensor,\n        fake_samples: torch.Tensor,\n        batch_size: int,\n    ) -&gt; torch.Tensor:\n        \"\"\"Calculates the gradient penalty loss for WGAN GP\"\"\"\n        # Random weight term for interpolation between real and fake samples\n        alpha = torch.rand([batch_size, 1]).to(self.device)\n        # Get random interpolation between real and fake samples\n        interpolated = (alpha * real_samples + ((1 - alpha) * fake_samples)).requires_grad_(True)\n        d_interpolated = self.discriminator(interpolated).squeeze()\n        labels = torch.ones((len(interpolated),), device=self.device)\n\n        # Get gradient w.r.t. interpolates\n        gradients = torch.autograd.grad(\n            outputs=d_interpolated,\n            inputs=interpolated,\n            grad_outputs=labels,\n            create_graph=True,\n            retain_graph=True,\n            only_inputs=True,\n            allow_unused=True,\n        )[0]\n        gradients = gradients.view(gradients.size(0), -1)\n        gradient_penalty = ((gradients.norm(2, dim=-1) - 1) ** 2).mean()\n        return self.lambda_gradient_penalty * gradient_penalty\n\n    def _append_optional_cond(self, X: torch.Tensor, cond: Optional[torch.Tensor]) -&gt; torch.Tensor:\n        if cond is None:\n            return X\n\n        return torch.cat([X, cond], dim=1)\n</code></pre>"},{"location":"reference/modules/model/models/vae/","title":"vae","text":""},{"location":"reference/modules/model/models/vae/#nhssynth.modules.model.models.vae.Decoder","title":"<code>Decoder</code>","text":"<p>             Bases: <code>Module</code></p> <p>Decoder, takes in z and outputs reconstruction</p> Source code in <code>src/nhssynth/modules/model/models/vae.py</code> <pre><code>class Decoder(nn.Module):\n    \"\"\"Decoder, takes in z and outputs reconstruction\"\"\"\n\n    def __init__(\n        self,\n        output_dim: int,\n        latent_dim: int,\n        hidden_dim: int,\n        activation: str,\n        learning_rate: float,\n        shared_optimizer: bool,\n    ) -&gt; None:\n        super().__init__()\n        activation = ACTIVATION_FUNCTIONS[activation]\n        self.net = nn.Sequential(\n            nn.Linear(latent_dim, hidden_dim),\n            activation(),\n            nn.Linear(hidden_dim, hidden_dim),\n            activation(),\n            nn.Linear(hidden_dim, output_dim),\n        )\n        if not shared_optimizer:\n            self.optim = torch.optim.Adam(self.parameters(), lr=learning_rate)\n\n    def forward(self, z):\n        return self.net(z)\n</code></pre>"},{"location":"reference/modules/model/models/vae/#nhssynth.modules.model.models.vae.Encoder","title":"<code>Encoder</code>","text":"<p>             Bases: <code>Module</code></p> <p>Encoder, takes in x and outputs mu_z, sigma_z (diagonal Gaussian variational posterior assumed)</p> Source code in <code>src/nhssynth/modules/model/models/vae.py</code> <pre><code>class Encoder(nn.Module):\n    \"\"\"Encoder, takes in x and outputs mu_z, sigma_z (diagonal Gaussian variational posterior assumed)\"\"\"\n\n    def __init__(\n        self,\n        input_dim: int,\n        latent_dim: int,\n        hidden_dim: int,\n        activation: str,\n        learning_rate: float,\n        shared_optimizer: bool,\n    ) -&gt; None:\n        super().__init__()\n        activation = ACTIVATION_FUNCTIONS[activation]\n        self.latent_dim = latent_dim\n        self.net = nn.Sequential(\n            nn.Linear(input_dim, hidden_dim),\n            activation(),\n            nn.Linear(hidden_dim, hidden_dim),\n            activation(),\n            nn.Linear(hidden_dim, 2 * latent_dim),\n        )\n        if not shared_optimizer:\n            self.optim = torch.optim.Adam(self.parameters(), lr=learning_rate)\n\n    def forward(self, x):\n        outs = self.net(x)\n        mu_z = outs[:, : self.latent_dim]\n        logsigma_z = outs[:, self.latent_dim :]\n        return mu_z, logsigma_z\n</code></pre>"},{"location":"reference/modules/model/models/vae/#nhssynth.modules.model.models.vae.VAE","title":"<code>VAE</code>","text":"<p>             Bases: <code>Model</code></p> <p>A Variational Autoencoder (VAE) model. Accepts <code>Model</code> arguments as well as the following:</p> <p>Parameters:</p> Name Type Description Default <code>encoder_latent_dim</code> <code>int</code> <p>The dimensionality of the latent space.</p> <code>256</code> <code>encoder_hidden_dim</code> <code>int</code> <p>The dimensionality of the hidden layers in the encoder.</p> <code>256</code> <code>encoder_activation</code> <code>str</code> <p>The activation function to use in the encoder.</p> <code>'leaky_relu'</code> <code>encoder_learning_rate</code> <code>float</code> <p>The learning rate for the encoder.</p> <code>0.001</code> <code>decoder_latent_dim</code> <code>int</code> <p>The dimensionality of the hidden layers in the decoder.</p> <code>256</code> <code>decoder_hidden_dim</code> <code>int</code> <p>The dimensionality of the hidden layers in the decoder.</p> <code>32</code> <code>decoder_activation</code> <code>str</code> <p>The activation function to use in the decoder.</p> <code>'leaky_relu'</code> <code>decoder_learning_rate</code> <code>float</code> <p>The learning rate for the decoder.</p> <code>0.001</code> <code>shared_optimizer</code> <code>bool</code> <p>Whether to use a shared optimizer for the encoder and decoder.</p> <code>True</code> Source code in <code>src/nhssynth/modules/model/models/vae.py</code> <pre><code>class VAE(Model):\n    \"\"\"\n    A Variational Autoencoder (VAE) model. Accepts [`Model`][nhssynth.modules.model.common.model.Model] arguments as well as the following:\n\n    Args:\n        encoder_latent_dim: The dimensionality of the latent space.\n        encoder_hidden_dim: The dimensionality of the hidden layers in the encoder.\n        encoder_activation: The activation function to use in the encoder.\n        encoder_learning_rate: The learning rate for the encoder.\n        decoder_latent_dim: The dimensionality of the hidden layers in the decoder.\n        decoder_hidden_dim: The dimensionality of the hidden layers in the decoder.\n        decoder_activation: The activation function to use in the decoder.\n        decoder_learning_rate: The learning rate for the decoder.\n        shared_optimizer: Whether to use a shared optimizer for the encoder and decoder.\n    \"\"\"\n\n    def __init__(\n        self,\n        *args,\n        encoder_latent_dim: int = 256,\n        encoder_hidden_dim: int = 256,\n        encoder_activation: str = \"leaky_relu\",\n        encoder_learning_rate: float = 1e-3,\n        decoder_latent_dim: int = 256,\n        decoder_hidden_dim: int = 32,\n        decoder_activation: str = \"leaky_relu\",\n        decoder_learning_rate: float = 1e-3,\n        shared_optimizer: bool = True,\n        **kwargs,\n    ) -&gt; None:\n        super(VAE, self).__init__(*args, **kwargs)\n\n        self.shared_optimizer = shared_optimizer\n        self.encoder = Encoder(\n            input_dim=self.ncols,\n            latent_dim=encoder_latent_dim,\n            hidden_dim=encoder_hidden_dim,\n            activation=encoder_activation,\n            learning_rate=encoder_learning_rate,\n            shared_optimizer=self.shared_optimizer,\n        ).to(self.device)\n        self.decoder = Decoder(\n            output_dim=self.ncols,\n            latent_dim=decoder_latent_dim,\n            hidden_dim=decoder_hidden_dim,\n            activation=decoder_activation,\n            learning_rate=decoder_learning_rate,\n            shared_optimizer=self.shared_optimizer,\n        ).to(self.device)\n        self.noiser = Noiser(\n            len(self.single_column_indices),\n        ).to(self.device)\n        if self.shared_optimizer:\n            assert (\n                encoder_learning_rate == decoder_learning_rate\n            ), \"If `shared_optimizer` is True, `encoder_learning_rate` must equal `decoder_learning_rate`\"\n            self.optim = torch.optim.Adam(\n                list(self.encoder.parameters()) + list(self.decoder.parameters()),\n                lr=encoder_learning_rate,\n            )\n            self.zero_grad = self.optim.zero_grad\n            self.step = self.optim.step\n        else:\n            self.zero_grad = lambda: (self.encoder.optim.zero_grad(), self.decoder.optim.zero_grad())\n            self.step = lambda: (self.encoder.optim.step(), self.decoder.optim.step())\n\n    @classmethod\n    def get_args(cls) -&gt; list[str]:\n        return [\n            \"encoder_latent_dim\",\n            \"encoder_hidden_dim\",\n            \"encoder_activation\",\n            \"encoder_learning_rate\",\n            \"decoder_latent_dim\",\n            \"decoder_hidden_dim\",\n            \"decoder_activation\",\n            \"decoder_learning_rate\",\n            \"shared_optimizer\",\n        ]\n\n    @classmethod\n    def get_metrics(cls) -&gt; list[str]:\n        return [\n            \"ELBO\",\n            \"KLD\",\n            \"ReconstructionLoss\",\n            \"CategoricalLoss\",\n            \"NumericalLoss\",\n        ]\n\n    def reconstruct(self, X):\n        mu_z, logsigma_z = self.encoder(X)\n        x_recon = self.decoder(mu_z)\n        return x_recon\n\n    def generate(self, N: Optional[int] = None) -&gt; pd.DataFrame:\n        N = N or self.nrows\n        z_samples = torch.randn_like(torch.ones((N, self.encoder.latent_dim)), device=self.device)\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", message=\"Using a non-full backward hook\")\n            x_gen = self.decoder(z_samples)\n        x_gen_ = torch.ones_like(x_gen, device=self.device)\n\n        if self.multi_column_indices != [[]]:\n            for cat_idxs in self.multi_column_indices:\n                x_gen_[:, cat_idxs] = torch.distributions.one_hot_categorical.OneHotCategorical(\n                    logits=x_gen[:, cat_idxs]\n                ).sample()\n\n        x_gen_[:, self.single_column_indices] = x_gen[:, self.single_column_indices] + torch.exp(\n            self.noiser(x_gen[:, self.single_column_indices])\n        ) * torch.randn_like(x_gen[:, self.single_column_indices])\n        if torch.cuda.is_available():\n            x_gen_ = x_gen_.cpu()\n        return self.metatransformer.inverse_apply(pd.DataFrame(x_gen_.detach(), columns=self.columns))\n\n    def loss(self, X):\n        mu_z, logsigma_z = self.encoder(X)\n\n        p = Normal(torch.zeros_like(mu_z), torch.ones_like(mu_z))\n        q = Normal(mu_z, torch.exp(logsigma_z))\n\n        kld = torch.sum(torch.distributions.kl_divergence(q, p))\n\n        s = torch.randn_like(mu_z)\n        z_samples = mu_z + s * torch.exp(logsigma_z)\n\n        x_recon = self.decoder(z_samples)\n\n        categoric_loglik = 0\n\n        if self.multi_column_indices != [[]]:\n            for cat_idxs in self.multi_column_indices:\n                categoric_loglik += -torch.nn.functional.cross_entropy(\n                    x_recon[:, cat_idxs],\n                    torch.max(X[:, cat_idxs], 1)[1],\n                ).sum()\n\n        gauss_loglik = 0\n        if self.single_column_indices:\n            gauss_loglik = (\n                Normal(\n                    loc=x_recon[:, self.single_column_indices],\n                    scale=torch.exp(self.noiser(x_recon[:, self.single_column_indices])),\n                )\n                .log_prob(X[:, self.single_column_indices])\n                .sum()\n            )\n\n        reconstruction_loss = -(categoric_loglik + gauss_loglik)\n\n        elbo = kld + reconstruction_loss\n\n        return {\n            \"ELBO\": elbo / X.size()[0],\n            \"ReconstructionLoss\": reconstruction_loss / X.size()[0],\n            \"KLD\": kld / X.size()[0],\n            \"CategoricalLoss\": categoric_loglik / X.size()[0],\n            \"NumericalLoss\": gauss_loglik / X.size()[0],\n        }\n\n    def train(\n        self,\n        num_epochs: int = 100,\n        patience: int = 5,\n        displayed_metrics: list[str] = [\"ELBO\"],\n    ) -&gt; tuple[int, dict[str, list[float]]]:\n        \"\"\"\n        Train the model.\n\n        Args:\n            num_epochs: Number of epochs to train for.\n            patience: Number of epochs to wait for improvement before early stopping.\n            displayed_metrics: List of metrics to display during training.\n\n        Returns:\n            The number of epochs trained for and a dictionary of the tracked metrics.\n        \"\"\"\n        self._start_training(num_epochs, patience, displayed_metrics)\n\n        self.encoder.train()\n        self.decoder.train()\n        self.noiser.train()\n\n        for epoch in tqdm(range(num_epochs), desc=\"Epochs\", position=len(self.stats_bars), leave=False):\n            for (Y_subset,) in tqdm(self.data_loader, desc=\"Batches\", position=len(self.stats_bars) + 1, leave=False):\n                self.zero_grad()\n                with warnings.catch_warnings():\n                    warnings.filterwarnings(\"ignore\", message=\"Using a non-full backward hook\")\n                    losses = self.loss(Y_subset.to(self.device))\n                losses[\"ELBO\"].backward()\n                self.step()\n                self._record_metrics(losses)\n\n            elbo = np.mean(self.metrics[\"ELBO\"][-len(self.data_loader) :])\n            if self._check_patience(epoch, elbo):\n                num_epochs = epoch + 1\n                break\n\n        self._finish_training(num_epochs)\n        return (num_epochs, self.metrics)\n</code></pre>"},{"location":"reference/modules/model/models/vae/#nhssynth.modules.model.models.vae.VAE.train","title":"<code>train(num_epochs=100, patience=5, displayed_metrics=['ELBO'])</code>","text":"<p>Train the model.</p> <p>Parameters:</p> Name Type Description Default <code>num_epochs</code> <code>int</code> <p>Number of epochs to train for.</p> <code>100</code> <code>patience</code> <code>int</code> <p>Number of epochs to wait for improvement before early stopping.</p> <code>5</code> <code>displayed_metrics</code> <code>list[str]</code> <p>List of metrics to display during training.</p> <code>['ELBO']</code> <p>Returns:</p> Type Description <code>tuple[int, dict[str, list[float]]]</code> <p>The number of epochs trained for and a dictionary of the tracked metrics.</p> Source code in <code>src/nhssynth/modules/model/models/vae.py</code> <pre><code>def train(\n    self,\n    num_epochs: int = 100,\n    patience: int = 5,\n    displayed_metrics: list[str] = [\"ELBO\"],\n) -&gt; tuple[int, dict[str, list[float]]]:\n    \"\"\"\n    Train the model.\n\n    Args:\n        num_epochs: Number of epochs to train for.\n        patience: Number of epochs to wait for improvement before early stopping.\n        displayed_metrics: List of metrics to display during training.\n\n    Returns:\n        The number of epochs trained for and a dictionary of the tracked metrics.\n    \"\"\"\n    self._start_training(num_epochs, patience, displayed_metrics)\n\n    self.encoder.train()\n    self.decoder.train()\n    self.noiser.train()\n\n    for epoch in tqdm(range(num_epochs), desc=\"Epochs\", position=len(self.stats_bars), leave=False):\n        for (Y_subset,) in tqdm(self.data_loader, desc=\"Batches\", position=len(self.stats_bars) + 1, leave=False):\n            self.zero_grad()\n            with warnings.catch_warnings():\n                warnings.filterwarnings(\"ignore\", message=\"Using a non-full backward hook\")\n                losses = self.loss(Y_subset.to(self.device))\n            losses[\"ELBO\"].backward()\n            self.step()\n            self._record_metrics(losses)\n\n        elbo = np.mean(self.metrics[\"ELBO\"][-len(self.data_loader) :])\n        if self._check_patience(epoch, elbo):\n            num_epochs = epoch + 1\n            break\n\n    self._finish_training(num_epochs)\n    return (num_epochs, self.metrics)\n</code></pre>"},{"location":"reference/modules/plotting/","title":"plotting","text":""},{"location":"reference/modules/plotting/io/","title":"io","text":""},{"location":"reference/modules/plotting/io/#nhssynth.modules.plotting.io.check_input_paths","title":"<code>check_input_paths(fn_dataset, fn_typed, fn_evaluations, dir_experiment)</code>","text":"<p>Sets up the input and output paths for the model files.</p> <p>Parameters:</p> Name Type Description Default <code>fn_dataset</code> <code>str</code> <p>The base name of the dataset.</p> required <code>fn_typed</code> <code>str</code> <p>The name of the typed data file.</p> required <code>fn_evaluations</code> <code>str</code> <p>The name of the file containing the evaluation bundle.</p> required <code>dir_experiment</code> <code>Path</code> <p>The path to the experiment directory.</p> required <p>Returns:</p> Type Description <code>tuple[str, str]</code> <p>The paths to the data, metadata and metatransformer files.</p> Source code in <code>src/nhssynth/modules/plotting/io.py</code> <pre><code>def check_input_paths(fn_dataset: str, fn_typed: str, fn_evaluations: str, dir_experiment: Path) -&gt; tuple[str, str]:\n    \"\"\"\n    Sets up the input and output paths for the model files.\n\n    Args:\n        fn_dataset: The base name of the dataset.\n        fn_typed: The name of the typed data file.\n        fn_evaluations: The name of the file containing the evaluation bundle.\n        dir_experiment: The path to the experiment directory.\n\n    Returns:\n        The paths to the data, metadata and metatransformer files.\n    \"\"\"\n    fn_dataset, fn_typed, fn_evaluations = io.consistent_endings([fn_dataset, fn_typed, fn_evaluations])\n    fn_typed, fn_evaluations = io.potential_suffixes([fn_typed, fn_evaluations], fn_dataset)\n    io.warn_if_path_supplied([fn_dataset, fn_typed, fn_evaluations], dir_experiment)\n    io.check_exists([fn_typed], dir_experiment)\n    return fn_dataset, fn_typed, fn_evaluations\n</code></pre>"},{"location":"reference/modules/plotting/io/#nhssynth.modules.plotting.io.load_required_data","title":"<code>load_required_data(args, dir_experiment)</code>","text":"<p>Loads the data from <code>args</code> or from disk when the dataloader has not be run previously.</p> <p>Parameters:</p> Name Type Description Default <code>args</code> <code>Namespace</code> <p>The arguments passed to the module, in this case potentially carrying the outputs of the dataloader module.</p> required <code>dir_experiment</code> <code>Path</code> <p>The path to the experiment directory.</p> required <p>Returns:</p> Type Description <code>tuple[str, DataFrame, DataFrame, dict[str, dict[str, Any]]]</code> <p>The data, metadata and metatransformer.</p> Source code in <code>src/nhssynth/modules/plotting/io.py</code> <pre><code>def load_required_data(\n    args: argparse.Namespace, dir_experiment: Path\n) -&gt; tuple[str, pd.DataFrame, pd.DataFrame, dict[str, dict[str, Any]]]:\n    \"\"\"\n    Loads the data from `args` or from disk when the dataloader has not be run previously.\n\n    Args:\n        args: The arguments passed to the module, in this case potentially carrying the outputs of the dataloader module.\n        dir_experiment: The path to the experiment directory.\n\n    Returns:\n        The data, metadata and metatransformer.\n    \"\"\"\n    if all(x in args.module_handover for x in [\"dataset\", \"typed\", \"evaluations\"]):\n        return (\n            args.module_handover[\"dataset\"],\n            args.module_handover[\"typed\"],\n            args.module_handover[\"evaluations\"],\n        )\n    else:\n        fn_dataset, fn_typed, fn_evaluations = check_input_paths(\n            args.dataset, args.typed, args.evaluations, dir_experiment\n        )\n\n        with open(dir_experiment / fn_typed, \"rb\") as f:\n            real_data = pickle.load(f)\n        with open(dir_experiment / fn_evaluations, \"rb\") as f:\n            evaluations = pickle.load(f)\n\n        return fn_dataset, real_data, evaluations\n</code></pre>"},{"location":"reference/modules/plotting/plots/","title":"plots","text":""},{"location":"reference/modules/plotting/plots/#nhssynth.modules.plotting.plots.factorize_all_categoricals","title":"<code>factorize_all_categoricals(df)</code>","text":"<p>Factorize all categorical columns in a dataframe.</p> Source code in <code>src/nhssynth/modules/plotting/plots.py</code> <pre><code>def factorize_all_categoricals(\n    df: pd.DataFrame,\n) -&gt; pd.DataFrame:\n    \"\"\"Factorize all categorical columns in a dataframe.\"\"\"\n    for col in df.columns:\n        if df[col].dtype == \"object\":\n            df[col] = pd.factorize(df[col])[0]\n        elif df[col].dtype == \"datetime64[ns]\":\n            df[col] = pd.to_numeric(df[col])\n        min_val = df[col].min()\n        max_val = df[col].max()\n        df[col] = (df[col] - min_val) / (max_val - min_val)\n\n    return df\n</code></pre>"},{"location":"reference/modules/plotting/run/","title":"run","text":""},{"location":"reference/modules/structure/","title":"structure","text":""},{"location":"reference/modules/structure/run/","title":"run","text":""}]}
\ No newline at end of file
+{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"NHS Synth","text":"<p>This is a package for generating useful synthetic data, audited and assessed along the dimensions of utility, privacy and fairness. Currently, the main focus of the package in its beta stage is to experiment with different model architectures to find which are the most promising for real-world usage.</p> <p>See the User Guide to get started with running an experiment with the package.</p> <p>See the Development Guide and Code Reference to get started with contributing to the package.</p>"},{"location":"development_guide/","title":"Development guide","text":"<p>This document aims to provide a comprehensive set of instructions for continuing development of this package. Good knowledge of Python development is assumed. Some ways of working are subjective and preferential; as such we try to be as minimal in our proscription of other methods as possible.</p>"},{"location":"development_guide/#development-environment-setup","title":"Development environment setup","text":""},{"location":"development_guide/#python","title":"Python","text":"<p>The package currently supports major versions 3.9, 3.10 and 3.11 of Python. We recommend installing all of these versions; at minimum the latest supported version of Python should be used. Many people use <code>pyenv</code> for managing multiple python versions. On MacOS homebrew is a good, less invasive option for this (provided you then use a virtual environment manager too). For virtual environment management, we recommend Python's in-built <code>venv</code> functionality, but conda or some similar system would suffice (note that in the section below it may not be necessary to use any specific virtual environment management at all depending on the setup of Poetry).</p>"},{"location":"development_guide/#poetry","title":"Poetry","text":"<p>We use Poetry to manage dependencies and the actual packaging and publishing of <code>NHSSynth</code> to PyPI. Poetry is a more robust alternative to a <code>requirements.txt</code> file, allowing for grouped dependencies and advanced build options. Rather than freezing a specific <code>pip</code> state, Poetry only specifies the top-level dependencies and then handles the resolution and installation of the latest compatible versions of the full dependency tree per these top-level dependencies. See the <code>pyproject.toml</code> in the GitHub repository and Poetry's documentation for further context.</p> <p>Once Poetry is installed (in your preferred way per the instructions on their website), you can choose one of two options:</p> <ol> <li> <p>Allow <code>poetry</code> to control virtual environments in their proprietary way), such that when you install and develop the package poetry will automatically create a virtual environment for you.</p> </li> <li> <p>Change <code>poetry</code>'s configuration to manage your own virtual environments:</p> <pre><code>poetry config virtualenvs.create false\npoetry config virtualenvs.in-project false\n</code></pre> <p>In this setup, a virtual environment can be be instantiated and activated in whichever way you prefer. For example, using <code>venv</code>:</p> <pre><code>python3.11 -m venv nhssynth-3.11\nsource nhssynth-3.11/bin/activate\n</code></pre> </li> </ol>"},{"location":"development_guide/#package-installation","title":"Package installation","text":"<p>At this point, the project dependencies can be installed via <code>poetry install --with dev</code> (add optional flags: <code>--with aux</code> to work with the auxiliary notebooks, <code>--with docs</code> to work with the documentation). This will install the package in editable mode, meaning that changes to the source code will be reflected in the installed package without needing to reinstall it. Note that if you are using your own virtual environment, you will need to activate it before running this command.</p> <p>You can then interact with the package in one of two ways:</p> <ol> <li> <p>Via the CLI module, which is accessed using the <code>nhssynth</code> command, e.g.</p> <pre><code>poetry run nhssynth ...\n</code></pre> <p>Note that you can omit the <code>poetry run</code> part and just type <code>nhssynth</code> if you followed the optional steps above to manage and activate your own virtual environment, or if you have executed <code>poetry shell</code> beforehand. 2. Through directly importing parts of the package to use in an existing project (<code>from nhssynth.modules... import ...</code>).</p> </li> </ol>"},{"location":"development_guide/#secure-mode","title":"Secure mode","text":"<p>Note that in order to train a generator in secure mode (see the documentation for details) the PyTorch extension package <code>csprng</code> must be installed separately. Currently this package's dependencies are not compatible with recent versions of PyTorch (the author's plan on rectifying this - watch this space), so you will need to install it manually, you can do this in your environment by running:</p> <pre><code>git clone git@github.com:pytorch/csprng.git\ncd csprng\ngit branch release \"v0.2.2-rc1\"\ngit checkout release\npython setup.py install\n</code></pre>"},{"location":"development_guide/#coding-practices","title":"Coding practices","text":""},{"location":"development_guide/#style","title":"Style","text":"<p>We use <code>black</code> for code formatting. This is a fairly opinionated formatter, but it is widely used and has a good reputation. We also use <code>ruff</code> to manage imports and lint the code. Both of these tools are run automatically via <code>pre-commit</code> hooks. Ensure you have installed the package with the <code>dev</code> group of dependencies and then run the following command to install the hooks:</p> <pre><code>pre-commit install\n</code></pre> <p>Note that you may need to pre-pend this command with <code>poetry run</code> if you are not using your own virtual environment.</p> <p>This will ensure that your code conforms to the two formatters' / linters' requirements each time you commit to a branch. <code>black</code> and <code>ruff</code> are also run as part of the CI workflow discussed below, such that even without these hooks, the code will be checked and raise an error on GitHub if it is not formatted consistently.</p> <p>Configuration for both packages can be found in the <code>pyproject.toml</code>, this configuration should be picked up automatically by both the pre-commit hooks and your IDE / running them manually in the command line. The main configuration is as follows:</p> <pre><code>[tool.black]\nline-length = 120\n\n[tool.ruff]\ninclude = [\"*.py\", \"*.pyi\", \"**/pyproject.toml\", \"*.ipynb\"]\nselect = [\"E4\", \"E7\", \"E9\", \"F\", \"C90\", \"I\"]\n\n[tool.ruff.per-file-ignores]\n\"src/nhssynth/common/constants.py\" = [\"F403\", \"F405\"]\n\n[tool.ruff.isort]\nknown-first-party = [\"nhssynth\"]\n</code></pre> <p>This ensure that absolute imports from <code>NHSSynth</code> are sorted separately from the rest of the imports in a file.</p> <p>There are a number of other hooks used as part of this repositories pre-commit, including one that automatically mirrors the poetry version of these packages in the <code>dev</code> per the list of supported packages and .poetry-sync-db.json. Roughly, these other hooks ensure correct formatting of <code>.yaml</code> and <code>.toml</code> files, checks for large files being added to a commit, strips notebook output from the files, and fixes whitespace and end-of-file issues. These are mostly consistent with the NHSx analytics project template's hooks</p>"},{"location":"development_guide/#documentation","title":"Documentation","text":"<p>There should be Google-style docstrings on all non-trivial functions and classes. Ideally a docstring should take the form:</p> <pre><code>def func(arg1: type1, arg2: type2) -&gt; returntype:\n    \"\"\"\n    One-line summary of the function.\n    AND / OR\n    Longer description of the function, including any caveats or assumptions where appropriate.\n\n    Args:\n        arg1: Description of arg1.\n        arg2: Description of arg2.\n\n    Returns:\n        Description of the return value.\n    \"\"\"\n    ...\n</code></pre> <p>These docstrings are then compiled into a full API documentation tree as part of a larger MkDocs documentation site hosted via GitHub (the one you are reading right now!). This process is derived from this tutorial.</p> <p>The MkDocs page is built using the <code>mkdocs-material</code> theme. The documentation is built and hosted automatically via GitHub Pages.</p> <p>The other parts of this site comprise markdown documents in the docs folder. Adding new pages is handled in the <code>mkdocs.yml</code> file as in any other Material MkDocs site. See their documentation if more complex changes to the site are required.</p>"},{"location":"development_guide/#testing","title":"Testing","text":"<p>We use <code>tox</code> to manage the execution of tests for the package against multiple versions of Python, and to ensure that they are being run in a clean environment. To run the tests, simply execute <code>tox</code> in the root directory of the repository. This will run the tests against all supported versions of Python. To run the tests against a specific version of Python, use <code>tox -e py311</code> (or <code>py310</code> or <code>py39</code>).</p>"},{"location":"development_guide/#configuration","title":"Configuration","text":"<p>See the tox.ini file for more information on the testing configuration. We follow the Poetry documentation on <code>tox</code> support to ensure that for each version of Python, <code>tox</code> will create an <code>sdist</code> package of the project and use <code>pip</code> to install it in a fresh environment. Thus, dependencies are resolved by pip in the first place and then afterwards updated to the locked dependencies in <code>poetry.lock</code> by running <code>poetry install ...</code> in this fresh environment. The tests are then run using <code>poetry pytest</code>, which is configured in the pyproject.toml file. This configuration is fairly minimal: simply specifying the testing directory as the tests folder and filtering some known warnings.</p> <pre><code>[tool.pytest.ini_options]\ntestpaths = \"tests\"\nfilterwarnings = [\"ignore::DeprecationWarning:pkg_resources\"]\n</code></pre> <p>We can also use <code>coverage</code> to check the test coverage of the package. This is configured in the pyproject.toml file as follows:</p> <pre><code>[tool.coverage.run]\nsource = [\"src/nhssynth/cli\", \"src/nhssynth/common\", \"src/nhssynth/modules\"]\nomit = [\n    \"src/nhssynth/common/debugging.py\",\n]\n</code></pre> <p>We omit <code>debugging.py</code> as it is a wrapper for reading full trace-backs of warnings and not to be imported directly.</p>"},{"location":"development_guide/#adding-tests","title":"Adding Tests","text":"<p>We use the <code>pytest</code> framework for testing. The testing directory structure mirrors that of <code>src</code>. The usual testing practices apply.</p>"},{"location":"development_guide/#releases","title":"Releases","text":""},{"location":"development_guide/#version-management","title":"Version management","text":"<p>The package's version should be updated following the semantic versioning framework. The package is currently in a pre-release state, such that major version 1.0.0 should only be tagged once the package is functionally complete and stable.</p> <p>To update the package's metadata, we can use Poetry's <code>version</code> command:</p> <pre><code>poetry version &lt;version&gt;\n</code></pre> <p>We can then commit and push the changes to the version file, and create a new tag:</p> <pre><code>git add pyproject.toml\ngit commit -m \"Bump version to &lt;version&gt;\"\ngit push\n</code></pre> <p>We should then tag the release using GitHub's CLI (or manually via <code>git</code> if you prefer):</p> <pre><code>gh release create &lt;version&gt; --generate-notes\n</code></pre> <p>This will create a new release on GitHub, and will automatically generate a changelog based on the commit messages and PR's closed since the last release. This changelog can then be edited to add more detail if necessary.</p>"},{"location":"development_guide/#building-and-publishing-to-pypi","title":"Building and publishing to PyPI","text":"<p>Poetry offers not only dependency management, but also a simple way to build and distribute the package.</p> <p>After tagging a release per the section above, we can build the package using Poetry's <code>build</code> command:</p> <pre><code>poetry build\n</code></pre> <p>This will create a <code>dist</code> folder containing the built package. To publish this to PyPI, we can use the <code>publish</code> command:</p> <pre><code>poetry publish\n</code></pre> <p>This will prompt for PyPI credentials, and then publish the package. Note that this will only work if you have been added as a Maintainer of the package on PyPI.</p> <p>It might be preferable at some point in the future to set up Trusted Publisher Management via OpenID Connect (OIDC) to allow for automated publishing of the package via a GitHub workflow. See the \"Publishing\" tab of <code>NHSSynth</code>'s project management panel on PyPI to set this up.</p>"},{"location":"development_guide/#github","title":"GitHub","text":""},{"location":"development_guide/#continuous-integration","title":"Continuous integration","text":"<p>We use GitHub Actions for continuous integration. The different workflows comprising this can be found in the <code>.github/workflows</code> folder. In general, the CI workflow is triggered on every push to the <code>main</code> or a feature branch - as appropriate - and runs tests against all supported versions of Python. It also runs <code>black</code> and <code>ruff</code> to check that the code is formatted correctly, and builds the documentation site.</p> <p>There are also scripts to update the dynamic badges in the <code>README</code>. These work via a gist associated with the repository. It is not easy to transfer ownership of this process, so if they break please feel free to contact me.</p>"},{"location":"development_guide/#branching","title":"Branching","text":"<p>We encourage the use of the Gitflow branching model for development. This means that the <code>main</code> branch is always in a stable state, and that all development work is done on feature branches. These feature branches are then merged into <code>main</code> via pull requests. The <code>main</code> branch is protected, such that pull requests must be reviewed and approved before they can be merged.</p> <p>At minimum, the <code>main</code> branches protection should be maintained, and roughly one branch per issue should be used. Ensure that all of the CI checks pass before merging.</p>"},{"location":"development_guide/#security-and-vulnerability-management","title":"Security and vulnerability management","text":"<p>The GitHub repository for the package has Dependabot, code scanning, and other security features enabled. These should be monitored continuously and any issues resolved as soon as possible. When issues of this type require a specific version of a dependency to be specified (and it is one that is not already amongst the dependency groups of the package), the version should be referenced as part of the <code>security</code> group of dependencies (i.e. with <code>poetry add &lt;package&gt; --group security</code>) and a new release created (see above).</p>"},{"location":"downstream_tasks/","title":"Defining a downstream task","text":"<p>It is likely that a synthetic dataset may be associated with specific modelling efforts or metrics that are not included in the general suite of evaluation tools supported more explicitly by this package. Additionally, analyses on model outputs for bias and fairness provided via Aequitas require some basis of predictions on which to perform the analysis. For these reasons, we provide a simple interface for defining a custom downstream task.</p> <p>All downstream tasks are to be located in a folder named <code>tasks</code> in the working directory of the project, with subfolders for each dataset, i.e. the tasks associated with the <code>support</code> dataset should be located in the <code>tasks/support</code> directory.</p> <p>The interface is then quite simple:</p> <ul> <li>There should be a function called <code>run</code> that takes a single argument: <code>dataset</code> (additional arguments could be provided with some further configuration if there is a need for this)</li> <li>The <code>run</code> function should fit a model and / or calculate some metric(s) on the dataset.</li> <li>It should then return predicted probabilities for the outcome variable(s) in the dataset and a dictionary of metrics.</li> <li>The file should contain a top-level variable containing an instantiation of the <code>nhssynth</code> <code>Task</code> class.</li> </ul> <p>See the example below of a logistic regression model fit on the <code>support</code> dataset with the <code>event</code> variable as the outcome and <code>rocauc</code> as the metric of interest:</p> <pre><code>import pandas as pd\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.metrics import roc_auc_score\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\n\nfrom nhssynth.modules.evaluation.tasks import Task\n\n\ndef run(dataset: pd.DataFrame) -&gt; tuple[pd.DataFrame, dict]:\n    # Split the dataset into features and target\n    target = \"event\"\n\n    data = dataset.dropna()\n    X, y = data.drop([\"dob\", \"x3\", target], axis=1), data[target]\n    X_train, X_test, y_train, y_test = train_test_split(\n        StandardScaler().fit_transform(X), y, test_size=0.33, random_state=42\n    )\n\n    lr = LogisticRegression()\n    lr.fit(X_train, y_train)\n\n    # Get the predicted probabilities and predictions\n    probs = pd.DataFrame(lr.predict_proba(X_test)[:, 1], columns=[f\"lr_{target}_prob\"])\n\n    rocauc = roc_auc_score(y_test, probs)\n\n    return probs, {\"rocauc_lr\": rocauc}\n\n\ntask = Task(\"Logistic Regression on 'event'\", run, supports_aequitas=True)\n</code></pre> <p>Note the highlighted lines above:</p> <ol> <li>The <code>Task</code> class has been imported from <code>nhssynth.modules.evaluations.tasks</code></li> <li>The <code>run</code> function should accept one argument and return a tuple</li> <li>The second element of this tuple should be a dictionary labelling each metric of interest (this name will be used in the dashboard as identification so ensure it is unique to the experiment)</li> <li>The <code>task</code> should be instantiated with a name, the <code>run</code> function and a boolean indicating whether the task supports Aequitas analysis, if the task does not support Aequitas analysis, then the first element of the tuple will not be used and <code>None</code> can be returned instead.</li> </ol> <p>The rest of this file can contain any arbitrary code that runs within these constraints, this could be a simple model as above, or a more complex pipeline of transformations and models to match a pre-existing workflow.</p>"},{"location":"getting_started/","title":"Getting Started","text":""},{"location":"getting_started/#running-an-experiment","title":"Running an experiment","text":"<p>This package offers two easy ways to run reproducible and highly-configurable experiments. The following sections describe how to use each of these two methods.</p>"},{"location":"getting_started/#via-the-cli","title":"Via the CLI","text":"<p>The CLI is the easiest way to quickly run an experiment. It is designed to be as simple as possible, whilst still offering a high degree of configurability. An example command to run a full pipeline experiment is:</p> <pre><code>nhssynth pipeline \\\n    --experiment-name test \\\n    --dataset support \\\n    --seed 123 \\\n    --architecture DPVAE PATEGAN DECAF \\\n    --repeats 3 \\\n    --downstream-tasks \\\n    --column-similarity-metrics CorrelationSimilarity ContingencySimilarity \\\n    --column-shape-metrics KSComplement TVComplement \\\n    --boundary-metrics BoundaryAdherence \\\n    --synthesis-metrics NewRowSynthesis \\\n    --divergence-metrics ContinuousKLDivergence DiscreteKLDivergence\n</code></pre> <p>This will run a full pipeline experiment on the <code>support</code> dataset in the <code>data</code> directory. The outputs of the experiment will be recorded in a folder named <code>test</code> (corresponding to the experiment name) in the <code>experiments</code> directory.</p> <p>In total, three different model architectures will be trained three times each with their default configurations. The resulting generated synthetic datasets will be evaluated via the downstream tasks in <code>tasks/support</code> alongside the metrics specified in the command. A dashboard will then be built automatically to exhibit the results.</p> <p>The components of the run are persistent to the experiment's folder. Suppose you have already run this experiment and want to add some new evaluations. You do not have to re-run the entire experiment, you can simply run:</p> <pre><code>nhssynth evaluation -e test -d support -s 123 --coverage-metrics RangeCoverage CategoryCoverage\nnhssynth dashboard -e test -d support\n</code></pre> <p>This will regenerate the dashboard with a different set of metrics corresponding to the arguments passed to <code>evaluation</code>. Note that the <code>--experiment-name</code> and <code>--dataset</code> arguments are required for all commands, as they are used to identify the experiment and ensure reproducibility.</p>"},{"location":"getting_started/#via-a-configuration-file","title":"Via a configuration file","text":"<p>A <code>yaml</code> configuration file placed in the <code>config</code> folder can be used to get the same result as the above:</p> <pre><code>seed: 123\nexperiment_name: test\nrun_type: pipeline\nmodel:\n  architecture:\n    - DPVAE\n    - DPGAN\n    - DECAF\n  max_grad_norm: 5.0\n  secure_mode: false\n  repeats: 4\nevaluation:\n  downstream_tasks: true\n  column_shape_metrics:\n  - KSComplement\n  - TVComplement\n  column_similarity_metrics:\n  - CorrelationSimilarity\n  - ContingencySimilarity\n  boundary_metrics:\n  - BoundaryAdherence\n  synthesis_metrics:\n  - NewRowSynthesis\n  divergence_metrics:\n  - ContinuousKLDivergence\n  - DiscreteKLDivergence\n</code></pre> <p>Once saved as <code>run_pipeline.yaml</code> in the <code>config</code> directory, the package can be run under the configuration laid out in the file via:</p> <pre><code>nhssynth config -c run_pipeline\n</code></pre> <p>Note that if you run via the CLI, you can add the <code>--save-config</code> flag to your command to save the configuration file in the <code>experiments/test</code> (or whatever the <code>--experiment-name</code> has been set to) directory. This allows for easy reproduction of an experiment at a later date or on someone else's computer through sharing the configuration file with them.</p>"},{"location":"getting_started/#setting-up-a-datasets-metadata","title":"Setting up a dataset's metadata","text":"<p>For each dataset you wish to work with, it is advisable to setup a corresponding metadata file. The package will infer this when information is missing (and you can then tweak it). The reason we suggest specifying metadata in this way is because Pandas / Python are in general bad at interpreting CSV files, particularly the specifics of datatypes, date objects and so on.</p> <p>To do this, we must create a metadata <code>yaml</code> file in the dataset's directory. For example, for the <code>support</code> dataset, this file is located at <code>data/support_metadata.yaml</code>. By default, the package will look for a file with the same name as the dataset in the dataset's directory, but with <code>_metadata</code> appended to the end. This is configurable like most other filenaming conventions via the CLI.</p> <p>The metadata file is split into two sections: <code>columns</code> and <code>constraints</code>. The former specifies the nature of each column in the dataset, whilst the latter specifies any constraints that should be enforced on the dataset.</p>"},{"location":"getting_started/#column-metadata","title":"Column metadata","text":"<p>Again, we refer to the <code>support</code> dataset's metadata file as an example:</p> <pre><code>columns:\n  dob:\n    dtype:\n      name: datetime64\n      floor: S\n  x1:\n    categorical: true\n    dtype: int64\n  x2:\n    categorical: true\n    dtype: int64\n  x3:\n    categorical: true\n  x4:\n    categorical: true\n    dtype: int64\n  x5:\n    categorical: true\n    dtype: int64\n  x6:\n    categorical: true\n    dtype: int64\n  x7:\n    dtype: int64\n  x8:\n    dtype: float64\n    missingness:\n      impute: mean\n  x9:\n    dtype: int64\n  x10:\n    dtype:\n      name: float64\n      rounding_scheme: 0.1\n  x11:\n    dtype: int64\n  x12:\n    dtype: float64\n  x13:\n    dtype: float64\n  x14:\n    dtype: float64\n  duration:\n    dtype: int64\n  event:\n    categorical: true\n    dtype: int64\n</code></pre> <p>For each column in the dataset, we specify the following:</p> <ul> <li>It's <code>dtype</code>, this can be any <code>numpy</code> data type or a datetime type.</li> <li>In the case of a datetime type, we also specify the <code>floor</code> (i.e. the smallest unit of time that we care about). In general this should be set to match the smallest unit of time in the dataset.</li> <li>In the case of a <code>float</code> type, we can also specify a <code>rounding_scheme</code> to round the values to a certain number of decimal places, again this should be set according to the rounding applied to the column in the real data, or if you want to round the values for some other reason.</li> <li>Whether it is <code>categorical</code> or not. If a column is not categorical, you don't need to specify this. A column is inferred as <code>categorical</code> if it has less than 10 unique values or is a string type.</li> <li>If the column has missing values, we can specify how to deal with them by specifying a <code>missingness</code> strategy. In the case of the <code>x8</code> column, we <code>impute</code> the missing values with the column's <code>mean</code>. If you don't specify this, the CLI or configuration file's specified global missingness strategy will be applied instead (this defaults to the augment strategy which model's the missingness as a separate level in the case of categorical features, or as a separate cluster in the case of continuous features).</li> </ul>"},{"location":"getting_started/#constraints","title":"Constraints","text":"<p>The second part of the metadata file specifies any constraints that should be enforced on the dataset. These can be a relative constraint between two columns, or a fixed one via a constant on a single column. For example, the <code>support</code> dataset's constraints are as follows (note that these are arbitrarily defined and do not necessarily reflect the real data):</p> <pre><code>constraints:\n  - \"x10 in (0,100)\"\n  - \"x12 in (0,100)\"\n  - \"x13 in (0,100)\"\n  - \"x10 &lt;= x12\"\n  - \"x12 &lt; x13\"\n  - \"x10 &lt; x13\"\n  - \"x8 &gt; x10\"\n  - \"x8 &gt; x12\"\n  - \"x8 &gt; x13\"\n  - \"x11 &gt; 100\"\n  - \"x12 &gt; 10\"\n</code></pre> <p>The function of these constraints is fairly self-explanatory: The package ensures the constraints are feasible and minimises them before applying transformations to ensure that they will be satisfied in the synthetic data as well. When a column does not meet a feasible constraint in the real data, we assume that this is intentional and use the violation as a feature upon which to generate synthetic data that also violates the constraint.</p> <p>There is a further constraint <code>fixcombo</code> that only applies to categorical columns. This suggests that only existing combinations of two or more categorical columns should be generated, i.e. the columns can be collapsed into a single composite feature. I.e. if we have a column for pregnancy, and another for sex, we may only want to allow three categories, 'male:not-pregnant', 'female:pregnant', 'female:not-pregnant'. This is specified as follows:</p> <pre><code>constraints:\n  - \"pregnancy fixcombo sex\"\n</code></pre> <p>In conclusion then, we support the following constraint types:</p> <ul> <li><code>fixcombo</code> for categorical columns</li> <li><code>&lt;</code> and <code>&lt;</code> for non-categorical columns</li> <li><code>&gt;=</code> and <code>&lt;=</code> for non-categorical columns</li> <li><code>in</code> for non-categorical columns, which is effectively two of the above constraints combined. I.e. <code>x in [a, b)</code> is equivalent to <code>x &gt;= a and x &lt; b</code>. This is purely a UX feature and is treated as two separate constraints internally.</li> </ul> <p>Once this metadata is setup, you are ready to run your experiment.</p>"},{"location":"getting_started/#evaluation","title":"Evaluation","text":"<p>Once models have been trained and synthetic datasets generated, we leverage evaluations from SDMetrics, Aequitas, the NHS' internal SynAdvSuite (at current time you must request access to this repository to use the privacy-related attacks it implements), and also offer a facility for the custom specification of downstream tasks. These evaluations are then aggregated into a dashboard for ease of comparison and analysis.</p> <p>See the relevant documentation for each of these packages for more information on the metrics they offer.</p>"},{"location":"model_card/","title":"Model Card: Variational AutoEncoder with Differential Privacy","text":""},{"location":"model_card/#model-details","title":"Model Details","text":"<p>The implementation of the Variational AutoEncoder (VAE) with Differential Privacy within this repository is based on work done by Dominic Danks during an NHSX Analytics Unit PhD internship (last commit to the original SynthVAE repository: commit 88a4bdf). This model card describes an updated and extended version of the model, by Harrison Wilde. Further information about the previous version created by Dom and its model implementation can be found in Section 5.4 of the associated report.</p>"},{"location":"model_card/#model-use","title":"Model Use","text":""},{"location":"model_card/#intended-use","title":"Intended Use","text":"<p>This model is intended for use in experimenting with differential privacy and VAEs.</p>"},{"location":"model_card/#training-data","title":"Training Data","text":"<p>Experiments in this repository are run against the Study to Understand Prognoses Preferences Outcomes and Risks of Treatment (SUPPORT) dataset accessed via the pycox python library. We also performed further analysis on a single table that we extracted from MIMIC-III.</p>"},{"location":"model_card/#performance-and-limitations","title":"Performance and Limitations","text":"<p>A from-scratch VAE implementation was compared against various models available within the SDV framework using a variety of quality and privacy metrics on the SUPPORT dataset. The VAE was found to be competitive with all of these models across the various metrics. Differential Privacy (DP) was introduced via DP-SGD and the performance of the VAE for different levels of privacy was evaluated. It was found that as the level of Differential Privacy introduced by DP-SGD was increased, it became easier to distinguish between synthetic and real data.</p> <p>Proper evaluation of quality and privacy of synthetic data is challenging. In this work, we utilised metrics from the SDV library due to their natural integration with the rest of the codebase. A valuable extension of this work would be to apply a variety of external metrics, including more advanced adversarial attacks to more thoroughly evaluate the privacy of the considered methods, including as the level of DP is varied. It would also be of interest to apply DP-SGD and/or PATE to all of the considered methods and evaluate whether the performance drop as a function of implemented privacy is similar or different across the models.</p> <p>Currently the SynthVAE model only works for data which is 'clean'. I.e data that has no missingness or NaNs within its input. It can handle continuous, categorical and datetime variables. Special types such as nominal data cannot be handled properly however the model may still run. Column names have to be specified in the code for the variable group they belong to.</p> <p>Hyperparameter tuning of the model can result in errors if certain parameter values are selected. Most commonly, changing learning rate in our example results in errors during training. An extensive test to evaluate plausible ranges has not been performed as of yet. If you get errors during tuning then consider your hyperparameter values and adjust accordingly.</p>"},{"location":"model_card/#acknowledgements","title":"Acknowledgements","text":"<p>This documentation is inspired by Model Cards for Model Reporting (Mitchell et al.) and Lessons from Archives (Jo &amp; Gebru).</p>"},{"location":"models/","title":"Adding new models","text":"<p>The <code>model</code> module contains all of the architectures implemented as part of this package. We offer GAN and VAE based architectures with a number of adjustments to achieve privacy and other augmented functionalities. The module handles the training and generation of synthetic data using these architectures, per a user's choice of model(s) and configuration.</p> <p>It is likely that as the literature matures, more effective architectures will present themselves as promising for application to the type of tabular data <code>NHSSynth</code> is designed for. Below we discuss how to add new models to the package.</p>"},{"location":"models/#model-design","title":"Model design","text":"<p>The models in this package are built entirely in PyTorch and use Opacus for differential privacy.</p> <p>We have built the VAE and (Tabular)GAN implementations in this package to serve as the foundations for a number of other architectures. As such, we try to maintain a somewhat modular design to building up more complex differentially private (or otherwise augmented) architectures. Each model inherits from either the <code>GAN</code> or <code>VAE</code> class (in files of the same name) which in turn inherit from a generic <code>Model</code> class found in the <code>common</code> folder. This folder contains components of models which are not to be instantiated themselves, e.g. a mixin class for differential privacy, the MLP underlying the <code>GAN</code> and so on.</p> <p>The <code>Model</code> class from which all of the models derive handles all of the general attributes. Roughly, these are the specifics of the dataset the instance of the model is relative to, the device that training is to be carried out upon, and other training parameters such as the total number of epochs to execute.</p> <p>We define these things at the model level, as when using differential privacy or other privacy accountant methods, we must know ahead of time the data and length of training exposure in order to calculate the levels of noise required to reach a certain privacy guarantee and so on.</p>"},{"location":"models/#implementing-a-new-model","title":"Implementing a new model","text":"<p>In order to add a new architecture then, it is important to first investigate the modular parts already implemented to ensure that what you want to build is not already possible through the composition of these existing parts. Then you must ensure that your architecture either inherits from the <code>GAN</code> or <code>VAE</code>, or <code>Model</code> if you wish to implement a different type of generative model.</p> <p>In all of these cases, the interface expects for the implementation to have the following methods:</p> <ul> <li><code>get_args</code>: a class method that lists the architecture specific arguments that the model requires. This is used to facilitate default arguments in the python API whilst still allowing for arguments in the CLI to be propagated and recorded automatically in the experiment output. This should be a list of variable names equal to the concatenation of all of the non-<code>Model</code> parent classes (e.g. <code>DPVAE</code> has <code>DP</code> and <code>VAE</code> args) plus any architecture specific arguments in the <code>__init__</code> method of the model in question.</li> <li><code>get_metrics</code>: another class method that behaves similarly to the above, should return a list of valid metrics to track during training for this model</li> <li><code>train</code>: a method handling the training loop for the model. This should take <code>num_epochs</code>, <code>patience</code> and <code>displayed_metrics</code> as arguments and return a tuple containing the number of epochs that were executed plus a bundle of training metrics (the values over time returned by <code>get_metrics</code> on the class). In the execution of this method, the utility methods defined in <code>Model</code> should be called in order, <code>_start_training</code> at the beginning, then <code>_record_metrics</code> at each training step of the data loader, and finally <code>_finish_training</code> to clean up progress bars and so on. <code>displayed_metrics</code> determines which metrics are actively displayed during training.</li> <li><code>generate</code>: a method to call on the trained model which generates <code>N</code> samples of data, and calls the model's associated <code>MetaTransformer</code> to return a valid pandas DataFrame of synthetic data ready to output.</li> </ul>"},{"location":"models/#adding-a-new-model-to-the-cli","title":"Adding a new model to the CLI","text":"<p>Once you have implemented your new model, you must add it to the CLI. To do this, we must first export the model's class into the <code>MODELS</code> constant in the <code>__init__</code> file in the <code>models</code> subfolder. We can then add a new function and option in <code>module_arguments.py</code> to list the arguments and their types unique to this type of architecture.</p> <p>Note that you should not duplicate arguments that are already defined in the <code>Model</code> class or foundational model architectures such as the <code>GAN</code> if you are implementing an extension to it. If you have setup <code>get_args</code> correctly all of this will be propagated automatically.</p>"},{"location":"modules/","title":"Adding new modules","text":"<p>The package is designed such that each module can be used as part of a pipeline (via the CLI or a configuration file) or independently (via importing them into an existing codebase).</p> <p>In the future it may be desireable to add / adjust the modules of the package, this guide offers a high-level overview of how to do so.</p>"},{"location":"modules/#importing-a-module-from-this-package","title":"Importing a module from this package","text":"<p>After installing the package, you can simply do: <pre><code>from nhssynth.modules import &lt;module&gt;\n</code></pre> and you will be able to use it in your code!</p>"},{"location":"modules/#creating-a-new-module-and-folding-it-into-the-cli","title":"Creating a new module and folding it into the CLI","text":"<p>The following instructions specify how to extend this package with a new module:</p> <ol> <li>Create a folder for your module within the package, i.e. <code>src/nhssynth/modules/mymodule</code></li> <li> <p>Include within it a main executor function that accepts arguments from the CLI, i.e.</p> <pre><code>def myexecutor(args):\n    ...\n</code></pre> <p>In <code>mymodule/executor.py</code> and export it by adding <code>from .executor import myexecutor</code> to <code>mymodule/__init__.py</code>. Check the existing modules for examples of what a typical executor function looks like.</p> </li> <li> <p>In the <code>cli</code> folder, add a corresponding function to <code>module_arguments.py</code> and populate with arguments you want to expose in the CLI:</p> <pre><code>def add_mymodule_args(parser: argparse.ArgumentParser, group_title: str, overrides=False):\n    group = parser.add_argument_group(title=group_title)\n    group.add_argument(...)\n    group.add_argument(...)\n    ...\n</code></pre> </li> <li> <p>Next, in <code>module_setup.py</code> make the following adjustments to the <code>MODULE_MAP</code> code:</p> <pre><code>MODULE_MAP = {\n    ...\n    \"mymodule\": ModuleConfig(\n        func=m.mymodule.myexecutor,\n        add_args=ma.add_mymodule_args,\n        description=\"...\",\n        help=\"...\",\n        common_parsers=[...]\n    ),\n    ...\n}\n</code></pre> <p>Where <code>common_parsers</code> is a subset of <code>COMMON_PARSERS</code> defined in <code>common_arguments.py</code>. Note that the \"seed\" and \"core\" parsers are added automatically, so you don't need to specify them. These parsers can be used to add arguments to your module that are common to multiple modules, e.g. the <code>dataloader</code> and <code>evaluation</code> modules both use <code>--typed</code> to specify the path of the typed input dataset.</p> </li> <li> <p>You can (optionally) also edit the following block if you want your module to be included in a full pipeline run:</p> <pre><code>PIPELINE = [..., mymodule, ...]  # NOTE this determines the order of a pipeline run\n</code></pre> </li> <li> <p>Congrats, your module is implemented within the CLI, its documentation etc. will now be built automatically and it can be referenced in configuration files!</p> </li> </ol>"},{"location":"secure_mode/","title":"Opacus' secure mode","text":"<p>Part of the process for achieving a differential privacy guarantee under Opacus involves generating noise according to a Gaussian distribution with mean 0 in Opacus' <code>_generate_noise()</code> function.</p> <p>Enabling <code>secure_mode</code> when using the NHSSynth package ensures that the generated noise is also secure against floating point representation attacks, such as the ones in https://arxiv.org/abs/2107.10138 and https://arxiv.org/abs/2112.05307.</p> <p>This attack first appeared in https://arxiv.org/abs/2112.05307; the fix via the <code>csprng</code> package is based on https://arxiv.org/abs/2107.10138 and involves calling the Gaussian noise function $2n$ times, where $n=2$ (see section 5.1 in https://arxiv.org/abs/2107.10138).</p> <p>The reason for choosing $n=2$ is that $n$ can be any number greater than $1$. The bigger $n$ is, though, the more computation needs to be done to generate the Gaussian samples. The choice of $n=2$ is justified via the knowledge that the attack has a complexity of $2^{p(2n-1)}$. In PyTorch, $p=53$ and so the complexity is $2^159$, which is deemed sufficiently hard for an attacker to break.</p>"},{"location":"reference/SUMMARY/","title":"SUMMARY","text":"<ul> <li>cli<ul> <li>common_arguments</li> <li>config</li> <li>model_arguments</li> <li>module_arguments</li> <li>module_setup</li> <li>run</li> </ul> </li> <li>common<ul> <li>common</li> <li>constants</li> <li>debugging</li> <li>dicts</li> <li>io</li> <li>strings</li> </ul> </li> <li>modules<ul> <li>dashboard<ul> <li>Upload</li> <li>io</li> <li>pages<ul> <li>1_Tables</li> <li>2_Plots</li> <li>3_Experiment_Configurations</li> </ul> </li> <li>run</li> <li>utils</li> </ul> </li> <li>dataloader<ul> <li>constraints</li> <li>io</li> <li>metadata</li> <li>metatransformer</li> <li>missingness</li> <li>run</li> <li>transformers<ul> <li>base</li> <li>categorical</li> <li>continuous</li> <li>datetime</li> </ul> </li> </ul> </li> <li>evaluation<ul> <li>aequitas</li> <li>io</li> <li>metrics</li> <li>run</li> <li>tasks</li> <li>utils</li> </ul> </li> <li>model<ul> <li>common<ul> <li>dp</li> <li>mlp</li> <li>model</li> </ul> </li> <li>io</li> <li>models<ul> <li>dpvae</li> <li>gan</li> <li>vae</li> </ul> </li> <li>run</li> <li>utils</li> </ul> </li> <li>plotting<ul> <li>io</li> <li>plots</li> <li>run</li> </ul> </li> <li>structure<ul> <li>run</li> </ul> </li> </ul> </li> </ul>"},{"location":"reference/cli/","title":"cli","text":""},{"location":"reference/cli/common_arguments/","title":"common_arguments","text":"<p>Functions to define the CLI's \"common\" arguments, i.e. those that can be applied to either:  - All module argument lists, e.g. --dataset, --seed, etc.  - A subset of module(s) argument lists, e.g. --synthetic, --typed, etc.</p>"},{"location":"reference/cli/common_arguments/#nhssynth.cli.common_arguments.get_core_parser","title":"<code>get_core_parser(overrides=False)</code>","text":"<p>Create the core common parser group applied to all modules (and the <code>pipeline</code> and <code>config</code> options). Note that we leverage common titling of the argument group to ensure arguments appear together even if declared separately.</p> <p>Parameters:</p> Name Type Description Default <code>overrides</code> <p>whether the arguments declared within are required or not.</p> <code>False</code> <p>Returns:</p> Type Description <code>ArgumentParser</code> <p>The parser with the group containing the core arguments attached.</p> Source code in <code>src/nhssynth/cli/common_arguments.py</code> <pre><code>def get_core_parser(overrides=False) -&gt; argparse.ArgumentParser:\n    \"\"\"\n    Create the core common parser group applied to all modules (and the `pipeline` and `config` options).\n    Note that we leverage common titling of the argument group to ensure arguments appear together even if declared separately.\n\n    Args:\n        overrides: whether the arguments declared within are required or not.\n\n    Returns:\n        The parser with the group containing the core arguments attached.\n    \"\"\"\n    \"\"\"\"\"\"\n    core = argparse.ArgumentParser(add_help=False)\n    core_grp = core.add_argument_group(title=\"options\")\n    core_grp.add_argument(\n        \"-d\",\n        \"--dataset\",\n        required=(not overrides),\n        type=str,\n        help=\"the name of the dataset to experiment with, should be present in `&lt;DATA_DIR&gt;`\",\n    )\n    core_grp.add_argument(\n        \"-e\",\n        \"--experiment-name\",\n        type=str,\n        default=TIME,\n        help=\"name the experiment run to affect logging, config, and default-behaviour i/o\",\n    )\n    core_grp.add_argument(\n        \"--save-config\",\n        action=\"store_true\",\n        help=\"save the config provided via the cli, this is a recommended option for reproducibility\",\n    )\n    return core\n</code></pre>"},{"location":"reference/cli/common_arguments/#nhssynth.cli.common_arguments.get_seed_parser","title":"<code>get_seed_parser(overrides=False)</code>","text":"<p>Create the common parser group for the seed. NB This is separate to the rest of the core arguments as it does not apply to the dashboard module.</p> <p>Parameters:</p> Name Type Description Default <code>overrides</code> <p>whether the arguments declared within are required or not.</p> <code>False</code> <p>Returns:</p> Type Description <code>ArgumentParser</code> <p>The parser with the group containing the seed argument attached.</p> Source code in <code>src/nhssynth/cli/common_arguments.py</code> <pre><code>def get_seed_parser(overrides=False) -&gt; argparse.ArgumentParser:\n    \"\"\"\n    Create the common parser group for the seed.\n    NB This is separate to the rest of the core arguments as it does not apply to the dashboard module.\n\n    Args:\n        overrides: whether the arguments declared within are required or not.\n\n    Returns:\n        The parser with the group containing the seed argument attached.\n    \"\"\"\n    parser = argparse.ArgumentParser(add_help=False)\n    parser_grp = parser.add_argument_group(title=\"options\")\n    parser_grp.add_argument(\n        \"-s\",\n        \"--seed\",\n        type=int,\n        help=\"specify a seed for reproducibility, this is a recommended option for reproducibility\",\n    )\n    return parser\n</code></pre>"},{"location":"reference/cli/common_arguments/#nhssynth.cli.common_arguments.suffix_parser_generator","title":"<code>suffix_parser_generator(name, help, required=False)</code>","text":"<p>Generator function for creating parsers following a common template. These parsers are all suffixes to the --dataset / -d / DATASET argument, see <code>COMMON_TITLE</code>.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>the name / label of the argument to add to the CLI options.</p> required <code>help</code> <code>str</code> <p>the help message when the CLI is run with --help / -h.</p> required <code>required</code> <code>bool</code> <p>whether the argument must be provided or not.</p> <code>False</code> Source code in <code>src/nhssynth/cli/common_arguments.py</code> <pre><code>def suffix_parser_generator(name: str, help: str, required: bool = False) -&gt; argparse.ArgumentParser:\n    \"\"\"Generator function for creating parsers following a common template.\n    These parsers are all suffixes to the --dataset / -d / DATASET argument, see `COMMON_TITLE`.\n\n    Args:\n        name: the name / label of the argument to add to the CLI options.\n        help: the help message when the CLI is run with --help / -h.\n        required: whether the argument must be provided or not.\n    \"\"\"\n\n    def get_parser(overrides: bool = False) -&gt; argparse.ArgumentParser:\n        parser = argparse.ArgumentParser(add_help=False)\n        parser_grp = parser.add_argument_group(title=COMMON_TITLE)\n        parser_grp.add_argument(\n            f\"--{name.replace('_', '-')}\",\n            required=required and not overrides,\n            type=str,\n            default=f\"_{name}\",\n            help=help,\n        )\n        return parser\n\n    return get_parser\n</code></pre>"},{"location":"reference/cli/config/","title":"config","text":"<p>Read, write and process config files, including handling of module-specific / common config overrides.</p>"},{"location":"reference/cli/config/#nhssynth.cli.config.assemble_config","title":"<code>assemble_config(args, all_subparsers)</code>","text":"<p>Assemble and arrange a nested-via-module configuration dictionary from parsed command-line arguments to be output as a YAML record.</p> <p>Parameters:</p> Name Type Description Default <code>args</code> <code>Namespace</code> <p>A namespace object containing all parsed command-line arguments.</p> required <code>all_subparsers</code> <code>dict[str, ArgumentParser]</code> <p>A dictionary mapping module names to subparser objects.</p> required <p>Returns:</p> Type Description <code>dict[str, Any]</code> <p>A dictionary containing configuration information extracted from <code>args</code> in a module-wise nested format that is YAML-friendly.</p> <p>Raises:</p> Type Description <code>ValueError</code> <p>If a module specified in <code>args.modules_to_run</code> is not in <code>all_subparsers</code>.</p> Source code in <code>src/nhssynth/cli/config.py</code> <pre><code>def assemble_config(\n    args: argparse.Namespace,\n    all_subparsers: dict[str, argparse.ArgumentParser],\n) -&gt; dict[str, Any]:\n    \"\"\"\n    Assemble and arrange a nested-via-module configuration dictionary from parsed command-line arguments to be output as a YAML record.\n\n    Args:\n        args: A namespace object containing all parsed command-line arguments.\n        all_subparsers: A dictionary mapping module names to subparser objects.\n\n    Returns:\n        A dictionary containing configuration information extracted from `args` in a module-wise nested format that is YAML-friendly.\n\n    Raises:\n        ValueError: If a module specified in `args.modules_to_run` is not in `all_subparsers`.\n    \"\"\"\n    args_dict = vars(args)\n\n    # Filter out the keys that are not relevant to the config file\n    args_dict = filter_dict(\n        args_dict, {\"func\", \"experiment_name\", \"save_config\", \"save_config_path\", \"module_handover\"}\n    )\n    for k in args_dict.copy().keys():\n        # Remove empty metric lists from the config\n        if \"_metrics\" in k and not args_dict[k]:\n            args_dict.pop(k)\n\n    modules_to_run = args_dict.pop(\"modules_to_run\")\n    if len(modules_to_run) == 1:\n        run_type = modules_to_run[0]\n    elif modules_to_run == PIPELINE:\n        run_type = \"pipeline\"\n    else:\n        raise ValueError(f\"Invalid value for `modules_to_run`: {modules_to_run}\")\n\n    # Generate a dictionary containing each module's name from the run, with all of its possible corresponding config args\n    module_args = {\n        module_name: [action.dest for action in all_subparsers[module_name]._actions if action.dest != \"help\"]\n        for module_name in modules_to_run\n    }\n\n    # Use the flat namespace to populate a nested (by module) dictionary of config args and values\n    out_dict = {}\n    for module_name in modules_to_run:\n        for k in args_dict.copy().keys():\n            # We want to keep dataset, experiment_name, seed and save_config at the top-level as they are core args\n            if k in module_args[module_name] and k not in {\n                \"version\",\n                \"dataset\",\n                \"experiment_name\",\n                \"seed\",\n                \"save_config\",\n            }:\n                if module_name not in out_dict:\n                    out_dict[module_name] = {}\n                v = args_dict.pop(k)\n                if v is not None:\n                    out_dict[module_name][k] = v\n\n    # Assemble the final dictionary in YAML-compliant form\n    return {**({\"run_type\": run_type} if run_type else {}), **args_dict, **out_dict}\n</code></pre>"},{"location":"reference/cli/config/#nhssynth.cli.config.get_default_and_required_args","title":"<code>get_default_and_required_args(top_parser, module_parsers)</code>","text":"<p>Get the default and required arguments for the top-level parser and the current run's corresponding list of module parsers.</p> <p>Parameters:</p> Name Type Description Default <code>top_parser</code> <code>ArgumentParser</code> <p>The top-level parser (contains common arguments).</p> required <code>module_parsers</code> <code>dict[str, ArgumentParser]</code> <p>The dict of module-level parsers mapped to their names.</p> required <p>Returns:</p> Type Description <code>tuple[dict[str, Any], list[str]]</code> <p>A tuple containing two elements: - A dictionary containing all arguments and their default values. - A list of key-value-pairs of the required arguments and their associated module.</p> Source code in <code>src/nhssynth/cli/config.py</code> <pre><code>def get_default_and_required_args(\n    top_parser: argparse.ArgumentParser,\n    module_parsers: dict[str, argparse.ArgumentParser],\n) -&gt; tuple[dict[str, Any], list[str]]:\n    \"\"\"\n    Get the default and required arguments for the top-level parser and the current run's corresponding list of module parsers.\n\n    Args:\n        top_parser: The top-level parser (contains common arguments).\n        module_parsers: The dict of module-level parsers mapped to their names.\n\n    Returns:\n        A tuple containing two elements:\n            - A dictionary containing all arguments and their default values.\n            - A list of key-value-pairs of the required arguments and their associated module.\n    \"\"\"\n    all_actions = {\"top-level\": top_parser._actions} | {m: p._actions for m, p in module_parsers.items()}\n    defaults = {}\n    required_args = []\n    for module, actions in all_actions.items():\n        for action in actions:\n            if action.dest not in [\"help\", \"==SUPPRESS==\"]:\n                defaults[action.dest] = action.default\n                if action.required:\n                    required_args.append({\"arg\": action.dest, \"module\": module})\n    return defaults, required_args\n</code></pre>"},{"location":"reference/cli/config/#nhssynth.cli.config.get_modules_to_run","title":"<code>get_modules_to_run(executor)</code>","text":"<p>Get the list of modules to run from the passed executor function.</p> <p>Parameters:</p> Name Type Description Default <code>executor</code> <code>Callable</code> <p>The executor function to run.</p> required <p>Returns:</p> Type Description <code>list[str]</code> <p>A list of module names to run.</p> Source code in <code>src/nhssynth/cli/config.py</code> <pre><code>def get_modules_to_run(executor: Callable) -&gt; list[str]:\n    \"\"\"\n    Get the list of modules to run from the passed executor function.\n\n    Args:\n        executor: The executor function to run.\n\n    Returns:\n        A list of module names to run.\n    \"\"\"\n    if executor == run_pipeline:\n        return PIPELINE\n    else:\n        return [get_key_by_value({mn: mc.func for mn, mc in MODULE_MAP.items()}, executor)]\n</code></pre>"},{"location":"reference/cli/config/#nhssynth.cli.config.read_config","title":"<code>read_config(args, parser, all_subparsers)</code>","text":"<p>Hierarchically assembles a config <code>argparse.Namespace</code> object for the inferred modules to run and execute, given a file.</p> <ol> <li>Load the YAML file containing the config to read from</li> <li>Check a valid <code>run_type</code> is specified or infer it and determine the list of <code>modules_to_run</code></li> <li>Establish the appropriate default configuration set of arguments from the <code>parser</code> and <code>all_subparsers</code> for the determined <code>modules_to_run</code></li> <li>Overwrite these with the specified (sub)set of config in the YAML file</li> <li>Overwrite again with passed command-line <code>args</code> (these are considered 'overrides')</li> <li>Run the appropriate module(s) or pipeline with the resulting configuration <code>Namespace</code> object</li> </ol> <p>Parameters:</p> Name Type Description Default <code>args</code> <code>Namespace</code> <p>Namespace object containing arguments from the command line</p> required <code>parser</code> <code>ArgumentParser</code> <p>top-level <code>ArgumentParser</code> object containing common arguments</p> required <code>all_subparsers</code> <code>dict[str, ArgumentParser]</code> <p>dictionary of <code>ArgumentParser</code> objects, one for each module</p> required <p>Returns:</p> Type Description <code>Namespace</code> <p>A Namespace object containing the assembled configuration settings</p> <p>Raises:</p> Type Description <code>AssertionError</code> <p>if any required arguments are missing from the configuration file / overrides</p> Source code in <code>src/nhssynth/cli/config.py</code> <pre><code>def read_config(\n    args: argparse.Namespace,\n    parser: argparse.ArgumentParser,\n    all_subparsers: dict[str, argparse.ArgumentParser],\n) -&gt; argparse.Namespace:\n    \"\"\"\n    Hierarchically assembles a config `argparse.Namespace` object for the inferred modules to run and execute, given a file.\n\n    1. Load the YAML file containing the config to read from\n    2. Check a valid `run_type` is specified or infer it and determine the list of `modules_to_run`\n    3. Establish the appropriate default configuration set of arguments from the `parser` and `all_subparsers` for the determined `modules_to_run`\n    4. Overwrite these with the specified (sub)set of config in the YAML file\n    5. Overwrite again with passed command-line `args` (these are considered 'overrides')\n    6. Run the appropriate module(s) or pipeline with the resulting configuration `Namespace` object\n\n    Args:\n        args: Namespace object containing arguments from the command line\n        parser: top-level `ArgumentParser` object containing common arguments\n        all_subparsers: dictionary of `ArgumentParser` objects, one for each module\n\n    Returns:\n        A Namespace object containing the assembled configuration settings\n\n    Raises:\n        AssertionError: if any required arguments are missing from the configuration file / overrides\n    \"\"\"\n    # Open the passed yaml file and load into a dictionary\n    with open(f\"config/{args.input_config}.yaml\") as stream:\n        config_dict = yaml.safe_load(stream)\n\n    valid_run_types = [x for x in all_subparsers.keys() if x != \"config\"]\n\n    version = config_dict.pop(\"version\", None)\n    if version and version != version(\"nhssynth\"):\n        warnings.warn(\n            f\"This config file's specified version ({version}) does not match the currently installed version of nhssynth ({version('nhssynth')}), results may differ.\"\n        )\n    elif not version:\n        version = ver(\"nhssynth\")\n\n    run_type = config_dict.pop(\"run_type\", None)\n\n    if run_type == \"pipeline\":\n        modules_to_run = PIPELINE\n    else:\n        modules_to_run = [x for x in config_dict.keys() | {run_type} if x in valid_run_types]\n        if not args.custom_pipeline:\n            modules_to_run = sorted(modules_to_run, key=lambda x: PIPELINE.index(x))\n\n    if not modules_to_run:\n        warnings.warn(\n            \"Missing or invalid `run_type` and / or module specification hierarchy in `config/{args.input_config}.yaml`, defaulting to a full run of the pipeline\"\n        )\n        modules_to_run = PIPELINE\n\n    # Get all possible default arguments by scraping the top level `parser` and the appropriate sub-parser for the `run_type`\n    args_dict, required_args = get_default_and_required_args(\n        parser, filter_dict(all_subparsers, modules_to_run, include=True)\n    )\n\n    # Find the non-default arguments amongst passed `args` by seeing which of them are different to the entries of `args_dict`\n    non_default_passed_args_dict = {\n        k: v\n        for k, v in vars(args).items()\n        if k in [\"input_config\", \"custom_pipeline\"] or (k in args_dict and k != \"func\" and v != args_dict[k])\n    }\n\n    # Overwrite the default arguments with the ones from the yaml file\n    args_dict.update(flatten_dict(config_dict))\n\n    # Overwrite the result of the above with any non-default CLI args\n    args_dict.update(non_default_passed_args_dict)\n\n    # Create a new Namespace using the assembled dictionary\n    new_args = argparse.Namespace(**args_dict)\n    assert getattr(\n        new_args, \"dataset\"\n    ), \"No dataset specified in the passed config file, provide one with the `--dataset` argument or add it to the config file\"\n    assert all(\n        getattr(new_args, req_arg[\"arg\"]) for req_arg in required_args\n    ), f\"Required arguments are missing from the passed config file: {[ra['module'] + ':' + ra['arg'] for ra in required_args if not getattr(new_args, ra['arg'])]}\"\n\n    # Run the appropriate execution function(s)\n    if not new_args.seed:\n        warnings.warn(\"No seed has been specified, meaning the results of this run may not be reproducible.\")\n    new_args.version = version\n    new_args.modules_to_run = modules_to_run\n    new_args.module_handover = {}\n    for module in new_args.modules_to_run:\n        MODULE_MAP[module](new_args)\n\n    return new_args\n</code></pre>"},{"location":"reference/cli/config/#nhssynth.cli.config.write_config","title":"<code>write_config(args, all_subparsers)</code>","text":"<p>Assembles a configuration dictionary from the run config and writes it to a YAML file at the location specified by <code>args.save_config_path</code>.</p> <p>Parameters:</p> Name Type Description Default <code>args</code> <code>Namespace</code> <p>A namespace containing the run's configuration.</p> required <code>all_subparsers</code> <code>dict[str, ArgumentParser]</code> <p>A dictionary containing all subparsers for the config args.</p> required Source code in <code>src/nhssynth/cli/config.py</code> <pre><code>def write_config(\n    args: argparse.Namespace,\n    all_subparsers: dict[str, argparse.ArgumentParser],\n) -&gt; None:\n    \"\"\"\n    Assembles a configuration dictionary from the run config and writes it to a YAML file at the location specified by `args.save_config_path`.\n\n    Args:\n        args: A namespace containing the run's configuration.\n        all_subparsers: A dictionary containing all subparsers for the config args.\n    \"\"\"\n    experiment_name = args.experiment_name\n    args_dict = assemble_config(args, all_subparsers)\n    with open(f\"experiments/{experiment_name}/config_{experiment_name}.yaml\", \"w\") as yaml_file:\n        yaml.dump(args_dict, yaml_file, default_flow_style=False, sort_keys=False)\n</code></pre>"},{"location":"reference/cli/model_arguments/","title":"model_arguments","text":"<p>Define arguments for each of the model classes.</p>"},{"location":"reference/cli/model_arguments/#nhssynth.cli.model_arguments.add_gan_args","title":"<code>add_gan_args(group, overrides=False)</code>","text":"<p>Adds arguments to an existing group for the GAN model.</p> Source code in <code>src/nhssynth/cli/model_arguments.py</code> <pre><code>def add_gan_args(group: argparse._ArgumentGroup, overrides: bool = False) -&gt; None:\n    \"\"\"Adds arguments to an existing group for the GAN model.\"\"\"\n    group.add_argument(\n        \"--n-units-conditional\",\n        type=int,\n        help=\"the number of units in the conditional layer\",\n    )\n    group.add_argument(\n        \"--generator-n-layers-hidden\",\n        type=int,\n        help=\"the number of hidden layers in the generator\",\n    )\n    group.add_argument(\n        \"--generator-n-units-hidden\",\n        type=int,\n        help=\"the number of units in each hidden layer of the generator\",\n    )\n    group.add_argument(\n        \"--generator-activation\",\n        type=str,\n        choices=list(ACTIVATION_FUNCTIONS.keys()),\n        help=\"the activation function of the generator\",\n    )\n    group.add_argument(\n        \"--generator-batch-norm\",\n        action=\"store_true\",\n        help=\"whether to use batch norm in the generator\",\n    )\n    group.add_argument(\n        \"--generator-dropout\",\n        type=float,\n        help=\"the dropout rate in the generator\",\n    )\n    group.add_argument(\n        \"--generator-lr\",\n        type=float,\n        help=\"the learning rate for the generator\",\n    )\n    group.add_argument(\n        \"--generator-residual\",\n        action=\"store_true\",\n        help=\"whether to use residual connections in the generator\",\n    )\n    group.add_argument(\n        \"--generator-opt-betas\",\n        type=float,\n        nargs=2,\n        help=\"the beta values for the generator optimizer\",\n    )\n    group.add_argument(\n        \"--discriminator-n-layers-hidden\",\n        type=int,\n        help=\"the number of hidden layers in the discriminator\",\n    )\n    group.add_argument(\n        \"--discriminator-n-units-hidden\",\n        type=int,\n        help=\"the number of units in each hidden layer of the discriminator\",\n    )\n    group.add_argument(\n        \"--discriminator-activation\",\n        type=str,\n        choices=list(ACTIVATION_FUNCTIONS.keys()),\n        help=\"the activation function of the discriminator\",\n    )\n    group.add_argument(\n        \"--discriminator-batch-norm\",\n        action=\"store_true\",\n        help=\"whether to use batch norm in the discriminator\",\n    )\n    group.add_argument(\n        \"--discriminator-dropout\",\n        type=float,\n        help=\"the dropout rate in the discriminator\",\n    )\n    group.add_argument(\n        \"--discriminator-lr\",\n        type=float,\n        help=\"the learning rate for the discriminator\",\n    )\n    group.add_argument(\n        \"--discriminator-opt-betas\",\n        type=float,\n        nargs=2,\n        help=\"the beta values for the discriminator optimizer\",\n    )\n    group.add_argument(\n        \"--clipping-value\",\n        type=float,\n        help=\"the clipping value for the discriminator\",\n    )\n    group.add_argument(\n        \"--lambda-gradient-penalty\",\n        type=float,\n        help=\"the gradient penalty coefficient\",\n    )\n</code></pre>"},{"location":"reference/cli/model_arguments/#nhssynth.cli.model_arguments.add_model_specific_args","title":"<code>add_model_specific_args(group, name, overrides=False)</code>","text":"<p>Adds arguments to an existing group according to <code>name</code>.</p> Source code in <code>src/nhssynth/cli/model_arguments.py</code> <pre><code>def add_model_specific_args(group: argparse._ArgumentGroup, name: str, overrides: bool = False) -&gt; None:\n    \"\"\"Adds arguments to an existing group according to `name`.\"\"\"\n    if name == \"VAE\":\n        add_vae_args(group, overrides)\n    elif name == \"GAN\":\n        add_gan_args(group, overrides)\n    elif name == \"TabularGAN\":\n        add_tabular_gan_args(group, overrides)\n</code></pre>"},{"location":"reference/cli/model_arguments/#nhssynth.cli.model_arguments.add_vae_args","title":"<code>add_vae_args(group, overrides=False)</code>","text":"<p>Adds arguments to an existing group for the VAE model.</p> Source code in <code>src/nhssynth/cli/model_arguments.py</code> <pre><code>def add_vae_args(group: argparse._ArgumentGroup, overrides: bool = False) -&gt; None:\n    \"\"\"Adds arguments to an existing group for the VAE model.\"\"\"\n    group.add_argument(\n        \"--encoder-latent-dim\",\n        type=int,\n        nargs=\"+\",\n        help=\"the latent dimension of the encoder\",\n    )\n    group.add_argument(\n        \"--encoder-hidden-dim\",\n        type=int,\n        nargs=\"+\",\n        help=\"the hidden dimension of the encoder\",\n    )\n    group.add_argument(\n        \"--encoder-activation\",\n        type=str,\n        nargs=\"+\",\n        choices=list(ACTIVATION_FUNCTIONS.keys()),\n        help=\"the activation function of the encoder\",\n    )\n    group.add_argument(\n        \"--encoder-learning-rate\",\n        type=float,\n        nargs=\"+\",\n        help=\"the learning rate for the encoder\",\n    )\n    group.add_argument(\n        \"--decoder-latent-dim\",\n        type=int,\n        nargs=\"+\",\n        help=\"the latent dimension of the decoder\",\n    )\n    group.add_argument(\n        \"--decoder-hidden-dim\",\n        type=int,\n        nargs=\"+\",\n        help=\"the hidden dimension of the decoder\",\n    )\n    group.add_argument(\n        \"--decoder-activation\",\n        type=str,\n        nargs=\"+\",\n        choices=list(ACTIVATION_FUNCTIONS.keys()),\n        help=\"the activation function of the decoder\",\n    )\n    group.add_argument(\n        \"--decoder-learning-rate\",\n        type=float,\n        nargs=\"+\",\n        help=\"the learning rate for the decoder\",\n    )\n    group.add_argument(\n        \"--shared-optimizer\",\n        action=\"store_true\",\n        help=\"whether to use a shared optimizer for the encoder and decoder\",\n    )\n</code></pre>"},{"location":"reference/cli/module_arguments/","title":"module_arguments","text":"<p>Define arguments for each of the modules' CLI sub-parsers.</p>"},{"location":"reference/cli/module_arguments/#nhssynth.cli.module_arguments.AllChoicesDefault","title":"<code>AllChoicesDefault</code>","text":"<p>             Bases: <code>Action</code></p> <p>Customised argparse action for defaulting to the full list of choices if only the argument's flag is supplied: (i.e. user passes <code>--metrics</code> with no follow up list of metric groups =&gt; all metric groups will be executed).</p> Notes <p>1) If no <code>option_string</code> is supplied: set to default value (<code>self.default</code>) 2) If <code>option_string</code> is supplied:     a) If <code>values</code> are supplied, set to list of values     b) If no <code>values</code> are supplied, set to <code>self.const</code>, if <code>self.const</code> is not set, set to <code>self.default</code></p> Source code in <code>src/nhssynth/cli/module_arguments.py</code> <pre><code>class AllChoicesDefault(argparse.Action):\n    \"\"\"\n    Customised argparse action for defaulting to the full list of choices if only the argument's flag is supplied:\n    (i.e. user passes `--metrics` with no follow up list of metric groups =&gt; all metric groups will be executed).\n\n    Notes:\n        1) If no `option_string` is supplied: set to default value (`self.default`)\n        2) If `option_string` is supplied:\n            a) If `values` are supplied, set to list of values\n            b) If no `values` are supplied, set to `self.const`, if `self.const` is not set, set to `self.default`\n    \"\"\"\n\n    def __call__(self, parser, namespace, values=None, option_string=None):\n        if values:\n            setattr(namespace, self.dest, values)\n        elif option_string:\n            setattr(namespace, self.dest, self.const if self.const else self.default)\n        else:\n            setattr(namespace, self.dest, self.default)\n</code></pre>"},{"location":"reference/cli/module_arguments/#nhssynth.cli.module_arguments.add_dataloader_args","title":"<code>add_dataloader_args(parser, group_title, overrides=False)</code>","text":"<p>Adds arguments to an existing dataloader module sub-parser instance.</p> Source code in <code>src/nhssynth/cli/module_arguments.py</code> <pre><code>def add_dataloader_args(parser: argparse.ArgumentParser, group_title: str, overrides: bool = False) -&gt; None:\n    \"\"\"Adds arguments to an existing dataloader module sub-parser instance.\"\"\"\n    group = parser.add_argument_group(title=group_title)\n    group.add_argument(\n        \"--data-dir\",\n        type=str,\n        default=\"./data\",\n        help=\"the directory containing the chosen dataset\",\n    )\n    group.add_argument(\n        \"--index-col\",\n        default=None,\n        nargs=\"*\",\n        help=\"indicate the name of the index column(s) in the csv file, such that pandas can index by it\",\n    )\n    group.add_argument(\n        \"--constraint-graph\",\n        type=str,\n        default=\"_constraint_graph\",\n        help=\"the name of the html file to write the constraint graph to, defaults to `&lt;DATASET&gt;_constraint_graph`\",\n    )\n    group.add_argument(\n        \"--collapse-yaml\",\n        action=\"store_true\",\n        help=\"use aliases and anchors in the output metadata yaml, this will make it much more compact\",\n    )\n    group.add_argument(\n        \"--missingness\",\n        type=str,\n        default=\"augment\",\n        choices=MISSINGNESS_STRATEGIES,\n        help=\"how to handle missing values in the dataset\",\n    )\n    group.add_argument(\n        \"--impute\",\n        type=str,\n        default=None,\n        help=\"the imputation strategy to use, ONLY USED if &lt;MISSINGNESS&gt; is set to 'impute', choose from: 'mean', 'median', 'mode', or any specific value (e.g. '0')\",\n    )\n    group.add_argument(\n        \"--write-csv\",\n        action=\"store_true\",\n        help=\"write the transformed real data to a csv file\",\n    )\n</code></pre>"},{"location":"reference/cli/module_arguments/#nhssynth.cli.module_arguments.add_evaluation_args","title":"<code>add_evaluation_args(parser, group_title, overrides=False)</code>","text":"<p>Adds arguments to an existing evaluation module sub-parser instance.</p> Source code in <code>src/nhssynth/cli/module_arguments.py</code> <pre><code>def add_evaluation_args(parser: argparse.ArgumentParser, group_title: str, overrides: bool = False) -&gt; None:\n    \"\"\"Adds arguments to an existing evaluation module sub-parser instance.\"\"\"\n    group = parser.add_argument_group(title=group_title)\n    group.add_argument(\n        \"--downstream-tasks\",\n        \"--tasks\",\n        action=\"store_true\",\n        help=\"run the downstream tasks evaluation\",\n    )\n    group.add_argument(\n        \"--tasks-dir\",\n        type=str,\n        default=\"./tasks\",\n        help=\"the directory containing the downstream tasks to run, this directory must contain a folder called &lt;DATASET&gt; containing the tasks to run\",\n    )\n    group.add_argument(\n        \"--aequitas\",\n        action=\"store_true\",\n        help=\"run the aequitas fairness evaluation (note this runs for each of the downstream tasks)\",\n    )\n    group.add_argument(\n        \"--aequitas-attributes\",\n        type=str,\n        nargs=\"+\",\n        default=None,\n        help=\"the attributes to use for the aequitas fairness evaluation, defaults to all attributes\",\n    )\n    group.add_argument(\n        \"--key-numerical-fields\",\n        type=str,\n        nargs=\"+\",\n        default=None,\n        help=\"the numerical key field attributes to use for SDV privacy evaluations\",\n    )\n    group.add_argument(\n        \"--sensitive-numerical-fields\",\n        type=str,\n        nargs=\"+\",\n        default=None,\n        help=\"the numerical sensitive field attributes to use for SDV privacy evaluations\",\n    )\n    group.add_argument(\n        \"--key-categorical-fields\",\n        type=str,\n        nargs=\"+\",\n        default=None,\n        help=\"the categorical key field attributes to use for SDV privacy evaluations\",\n    )\n    group.add_argument(\n        \"--sensitive-categorical-fields\",\n        type=str,\n        nargs=\"+\",\n        default=None,\n        help=\"the categorical sensitive field attributes to use for SDV privacy evaluations\",\n    )\n    for name in METRIC_CHOICES:\n        generate_evaluation_arg(group, name)\n</code></pre>"},{"location":"reference/cli/module_arguments/#nhssynth.cli.module_arguments.add_model_args","title":"<code>add_model_args(parser, group_title, overrides=False)</code>","text":"<p>Adds arguments to an existing model module sub-parser instance.</p> Source code in <code>src/nhssynth/cli/module_arguments.py</code> <pre><code>def add_model_args(parser: argparse.ArgumentParser, group_title: str, overrides: bool = False) -&gt; None:\n    \"\"\"Adds arguments to an existing model module sub-parser instance.\"\"\"\n    group = parser.add_argument_group(title=group_title)\n    group.add_argument(\n        \"--architecture\",\n        type=str,\n        nargs=\"+\",\n        default=[\"VAE\"],\n        choices=MODELS,\n        help=\"the model architecture(s) to train\",\n    )\n    group.add_argument(\n        \"--repeats\",\n        type=int,\n        default=1,\n        help=\"how many times to repeat the training process per model architecture (&lt;SEED&gt; is incremented each time)\",\n    )\n    group.add_argument(\n        \"--batch-size\",\n        type=int,\n        nargs=\"+\",\n        default=32,\n        help=\"the batch size for the model\",\n    )\n    group.add_argument(\n        \"--num-epochs\",\n        type=int,\n        nargs=\"+\",\n        default=100,\n        help=\"number of epochs to train for\",\n    )\n    group.add_argument(\n        \"--patience\",\n        type=int,\n        nargs=\"+\",\n        default=5,\n        help=\"how many epochs the model is allowed to train for without improvement\",\n    )\n    group.add_argument(\n        \"--displayed-metrics\",\n        type=str,\n        nargs=\"+\",\n        default=[],\n        help=\"metrics to display during training of the model, when set to `None`, all metrics are displayed\",\n    )\n    group.add_argument(\n        \"--use-gpu\",\n        action=\"store_true\",\n        help=\"use the GPU for training\",\n    )\n    group.add_argument(\n        \"--num-samples\",\n        type=int,\n        default=None,\n        help=\"the number of samples to generate from the model, defaults to the size of the original dataset\",\n    )\n    privacy_group = parser.add_argument_group(title=\"model privacy options\")\n    privacy_group.add_argument(\n        \"--target-epsilon\",\n        type=float,\n        nargs=\"+\",\n        default=1.0,\n        help=\"the target epsilon for differential privacy\",\n    )\n    privacy_group.add_argument(\n        \"--target-delta\",\n        type=float,\n        nargs=\"+\",\n        help=\"the target delta for differential privacy, defaults to `1 / len(dataset)` if not specified\",\n    )\n    privacy_group.add_argument(\n        \"--max-grad-norm\",\n        type=float,\n        nargs=\"+\",\n        default=5.0,\n        help=\"the clipping threshold for gradients (only relevant under differential privacy)\",\n    )\n    privacy_group.add_argument(\n        \"--secure-mode\",\n        action=\"store_true\",\n        help=\"Enable secure RNG via the `csprng` package to make privacy guarantees more robust, comes at a cost of performance and reproducibility\",\n    )\n    for model_name in MODELS.keys():\n        model_group = parser.add_argument_group(title=f\"{model_name}-specific options\")\n        add_model_specific_args(model_group, model_name, overrides=overrides)\n</code></pre>"},{"location":"reference/cli/module_arguments/#nhssynth.cli.module_arguments.add_plotting_args","title":"<code>add_plotting_args(parser, group_title, overrides=False)</code>","text":"<p>Adds arguments to an existing plotting module sub-parser instance.</p> Source code in <code>src/nhssynth/cli/module_arguments.py</code> <pre><code>def add_plotting_args(parser: argparse.ArgumentParser, group_title: str, overrides: bool = False) -&gt; None:\n    \"\"\"Adds arguments to an existing plotting module sub-parser instance.\"\"\"\n    group = parser.add_argument_group(title=group_title)\n    group.add_argument(\n        \"--plot-quality\",\n        action=\"store_true\",\n        help=\"plot the SDV quality report\",\n    )\n    group.add_argument(\n        \"--plot-diagnostic\",\n        action=\"store_true\",\n        help=\"plot the SDV diagnostic report\",\n    )\n    group.add_argument(\n        \"--plot-sdv-report\",\n        action=\"store_true\",\n        help=\"plot the SDV report\",\n    )\n    group.add_argument(\n        \"--plot-tsne\",\n        action=\"store_true\",\n        help=\"plot the t-SNE embeddings of the real and synthetic data\",\n    )\n</code></pre>"},{"location":"reference/cli/module_setup/","title":"module_setup","text":"<p>Specify all CLI-accessible modules and their configurations, the pipeline to run by default, and define special functions for the <code>config</code> and <code>pipeline</code> CLI option trees.</p>"},{"location":"reference/cli/module_setup/#nhssynth.cli.module_setup.ModuleConfig","title":"<code>ModuleConfig</code>","text":"<p>Represents a module's configuration, containing the following attributes:</p> <p>Attributes:</p> Name Type Description <code>func</code> <p>A callable that executes the module's functionality.</p> <code>add_args</code> <p>A callable that populates the module's sub-parser arguments.</p> <code>description</code> <p>A description of the module's functionality.</p> <code>help</code> <p>A help message for the module's command-line interface.</p> <code>common_parsers</code> <p>A list of common parsers to add to the module's sub-parser, appending the 'dataset' and 'core' parsers to those passed.</p> Source code in <code>src/nhssynth/cli/module_setup.py</code> <pre><code>class ModuleConfig:\n    \"\"\"\n    Represents a module's configuration, containing the following attributes:\n\n    Attributes:\n        func: A callable that executes the module's functionality.\n        add_args: A callable that populates the module's sub-parser arguments.\n        description: A description of the module's functionality.\n        help: A help message for the module's command-line interface.\n        common_parsers: A list of common parsers to add to the module's sub-parser, appending the 'dataset' and 'core' parsers to those passed.\n    \"\"\"\n\n    def __init__(\n        self,\n        func: Callable[..., argparse.Namespace],\n        add_args: Callable[..., None],\n        description: str,\n        help: str,\n        common_parsers: Optional[list[str]] = None,\n        no_seed: bool = False,\n    ) -&gt; None:\n        self.func = func\n        self.add_args = add_args\n        self.description = description\n        self.help = help\n        self.common_parsers = [\"core\", \"seed\"] if not no_seed else [\"core\"]\n        if common_parsers:\n            assert set(common_parsers) &lt;= COMMON_PARSERS.keys(), \"Invalid common parser(s) specified.\"\n            # merge the below two assert statements\n            assert (\n                \"core\" not in common_parsers and \"seed\" not in common_parsers\n            ), \"The 'seed' and 'core' parser groups are automatically added to all modules, remove the from `ModuleConfig`s.\"\n            self.common_parsers += common_parsers\n\n    def __call__(self, args: argparse.Namespace) -&gt; argparse.Namespace:\n        return self.func(args)\n</code></pre>"},{"location":"reference/cli/module_setup/#nhssynth.cli.module_setup.add_config_args","title":"<code>add_config_args(parser)</code>","text":"<p>Adds arguments to <code>parser</code> relating to configuration file handling and module-specific config overrides.</p> Source code in <code>src/nhssynth/cli/module_setup.py</code> <pre><code>def add_config_args(parser: argparse.ArgumentParser) -&gt; None:\n    \"\"\"Adds arguments to `parser` relating to configuration file handling and module-specific config overrides.\"\"\"\n    parser.add_argument(\n        \"-c\",\n        \"--input-config\",\n        required=True,\n        help=\"specify the config file name\",\n    )\n    parser.add_argument(\n        \"-cp\",\n        \"--custom-pipeline\",\n        action=\"store_true\",\n        help=\"infer a custom pipeline running order of modules from the config\",\n    )\n    for module_name in PIPELINE:\n        MODULE_MAP[module_name].add_args(parser, f\"{module_name} option overrides\", overrides=True)\n    for module_name in VALID_MODULES - set(PIPELINE):\n        MODULE_MAP[module_name].add_args(parser, f\"{module_name} options overrides\", overrides=True)\n</code></pre>"},{"location":"reference/cli/module_setup/#nhssynth.cli.module_setup.add_pipeline_args","title":"<code>add_pipeline_args(parser)</code>","text":"<p>Adds arguments to <code>parser</code> for each module in the pipeline.</p> Source code in <code>src/nhssynth/cli/module_setup.py</code> <pre><code>def add_pipeline_args(parser: argparse.ArgumentParser) -&gt; None:\n    \"\"\"Adds arguments to `parser` for each module in the pipeline.\"\"\"\n    for module_name in PIPELINE:\n        MODULE_MAP[module_name].add_args(parser, f\"{module_name} options\")\n</code></pre>"},{"location":"reference/cli/module_setup/#nhssynth.cli.module_setup.add_subparser","title":"<code>add_subparser(subparsers, name, module_config)</code>","text":"<p>Add a subparser to an argparse argument parser.</p> <p>Parameters:</p> Name Type Description Default <code>subparsers</code> <code>_SubParsersAction</code> <p>The subparsers action to which the subparser will be added.</p> required <code>name</code> <code>str</code> <p>The name of the subparser.</p> required <code>module_config</code> <code>ModuleConfig</code> <p>A <code>ModuleConfig</code> object containing information about the subparser, including a function to execute and a function to add arguments.</p> required <p>Returns:</p> Type Description <code>ArgumentParser</code> <p>The newly created subparser.</p> Source code in <code>src/nhssynth/cli/module_setup.py</code> <pre><code>def add_subparser(\n    subparsers: argparse._SubParsersAction,\n    name: str,\n    module_config: ModuleConfig,\n) -&gt; argparse.ArgumentParser:\n    \"\"\"\n    Add a subparser to an argparse argument parser.\n\n    Args:\n        subparsers: The subparsers action to which the subparser will be added.\n        name: The name of the subparser.\n        module_config: A [`ModuleConfig`][nhssynth.cli.module_setup.ModuleConfig] object containing information about the subparser, including a function to execute and a function to add arguments.\n\n    Returns:\n        The newly created subparser.\n    \"\"\"\n    parent_parsers = get_parent_parsers(name, module_config.common_parsers)\n    parser = subparsers.add_parser(\n        name=name,\n        description=module_config.description,\n        help=module_config.help,\n        formatter_class=argparse.ArgumentDefaultsHelpFormatter,\n        parents=parent_parsers,\n    )\n    if name not in {\"pipeline\", \"config\"}:\n        module_config.add_args(parser, f\"{name} options\")\n    else:\n        module_config.add_args(parser)\n    parser.set_defaults(func=module_config.func)\n    return parser\n</code></pre>"},{"location":"reference/cli/module_setup/#nhssynth.cli.module_setup.get_parent_parsers","title":"<code>get_parent_parsers(name, module_parsers)</code>","text":"<p>Get a list of parent parsers for a given module, based on the module's <code>common_parsers</code> attribute.</p> Source code in <code>src/nhssynth/cli/module_setup.py</code> <pre><code>def get_parent_parsers(name: str, module_parsers: list[str]) -&gt; list[argparse.ArgumentParser]:\n    \"\"\"Get a list of parent parsers for a given module, based on the module's `common_parsers` attribute.\"\"\"\n    if name in {\"pipeline\", \"config\"}:\n        return [p(name == \"config\") for p in COMMON_PARSERS.values()]\n    elif name == \"dashboard\":\n        return [COMMON_PARSERS[pn](True) for pn in module_parsers]\n    else:\n        return [COMMON_PARSERS[pn]() for pn in module_parsers]\n</code></pre>"},{"location":"reference/cli/module_setup/#nhssynth.cli.module_setup.run_pipeline","title":"<code>run_pipeline(args)</code>","text":"<p>Runs the specified pipeline of modules with the passed configuration <code>args</code>.</p> Source code in <code>src/nhssynth/cli/module_setup.py</code> <pre><code>def run_pipeline(args: argparse.Namespace) -&gt; None:\n    \"\"\"Runs the specified pipeline of modules with the passed configuration `args`.\"\"\"\n    print(\"Running full pipeline...\")\n    args.modules_to_run = PIPELINE\n    for module_name in PIPELINE:\n        args = MODULE_MAP[module_name](args)\n</code></pre>"},{"location":"reference/cli/run/","title":"run","text":""},{"location":"reference/common/","title":"common","text":""},{"location":"reference/common/common/","title":"common","text":"<p>Common functions for all modules.</p>"},{"location":"reference/common/common/#nhssynth.common.common.set_seed","title":"<code>set_seed(seed=None)</code>","text":"<p>(Potentially) set the seed for numpy, torch and random. If no seed is provided, nothing happens.</p> <p>Parameters:</p> Name Type Description Default <code>seed</code> <code>Optional[int]</code> <p>The seed to set.</p> <code>None</code> Source code in <code>src/nhssynth/common/common.py</code> <pre><code>def set_seed(seed: Optional[int] = None) -&gt; None:\n    \"\"\"\n    (Potentially) set the seed for numpy, torch and random. If no seed is provided, nothing happens.\n\n    Args:\n        seed: The seed to set.\n    \"\"\"\n    if seed:\n        np.random.seed(seed)\n        torch.manual_seed(seed)\n        random.seed(seed)\n</code></pre>"},{"location":"reference/common/constants/","title":"constants","text":"<p>Define all of the common constants used throughout the project.</p>"},{"location":"reference/common/debugging/","title":"debugging","text":"<p>Debugging utilities.</p>"},{"location":"reference/common/dicts/","title":"dicts","text":"<p>Common functions for working with dictionaries.</p>"},{"location":"reference/common/dicts/#nhssynth.common.dicts.filter_dict","title":"<code>filter_dict(d, filter_keys, include=False)</code>","text":"<p>Given a dictionary, return a new dictionary either including or excluding keys in a given <code>filter</code> set.</p> <p>Parameters:</p> Name Type Description Default <code>d</code> <code>dict</code> <p>A dictionary to filter.</p> required <code>filter_keys</code> <code>Union[set, list]</code> <p>A list or set of keys to either include or exclude.</p> required <code>include</code> <code>bool</code> <p>Determine whether to return a dictionary including or excluding keys in <code>filter</code>.</p> <code>False</code> <p>Returns:</p> Type Description <code>dict</code> <p>A filtered dictionary.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; d = {'a': 1, 'b': 2, 'c': 3}\n&gt;&gt;&gt; filter_dict(d, {'a', 'b'})\n{'c': 3}\n&gt;&gt;&gt; filter_dict(d, {'a', 'b'}, include=True)\n{'a': 1, 'b': 2}\n</code></pre> Source code in <code>src/nhssynth/common/dicts.py</code> <pre><code>def filter_dict(d: dict, filter_keys: Union[set, list], include: bool = False) -&gt; dict:\n    \"\"\"\n    Given a dictionary, return a new dictionary either including or excluding keys in a given `filter` set.\n\n    Args:\n        d: A dictionary to filter.\n        filter_keys: A list or set of keys to either include or exclude.\n        include: Determine whether to return a dictionary including or excluding keys in `filter`.\n\n    Returns:\n        A filtered dictionary.\n\n    Examples:\n        &gt;&gt;&gt; d = {'a': 1, 'b': 2, 'c': 3}\n        &gt;&gt;&gt; filter_dict(d, {'a', 'b'})\n        {'c': 3}\n        &gt;&gt;&gt; filter_dict(d, {'a', 'b'}, include=True)\n        {'a': 1, 'b': 2}\n    \"\"\"\n    if include:\n        filtered_keys = set(filter_keys) &amp; set(d.keys())\n    else:\n        filtered_keys = set(d.keys()) - set(filter_keys)\n    return {k: v for k, v in d.items() if k in filtered_keys}\n</code></pre>"},{"location":"reference/common/dicts/#nhssynth.common.dicts.flatten_dict","title":"<code>flatten_dict(d)</code>","text":"<p>Flatten a dictionary by recursively combining nested keys into a single dictionary until no nested keys remain.</p> <p>Parameters:</p> Name Type Description Default <code>d</code> <code>dict[str, Any]</code> <p>A dictionary with potentially nested keys.</p> required <p>Returns:</p> Type Description <code>dict[str, Any]</code> <p>A flattened dictionary.</p> <p>Raises:</p> Type Description <code>ValueError</code> <p>If duplicate keys are found in the flattened dictionary.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; d = {'a': 1, 'b': {'c': 2, 'd': {'e': 3}}}\n&gt;&gt;&gt; flatten_dict(d)\n{'a': 1, 'c': 2, 'e': 3}\n</code></pre> Source code in <code>src/nhssynth/common/dicts.py</code> <pre><code>def flatten_dict(d: dict[str, Any]) -&gt; dict[str, Any]:\n    \"\"\"\n    Flatten a dictionary by recursively combining nested keys into a single dictionary until no nested keys remain.\n\n    Args:\n        d: A dictionary with potentially nested keys.\n\n    Returns:\n        A flattened dictionary.\n\n    Raises:\n        ValueError: If duplicate keys are found in the flattened dictionary.\n\n    Examples:\n        &gt;&gt;&gt; d = {'a': 1, 'b': {'c': 2, 'd': {'e': 3}}}\n        &gt;&gt;&gt; flatten_dict(d)\n        {'a': 1, 'c': 2, 'e': 3}\n    \"\"\"\n    items = []\n    for k, v in d.items():\n        if isinstance(v, dict):\n            items.extend(flatten_dict(v).items())\n        else:\n            items.append((k, v))\n    if len(set([p[0] for p in items])) != len(items):\n        raise ValueError(\"Duplicate keys found in flattened dictionary\")\n    return dict(items)\n</code></pre>"},{"location":"reference/common/dicts/#nhssynth.common.dicts.get_key_by_value","title":"<code>get_key_by_value(d, value)</code>","text":"<p>Find the first key in a dictionary with a given value.</p> <p>Parameters:</p> Name Type Description Default <code>d</code> <code>dict</code> <p>A dictionary to search through.</p> required <code>value</code> <code>Any</code> <p>The value to search for.</p> required <p>Returns:</p> Type Description <code>Union[Any, None]</code> <p>The first key in <code>d</code> with the value <code>value</code>, or <code>None</code> if no such key exists.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; d = {'a': 1, 'b': 2, 'c': 1}\n&gt;&gt;&gt; get_key_by_value(d, 2)\n'b'\n&gt;&gt;&gt; get_key_by_value(d, 3)\nNone\n</code></pre> Source code in <code>src/nhssynth/common/dicts.py</code> <pre><code>def get_key_by_value(d: dict, value: Any) -&gt; Union[Any, None]:\n    \"\"\"\n    Find the first key in a dictionary with a given value.\n\n    Args:\n        d: A dictionary to search through.\n        value: The value to search for.\n\n    Returns:\n        The first key in `d` with the value `value`, or `None` if no such key exists.\n\n    Examples:\n        &gt;&gt;&gt; d = {'a': 1, 'b': 2, 'c': 1}\n        &gt;&gt;&gt; get_key_by_value(d, 2)\n        'b'\n        &gt;&gt;&gt; get_key_by_value(d, 3)\n        None\n\n    \"\"\"\n    for key, val in d.items():\n        if val == value:\n            return key\n    return None\n</code></pre>"},{"location":"reference/common/io/","title":"io","text":"<p>Common building-block functions for handling module input and output.</p>"},{"location":"reference/common/io/#nhssynth.common.io.check_exists","title":"<code>check_exists(fns, dir)</code>","text":"<p>Checks if the files in <code>fns</code> exist in <code>dir</code>.</p> <p>Parameters:</p> Name Type Description Default <code>fns</code> <code>list[str]</code> <p>The list of files to check.</p> required <code>dir</code> <code>Path</code> <p>The directory the files should exist in.</p> required <p>Raises:</p> Type Description <code>FileNotFoundError</code> <p>If any of the files in <code>fns</code> do not exist in <code>dir</code>.</p> Source code in <code>src/nhssynth/common/io.py</code> <pre><code>def check_exists(fns: list[str], dir: Path) -&gt; None:\n    \"\"\"\n    Checks if the files in `fns` exist in `dir`.\n\n    Args:\n        fns: The list of files to check.\n        dir: The directory the files should exist in.\n\n    Raises:\n        FileNotFoundError: If any of the files in `fns` do not exist in `dir`.\n    \"\"\"\n    for fn in fns:\n        if not (dir / fn).exists():\n            raise FileNotFoundError(f\"File {fn} does not exist at {dir}.\")\n</code></pre>"},{"location":"reference/common/io/#nhssynth.common.io.consistent_ending","title":"<code>consistent_ending(fn, ending='.pkl', suffix='')</code>","text":"<p>Ensures that the filename <code>fn</code> ends with <code>ending</code>. If not, removes any existing ending and appends <code>ending</code>.</p> <p>Parameters:</p> Name Type Description Default <code>fn</code> <code>str</code> <p>The filename to check.</p> required <code>ending</code> <code>str</code> <p>The desired ending to check for. Default is \".pkl\".</p> <code>'.pkl'</code> <code>suffix</code> <code>str</code> <p>A suffix to append to the filename before the ending.</p> <code>''</code> <p>Returns:</p> Type Description <code>str</code> <p>The filename with the correct ending and potentially an inserted suffix.</p> Source code in <code>src/nhssynth/common/io.py</code> <pre><code>def consistent_ending(fn: str, ending: str = \".pkl\", suffix: str = \"\") -&gt; str:\n    \"\"\"\n    Ensures that the filename `fn` ends with `ending`. If not, removes any existing ending and appends `ending`.\n\n    Args:\n        fn: The filename to check.\n        ending: The desired ending to check for. Default is \".pkl\".\n        suffix: A suffix to append to the filename before the ending.\n\n    Returns:\n        The filename with the correct ending and potentially an inserted suffix.\n    \"\"\"\n    path_fn = Path(fn)\n    return str(path_fn.parent / path_fn.stem) + (\"_\" if suffix else \"\") + suffix + ending\n</code></pre>"},{"location":"reference/common/io/#nhssynth.common.io.consistent_endings","title":"<code>consistent_endings(args)</code>","text":"<p>Wrapper around <code>consistent_ending</code> to apply it to a list of filenames.</p> <p>Parameters:</p> Name Type Description Default <code>args</code> <code>list[Union[str, tuple[str, str], tuple[str, str, str]]]</code> <p>The list of filenames to check. Can take the form of a single filename, a pair of a filename and an ending, or a triple of a filename, an ending and a suffix.</p> required <p>Returns:</p> Type Description <code>list[str]</code> <p>The list of filenames with the correct endings.</p> Source code in <code>src/nhssynth/common/io.py</code> <pre><code>def consistent_endings(args: list[Union[str, tuple[str, str], tuple[str, str, str]]]) -&gt; list[str]:\n    \"\"\"\n    Wrapper around `consistent_ending` to apply it to a list of filenames.\n\n    Args:\n        args: The list of filenames to check. Can take the form of a single filename, a pair of a filename and an ending, or a triple of a filename, an ending and a suffix.\n\n    Returns:\n        The list of filenames with the correct endings.\n    \"\"\"\n    return list(consistent_ending(arg) if isinstance(arg, str) else consistent_ending(*arg) for arg in args)\n</code></pre>"},{"location":"reference/common/io/#nhssynth.common.io.experiment_io","title":"<code>experiment_io(experiment_name, dir_experiments='experiments')</code>","text":"<p>Create an experiment's directory and return the path.</p> <p>Parameters:</p> Name Type Description Default <code>experiment_name</code> <code>str</code> <p>The name of the experiment.</p> required <code>dir_experiments</code> <code>str</code> <p>The name of the directory containing all experiments.</p> <code>'experiments'</code> <p>Returns:</p> Type Description <code>str</code> <p>The path to the experiment directory.</p> Source code in <code>src/nhssynth/common/io.py</code> <pre><code>def experiment_io(experiment_name: str, dir_experiments: str = \"experiments\") -&gt; str:\n    \"\"\"\n    Create an experiment's directory and return the path.\n\n    Args:\n        experiment_name: The name of the experiment.\n        dir_experiments: The name of the directory containing all experiments.\n\n    Returns:\n        The path to the experiment directory.\n    \"\"\"\n    dir_experiment = Path(dir_experiments) / experiment_name\n    dir_experiment.mkdir(parents=True, exist_ok=True)\n    return dir_experiment\n</code></pre>"},{"location":"reference/common/io/#nhssynth.common.io.potential_suffix","title":"<code>potential_suffix(fn, fn_base)</code>","text":"<p>Checks if <code>fn</code> is a suffix (starts with an underscore) to append to <code>fn_base</code>, or a filename in its own right.</p> <p>Parameters:</p> Name Type Description Default <code>fn</code> <code>str</code> <p>The filename / potential suffix to append to <code>fn_base</code>.</p> required <code>fn_base</code> <code>str</code> <p>The name of the file the suffix would attach to.</p> required <p>Returns:</p> Type Description <code>str</code> <p>The appropriately processed <code>fn</code></p> Source code in <code>src/nhssynth/common/io.py</code> <pre><code>def potential_suffix(fn: str, fn_base: str) -&gt; str:\n    \"\"\"\n    Checks if `fn` is a suffix (starts with an underscore) to append to `fn_base`, or a filename in its own right.\n\n    Args:\n        fn: The filename / potential suffix to append to `fn_base`.\n        fn_base: The name of the file the suffix would attach to.\n\n    Returns:\n        The appropriately processed `fn`\n    \"\"\"\n    fn_base = Path(fn_base).stem\n    if fn[0] == \"_\":\n        return fn_base + fn\n    else:\n        return fn\n</code></pre>"},{"location":"reference/common/io/#nhssynth.common.io.potential_suffixes","title":"<code>potential_suffixes(fns, fn_base)</code>","text":"<p>Wrapper around <code>potential_suffix</code> to apply it to a list of filenames.</p> <p>Parameters:</p> Name Type Description Default <code>fns</code> <code>list[str]</code> <p>The list of filenames / potential suffixes to append to <code>fn_base</code>.</p> required <code>fn_base</code> <code>str</code> <p>The name of the file the suffixes would attach to.</p> required Source code in <code>src/nhssynth/common/io.py</code> <pre><code>def potential_suffixes(fns: list[str], fn_base: str) -&gt; list[str]:\n    \"\"\"\n    Wrapper around `potential_suffix` to apply it to a list of filenames.\n\n    Args:\n        fns: The list of filenames / potential suffixes to append to `fn_base`.\n        fn_base: The name of the file the suffixes would attach to.\n    \"\"\"\n    return list(potential_suffix(fn, fn_base) for fn in fns)\n</code></pre>"},{"location":"reference/common/io/#nhssynth.common.io.warn_if_path_supplied","title":"<code>warn_if_path_supplied(fns, dir)</code>","text":"<p>Warns if the files in <code>fns</code> include directory separators.</p> <p>Parameters:</p> Name Type Description Default <code>fns</code> <code>list[str]</code> <p>The list of files to check.</p> required <code>dir</code> <code>Path</code> <p>The directory the files should exist in.</p> required <p>Warns:</p> Type Description <code>UserWarning</code> <p>when the path to any of the files in <code>fns</code> includes directory separators, as this may lead to unintended consequences if the user doesn't realise default directories are pre-specified.</p> Source code in <code>src/nhssynth/common/io.py</code> <pre><code>def warn_if_path_supplied(fns: list[str], dir: Path) -&gt; None:\n    \"\"\"\n    Warns if the files in `fns` include directory separators.\n\n    Args:\n        fns: The list of files to check.\n        dir: The directory the files should exist in.\n\n    Warnings:\n        UserWarning: when the path to any of the files in `fns` includes directory separators, as this may lead to unintended consequences if the user doesn't realise default directories are pre-specified.\n    \"\"\"\n    for fn in fns:\n        if \"/\" in fn:\n            warnings.warn(\n                f\"Using the path supplied appended to {dir}, i.e. attempting to read data from {dir / fn}\",\n                UserWarning,\n            )\n</code></pre>"},{"location":"reference/common/strings/","title":"strings","text":"<p>String manipulation functions.</p>"},{"location":"reference/common/strings/#nhssynth.common.strings.add_spaces_before_caps","title":"<code>add_spaces_before_caps(string)</code>","text":"<p>Adds spaces before capital letters in a string if there is a lower-case letter following it.</p> <p>Parameters:</p> Name Type Description Default <code>string</code> <code>str</code> <p>The string to add spaces to.</p> required <p>Returns:</p> Type Description <code>str</code> <p>The string with spaces added before capital letters.</p> <p>Examples:</p> <pre><code>&gt;&gt;&gt; add_spaces_before_caps(\"HelloWorld\")\n'Hello World'\n&gt;&gt;&gt; add_spaces_before_caps(\"HelloWorldAGAIN\")\n'Hello World AGAIN'\n</code></pre> Source code in <code>src/nhssynth/common/strings.py</code> <pre><code>def add_spaces_before_caps(string: str) -&gt; str:\n    \"\"\"\n    Adds spaces before capital letters in a string if there is a lower-case letter following it.\n\n    Args:\n        string: The string to add spaces to.\n\n    Returns:\n        The string with spaces added before capital letters.\n\n    Examples:\n        &gt;&gt;&gt; add_spaces_before_caps(\"HelloWorld\")\n        'Hello World'\n        &gt;&gt;&gt; add_spaces_before_caps(\"HelloWorldAGAIN\")\n        'Hello World AGAIN'\n    \"\"\"\n    return \" \".join(re.findall(r\"[a-z]?[A-Z][a-z]+|[A-Z]+(?=[A-Z][a-z]|\\b)\", string))\n</code></pre>"},{"location":"reference/common/strings/#nhssynth.common.strings.format_timedelta","title":"<code>format_timedelta(start, finish)</code>","text":"<p>Calculate and prettily format the difference between two calls to <code>time.time()</code>.</p> <p>Parameters:</p> Name Type Description Default <code>start</code> <code>float</code> <p>The start time.</p> required <code>finish</code> <code>float</code> <p>The finish time.</p> required <p>Returns:</p> Type Description <code>str</code> <p>A string containing the time difference in a human-readable format.</p> Source code in <code>src/nhssynth/common/strings.py</code> <pre><code>def format_timedelta(start: float, finish: float) -&gt; str:\n    \"\"\"\n    Calculate and prettily format the difference between two calls to `time.time()`.\n\n    Args:\n        start: The start time.\n        finish: The finish time.\n\n    Returns:\n        A string containing the time difference in a human-readable format.\n    \"\"\"\n    total = datetime.timedelta(seconds=finish - start)\n    hours, remainder = divmod(total.seconds, 3600)\n    minutes, seconds = divmod(remainder, 60)\n\n    if total.days &gt; 0:\n        delta_str = f\"{total.days}d {hours}h {minutes}m {seconds}s\"\n    elif hours &gt; 0:\n        delta_str = f\"{hours}h {minutes}m {seconds}s\"\n    elif minutes &gt; 0:\n        delta_str = f\"{minutes}m {seconds}s\"\n    else:\n        delta_str = f\"{seconds}s\"\n    return delta_str\n</code></pre>"},{"location":"reference/modules/","title":"modules","text":""},{"location":"reference/modules/dashboard/","title":"dashboard","text":""},{"location":"reference/modules/dashboard/Upload/","title":"Upload","text":""},{"location":"reference/modules/dashboard/Upload/#nhssynth.modules.dashboard.Upload.get_component","title":"<code>get_component(args, name, component_type, text)</code>","text":"<p>Generate an upload field and its functionality for a given component of the evaluations.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the component as it should be recorded in the session state and as it exists in the args.</p> required <code>component_type</code> <code>Any</code> <p>The type of the component (to ensure that only the expected object can be uploaded)</p> required <code>text</code> <code>str</code> <p>The human-readable text to display to the user as part of the element.</p> required Source code in <code>src/nhssynth/modules/dashboard/Upload.py</code> <pre><code>def get_component(args: argparse.Namespace, name: str, component_type: Any, text: str) -&gt; None:\n    \"\"\"\n    Generate an upload field and its functionality for a given component of the evaluations.\n\n    Args:\n        name: The name of the component as it should be recorded in the session state and as it exists in the args.\n        component_type: The type of the component (to ensure that only the expected object can be uploaded)\n        text: The human-readable text to display to the user as part of the element.\n    \"\"\"\n    uploaded = st.file_uploader(f\"Upload a pickle file containing a {text}\", type=\"pkl\")\n    if getattr(args, name):\n        with open(os.getcwd() + \"/\" + getattr(args, name), \"rb\") as f:\n            loaded = pickle.load(f)\n    if uploaded is not None:\n        loaded = pickle.load(uploaded)\n    if loaded is not None:\n        assert isinstance(loaded, component_type), f\"Uploaded file does not contain a {text}!\"\n        st.session_state[name] = loaded.contents\n        st.success(f\"Loaded {text}!\")\n</code></pre>"},{"location":"reference/modules/dashboard/Upload/#nhssynth.modules.dashboard.Upload.parse_args","title":"<code>parse_args()</code>","text":"<p>These arguments allow a user to automatically load the required data for the dashboard from disk.</p> <p>Returns:</p> Type Description <code>Namespace</code> <p>The parsed arguments.</p> Source code in <code>src/nhssynth/modules/dashboard/Upload.py</code> <pre><code>def parse_args() -&gt; argparse.Namespace:\n    \"\"\"\n    These arguments allow a user to automatically load the required data for the dashboard from disk.\n\n    Returns:\n        The parsed arguments.\n    \"\"\"\n    parser = argparse.ArgumentParser(description=\"NHSSynth Evaluation Dashboard\")\n    parser.add_argument(\"--evaluations\", type=str, help=\"Path to a set of evaluations.\")\n    parser.add_argument(\"--experiments\", type=str, help=\"Path to a set of experiments.\")\n    parser.add_argument(\"--synthetic-datasets\", type=str, help=\"Path to a set of synthetic datasets.\")\n    parser.add_argument(\"--typed\", type=str, help=\"Path to a typed real dataset.\")\n    return parser.parse_args()\n</code></pre>"},{"location":"reference/modules/dashboard/io/","title":"io","text":""},{"location":"reference/modules/dashboard/io/#nhssynth.modules.dashboard.io.check_input_paths","title":"<code>check_input_paths(dir_experiment, fn_dataset, fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations)</code>","text":"<p>Sets up the input and output paths for the model files.</p> <p>Parameters:</p> Name Type Description Default <code>dir_experiment</code> <code>str</code> <p>The path to the experiment directory.</p> required <code>fn_dataset</code> <code>str</code> <p>The base name of the dataset.</p> required <code>fn_experiments</code> <code>str</code> <p>The filename of the collection of experiments.</p> required <code>fn_synthetic_datasets</code> <code>str</code> <p>The filename of the collection of synthetic datasets.</p> required <code>fn_evaluations</code> <code>str</code> <p>The filename of the collection of evaluations.</p> required <p>Returns:</p> Type Description <code>str</code> <p>The paths</p> Source code in <code>src/nhssynth/modules/dashboard/io.py</code> <pre><code>def check_input_paths(\n    dir_experiment: str,\n    fn_dataset: str,\n    fn_typed: str,\n    fn_experiments: str,\n    fn_synthetic_datasets: str,\n    fn_evaluations: str,\n) -&gt; str:\n    \"\"\"\n    Sets up the input and output paths for the model files.\n\n    Args:\n        dir_experiment: The path to the experiment directory.\n        fn_dataset: The base name of the dataset.\n        fn_experiments: The filename of the collection of experiments.\n        fn_synthetic_datasets: The filename of the collection of synthetic datasets.\n        fn_evaluations: The filename of the collection of evaluations.\n\n    Returns:\n        The paths\n    \"\"\"\n    fn_dataset = Path(fn_dataset).stem\n    fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations = io.consistent_endings(\n        [fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations]\n    )\n    fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations = io.potential_suffixes(\n        [fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations], fn_dataset\n    )\n    io.warn_if_path_supplied([fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations], dir_experiment)\n    io.check_exists([fn_typed, fn_experiments, fn_synthetic_datasets, fn_evaluations], dir_experiment)\n    return (\n        dir_experiment / fn_typed,\n        dir_experiment / fn_experiments,\n        dir_experiment / fn_synthetic_datasets,\n        dir_experiment / fn_evaluations,\n    )\n</code></pre>"},{"location":"reference/modules/dashboard/run/","title":"run","text":""},{"location":"reference/modules/dashboard/utils/","title":"utils","text":""},{"location":"reference/modules/dashboard/utils/#nhssynth.modules.dashboard.utils.hide_streamlit_content","title":"<code>hide_streamlit_content()</code>","text":"<p>Hide the footer message and deploy button in Streamlit.</p> Source code in <code>src/nhssynth/modules/dashboard/utils.py</code> <pre><code>def hide_streamlit_content() -&gt; None:\n    \"\"\"\n    Hide the footer message and deploy button in Streamlit.\n    \"\"\"\n    hide_streamlit_style = \"\"\"\n    &lt;style&gt;\n    footer {visibility: hidden;}\n    .stDeployButton {visibility: hidden;}\n    &lt;/style&gt;\n    \"\"\"\n    st.markdown(hide_streamlit_style, unsafe_allow_html=True)\n</code></pre>"},{"location":"reference/modules/dashboard/utils/#nhssynth.modules.dashboard.utils.id_selector","title":"<code>id_selector(df)</code>","text":"<p>Select an ID from the dataframe to then operate on.</p> <p>Parameters:</p> Name Type Description Default <code>df</code> <code>DataFrame</code> <p>The dataframe to select an ID from.</p> required <p>Returns:</p> Type Description <code>Series</code> <p>The dataset subset to only the row corresponding to the ID.</p> Source code in <code>src/nhssynth/modules/dashboard/utils.py</code> <pre><code>def id_selector(df: pd.DataFrame) -&gt; pd.Series:\n    \"\"\"\n    Select an ID from the dataframe to then operate on.\n\n    Args:\n        df: The dataframe to select an ID from.\n\n    Returns:\n        The dataset subset to only the row corresponding to the ID.\n    \"\"\"\n    architecture = st.sidebar.selectbox(\n        \"Select architecture to display\", df.index.get_level_values(\"architecture\").unique()\n    )\n    # Different architectures may have different numbers of repeats and configs\n    repeats = df.loc[architecture].index.get_level_values(\"repeat\").astype(int).unique()\n    configs = df.loc[architecture].index.get_level_values(\"config\").astype(int).unique()\n    if len(repeats) &gt; 1:\n        repeat = st.sidebar.selectbox(\"Select repeat to display\", repeats)\n    else:\n        repeat = repeats[0]\n    if len(configs) &gt; 1:\n        config = st.sidebar.selectbox(\"Select configuration to display\", configs)\n    else:\n        config = configs[0]\n    return df.loc[(architecture, repeat, config)]\n</code></pre>"},{"location":"reference/modules/dashboard/utils/#nhssynth.modules.dashboard.utils.subset_selector","title":"<code>subset_selector(df)</code>","text":"<p>Select a subset of the dataframe to then operate on.</p> <p>Parameters:</p> Name Type Description Default <code>df</code> <code>DataFrame</code> <p>The dataframe to select a subset of.</p> required <p>Returns:</p> Type Description <code>DataFrame</code> <p>The subset of the dataframe.</p> Source code in <code>src/nhssynth/modules/dashboard/utils.py</code> <pre><code>def subset_selector(df: pd.DataFrame) -&gt; pd.DataFrame:\n    \"\"\"\n    Select a subset of the dataframe to then operate on.\n\n    Args:\n        df: The dataframe to select a subset of.\n\n    Returns:\n        The subset of the dataframe.\n    \"\"\"\n    architectures = df.index.get_level_values(\"architecture\").unique().tolist()\n    repeats = df.index.get_level_values(\"repeat\").astype(int).unique().tolist()\n    configs = df.index.get_level_values(\"config\").astype(int).unique().tolist()\n    selected_architectures = st.sidebar.multiselect(\n        \"Select architectures to display\", architectures, default=architectures\n    )\n    selected_repeats = st.sidebar.multiselect(\"Select repeats to display\", repeats, default=repeats[0])\n    selected_configs = st.sidebar.multiselect(\"Select configurations to display\", configs, default=configs)\n    return df.loc[(selected_architectures, selected_repeats, selected_configs)]\n</code></pre>"},{"location":"reference/modules/dashboard/pages/","title":"pages","text":""},{"location":"reference/modules/dashboard/pages/1_Tables/","title":"1_Tables","text":""},{"location":"reference/modules/dashboard/pages/2_Plots/","title":"2_Plots","text":""},{"location":"reference/modules/dashboard/pages/2_Plots/#nhssynth.modules.dashboard.pages.2_Plots.prepare_for_dimensionality","title":"<code>prepare_for_dimensionality(df)</code>","text":"<p>Factorize all categorical columns in a dataframe.</p> Source code in <code>src/nhssynth/modules/dashboard/pages/2_Plots.py</code> <pre><code>def prepare_for_dimensionality(df: pd.DataFrame) -&gt; pd.DataFrame:\n    \"\"\"Factorize all categorical columns in a dataframe.\"\"\"\n    for col in df.columns:\n        if df[col].dtype == \"object\":\n            df[col] = pd.factorize(df[col])[0]\n        elif df[col].dtype == \"datetime64[ns]\":\n            df[col] = pd.to_numeric(df[col])\n        min_val = df[col].min()\n        max_val = df[col].max()\n        df[col] = (df[col] - min_val) / (max_val - min_val)\n    return df\n</code></pre>"},{"location":"reference/modules/dashboard/pages/3_Experiment_Configurations/","title":"3_Experiment_Configurations","text":""},{"location":"reference/modules/dataloader/","title":"dataloader","text":""},{"location":"reference/modules/dataloader/constraints/","title":"constraints","text":""},{"location":"reference/modules/dataloader/io/","title":"io","text":""},{"location":"reference/modules/dataloader/io/#nhssynth.modules.dataloader.io.check_input_paths","title":"<code>check_input_paths(fn_input, fn_metadata, dir_data)</code>","text":"<p>Formats the input filenames and directory for an experiment.</p> <p>Parameters:</p> Name Type Description Default <code>fn_input</code> <code>str</code> <p>The input data filename.</p> required <code>fn_metadata</code> <code>str</code> <p>The metadata filename / suffix to append to <code>fn_input</code>.</p> required <code>dir_data</code> <code>str</code> <p>The directory that should contain both of the above.</p> required <p>Returns:</p> Type Description <code>tuple[Path, str, str]</code> <p>A tuple containing the correct directory path, input data filename and metadata filename (used for both in and out).</p> <p>Warns:</p> Type Description <code>UserWarning</code> <p>When the path to <code>fn_input</code> includes directory separators, as this is not supported and may not work as intended.</p> <code>UserWarning</code> <p>When the path to <code>fn_metadata</code> includes directory separators, as this is not supported and may not work as intended.</p> Source code in <code>src/nhssynth/modules/dataloader/io.py</code> <pre><code>def check_input_paths(\n    fn_input: str,\n    fn_metadata: str,\n    dir_data: str,\n) -&gt; tuple[Path, str, str]:\n    \"\"\"\n    Formats the input filenames and directory for an experiment.\n\n    Args:\n        fn_input: The input data filename.\n        fn_metadata: The metadata filename / suffix to append to `fn_input`.\n        dir_data: The directory that should contain both of the above.\n\n    Returns:\n        A tuple containing the correct directory path, input data filename and metadata filename (used for both in and out).\n\n    Warnings:\n        UserWarning: When the path to `fn_input` includes directory separators, as this is not supported and may not work as intended.\n        UserWarning: When the path to `fn_metadata` includes directory separators, as this is not supported and may not work as intended.\n    \"\"\"\n    fn_input, fn_metadata = io.consistent_endings([(fn_input, \".csv\"), (fn_metadata, \".yaml\")])\n    dir_data = Path(dir_data)\n    fn_metadata = io.potential_suffix(fn_metadata, fn_input)\n    io.warn_if_path_supplied([fn_input, fn_metadata], dir_data)\n    io.check_exists([fn_input], dir_data)\n    return dir_data, fn_input, fn_metadata\n</code></pre>"},{"location":"reference/modules/dataloader/io/#nhssynth.modules.dataloader.io.check_output_paths","title":"<code>check_output_paths(fn_dataset, fn_typed, fn_transformed, fn_metatransformer, fn_constraint_graph, fn_sdv_metadata, dir_experiment)</code>","text":"<p>Formats the output filenames for an experiment.</p> <p>Parameters:</p> Name Type Description Default <code>fn_dataset</code> <code>str</code> <p>The input data filename.</p> required <code>fn_typed</code> <code>str</code> <p>The typed input data filename/suffix to append to <code>fn_dataset</code>.</p> required <code>fn_transformed</code> <code>str</code> <p>The transformed output data filename/suffix to append to <code>fn_dataset</code>.</p> required <code>fn_metatransformer</code> <code>str</code> <p>The metatransformer filename/suffix to append to <code>fn_dataset</code>.</p> required <code>fn_constraint_graph</code> <code>str</code> <p>The constraint graph filename/suffix to append to <code>fn_dataset</code>.</p> required <code>fn_sdv_metadata</code> <code>str</code> <p>The SDV metadata filename/suffix to append to <code>fn_dataset</code>.</p> required <code>dir_experiment</code> <code>Path</code> <p>The experiment directory to write the outputs to.</p> required <p>Returns:</p> Type Description <code>tuple[str, str, str]</code> <p>A tuple containing the formatted output filenames.</p> <p>Warns:</p> Type Description <code>UserWarning</code> <p>When any of the filenames include directory separators, as this is not supported and may not work as intended.</p> Source code in <code>src/nhssynth/modules/dataloader/io.py</code> <pre><code>def check_output_paths(\n    fn_dataset: str,\n    fn_typed: str,\n    fn_transformed: str,\n    fn_metatransformer: str,\n    fn_constraint_graph: str,\n    fn_sdv_metadata: str,\n    dir_experiment: Path,\n) -&gt; tuple[str, str, str]:\n    \"\"\"\n    Formats the output filenames for an experiment.\n\n    Args:\n        fn_dataset: The input data filename.\n        fn_typed: The typed input data filename/suffix to append to `fn_dataset`.\n        fn_transformed: The transformed output data filename/suffix to append to `fn_dataset`.\n        fn_metatransformer: The metatransformer filename/suffix to append to `fn_dataset`.\n        fn_constraint_graph: The constraint graph filename/suffix to append to `fn_dataset`.\n        fn_sdv_metadata: The SDV metadata filename/suffix to append to `fn_dataset`.\n        dir_experiment: The experiment directory to write the outputs to.\n\n    Returns:\n        A tuple containing the formatted output filenames.\n\n    Warnings:\n        UserWarning: When any of the filenames include directory separators, as this is not supported and may not work as intended.\n    \"\"\"\n    fn_dataset = Path(fn_dataset).stem\n    fn_typed, fn_transformed, fn_metatransformer, fn_constraint_graph, fn_sdv_metadata = io.consistent_endings(\n        [fn_typed, fn_transformed, fn_metatransformer, (fn_constraint_graph, \".html\"), fn_sdv_metadata]\n    )\n    fn_typed, fn_transformed, fn_metatransformer, fn_constraint_graph, fn_sdv_metadata = io.potential_suffixes(\n        [fn_typed, fn_transformed, fn_metatransformer, fn_constraint_graph, fn_sdv_metadata], fn_dataset\n    )\n    io.warn_if_path_supplied(\n        [fn_typed, fn_transformed, fn_metatransformer, fn_constraint_graph, fn_sdv_metadata], dir_experiment\n    )\n    return fn_dataset, fn_typed, fn_transformed, fn_metatransformer, fn_constraint_graph, fn_sdv_metadata\n</code></pre>"},{"location":"reference/modules/dataloader/io/#nhssynth.modules.dataloader.io.write_data_outputs","title":"<code>write_data_outputs(metatransformer, fn_dataset, fn_metadata, dir_experiment, args)</code>","text":"<p>Writes the transformed data and metatransformer to disk.</p> <p>Parameters:</p> Name Type Description Default <code>metatransformer</code> <code>MetaTransformer</code> <p>The metatransformer used to transform the data into its model-ready state.</p> required <code>fn_dataset</code> <code>str</code> <p>The base dataset filename.</p> required <code>fn_metadata</code> <code>str</code> <p>The metadata filename.</p> required <code>dir_experiment</code> <code>Path</code> <p>The experiment directory to write the outputs to.</p> required <code>args</code> <code>Namespace</code> <p>The full set of parsed command line arguments.</p> required <p>Returns:</p> Type Description <code>None</code> <p>The filename of the dataset used.</p> Source code in <code>src/nhssynth/modules/dataloader/io.py</code> <pre><code>def write_data_outputs(\n    metatransformer: MetaTransformer,\n    fn_dataset: str,\n    fn_metadata: str,\n    dir_experiment: Path,\n    args: argparse.Namespace,\n) -&gt; None:\n    \"\"\"\n    Writes the transformed data and metatransformer to disk.\n\n    Args:\n        metatransformer: The metatransformer used to transform the data into its model-ready state.\n        fn_dataset: The base dataset filename.\n        fn_metadata: The metadata filename.\n        dir_experiment: The experiment directory to write the outputs to.\n        args: The full set of parsed command line arguments.\n\n    Returns:\n        The filename of the dataset used.\n    \"\"\"\n    fn_dataset, fn_typed, fn_transformed, fn_metatransformer, fn_constraint_graph, fn_sdv_metadata = check_output_paths(\n        fn_dataset,\n        args.typed,\n        args.transformed,\n        args.metatransformer,\n        args.constraint_graph,\n        args.sdv_metadata,\n        dir_experiment,\n    )\n    metatransformer.save_metadata(dir_experiment / fn_metadata, args.collapse_yaml)\n    metatransformer.save_constraint_graphs(dir_experiment / fn_constraint_graph)\n    with open(dir_experiment / fn_typed, \"wb\") as f:\n        pickle.dump(TypedDataset(metatransformer.get_typed_dataset()), f)\n    transformed_dataset = metatransformer.get_transformed_dataset()\n    transformed_dataset.to_pickle(dir_experiment / fn_transformed)\n    if args.write_csv:\n        chunks = np.array_split(transformed_dataset.index, 100)\n        for chunk, subset in enumerate(tqdm(chunks, desc=\"Writing transformed dataset to CSV\", unit=\"chunk\")):\n            if chunk == 0:\n                transformed_dataset.loc[subset].to_csv(\n                    dir_experiment / (fn_transformed[:-3] + \"csv\"), mode=\"w\", index=False\n                )\n            else:\n                transformed_dataset.loc[subset].to_csv(\n                    dir_experiment / (fn_transformed[:-3] + \"csv\"), mode=\"a\", index=False, header=False\n                )\n    with open(dir_experiment / fn_metatransformer, \"wb\") as f:\n        pickle.dump(metatransformer, f)\n    with open(dir_experiment / fn_sdv_metadata, \"wb\") as f:\n        pickle.dump(metatransformer.get_sdv_metadata(), f)\n\n    return fn_dataset\n</code></pre>"},{"location":"reference/modules/dataloader/metadata/","title":"metadata","text":""},{"location":"reference/modules/dataloader/metadata/#nhssynth.modules.dataloader.metadata.MetaData","title":"<code>MetaData</code>","text":"Source code in <code>src/nhssynth/modules/dataloader/metadata.py</code> <pre><code>class MetaData:\n    class ColumnMetaData:\n        def __init__(self, name: str, data: pd.Series, raw: dict) -&gt; None:\n            self.name = name\n            self.dtype: np.dtype = self._validate_dtype(data, raw.get(\"dtype\"))\n            self.categorical: bool = self._validate_categorical(data, raw.get(\"categorical\"))\n            self.missingness_strategy: GenericMissingnessStrategy = self._validate_missingness_strategy(\n                raw.get(\"missingness\")\n            )\n            self.transformer: ColumnTransformer = self._validate_transformer(raw.get(\"transformer\"))\n\n        def _validate_dtype(self, data: pd.Series, dtype_raw: Optional[Union[dict, str]] = None) -&gt; np.dtype:\n            if isinstance(dtype_raw, dict):\n                dtype_name = dtype_raw.pop(\"name\", None)\n            elif isinstance(dtype_raw, str):\n                dtype_name = dtype_raw\n            else:\n                dtype_name = self._infer_dtype(data)\n            try:\n                dtype = np.dtype(dtype_name)\n            except TypeError:\n                warnings.warn(\n                    f\"Invalid dtype specification '{dtype_name}' for column '{self.name}', ignoring dtype for this column\"\n                )\n                dtype = self._infer_dtype(data)\n            if dtype.kind == \"M\":\n                self._setup_datetime_config(data, dtype_raw)\n            elif dtype.kind in [\"f\", \"i\", \"u\"]:\n                self.rounding_scheme = self._validate_rounding_scheme(data, dtype, dtype_raw)\n            return dtype\n\n        def _infer_dtype(self, data: pd.Series) -&gt; np.dtype:\n            return data.dtype.name\n\n        def _infer_datetime_format(self, data: pd.Series) -&gt; str:\n            return _guess_datetime_format_for_array(data[data.notna()].astype(str).to_numpy())\n\n        def _setup_datetime_config(self, data: pd.Series, datetime_config: dict) -&gt; dict:\n            \"\"\"\n            Add keys to `datetime_config` corresponding to args from the `pd.to_datetime` function\n            (see [the docs](https://pandas.pydata.org/docs/reference/api/pandas.to_datetime.html))\n            \"\"\"\n            if not isinstance(datetime_config, dict):\n                datetime_config = {}\n            else:\n                datetime_config = filter_dict(datetime_config, {\"format\", \"floor\"}, include=True)\n            if \"format\" not in datetime_config:\n                datetime_config[\"format\"] = self._infer_datetime_format(data)\n            self.datetime_config = datetime_config\n\n        def _validate_rounding_scheme(self, data: pd.Series, dtype: np.dtype, dtype_dict: dict) -&gt; int:\n            if dtype_dict and \"rounding_scheme\" in dtype_dict:\n                return dtype_dict[\"rounding_scheme\"]\n            else:\n                if dtype.kind != \"f\":\n                    return 1.0\n                roundable_data = data[data.notna()]\n                for i in range(np.finfo(dtype).precision):\n                    if (roundable_data.round(i) == roundable_data).all():\n                        return 10**-i\n            return None\n\n        def _validate_categorical(self, data: pd.Series, categorical: Optional[bool] = None) -&gt; bool:\n            if categorical is None:\n                return self._infer_categorical(data)\n            elif not isinstance(categorical, bool):\n                warnings.warn(\n                    f\"Invalid categorical '{categorical}' for column '{self.name}', ignoring categorical for this column\"\n                )\n                return self._infer_categorical(data)\n            else:\n                self.boolean = data.nunique() &lt;= 2\n                return categorical\n\n        def _infer_categorical(self, data: pd.Series) -&gt; bool:\n            self.boolean = data.nunique() &lt;= 2\n            return data.nunique() &lt;= 10 or self.dtype.kind == \"O\"\n\n        def _validate_missingness_strategy(self, missingness_strategy: Optional[Union[dict, str]]) -&gt; tuple[str, dict]:\n            if not missingness_strategy:\n                return None\n            if isinstance(missingness_strategy, dict):\n                impute = missingness_strategy.get(\"impute\", None)\n                strategy = \"impute\" if impute else missingness_strategy.get(\"strategy\", None)\n            else:\n                strategy = missingness_strategy\n            if (\n                strategy not in MISSINGNESS_STRATEGIES\n                or (strategy == \"impute\" and impute == \"mean\" and self.dtype.kind != \"f\")\n                or (strategy == \"impute\" and not impute)\n            ):\n                warnings.warn(\n                    f\"Invalid missingness strategy '{missingness_strategy}' for column '{self.name}', ignoring missingness strategy for this column\"\n                )\n                return None\n            return (\n                MISSINGNESS_STRATEGIES[strategy](impute) if strategy == \"impute\" else MISSINGNESS_STRATEGIES[strategy]()\n            )\n\n        def _validate_transformer(self, transformer: Optional[Union[dict, str]] = {}) -&gt; tuple[str, dict]:\n            # if transformer is neither a dict nor a str statement below will raise a TypeError\n            if isinstance(transformer, dict):\n                self.transformer_name = transformer.get(\"name\")\n                self.transformer_config = filter_dict(transformer, \"name\")\n            elif isinstance(transformer, str):\n                self.transformer_name = transformer\n                self.transformer_config = {}\n            else:\n                if transformer is not None:\n                    warnings.warn(\n                        f\"Invalid transformer config '{transformer}' for column '{self.name}', ignoring transformer for this column\"\n                    )\n                self.transformer_name = None\n                self.transformer_config = {}\n            if not self.transformer_name:\n                return self._infer_transformer()\n            else:\n                try:\n                    return eval(self.transformer_name)(**self.transformer_config)\n                except NameError:\n                    warnings.warn(\n                        f\"Invalid transformer '{self.transformer_name}' or config '{self.transformer_config}' for column '{self.name}', ignoring transformer for this column\"\n                    )\n                    return self._infer_transformer()\n\n        def _infer_transformer(self) -&gt; ColumnTransformer:\n            if self.categorical:\n                transformer = OHECategoricalTransformer(**self.transformer_config)\n            else:\n                transformer = ClusterContinuousTransformer(**self.transformer_config)\n            if self.dtype.kind == \"M\":\n                transformer = DatetimeTransformer(transformer)\n            return transformer\n\n    def __init__(self, data: pd.DataFrame, metadata: Optional[dict] = {}):\n        self.columns: pd.Index = data.columns\n        self.raw_metadata: dict = metadata\n        if set(self.raw_metadata[\"columns\"].keys()) - set(self.columns):\n            raise ValueError(\"Metadata contains keys that do not appear amongst the columns.\")\n        self.dropped_columns = [cn for cn in self.columns if self.raw_metadata[\"columns\"].get(cn, None) == \"drop\"]\n        self.columns = self.columns.drop(self.dropped_columns)\n        self._metadata = {\n            cn: self.ColumnMetaData(cn, data[cn], self.raw_metadata[\"columns\"].get(cn, {})) for cn in self.columns\n        }\n        self.constraints = ConstraintGraph(self.raw_metadata.get(\"constraints\", []), self.columns, self._metadata)\n\n    def __getitem__(self, key: str) -&gt; dict[str, Any]:\n        return self._metadata[key]\n\n    def __iter__(self) -&gt; Iterator:\n        return iter(self._metadata.values())\n\n    def __repr__(self) -&gt; None:\n        return yaml.dump(self._metadata, default_flow_style=False, sort_keys=False)\n\n    @classmethod\n    def from_path(cls, data: pd.DataFrame, path_str: str):\n        \"\"\"\n        Instantiate a MetaData object from a YAML file via a specified path.\n\n        Args:\n            data: The data to be used to infer / validate the metadata.\n            path_str: The path to the metadata YAML file.\n\n        Returns:\n            The metadata object.\n        \"\"\"\n        path = pathlib.Path(path_str)\n        if path.exists():\n            with open(path) as stream:\n                metadata = yaml.safe_load(stream)\n            # Filter out the expanded alias/anchor group as it is not needed\n            metadata = filter_dict(metadata, {\"column_types\"})\n        else:\n            warnings.warn(f\"No metadata found at {path}...\")\n            metadata = {\"columns\": {}}\n        return cls(data, metadata)\n\n    def _collapse(self, metadata: dict) -&gt; dict:\n        \"\"\"\n        Given a metadata dictionary, rewrite to collapse duplicate column types in order to leverage YAML anchors and shrink the file.\n\n        Args:\n            metadata: The metadata dictionary to be rewritten.\n\n        Returns:\n            A rewritten metadata dictionary with collapsed column types and transformers.\n                The returned dictionary has the following structure:\n                {\n                    \"column_types\": dict,\n                    **metadata  # one entry for each column in \"columns\" that now reference the dicts above\n                }\n                - \"column_types\" is a dictionary mapping column type indices to column type configurations.\n                - \"**metadata\" contains the original metadata dictionary, with column types rewritten to use the indices and \"column_types\".\n        \"\"\"\n        c_index = 1\n        column_types = {}\n        column_type_counts = {}\n        for cn, cd in metadata[\"columns\"].items():\n            if cd not in column_types.values():\n                column_types[c_index] = cd if isinstance(cd, str) else cd.copy()\n                column_type_counts[c_index] = 1\n                c_index += 1\n            else:\n                cix = get_key_by_value(column_types, cd)\n                column_type_counts[cix] += 1\n\n        for cn, cd in metadata[\"columns\"].items():\n            cix = get_key_by_value(column_types, cd)\n            if column_type_counts[cix] &gt; 1:\n                metadata[\"columns\"][cn] = column_types[cix]\n            else:\n                column_types.pop(cix)\n\n        return {\"column_types\": {i + 1: x for i, x in enumerate(column_types.values())}, **metadata}\n\n    def _assemble(self, collapse_yaml: bool) -&gt; dict[str, dict[str, Any]]:\n        \"\"\"\n        Rearrange the metadata into a dictionary that can be written to a YAML file.\n\n        Args:\n            collapse_yaml: A boolean indicating whether to collapse the YAML representation of the metadata, reducing duplication.\n\n        Returns:\n            A dictionary containing the assembled metadata.\n        \"\"\"\n        assembled_metadata = {\n            \"columns\": {\n                cn: {\n                    \"dtype\": (\n                        cmd.dtype.name\n                        if not hasattr(cmd, \"datetime_config\")\n                        else {\"name\": cmd.dtype.name, **cmd.datetime_config}\n                    ),\n                    \"categorical\": cmd.categorical,\n                }\n                for cn, cmd in self._metadata.items()\n            }\n        }\n        # We loop through the base dict above to add other parts if they are present in the metadata\n        for cn, cmd in self._metadata.items():\n            if cmd.missingness_strategy:\n                assembled_metadata[\"columns\"][cn][\"missingness\"] = (\n                    cmd.missingness_strategy.name\n                    if cmd.missingness_strategy.name != \"impute\"\n                    else {\"name\": cmd.missingness_strategy.name, \"impute\": cmd.missingness_strategy.impute}\n                )\n            if cmd.transformer_config:\n                assembled_metadata[\"columns\"][cn][\"transformer\"] = {\n                    **cmd.transformer_config,\n                    \"name\": cmd.transformer.__class__.__name__,\n                }\n\n        # Add back the dropped_columns not present in the metadata\n        if self.dropped_columns:\n            assembled_metadata[\"columns\"].update({cn: \"drop\" for cn in self.dropped_columns})\n\n        if collapse_yaml:\n            assembled_metadata = self._collapse(assembled_metadata)\n\n        # We add the constraints section after all of the formatting and processing above\n        # In general, the constraints are kept the same as the input (provided they passed validation)\n        # If `collapse_yaml` is specified, we output the minimum set of equivalent constraints\n        if self.constraints:\n            assembled_metadata[\"constraints\"] = (\n                [str(c) for c in self.constraints.minimal_constraints]\n                if collapse_yaml\n                else self.constraints.raw_constraint_strings\n            )\n        return assembled_metadata\n\n    def save(self, path: pathlib.Path, collapse_yaml: bool) -&gt; None:\n        \"\"\"\n        Writes metadata to a YAML file.\n\n        Args:\n            path: The path at which to write the metadata YAML file.\n            collapse_yaml: A boolean indicating whether to collapse the YAML representation of the metadata, reducing duplication.\n        \"\"\"\n        with open(path, \"w\") as yaml_file:\n            yaml.safe_dump(\n                self._assemble(collapse_yaml),\n                yaml_file,\n                default_flow_style=False,\n                sort_keys=False,\n            )\n\n    def get_sdv_metadata(self) -&gt; dict[str, dict[str, dict[str, str]]]:\n        \"\"\"\n        Map combinations of our metadata implementation to SDV's as required by SDMetrics.\n\n        Returns:\n            A dictionary containing the SDV metadata.\n        \"\"\"\n        sdv_metadata = {\n            \"columns\": {\n                cn: {\n                    \"sdtype\": (\n                        \"boolean\"\n                        if cmd.boolean\n                        else \"categorical\" if cmd.categorical else \"datetime\" if cmd.dtype.kind == \"M\" else \"numerical\"\n                    ),\n                }\n                for cn, cmd in self._metadata.items()\n            }\n        }\n        for cn, cmd in self._metadata.items():\n            if cmd.dtype.kind == \"M\":\n                sdv_metadata[\"columns\"][cn][\"format\"] = cmd.datetime_config[\"format\"]\n        return sdv_metadata\n\n    def save_constraint_graphs(self, path: pathlib.Path) -&gt; None:\n        \"\"\"\n        Output the constraint graphs as HTML files.\n\n        Args:\n            path: The path at which to write the constraint graph HTML files.\n        \"\"\"\n        self.constraints._output_graphs_html(path)\n</code></pre>"},{"location":"reference/modules/dataloader/metadata/#nhssynth.modules.dataloader.metadata.MetaData.ColumnMetaData","title":"<code>ColumnMetaData</code>","text":"Source code in <code>src/nhssynth/modules/dataloader/metadata.py</code> <pre><code>class ColumnMetaData:\n    def __init__(self, name: str, data: pd.Series, raw: dict) -&gt; None:\n        self.name = name\n        self.dtype: np.dtype = self._validate_dtype(data, raw.get(\"dtype\"))\n        self.categorical: bool = self._validate_categorical(data, raw.get(\"categorical\"))\n        self.missingness_strategy: GenericMissingnessStrategy = self._validate_missingness_strategy(\n            raw.get(\"missingness\")\n        )\n        self.transformer: ColumnTransformer = self._validate_transformer(raw.get(\"transformer\"))\n\n    def _validate_dtype(self, data: pd.Series, dtype_raw: Optional[Union[dict, str]] = None) -&gt; np.dtype:\n        if isinstance(dtype_raw, dict):\n            dtype_name = dtype_raw.pop(\"name\", None)\n        elif isinstance(dtype_raw, str):\n            dtype_name = dtype_raw\n        else:\n            dtype_name = self._infer_dtype(data)\n        try:\n            dtype = np.dtype(dtype_name)\n        except TypeError:\n            warnings.warn(\n                f\"Invalid dtype specification '{dtype_name}' for column '{self.name}', ignoring dtype for this column\"\n            )\n            dtype = self._infer_dtype(data)\n        if dtype.kind == \"M\":\n            self._setup_datetime_config(data, dtype_raw)\n        elif dtype.kind in [\"f\", \"i\", \"u\"]:\n            self.rounding_scheme = self._validate_rounding_scheme(data, dtype, dtype_raw)\n        return dtype\n\n    def _infer_dtype(self, data: pd.Series) -&gt; np.dtype:\n        return data.dtype.name\n\n    def _infer_datetime_format(self, data: pd.Series) -&gt; str:\n        return _guess_datetime_format_for_array(data[data.notna()].astype(str).to_numpy())\n\n    def _setup_datetime_config(self, data: pd.Series, datetime_config: dict) -&gt; dict:\n        \"\"\"\n        Add keys to `datetime_config` corresponding to args from the `pd.to_datetime` function\n        (see [the docs](https://pandas.pydata.org/docs/reference/api/pandas.to_datetime.html))\n        \"\"\"\n        if not isinstance(datetime_config, dict):\n            datetime_config = {}\n        else:\n            datetime_config = filter_dict(datetime_config, {\"format\", \"floor\"}, include=True)\n        if \"format\" not in datetime_config:\n            datetime_config[\"format\"] = self._infer_datetime_format(data)\n        self.datetime_config = datetime_config\n\n    def _validate_rounding_scheme(self, data: pd.Series, dtype: np.dtype, dtype_dict: dict) -&gt; int:\n        if dtype_dict and \"rounding_scheme\" in dtype_dict:\n            return dtype_dict[\"rounding_scheme\"]\n        else:\n            if dtype.kind != \"f\":\n                return 1.0\n            roundable_data = data[data.notna()]\n            for i in range(np.finfo(dtype).precision):\n                if (roundable_data.round(i) == roundable_data).all():\n                    return 10**-i\n        return None\n\n    def _validate_categorical(self, data: pd.Series, categorical: Optional[bool] = None) -&gt; bool:\n        if categorical is None:\n            return self._infer_categorical(data)\n        elif not isinstance(categorical, bool):\n            warnings.warn(\n                f\"Invalid categorical '{categorical}' for column '{self.name}', ignoring categorical for this column\"\n            )\n            return self._infer_categorical(data)\n        else:\n            self.boolean = data.nunique() &lt;= 2\n            return categorical\n\n    def _infer_categorical(self, data: pd.Series) -&gt; bool:\n        self.boolean = data.nunique() &lt;= 2\n        return data.nunique() &lt;= 10 or self.dtype.kind == \"O\"\n\n    def _validate_missingness_strategy(self, missingness_strategy: Optional[Union[dict, str]]) -&gt; tuple[str, dict]:\n        if not missingness_strategy:\n            return None\n        if isinstance(missingness_strategy, dict):\n            impute = missingness_strategy.get(\"impute\", None)\n            strategy = \"impute\" if impute else missingness_strategy.get(\"strategy\", None)\n        else:\n            strategy = missingness_strategy\n        if (\n            strategy not in MISSINGNESS_STRATEGIES\n            or (strategy == \"impute\" and impute == \"mean\" and self.dtype.kind != \"f\")\n            or (strategy == \"impute\" and not impute)\n        ):\n            warnings.warn(\n                f\"Invalid missingness strategy '{missingness_strategy}' for column '{self.name}', ignoring missingness strategy for this column\"\n            )\n            return None\n        return (\n            MISSINGNESS_STRATEGIES[strategy](impute) if strategy == \"impute\" else MISSINGNESS_STRATEGIES[strategy]()\n        )\n\n    def _validate_transformer(self, transformer: Optional[Union[dict, str]] = {}) -&gt; tuple[str, dict]:\n        # if transformer is neither a dict nor a str statement below will raise a TypeError\n        if isinstance(transformer, dict):\n            self.transformer_name = transformer.get(\"name\")\n            self.transformer_config = filter_dict(transformer, \"name\")\n        elif isinstance(transformer, str):\n            self.transformer_name = transformer\n            self.transformer_config = {}\n        else:\n            if transformer is not None:\n                warnings.warn(\n                    f\"Invalid transformer config '{transformer}' for column '{self.name}', ignoring transformer for this column\"\n                )\n            self.transformer_name = None\n            self.transformer_config = {}\n        if not self.transformer_name:\n            return self._infer_transformer()\n        else:\n            try:\n                return eval(self.transformer_name)(**self.transformer_config)\n            except NameError:\n                warnings.warn(\n                    f\"Invalid transformer '{self.transformer_name}' or config '{self.transformer_config}' for column '{self.name}', ignoring transformer for this column\"\n                )\n                return self._infer_transformer()\n\n    def _infer_transformer(self) -&gt; ColumnTransformer:\n        if self.categorical:\n            transformer = OHECategoricalTransformer(**self.transformer_config)\n        else:\n            transformer = ClusterContinuousTransformer(**self.transformer_config)\n        if self.dtype.kind == \"M\":\n            transformer = DatetimeTransformer(transformer)\n        return transformer\n</code></pre>"},{"location":"reference/modules/dataloader/metadata/#nhssynth.modules.dataloader.metadata.MetaData.from_path","title":"<code>from_path(data, path_str)</code>  <code>classmethod</code>","text":"<p>Instantiate a MetaData object from a YAML file via a specified path.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>DataFrame</code> <p>The data to be used to infer / validate the metadata.</p> required <code>path_str</code> <code>str</code> <p>The path to the metadata YAML file.</p> required <p>Returns:</p> Type Description <p>The metadata object.</p> Source code in <code>src/nhssynth/modules/dataloader/metadata.py</code> <pre><code>@classmethod\ndef from_path(cls, data: pd.DataFrame, path_str: str):\n    \"\"\"\n    Instantiate a MetaData object from a YAML file via a specified path.\n\n    Args:\n        data: The data to be used to infer / validate the metadata.\n        path_str: The path to the metadata YAML file.\n\n    Returns:\n        The metadata object.\n    \"\"\"\n    path = pathlib.Path(path_str)\n    if path.exists():\n        with open(path) as stream:\n            metadata = yaml.safe_load(stream)\n        # Filter out the expanded alias/anchor group as it is not needed\n        metadata = filter_dict(metadata, {\"column_types\"})\n    else:\n        warnings.warn(f\"No metadata found at {path}...\")\n        metadata = {\"columns\": {}}\n    return cls(data, metadata)\n</code></pre>"},{"location":"reference/modules/dataloader/metadata/#nhssynth.modules.dataloader.metadata.MetaData.get_sdv_metadata","title":"<code>get_sdv_metadata()</code>","text":"<p>Map combinations of our metadata implementation to SDV's as required by SDMetrics.</p> <p>Returns:</p> Type Description <code>dict[str, dict[str, dict[str, str]]]</code> <p>A dictionary containing the SDV metadata.</p> Source code in <code>src/nhssynth/modules/dataloader/metadata.py</code> <pre><code>def get_sdv_metadata(self) -&gt; dict[str, dict[str, dict[str, str]]]:\n    \"\"\"\n    Map combinations of our metadata implementation to SDV's as required by SDMetrics.\n\n    Returns:\n        A dictionary containing the SDV metadata.\n    \"\"\"\n    sdv_metadata = {\n        \"columns\": {\n            cn: {\n                \"sdtype\": (\n                    \"boolean\"\n                    if cmd.boolean\n                    else \"categorical\" if cmd.categorical else \"datetime\" if cmd.dtype.kind == \"M\" else \"numerical\"\n                ),\n            }\n            for cn, cmd in self._metadata.items()\n        }\n    }\n    for cn, cmd in self._metadata.items():\n        if cmd.dtype.kind == \"M\":\n            sdv_metadata[\"columns\"][cn][\"format\"] = cmd.datetime_config[\"format\"]\n    return sdv_metadata\n</code></pre>"},{"location":"reference/modules/dataloader/metadata/#nhssynth.modules.dataloader.metadata.MetaData.save","title":"<code>save(path, collapse_yaml)</code>","text":"<p>Writes metadata to a YAML file.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>Path</code> <p>The path at which to write the metadata YAML file.</p> required <code>collapse_yaml</code> <code>bool</code> <p>A boolean indicating whether to collapse the YAML representation of the metadata, reducing duplication.</p> required Source code in <code>src/nhssynth/modules/dataloader/metadata.py</code> <pre><code>def save(self, path: pathlib.Path, collapse_yaml: bool) -&gt; None:\n    \"\"\"\n    Writes metadata to a YAML file.\n\n    Args:\n        path: The path at which to write the metadata YAML file.\n        collapse_yaml: A boolean indicating whether to collapse the YAML representation of the metadata, reducing duplication.\n    \"\"\"\n    with open(path, \"w\") as yaml_file:\n        yaml.safe_dump(\n            self._assemble(collapse_yaml),\n            yaml_file,\n            default_flow_style=False,\n            sort_keys=False,\n        )\n</code></pre>"},{"location":"reference/modules/dataloader/metadata/#nhssynth.modules.dataloader.metadata.MetaData.save_constraint_graphs","title":"<code>save_constraint_graphs(path)</code>","text":"<p>Output the constraint graphs as HTML files.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>Path</code> <p>The path at which to write the constraint graph HTML files.</p> required Source code in <code>src/nhssynth/modules/dataloader/metadata.py</code> <pre><code>def save_constraint_graphs(self, path: pathlib.Path) -&gt; None:\n    \"\"\"\n    Output the constraint graphs as HTML files.\n\n    Args:\n        path: The path at which to write the constraint graph HTML files.\n    \"\"\"\n    self.constraints._output_graphs_html(path)\n</code></pre>"},{"location":"reference/modules/dataloader/metatransformer/","title":"metatransformer","text":""},{"location":"reference/modules/dataloader/metatransformer/#nhssynth.modules.dataloader.metatransformer.MetaTransformer","title":"<code>MetaTransformer</code>","text":"<p>The metatransformer is responsible for transforming input dataset into a format that can be used by the <code>model</code> module, and for transforming this module's output back to the original format of the input dataset.</p> <p>Parameters:</p> Name Type Description Default <code>dataset</code> <code>DataFrame</code> <p>The raw input DataFrame.</p> required <code>metadata</code> <code>Optional[MetaData]</code> <p>Optionally, a <code>MetaData</code> object containing the metadata for the dataset. If this is not provided it will be inferred from the dataset.</p> <code>None</code> <code>missingness_strategy</code> <code>Optional[str]</code> <p>The missingness strategy to use. Defaults to augmenting missing values in the data, see the missingness strategies for more information.</p> <code>'augment'</code> <code>impute_value</code> <code>Optional[Any]</code> <p>Only used when <code>missingness_strategy</code> is set to 'impute'. The value to use when imputing missing values in the data.</p> <code>None</code> <p>After calling <code>MetaTransformer.apply()</code>, the following attributes and methods will be available:</p> <p>Attributes:</p> Name Type Description <code>typed_dataset</code> <code>DataFrame</code> <p>The dataset with the dtypes applied.</p> <code>post_missingness_strategy_dataset</code> <code>DataFrame</code> <p>The dataset with the missingness strategies applied.</p> <code>transformed_dataset</code> <code>DataFrame</code> <p>The transformed dataset.</p> <code>single_column_indices</code> <code>list[int]</code> <p>The indices of the columns that were transformed into a single column.</p> <code>multi_column_indices</code> <code>list[list[int]]</code> <p>The indices of the columns that were transformed into multiple columns.</p> <p>Methods:</p> <ul> <li><code>get_typed_dataset()</code>: Returns the typed dataset.</li> <li><code>get_prepared_dataset()</code>: Returns the dataset with the missingness strategies applied.</li> <li><code>get_transformed_dataset()</code>: Returns the transformed dataset.</li> <li><code>get_multi_and_single_column_indices()</code>: Returns the indices of the columns that were transformed into one or multiple column(s).</li> <li><code>get_sdv_metadata()</code>: Returns the metadata in the correct format for SDMetrics.</li> <li><code>save_metadata()</code>: Saves the metadata to a file.</li> <li><code>save_constraint_graphs()</code>: Saves the constraint graphs to a file.</li> </ul> <p>Note that <code>mt.apply</code> is a helper function that runs <code>mt.apply_dtypes</code>, <code>mt.apply_missingness_strategy</code> and <code>mt.transform</code> in sequence. This is the recommended way to use the MetaTransformer to ensure that it is fully instantiated for use downstream.</p> Source code in <code>src/nhssynth/modules/dataloader/metatransformer.py</code> <pre><code>class MetaTransformer:\n    \"\"\"\n    The metatransformer is responsible for transforming input dataset into a format that can be used by the `model` module, and for transforming\n    this module's output back to the original format of the input dataset.\n\n    Args:\n        dataset: The raw input DataFrame.\n        metadata: Optionally, a [`MetaData`][nhssynth.modules.dataloader.metadata.MetaData] object containing the metadata for the dataset. If this is not provided it will be inferred from the dataset.\n        missingness_strategy: The missingness strategy to use. Defaults to augmenting missing values in the data, see [the missingness strategies][nhssynth.modules.dataloader.missingness] for more information.\n        impute_value: Only used when `missingness_strategy` is set to 'impute'. The value to use when imputing missing values in the data.\n\n    After calling `MetaTransformer.apply()`, the following attributes and methods will be available:\n\n    Attributes:\n        typed_dataset (pd.DataFrame): The dataset with the dtypes applied.\n        post_missingness_strategy_dataset (pd.DataFrame): The dataset with the missingness strategies applied.\n        transformed_dataset (pd.DataFrame): The transformed dataset.\n        single_column_indices (list[int]): The indices of the columns that were transformed into a single column.\n        multi_column_indices (list[list[int]]): The indices of the columns that were transformed into multiple columns.\n\n    **Methods:**\n\n    - `get_typed_dataset()`: Returns the typed dataset.\n    - `get_prepared_dataset()`: Returns the dataset with the missingness strategies applied.\n    - `get_transformed_dataset()`: Returns the transformed dataset.\n    - `get_multi_and_single_column_indices()`: Returns the indices of the columns that were transformed into one or multiple column(s).\n    - `get_sdv_metadata()`: Returns the metadata in the correct format for SDMetrics.\n    - `save_metadata()`: Saves the metadata to a file.\n    - `save_constraint_graphs()`: Saves the constraint graphs to a file.\n\n    Note that `mt.apply` is a helper function that runs `mt.apply_dtypes`, `mt.apply_missingness_strategy` and `mt.transform` in sequence.\n    This is the recommended way to use the MetaTransformer to ensure that it is fully instantiated for use downstream.\n    \"\"\"\n\n    def __init__(\n        self,\n        dataset: pd.DataFrame,\n        metadata: Optional[MetaData] = None,\n        missingness_strategy: Optional[str] = \"augment\",\n        impute_value: Optional[Any] = None,\n    ):\n        self._raw_dataset: pd.DataFrame = dataset\n        self._metadata: MetaData = metadata or MetaData(dataset)\n        if missingness_strategy == \"impute\":\n            assert (\n                impute_value is not None\n            ), \"`impute_value` of the `MetaTransformer` must be specified (via the --impute flag) when using the imputation missingness strategy\"\n            self._impute_value = impute_value\n        self._missingness_strategy = MISSINGNESS_STRATEGIES[missingness_strategy]\n\n    @classmethod\n    def from_path(cls, dataset: pd.DataFrame, metadata_path: str, **kwargs) -&gt; Self:\n        \"\"\"\n        Instantiates a MetaTransformer from a metadata file via a provided path.\n\n        Args:\n            dataset: The raw input DataFrame.\n            metadata_path: The path to the metadata file.\n\n        Returns:\n            A MetaTransformer object.\n        \"\"\"\n        return cls(dataset, MetaData.from_path(dataset, metadata_path), **kwargs)\n\n    @classmethod\n    def from_dict(cls, dataset: pd.DataFrame, metadata: dict, **kwargs) -&gt; Self:\n        \"\"\"\n        Instantiates a MetaTransformer from a metadata dictionary.\n\n        Args:\n            dataset: The raw input DataFrame.\n            metadata: A dictionary of raw metadata.\n\n        Returns:\n            A MetaTransformer object.\n        \"\"\"\n        return cls(dataset, MetaData(dataset, metadata), **kwargs)\n\n    def drop_columns(self) -&gt; None:\n        \"\"\"\n        Drops columns from the dataset that are not in the `MetaData`.\n        \"\"\"\n        self._raw_dataset = self._raw_dataset[self._metadata.columns]\n\n    def _apply_rounding_scheme(self, working_column: pd.Series, rounding_scheme: float) -&gt; pd.Series:\n        \"\"\"\n        A rounding scheme takes the form of the smallest value that should be rounded to 0, i.e. 0.01 for 2dp.\n        We first round to the nearest multiple in the standard way, through dividing, rounding and then multiplying.\n        However, this can lead to floating point errors, so we then round to the number of decimal places required by the rounding scheme.\n\n        e.g. `np.round(0.15 / 0.1) * 0.1` will erroneously return 0.1.\n\n        Args:\n            working_column: The column to apply the rounding scheme to.\n            rounding_scheme: The rounding scheme to apply.\n\n        Returns:\n            The column with the rounding scheme applied.\n        \"\"\"\n        working_column = np.round(working_column / rounding_scheme) * rounding_scheme\n        return working_column.round(max(0, int(np.ceil(np.log10(1 / rounding_scheme)))))\n\n    def _apply_dtype(\n        self,\n        working_column: pd.Series,\n        column_metadata: MetaData.ColumnMetaData,\n    ) -&gt; pd.Series:\n        \"\"\"\n        Given a `working_column`, the dtype specified in the `column_metadata` is applied to it.\n         - Datetime columns are floored, and their format is inferred.\n         - Rounding schemes are applied to numeric columns if specified.\n         - Columns with missing values have their dtype converted to the pandas equivalent to allow for NA values.\n\n        Args:\n            working_column: The column to apply the dtype to.\n            column_metadata: The metadata for the column.\n\n        Returns:\n            The column with the dtype applied.\n        \"\"\"\n        dtype = column_metadata.dtype\n        try:\n            if dtype.kind == \"M\":\n                working_column = pd.to_datetime(working_column, format=column_metadata.datetime_config.get(\"format\"))\n                if column_metadata.datetime_config.get(\"floor\"):\n                    working_column = working_column.dt.floor(column_metadata.datetime_config.get(\"floor\"))\n                    column_metadata.datetime_config[\"format\"] = column_metadata._infer_datetime_format(working_column)\n                return working_column\n            else:\n                if hasattr(column_metadata, \"rounding_scheme\") and column_metadata.rounding_scheme is not None:\n                    working_column = self._apply_rounding_scheme(working_column, column_metadata.rounding_scheme)\n                # If there are missing values in the column, we need to use the pandas equivalent of the dtype to allow for NA values\n                if working_column.isnull().any() and dtype.kind in [\"i\", \"u\", \"f\"]:\n                    return working_column.astype(dtype.name.capitalize())\n                else:\n                    return working_column.astype(dtype)\n        except ValueError:\n            raise ValueError(f\"{sys.exc_info()[1]}\\nError applying dtype '{dtype}' to column '{working_column.name}'\")\n\n    def apply_dtypes(self, data: pd.DataFrame) -&gt; pd.DataFrame:\n        \"\"\"\n        Applies dtypes from the metadata to `dataset`.\n\n        Returns:\n            The dataset with the dtypes applied.\n        \"\"\"\n        working_data = data.copy()\n        for column_metadata in self._metadata:\n            working_data[column_metadata.name] = self._apply_dtype(working_data[column_metadata.name], column_metadata)\n        return working_data\n\n    def apply_missingness_strategy(self) -&gt; pd.DataFrame:\n        \"\"\"\n        Resolves missingness in the dataset via the `MetaTransformer`'s global missingness strategy or\n        column-wise missingness strategies. In the case of the `AugmentMissingnessStrategy`, the missingness\n        is not resolved, instead a new column / value is added for later transformation.\n\n        Returns:\n            The dataset with the missingness strategies applied.\n        \"\"\"\n        working_data = self.typed_dataset.copy()\n        for column_metadata in self._metadata:\n            if not column_metadata.missingness_strategy:\n                column_metadata.missingness_strategy = (\n                    self._missingness_strategy(self._impute_value)\n                    if hasattr(self, \"_impute_value\")\n                    else self._missingness_strategy()\n                )\n            if not working_data[column_metadata.name].isnull().any():\n                continue\n            working_data = column_metadata.missingness_strategy.remove(working_data, column_metadata)\n        return working_data\n\n    # def apply_constraints(self) -&gt; pd.DataFrame:\n    #     working_data = self.post_missingness_strategy_dataset.copy()\n    #     for constraint in self._metadata.constraints:\n    #         working_data = constraint.apply(working_data)\n    #     return working_data\n\n    def _get_missingness_carrier(self, column_metadata: MetaData.ColumnMetaData) -&gt; Union[pd.Series, Any]:\n        \"\"\"\n        In the case of the `AugmentMissingnessStrategy`, a `missingness_carrier` has been determined for each column.\n        For continuous columns this is an indicator column for the presence of NaN values.\n        For categorical columns this is the value to be used to represent missingness as a category.\n\n        Args:\n            column_metadata: The metadata for the column.\n\n        Returns:\n            The missingness carrier for the column.\n        \"\"\"\n        missingness_carrier = getattr(column_metadata.missingness_strategy, \"missingness_carrier\", None)\n        if missingness_carrier in self.post_missingness_strategy_dataset.columns:\n            return self.post_missingness_strategy_dataset[missingness_carrier]\n        else:\n            return missingness_carrier\n\n    def transform(self) -&gt; pd.DataFrame:\n        \"\"\"\n        Prepares the dataset by applying each of the columns' transformers and recording the indices of the single and multi columns.\n\n        Returns:\n            The transformed dataset.\n        \"\"\"\n        transformed_columns = []\n        self.single_column_indices = []\n        self.multi_column_indices = []\n        col_counter = 0\n        working_data = self.post_missingness_strategy_dataset.copy()\n\n        # iteratively build the transformed df\n        for column_metadata in tqdm(\n            self._metadata, desc=\"Transforming data\", unit=\"column\", total=len(self._metadata.columns)\n        ):\n            missingness_carrier = self._get_missingness_carrier(column_metadata)\n            transformed_data = column_metadata.transformer.apply(\n                working_data[column_metadata.name], missingness_carrier\n            )\n            transformed_columns.append(transformed_data)\n\n            # track single and multi column indices to supply to the model\n            if isinstance(transformed_data, pd.DataFrame) and transformed_data.shape[1] &gt; 1:\n                num_to_add = transformed_data.shape[1]\n                if not column_metadata.categorical:\n                    self.single_column_indices.append(col_counter)\n                    col_counter += 1\n                    num_to_add -= 1\n                self.multi_column_indices.append(list(range(col_counter, col_counter + num_to_add)))\n                col_counter += num_to_add\n            else:\n                self.single_column_indices.append(col_counter)\n                col_counter += 1\n\n        return pd.concat(transformed_columns, axis=1)\n\n    def apply(self) -&gt; pd.DataFrame:\n        \"\"\"\n        Applies the various steps of the MetaTransformer to a passed DataFrame.\n\n        Returns:\n            The transformed dataset.\n        \"\"\"\n        self.drop_columns()\n        self.typed_dataset = self.apply_dtypes(self._raw_dataset)\n        self.post_missingness_strategy_dataset = self.apply_missingness_strategy()\n        # self.constrained_dataset = self.apply_constraints()\n        self.transformed_dataset = self.transform()\n        return self.transformed_dataset\n\n    def inverse_apply(self, dataset: pd.DataFrame) -&gt; pd.DataFrame:\n        \"\"\"\n        Reverses the transformation applied by the MetaTransformer.\n\n        Args:\n            dataset: The transformed dataset.\n\n        Returns:\n            The original dataset.\n        \"\"\"\n        for column_metadata in self._metadata:\n            dataset = column_metadata.transformer.revert(dataset)\n        return self.apply_dtypes(dataset)\n\n    def get_typed_dataset(self) -&gt; pd.DataFrame:\n        if not hasattr(self, \"typed_dataset\"):\n            raise ValueError(\n                \"The typed dataset has not yet been created. Call `mt.apply()` (or `mt.apply_dtypes()`) first.\"\n            )\n        return self.typed_dataset\n\n    def get_prepared_dataset(self) -&gt; pd.DataFrame:\n        if not hasattr(self, \"prepared_dataset\"):\n            raise ValueError(\n                \"The prepared dataset has not yet been created. Call `mt.apply()` (or `mt.apply_missingness_strategy()`) first.\"\n            )\n        return self.prepared_dataset\n\n    def get_transformed_dataset(self) -&gt; pd.DataFrame:\n        if not hasattr(self, \"transformed_dataset\"):\n            raise ValueError(\n                \"The prepared dataset has not yet been created. Call `mt.apply()` (or `mt.transform()`) first.\"\n            )\n        return self.transformed_dataset\n\n    def get_multi_and_single_column_indices(self) -&gt; tuple[list[int], list[int]]:\n        \"\"\"\n        Returns the indices of the columns that were transformed into one or multiple column(s).\n\n        Returns:\n            A tuple containing the indices of the single and multi columns.\n        \"\"\"\n        if not hasattr(self, \"multi_column_indices\") or not hasattr(self, \"single_column_indices\"):\n            raise ValueError(\n                \"The single and multi column indices have not yet been created. Call `mt.apply()` (or `mt.transform()`) first.\"\n            )\n        return self.multi_column_indices, self.single_column_indices\n\n    def get_sdv_metadata(self) -&gt; dict[str, dict[str, Any]]:\n        \"\"\"\n        Calls the `MetaData` method to reformat its contents into the correct format for use with SDMetrics.\n\n        Returns:\n            The metadata in the correct format for SDMetrics.\n        \"\"\"\n        return self._metadata.get_sdv_metadata()\n\n    def save_metadata(self, path: pathlib.Path, collapse_yaml: bool = False) -&gt; None:\n        return self._metadata.save(path, collapse_yaml)\n\n    def save_constraint_graphs(self, path: pathlib.Path) -&gt; None:\n        return self._metadata.constraints._output_graphs_html(path)\n</code></pre>"},{"location":"reference/modules/dataloader/metatransformer/#nhssynth.modules.dataloader.metatransformer.MetaTransformer.apply","title":"<code>apply()</code>","text":"<p>Applies the various steps of the MetaTransformer to a passed DataFrame.</p> <p>Returns:</p> Type Description <code>DataFrame</code> <p>The transformed dataset.</p> Source code in <code>src/nhssynth/modules/dataloader/metatransformer.py</code> <pre><code>def apply(self) -&gt; pd.DataFrame:\n    \"\"\"\n    Applies the various steps of the MetaTransformer to a passed DataFrame.\n\n    Returns:\n        The transformed dataset.\n    \"\"\"\n    self.drop_columns()\n    self.typed_dataset = self.apply_dtypes(self._raw_dataset)\n    self.post_missingness_strategy_dataset = self.apply_missingness_strategy()\n    # self.constrained_dataset = self.apply_constraints()\n    self.transformed_dataset = self.transform()\n    return self.transformed_dataset\n</code></pre>"},{"location":"reference/modules/dataloader/metatransformer/#nhssynth.modules.dataloader.metatransformer.MetaTransformer.apply_dtypes","title":"<code>apply_dtypes(data)</code>","text":"<p>Applies dtypes from the metadata to <code>dataset</code>.</p> <p>Returns:</p> Type Description <code>DataFrame</code> <p>The dataset with the dtypes applied.</p> Source code in <code>src/nhssynth/modules/dataloader/metatransformer.py</code> <pre><code>def apply_dtypes(self, data: pd.DataFrame) -&gt; pd.DataFrame:\n    \"\"\"\n    Applies dtypes from the metadata to `dataset`.\n\n    Returns:\n        The dataset with the dtypes applied.\n    \"\"\"\n    working_data = data.copy()\n    for column_metadata in self._metadata:\n        working_data[column_metadata.name] = self._apply_dtype(working_data[column_metadata.name], column_metadata)\n    return working_data\n</code></pre>"},{"location":"reference/modules/dataloader/metatransformer/#nhssynth.modules.dataloader.metatransformer.MetaTransformer.apply_missingness_strategy","title":"<code>apply_missingness_strategy()</code>","text":"<p>Resolves missingness in the dataset via the <code>MetaTransformer</code>'s global missingness strategy or column-wise missingness strategies. In the case of the <code>AugmentMissingnessStrategy</code>, the missingness is not resolved, instead a new column / value is added for later transformation.</p> <p>Returns:</p> Type Description <code>DataFrame</code> <p>The dataset with the missingness strategies applied.</p> Source code in <code>src/nhssynth/modules/dataloader/metatransformer.py</code> <pre><code>def apply_missingness_strategy(self) -&gt; pd.DataFrame:\n    \"\"\"\n    Resolves missingness in the dataset via the `MetaTransformer`'s global missingness strategy or\n    column-wise missingness strategies. In the case of the `AugmentMissingnessStrategy`, the missingness\n    is not resolved, instead a new column / value is added for later transformation.\n\n    Returns:\n        The dataset with the missingness strategies applied.\n    \"\"\"\n    working_data = self.typed_dataset.copy()\n    for column_metadata in self._metadata:\n        if not column_metadata.missingness_strategy:\n            column_metadata.missingness_strategy = (\n                self._missingness_strategy(self._impute_value)\n                if hasattr(self, \"_impute_value\")\n                else self._missingness_strategy()\n            )\n        if not working_data[column_metadata.name].isnull().any():\n            continue\n        working_data = column_metadata.missingness_strategy.remove(working_data, column_metadata)\n    return working_data\n</code></pre>"},{"location":"reference/modules/dataloader/metatransformer/#nhssynth.modules.dataloader.metatransformer.MetaTransformer.drop_columns","title":"<code>drop_columns()</code>","text":"<p>Drops columns from the dataset that are not in the <code>MetaData</code>.</p> Source code in <code>src/nhssynth/modules/dataloader/metatransformer.py</code> <pre><code>def drop_columns(self) -&gt; None:\n    \"\"\"\n    Drops columns from the dataset that are not in the `MetaData`.\n    \"\"\"\n    self._raw_dataset = self._raw_dataset[self._metadata.columns]\n</code></pre>"},{"location":"reference/modules/dataloader/metatransformer/#nhssynth.modules.dataloader.metatransformer.MetaTransformer.from_dict","title":"<code>from_dict(dataset, metadata, **kwargs)</code>  <code>classmethod</code>","text":"<p>Instantiates a MetaTransformer from a metadata dictionary.</p> <p>Parameters:</p> Name Type Description Default <code>dataset</code> <code>DataFrame</code> <p>The raw input DataFrame.</p> required <code>metadata</code> <code>dict</code> <p>A dictionary of raw metadata.</p> required <p>Returns:</p> Type Description <code>Self</code> <p>A MetaTransformer object.</p> Source code in <code>src/nhssynth/modules/dataloader/metatransformer.py</code> <pre><code>@classmethod\ndef from_dict(cls, dataset: pd.DataFrame, metadata: dict, **kwargs) -&gt; Self:\n    \"\"\"\n    Instantiates a MetaTransformer from a metadata dictionary.\n\n    Args:\n        dataset: The raw input DataFrame.\n        metadata: A dictionary of raw metadata.\n\n    Returns:\n        A MetaTransformer object.\n    \"\"\"\n    return cls(dataset, MetaData(dataset, metadata), **kwargs)\n</code></pre>"},{"location":"reference/modules/dataloader/metatransformer/#nhssynth.modules.dataloader.metatransformer.MetaTransformer.from_path","title":"<code>from_path(dataset, metadata_path, **kwargs)</code>  <code>classmethod</code>","text":"<p>Instantiates a MetaTransformer from a metadata file via a provided path.</p> <p>Parameters:</p> Name Type Description Default <code>dataset</code> <code>DataFrame</code> <p>The raw input DataFrame.</p> required <code>metadata_path</code> <code>str</code> <p>The path to the metadata file.</p> required <p>Returns:</p> Type Description <code>Self</code> <p>A MetaTransformer object.</p> Source code in <code>src/nhssynth/modules/dataloader/metatransformer.py</code> <pre><code>@classmethod\ndef from_path(cls, dataset: pd.DataFrame, metadata_path: str, **kwargs) -&gt; Self:\n    \"\"\"\n    Instantiates a MetaTransformer from a metadata file via a provided path.\n\n    Args:\n        dataset: The raw input DataFrame.\n        metadata_path: The path to the metadata file.\n\n    Returns:\n        A MetaTransformer object.\n    \"\"\"\n    return cls(dataset, MetaData.from_path(dataset, metadata_path), **kwargs)\n</code></pre>"},{"location":"reference/modules/dataloader/metatransformer/#nhssynth.modules.dataloader.metatransformer.MetaTransformer.get_multi_and_single_column_indices","title":"<code>get_multi_and_single_column_indices()</code>","text":"<p>Returns the indices of the columns that were transformed into one or multiple column(s).</p> <p>Returns:</p> Type Description <code>tuple[list[int], list[int]]</code> <p>A tuple containing the indices of the single and multi columns.</p> Source code in <code>src/nhssynth/modules/dataloader/metatransformer.py</code> <pre><code>def get_multi_and_single_column_indices(self) -&gt; tuple[list[int], list[int]]:\n    \"\"\"\n    Returns the indices of the columns that were transformed into one or multiple column(s).\n\n    Returns:\n        A tuple containing the indices of the single and multi columns.\n    \"\"\"\n    if not hasattr(self, \"multi_column_indices\") or not hasattr(self, \"single_column_indices\"):\n        raise ValueError(\n            \"The single and multi column indices have not yet been created. Call `mt.apply()` (or `mt.transform()`) first.\"\n        )\n    return self.multi_column_indices, self.single_column_indices\n</code></pre>"},{"location":"reference/modules/dataloader/metatransformer/#nhssynth.modules.dataloader.metatransformer.MetaTransformer.get_sdv_metadata","title":"<code>get_sdv_metadata()</code>","text":"<p>Calls the <code>MetaData</code> method to reformat its contents into the correct format for use with SDMetrics.</p> <p>Returns:</p> Type Description <code>dict[str, dict[str, Any]]</code> <p>The metadata in the correct format for SDMetrics.</p> Source code in <code>src/nhssynth/modules/dataloader/metatransformer.py</code> <pre><code>def get_sdv_metadata(self) -&gt; dict[str, dict[str, Any]]:\n    \"\"\"\n    Calls the `MetaData` method to reformat its contents into the correct format for use with SDMetrics.\n\n    Returns:\n        The metadata in the correct format for SDMetrics.\n    \"\"\"\n    return self._metadata.get_sdv_metadata()\n</code></pre>"},{"location":"reference/modules/dataloader/metatransformer/#nhssynth.modules.dataloader.metatransformer.MetaTransformer.inverse_apply","title":"<code>inverse_apply(dataset)</code>","text":"<p>Reverses the transformation applied by the MetaTransformer.</p> <p>Parameters:</p> Name Type Description Default <code>dataset</code> <code>DataFrame</code> <p>The transformed dataset.</p> required <p>Returns:</p> Type Description <code>DataFrame</code> <p>The original dataset.</p> Source code in <code>src/nhssynth/modules/dataloader/metatransformer.py</code> <pre><code>def inverse_apply(self, dataset: pd.DataFrame) -&gt; pd.DataFrame:\n    \"\"\"\n    Reverses the transformation applied by the MetaTransformer.\n\n    Args:\n        dataset: The transformed dataset.\n\n    Returns:\n        The original dataset.\n    \"\"\"\n    for column_metadata in self._metadata:\n        dataset = column_metadata.transformer.revert(dataset)\n    return self.apply_dtypes(dataset)\n</code></pre>"},{"location":"reference/modules/dataloader/metatransformer/#nhssynth.modules.dataloader.metatransformer.MetaTransformer.transform","title":"<code>transform()</code>","text":"<p>Prepares the dataset by applying each of the columns' transformers and recording the indices of the single and multi columns.</p> <p>Returns:</p> Type Description <code>DataFrame</code> <p>The transformed dataset.</p> Source code in <code>src/nhssynth/modules/dataloader/metatransformer.py</code> <pre><code>def transform(self) -&gt; pd.DataFrame:\n    \"\"\"\n    Prepares the dataset by applying each of the columns' transformers and recording the indices of the single and multi columns.\n\n    Returns:\n        The transformed dataset.\n    \"\"\"\n    transformed_columns = []\n    self.single_column_indices = []\n    self.multi_column_indices = []\n    col_counter = 0\n    working_data = self.post_missingness_strategy_dataset.copy()\n\n    # iteratively build the transformed df\n    for column_metadata in tqdm(\n        self._metadata, desc=\"Transforming data\", unit=\"column\", total=len(self._metadata.columns)\n    ):\n        missingness_carrier = self._get_missingness_carrier(column_metadata)\n        transformed_data = column_metadata.transformer.apply(\n            working_data[column_metadata.name], missingness_carrier\n        )\n        transformed_columns.append(transformed_data)\n\n        # track single and multi column indices to supply to the model\n        if isinstance(transformed_data, pd.DataFrame) and transformed_data.shape[1] &gt; 1:\n            num_to_add = transformed_data.shape[1]\n            if not column_metadata.categorical:\n                self.single_column_indices.append(col_counter)\n                col_counter += 1\n                num_to_add -= 1\n            self.multi_column_indices.append(list(range(col_counter, col_counter + num_to_add)))\n            col_counter += num_to_add\n        else:\n            self.single_column_indices.append(col_counter)\n            col_counter += 1\n\n    return pd.concat(transformed_columns, axis=1)\n</code></pre>"},{"location":"reference/modules/dataloader/missingness/","title":"missingness","text":""},{"location":"reference/modules/dataloader/missingness/#nhssynth.modules.dataloader.missingness.AugmentMissingnessStrategy","title":"<code>AugmentMissingnessStrategy</code>","text":"<p>             Bases: <code>GenericMissingnessStrategy</code></p> Source code in <code>src/nhssynth/modules/dataloader/missingness.py</code> <pre><code>class AugmentMissingnessStrategy(GenericMissingnessStrategy):\n    def __init__(self) -&gt; None:\n        super().__init__(\"augment\")\n\n    def remove(self, data: pd.DataFrame, column_metadata: ColumnMetaData) -&gt; pd.DataFrame:\n        \"\"\"\n        Impute missingness with the model. To do this we create a new column for continuous features and a new category for categorical features.\n\n        Args:\n            data: The dataset.\n            column_metadata: The column metadata enabling the correct set up of the missingness strategy.\n\n        Returns:\n            The dataset, potentially with a new column representing the missingness for the column added.\n        \"\"\"\n        if column_metadata.categorical:\n            if column_metadata.dtype.kind == \"O\":\n                self.missingness_carrier = column_metadata.name + \"_missing\"\n            else:\n                self.missingness_carrier = data[column_metadata.name].min() - 1\n        else:\n            self.missingness_carrier = column_metadata.name + \"_missing\"\n            data[self.missingness_carrier] = data[column_metadata.name].isnull().astype(int)\n        return data\n</code></pre>"},{"location":"reference/modules/dataloader/missingness/#nhssynth.modules.dataloader.missingness.AugmentMissingnessStrategy.remove","title":"<code>remove(data, column_metadata)</code>","text":"<p>Impute missingness with the model. To do this we create a new column for continuous features and a new category for categorical features.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>DataFrame</code> <p>The dataset.</p> required <code>column_metadata</code> <code>ColumnMetaData</code> <p>The column metadata enabling the correct set up of the missingness strategy.</p> required <p>Returns:</p> Type Description <code>DataFrame</code> <p>The dataset, potentially with a new column representing the missingness for the column added.</p> Source code in <code>src/nhssynth/modules/dataloader/missingness.py</code> <pre><code>def remove(self, data: pd.DataFrame, column_metadata: ColumnMetaData) -&gt; pd.DataFrame:\n    \"\"\"\n    Impute missingness with the model. To do this we create a new column for continuous features and a new category for categorical features.\n\n    Args:\n        data: The dataset.\n        column_metadata: The column metadata enabling the correct set up of the missingness strategy.\n\n    Returns:\n        The dataset, potentially with a new column representing the missingness for the column added.\n    \"\"\"\n    if column_metadata.categorical:\n        if column_metadata.dtype.kind == \"O\":\n            self.missingness_carrier = column_metadata.name + \"_missing\"\n        else:\n            self.missingness_carrier = data[column_metadata.name].min() - 1\n    else:\n        self.missingness_carrier = column_metadata.name + \"_missing\"\n        data[self.missingness_carrier] = data[column_metadata.name].isnull().astype(int)\n    return data\n</code></pre>"},{"location":"reference/modules/dataloader/missingness/#nhssynth.modules.dataloader.missingness.DropMissingnessStrategy","title":"<code>DropMissingnessStrategy</code>","text":"<p>             Bases: <code>GenericMissingnessStrategy</code></p> <p>Drop missingness strategy.</p> Source code in <code>src/nhssynth/modules/dataloader/missingness.py</code> <pre><code>class DropMissingnessStrategy(GenericMissingnessStrategy):\n    \"\"\"Drop missingness strategy.\"\"\"\n\n    def __init__(self) -&gt; None:\n        super().__init__(\"drop\")\n\n    def remove(self, data: pd.DataFrame, column_metadata: ColumnMetaData) -&gt; pd.DataFrame:\n        \"\"\"\n        Drop rows containing missing values in the appropriate column.\n\n        Args:\n            data: The dataset.\n            column_metadata: The column metadata.\n\n        Returns:\n            The dataset with rows containing missing values in the appropriate column dropped.\n        \"\"\"\n        return data.dropna(subset=[column_metadata.name]).reset_index(drop=True)\n</code></pre>"},{"location":"reference/modules/dataloader/missingness/#nhssynth.modules.dataloader.missingness.DropMissingnessStrategy.remove","title":"<code>remove(data, column_metadata)</code>","text":"<p>Drop rows containing missing values in the appropriate column.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>DataFrame</code> <p>The dataset.</p> required <code>column_metadata</code> <code>ColumnMetaData</code> <p>The column metadata.</p> required <p>Returns:</p> Type Description <code>DataFrame</code> <p>The dataset with rows containing missing values in the appropriate column dropped.</p> Source code in <code>src/nhssynth/modules/dataloader/missingness.py</code> <pre><code>def remove(self, data: pd.DataFrame, column_metadata: ColumnMetaData) -&gt; pd.DataFrame:\n    \"\"\"\n    Drop rows containing missing values in the appropriate column.\n\n    Args:\n        data: The dataset.\n        column_metadata: The column metadata.\n\n    Returns:\n        The dataset with rows containing missing values in the appropriate column dropped.\n    \"\"\"\n    return data.dropna(subset=[column_metadata.name]).reset_index(drop=True)\n</code></pre>"},{"location":"reference/modules/dataloader/missingness/#nhssynth.modules.dataloader.missingness.GenericMissingnessStrategy","title":"<code>GenericMissingnessStrategy</code>","text":"<p>             Bases: <code>ABC</code></p> <p>Generic missingness strategy.</p> Source code in <code>src/nhssynth/modules/dataloader/missingness.py</code> <pre><code>class GenericMissingnessStrategy(ABC):\n    \"\"\"Generic missingness strategy.\"\"\"\n\n    def __init__(self, name: str) -&gt; None:\n        super().__init__()\n        self.name: str = name\n\n    @abstractmethod\n    def remove(self, data: pd.DataFrame, column_metadata: ColumnMetaData) -&gt; pd.DataFrame:\n        \"\"\"Remove missingness.\"\"\"\n        pass\n</code></pre>"},{"location":"reference/modules/dataloader/missingness/#nhssynth.modules.dataloader.missingness.GenericMissingnessStrategy.remove","title":"<code>remove(data, column_metadata)</code>  <code>abstractmethod</code>","text":"<p>Remove missingness.</p> Source code in <code>src/nhssynth/modules/dataloader/missingness.py</code> <pre><code>@abstractmethod\ndef remove(self, data: pd.DataFrame, column_metadata: ColumnMetaData) -&gt; pd.DataFrame:\n    \"\"\"Remove missingness.\"\"\"\n    pass\n</code></pre>"},{"location":"reference/modules/dataloader/missingness/#nhssynth.modules.dataloader.missingness.ImputeMissingnessStrategy","title":"<code>ImputeMissingnessStrategy</code>","text":"<p>             Bases: <code>GenericMissingnessStrategy</code></p> <p>Impute missingness with mean strategy.</p> Source code in <code>src/nhssynth/modules/dataloader/missingness.py</code> <pre><code>class ImputeMissingnessStrategy(GenericMissingnessStrategy):\n    \"\"\"Impute missingness with mean strategy.\"\"\"\n\n    def __init__(self, impute: Any) -&gt; None:\n        super().__init__(\"impute\")\n        self.impute = impute.lower() if isinstance(impute, str) else impute\n\n    def remove(self, data: pd.DataFrame, column_metadata: ColumnMetaData) -&gt; pd.DataFrame:\n        \"\"\"\n        Impute missingness in the data via the `impute` strategy. 'Special' values trigger specific behaviour.\n\n        Args:\n            data: The dataset.\n            column_metadata: The column metadata.\n\n        Returns:\n            The dataset with missing values in the appropriate column replaced with imputed ones.\n        \"\"\"\n        if (self.impute == \"mean\" or self.impute == \"median\") and column_metadata.categorical:\n            warnings.warn(\"Cannot impute mean or median for categorical data, using mode instead.\")\n            self.imputation_value = data[column_metadata.name].mode()[0]\n        elif self.impute == \"mean\":\n            self.imputation_value = data[column_metadata.name].mean()\n        elif self.impute == \"median\":\n            self.imputation_value = data[column_metadata.name].median()\n        elif self.impute == \"mode\":\n            self.imputation_value = data[column_metadata.name].mode()[0]\n        else:\n            self.imputation_value = self.impute\n        self.imputation_value = column_metadata.dtype.type(self.imputation_value)\n        try:\n            data[column_metadata.name].fillna(self.imputation_value, inplace=True)\n        except AssertionError:\n            raise ValueError(f\"Could not impute '{self.imputation_value}' into column: '{column_metadata.name}'.\")\n        return data\n</code></pre>"},{"location":"reference/modules/dataloader/missingness/#nhssynth.modules.dataloader.missingness.ImputeMissingnessStrategy.remove","title":"<code>remove(data, column_metadata)</code>","text":"<p>Impute missingness in the data via the <code>impute</code> strategy. 'Special' values trigger specific behaviour.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>DataFrame</code> <p>The dataset.</p> required <code>column_metadata</code> <code>ColumnMetaData</code> <p>The column metadata.</p> required <p>Returns:</p> Type Description <code>DataFrame</code> <p>The dataset with missing values in the appropriate column replaced with imputed ones.</p> Source code in <code>src/nhssynth/modules/dataloader/missingness.py</code> <pre><code>def remove(self, data: pd.DataFrame, column_metadata: ColumnMetaData) -&gt; pd.DataFrame:\n    \"\"\"\n    Impute missingness in the data via the `impute` strategy. 'Special' values trigger specific behaviour.\n\n    Args:\n        data: The dataset.\n        column_metadata: The column metadata.\n\n    Returns:\n        The dataset with missing values in the appropriate column replaced with imputed ones.\n    \"\"\"\n    if (self.impute == \"mean\" or self.impute == \"median\") and column_metadata.categorical:\n        warnings.warn(\"Cannot impute mean or median for categorical data, using mode instead.\")\n        self.imputation_value = data[column_metadata.name].mode()[0]\n    elif self.impute == \"mean\":\n        self.imputation_value = data[column_metadata.name].mean()\n    elif self.impute == \"median\":\n        self.imputation_value = data[column_metadata.name].median()\n    elif self.impute == \"mode\":\n        self.imputation_value = data[column_metadata.name].mode()[0]\n    else:\n        self.imputation_value = self.impute\n    self.imputation_value = column_metadata.dtype.type(self.imputation_value)\n    try:\n        data[column_metadata.name].fillna(self.imputation_value, inplace=True)\n    except AssertionError:\n        raise ValueError(f\"Could not impute '{self.imputation_value}' into column: '{column_metadata.name}'.\")\n    return data\n</code></pre>"},{"location":"reference/modules/dataloader/missingness/#nhssynth.modules.dataloader.missingness.NullMissingnessStrategy","title":"<code>NullMissingnessStrategy</code>","text":"<p>             Bases: <code>GenericMissingnessStrategy</code></p> <p>Null missingness strategy.</p> Source code in <code>src/nhssynth/modules/dataloader/missingness.py</code> <pre><code>class NullMissingnessStrategy(GenericMissingnessStrategy):\n    \"\"\"Null missingness strategy.\"\"\"\n\n    def __init__(self) -&gt; None:\n        super().__init__(\"none\")\n\n    def remove(self, data: pd.DataFrame, column_metadata: ColumnMetaData) -&gt; pd.DataFrame:\n        \"\"\"Do nothing.\"\"\"\n        return data\n</code></pre>"},{"location":"reference/modules/dataloader/missingness/#nhssynth.modules.dataloader.missingness.NullMissingnessStrategy.remove","title":"<code>remove(data, column_metadata)</code>","text":"<p>Do nothing.</p> Source code in <code>src/nhssynth/modules/dataloader/missingness.py</code> <pre><code>def remove(self, data: pd.DataFrame, column_metadata: ColumnMetaData) -&gt; pd.DataFrame:\n    \"\"\"Do nothing.\"\"\"\n    return data\n</code></pre>"},{"location":"reference/modules/dataloader/run/","title":"run","text":""},{"location":"reference/modules/dataloader/transformers/","title":"transformers","text":""},{"location":"reference/modules/dataloader/transformers/base/","title":"base","text":""},{"location":"reference/modules/dataloader/transformers/base/#nhssynth.modules.dataloader.transformers.base.ColumnTransformer","title":"<code>ColumnTransformer</code>","text":"<p>             Bases: <code>ABC</code></p> <p>A generic column transformer class to prototype all of the transformers applied via the <code>MetaTransformer</code>.</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/base.py</code> <pre><code>class ColumnTransformer(ABC):\n    \"\"\"A generic column transformer class to prototype all of the transformers applied via the [`MetaTransformer`][nhssynth.modules.dataloader.metatransformer.MetaTransformer].\"\"\"\n\n    def __init__(self) -&gt; None:\n        super().__init__()\n\n    @abstractmethod\n    def apply(self, data: pd.DataFrame, missingness_column: Optional[pd.Series]) -&gt; None:\n        \"\"\"Apply the transformer to the data.\"\"\"\n        pass\n\n    @abstractmethod\n    def revert(self, data: pd.DataFrame) -&gt; None:\n        \"\"\"Revert data to pre-transformer state.\"\"\"\n        pass\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/base/#nhssynth.modules.dataloader.transformers.base.ColumnTransformer.apply","title":"<code>apply(data, missingness_column)</code>  <code>abstractmethod</code>","text":"<p>Apply the transformer to the data.</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/base.py</code> <pre><code>@abstractmethod\ndef apply(self, data: pd.DataFrame, missingness_column: Optional[pd.Series]) -&gt; None:\n    \"\"\"Apply the transformer to the data.\"\"\"\n    pass\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/base/#nhssynth.modules.dataloader.transformers.base.ColumnTransformer.revert","title":"<code>revert(data)</code>  <code>abstractmethod</code>","text":"<p>Revert data to pre-transformer state.</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/base.py</code> <pre><code>@abstractmethod\ndef revert(self, data: pd.DataFrame) -&gt; None:\n    \"\"\"Revert data to pre-transformer state.\"\"\"\n    pass\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/base/#nhssynth.modules.dataloader.transformers.base.TransformerWrapper","title":"<code>TransformerWrapper</code>","text":"<p>             Bases: <code>ABC</code></p> <p>A class to facilitate nesting of <code>ColumnTransformer</code>s.</p> <p>Parameters:</p> Name Type Description Default <code>wrapped_transformer</code> <code>ColumnTransformer</code> <p>The <code>ColumnTransformer</code> to wrap.</p> required Source code in <code>src/nhssynth/modules/dataloader/transformers/base.py</code> <pre><code>class TransformerWrapper(ABC):\n    \"\"\"\n    A class to facilitate nesting of [`ColumnTransformer`][nhssynth.modules.dataloader.transformers.base.ColumnTransformer]s.\n\n    Args:\n        wrapped_transformer: The [`ColumnTransformer`][nhssynth.modules.dataloader.transformers.base.ColumnTransformer] to wrap.\n    \"\"\"\n\n    def __init__(self, wrapped_transformer: ColumnTransformer) -&gt; None:\n        super().__init__()\n        self._wrapped_transformer: ColumnTransformer = wrapped_transformer\n\n    def apply(self, data: pd.Series, missingness_column: Optional[pd.Series], **kwargs) -&gt; pd.DataFrame:\n        \"\"\"Method for applying the wrapped transformer to the data.\"\"\"\n        return self._wrapped_transformer.apply(data, missingness_column, **kwargs)\n\n    def revert(self, data: pd.Series, **kwargs) -&gt; pd.DataFrame:\n        \"\"\"Method for reverting the passed data via the wrapped transformer.\"\"\"\n        return self._wrapped_transformer.revert(data, **kwargs)\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/base/#nhssynth.modules.dataloader.transformers.base.TransformerWrapper.apply","title":"<code>apply(data, missingness_column, **kwargs)</code>","text":"<p>Method for applying the wrapped transformer to the data.</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/base.py</code> <pre><code>def apply(self, data: pd.Series, missingness_column: Optional[pd.Series], **kwargs) -&gt; pd.DataFrame:\n    \"\"\"Method for applying the wrapped transformer to the data.\"\"\"\n    return self._wrapped_transformer.apply(data, missingness_column, **kwargs)\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/base/#nhssynth.modules.dataloader.transformers.base.TransformerWrapper.revert","title":"<code>revert(data, **kwargs)</code>","text":"<p>Method for reverting the passed data via the wrapped transformer.</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/base.py</code> <pre><code>def revert(self, data: pd.Series, **kwargs) -&gt; pd.DataFrame:\n    \"\"\"Method for reverting the passed data via the wrapped transformer.\"\"\"\n    return self._wrapped_transformer.revert(data, **kwargs)\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/categorical/","title":"categorical","text":""},{"location":"reference/modules/dataloader/transformers/categorical/#nhssynth.modules.dataloader.transformers.categorical.OHECategoricalTransformer","title":"<code>OHECategoricalTransformer</code>","text":"<p>             Bases: <code>ColumnTransformer</code></p> <p>A transformer to one-hot encode categorical features via sklearn's <code>OneHotEncoder</code>. Essentially wraps the <code>fit_transformer</code> and <code>inverse_transform</code> methods of <code>OneHotEncoder</code> to comply with the <code>ColumnTransformer</code> interface.</p> <p>Parameters:</p> Name Type Description Default <code>drop</code> <code>Optional[Union[list, str]]</code> <p>str or list of str, to pass to <code>OneHotEncoder</code>'s <code>drop</code> parameter.</p> <code>None</code> <p>Attributes:</p> Name Type Description <code>missing_value</code> <code>Any</code> <p>The value used to fill missing values in the data.</p> <p>After applying the transformer, the following attributes will be populated:</p> <p>Attributes:</p> Name Type Description <code>original_column_name</code> <p>The name of the original column.</p> <code>new_column_names</code> <p>The names of the columns generated by the transformer.</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/categorical.py</code> <pre><code>class OHECategoricalTransformer(ColumnTransformer):\n    \"\"\"\n    A transformer to one-hot encode categorical features via sklearn's `OneHotEncoder`.\n    Essentially wraps the `fit_transformer` and `inverse_transform` methods of `OneHotEncoder` to comply with the `ColumnTransformer` interface.\n\n    Args:\n        drop: str or list of str, to pass to `OneHotEncoder`'s `drop` parameter.\n\n    Attributes:\n        missing_value: The value used to fill missing values in the data.\n\n    After applying the transformer, the following attributes will be populated:\n\n    Attributes:\n        original_column_name: The name of the original column.\n        new_column_names: The names of the columns generated by the transformer.\n    \"\"\"\n\n    def __init__(self, drop: Optional[Union[list, str]] = None) -&gt; None:\n        super().__init__()\n        self._drop: Union[list, str] = drop\n        self._transformer: OneHotEncoder = OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False, drop=self._drop)\n        self.missing_value: Any = None\n\n    def apply(self, data: pd.Series, missing_value: Optional[Any] = None) -&gt; pd.DataFrame:\n        \"\"\"\n        Apply the transformer to the data via sklearn's `OneHotEncoder`'s `fit_transform` method. Name the new columns via manipulation of the original column name.\n        If `missing_value` is provided, fill missing values with this value before applying the transformer to ensure a new category is added.\n\n        Args:\n            data: The column of data to transform.\n            missing_value: The value learned by the `MetaTransformer` to represent missingness, this is only used as part of the `AugmentMissingnessStrategy`.\n        \"\"\"\n        self.original_column_name = data.name\n        if missing_value:\n            data = data.fillna(missing_value)\n            self.missing_value = missing_value\n        transformed_data = pd.DataFrame(\n            self._transformer.fit_transform(data.values.reshape(-1, 1)),\n            columns=self._transformer.get_feature_names_out(input_features=[data.name]),\n        )\n        self.new_column_names = transformed_data.columns\n        return transformed_data\n\n    def revert(self, data: pd.DataFrame) -&gt; pd.DataFrame:\n        \"\"\"\n        Revert data to pre-transformer state via sklearn's `OneHotEncoder`'s `inverse_transform` method.\n        If `missing_value` is provided, replace instances of this value in the data with `np.nan` to ensure missing values are represented correctly in the case\n        where `missing_value` was 'modelled' and thus generated.\n\n        Args:\n            data: The full dataset including the column(s) to be reverted to their pre-transformer state.\n\n        Returns:\n            The dataset with a single categorical column that is analogous to the original column, with the same name, and without the generated one-hot columns.\n        \"\"\"\n        data[self.original_column_name] = pd.Series(\n            self._transformer.inverse_transform(data[self.new_column_names].values).flatten(),\n            index=data.index,\n            name=self.original_column_name,\n        )\n        if self.missing_value:\n            data[self.original_column_name] = data[self.original_column_name].replace(self.missing_value, np.nan)\n        return data.drop(self.new_column_names, axis=1)\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/categorical/#nhssynth.modules.dataloader.transformers.categorical.OHECategoricalTransformer.apply","title":"<code>apply(data, missing_value=None)</code>","text":"<p>Apply the transformer to the data via sklearn's <code>OneHotEncoder</code>'s <code>fit_transform</code> method. Name the new columns via manipulation of the original column name. If <code>missing_value</code> is provided, fill missing values with this value before applying the transformer to ensure a new category is added.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>Series</code> <p>The column of data to transform.</p> required <code>missing_value</code> <code>Optional[Any]</code> <p>The value learned by the <code>MetaTransformer</code> to represent missingness, this is only used as part of the <code>AugmentMissingnessStrategy</code>.</p> <code>None</code> Source code in <code>src/nhssynth/modules/dataloader/transformers/categorical.py</code> <pre><code>def apply(self, data: pd.Series, missing_value: Optional[Any] = None) -&gt; pd.DataFrame:\n    \"\"\"\n    Apply the transformer to the data via sklearn's `OneHotEncoder`'s `fit_transform` method. Name the new columns via manipulation of the original column name.\n    If `missing_value` is provided, fill missing values with this value before applying the transformer to ensure a new category is added.\n\n    Args:\n        data: The column of data to transform.\n        missing_value: The value learned by the `MetaTransformer` to represent missingness, this is only used as part of the `AugmentMissingnessStrategy`.\n    \"\"\"\n    self.original_column_name = data.name\n    if missing_value:\n        data = data.fillna(missing_value)\n        self.missing_value = missing_value\n    transformed_data = pd.DataFrame(\n        self._transformer.fit_transform(data.values.reshape(-1, 1)),\n        columns=self._transformer.get_feature_names_out(input_features=[data.name]),\n    )\n    self.new_column_names = transformed_data.columns\n    return transformed_data\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/categorical/#nhssynth.modules.dataloader.transformers.categorical.OHECategoricalTransformer.revert","title":"<code>revert(data)</code>","text":"<p>Revert data to pre-transformer state via sklearn's <code>OneHotEncoder</code>'s <code>inverse_transform</code> method. If <code>missing_value</code> is provided, replace instances of this value in the data with <code>np.nan</code> to ensure missing values are represented correctly in the case where <code>missing_value</code> was 'modelled' and thus generated.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>DataFrame</code> <p>The full dataset including the column(s) to be reverted to their pre-transformer state.</p> required <p>Returns:</p> Type Description <code>DataFrame</code> <p>The dataset with a single categorical column that is analogous to the original column, with the same name, and without the generated one-hot columns.</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/categorical.py</code> <pre><code>def revert(self, data: pd.DataFrame) -&gt; pd.DataFrame:\n    \"\"\"\n    Revert data to pre-transformer state via sklearn's `OneHotEncoder`'s `inverse_transform` method.\n    If `missing_value` is provided, replace instances of this value in the data with `np.nan` to ensure missing values are represented correctly in the case\n    where `missing_value` was 'modelled' and thus generated.\n\n    Args:\n        data: The full dataset including the column(s) to be reverted to their pre-transformer state.\n\n    Returns:\n        The dataset with a single categorical column that is analogous to the original column, with the same name, and without the generated one-hot columns.\n    \"\"\"\n    data[self.original_column_name] = pd.Series(\n        self._transformer.inverse_transform(data[self.new_column_names].values).flatten(),\n        index=data.index,\n        name=self.original_column_name,\n    )\n    if self.missing_value:\n        data[self.original_column_name] = data[self.original_column_name].replace(self.missing_value, np.nan)\n    return data.drop(self.new_column_names, axis=1)\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/continuous/","title":"continuous","text":""},{"location":"reference/modules/dataloader/transformers/continuous/#nhssynth.modules.dataloader.transformers.continuous.ClusterContinuousTransformer","title":"<code>ClusterContinuousTransformer</code>","text":"<p>             Bases: <code>ColumnTransformer</code></p> <p>A transformer to cluster continuous features via sklearn's <code>BayesianGaussianMixture</code>. Essentially wraps the process of fitting the BGM model and generating cluster assignments and normalised values for the data to comply with the <code>ColumnTransformer</code> interface.</p> <p>Parameters:</p> Name Type Description Default <code>n_components</code> <code>int</code> <p>The number of components to use in the BGM model.</p> <code>10</code> <code>n_init</code> <code>int</code> <p>The number of initialisations to use in the BGM model.</p> <code>1</code> <code>init_params</code> <code>str</code> <p>The initialisation method to use in the BGM model.</p> <code>'kmeans'</code> <code>random_state</code> <code>int</code> <p>The random state to use in the BGM model.</p> <code>0</code> <code>max_iter</code> <code>int</code> <p>The maximum number of iterations to use in the BGM model.</p> <code>1000</code> <code>remove_unused_components</code> <code>bool</code> <p>Whether to remove components that have no data assigned EXPERIMENTAL.</p> <code>False</code> <code>clip_output</code> <code>bool</code> <p>Whether to clip the output normalised values to the range [-1, 1].</p> <code>False</code> <p>After applying the transformer, the following attributes will be populated:</p> <p>Attributes:</p> Name Type Description <code>means</code> <p>The means of the components in the BGM model.</p> <code>stds</code> <p>The standard deviations of the components in the BGM model.</p> <code>new_column_names</code> <p>The names of the columns generated by the transformer (one for the normalised values and one for each cluster component).</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/continuous.py</code> <pre><code>class ClusterContinuousTransformer(ColumnTransformer):\n    \"\"\"\n    A transformer to cluster continuous features via sklearn's `BayesianGaussianMixture`.\n    Essentially wraps the process of fitting the BGM model and generating cluster assignments and normalised values for the data to comply with the `ColumnTransformer` interface.\n\n    Args:\n        n_components: The number of components to use in the BGM model.\n        n_init: The number of initialisations to use in the BGM model.\n        init_params: The initialisation method to use in the BGM model.\n        random_state: The random state to use in the BGM model.\n        max_iter: The maximum number of iterations to use in the BGM model.\n        remove_unused_components: Whether to remove components that have no data assigned EXPERIMENTAL.\n        clip_output: Whether to clip the output normalised values to the range [-1, 1].\n\n    After applying the transformer, the following attributes will be populated:\n\n    Attributes:\n        means: The means of the components in the BGM model.\n        stds: The standard deviations of the components in the BGM model.\n        new_column_names: The names of the columns generated by the transformer (one for the normalised values and one for each cluster component).\n    \"\"\"\n\n    def __init__(\n        self,\n        n_components: int = 10,\n        n_init: int = 1,\n        init_params: str = \"kmeans\",\n        random_state: int = 0,\n        max_iter: int = 1000,\n        remove_unused_components: bool = False,\n        clip_output: bool = False,\n    ) -&gt; None:\n        super().__init__()\n        self._transformer = BayesianGaussianMixture(\n            n_components=n_components,\n            random_state=random_state,\n            n_init=n_init,\n            init_params=init_params,\n            max_iter=max_iter,\n            weight_concentration_prior=1e-3,\n        )\n        self._n_components = n_components\n        self._std_multiplier = 4\n        self._missingness_column_name = None\n        self._max_iter = max_iter\n        self.remove_unused_components = remove_unused_components\n        self.clip_output = clip_output\n\n    def apply(self, data: pd.Series, missingness_column: Optional[pd.Series] = None) -&gt; pd.DataFrame:\n        \"\"\"\n        Apply the transformer to the data via sklearn's `BayesianGaussianMixture`'s `fit` and `predict_proba` methods.\n        Name the new columns via the original column name.\n\n        If `missingness_column` is provided, use this to extract the non-missing data; the missing values are assigned to a new pseudo-cluster with mean 0\n        (i.e. all values in the normalised column are 0.0). We do this by taking the full index before subsetting to non-missing data, then reindexing.\n\n        Args:\n            data: The column of data to transform.\n            missingness_column: The column of data representing missingness, this is only used as part of the `AugmentMissingnessStrategy`.\n\n        Returns:\n            The transformed data (will be multiple columns if `n_components` &gt; 1 at initialisation).\n        \"\"\"\n        self.original_column_name = data.name\n        if missingness_column is not None:\n            self._missingness_column_name = missingness_column.name\n            full_index = data.index\n            data = data[missingness_column == 0]\n        index = data.index\n        data = np.array(data.values.reshape(-1, 1), dtype=data.dtype.name.lower())\n\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)\n            self._transformer.fit(data)\n\n        self.means = self._transformer.means_.reshape(-1)\n        self.stds = np.sqrt(self._transformer.covariances_).reshape(-1)\n\n        components = np.argmax(self._transformer.predict_proba(data), axis=1)\n        normalised_values = (data - self.means.reshape(1, -1)) / (self._std_multiplier * self.stds.reshape(1, -1))\n        normalised = normalised_values[np.arange(len(data)), components]\n        normalised = np.clip(normalised, -1.0, 1.0)\n        components = np.eye(self._n_components, dtype=int)[components]\n\n        transformed_data = pd.DataFrame(\n            np.hstack([normalised.reshape(-1, 1), components]),\n            index=index,\n            columns=[f\"{self.original_column_name}_normalised\"]\n            + [f\"{self.original_column_name}_c{i + 1}\" for i in range(self._n_components)],\n        )\n\n        # EXPERIMENTAL feature, removing components from the column matrix that have no data assigned to them\n        if self.remove_unused_components:\n            nunique = transformed_data.iloc[:, 1:].nunique(dropna=False)\n            unused_components = nunique[nunique == 1].index\n            unused_component_idx = [transformed_data.columns.get_loc(col_name) - 1 for col_name in unused_components]\n            self.means = np.delete(self.means, unused_component_idx)\n            self.stds = np.delete(self.stds, unused_component_idx)\n            transformed_data.drop(unused_components, axis=1, inplace=True)\n\n        if missingness_column is not None:\n            transformed_data = pd.concat([transformed_data.reindex(full_index).fillna(0.0), missingness_column], axis=1)\n\n        self.new_column_names = transformed_data.columns\n        return transformed_data.astype(\n            {col_name: int for col_name in transformed_data.columns if re.search(r\"_c\\d+\", col_name)}\n        )\n\n    def revert(self, data: pd.DataFrame) -&gt; pd.DataFrame:\n        \"\"\"\n        Revert data to pre-transformer state via the means and stds of the BGM. Extract the relevant columns from the data via the `new_column_names` attribute.\n        If `missingness_column` was provided to the `apply` method, drop the missing values from the data before reverting and use the `full_index` to\n        reintroduce missing values when `original_column_name` is constructed.\n\n        Args:\n            data: The full dataset including the column(s) to be reverted to their pre-transformer state.\n\n        Returns:\n            The dataset with a single continuous column that is analogous to the original column, with the same name, and without the generated columns from which it is derived.\n        \"\"\"\n        working_data = data[self.new_column_names]\n        full_index = working_data.index\n        if self._missingness_column_name is not None:\n            working_data = working_data[working_data[self._missingness_column_name] == 0]\n            working_data = working_data.drop(self._missingness_column_name, axis=1)\n        index = working_data.index\n\n        components = np.argmax(working_data.filter(regex=r\".*_c\\d+\").values, axis=1)\n        working_data = working_data.filter(like=\"_normalised\").values.reshape(-1)\n        if self.clip_output:\n            working_data = np.clip(working_data, -1.0, 1.0)\n\n        mean_t = self.means[components]\n        std_t = self.stds[components]\n        data[self.original_column_name] = pd.Series(\n            working_data * self._std_multiplier * std_t + mean_t, index=index, name=self.original_column_name\n        ).reindex(full_index)\n        data.drop(self.new_column_names, axis=1, inplace=True)\n        return data\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/continuous/#nhssynth.modules.dataloader.transformers.continuous.ClusterContinuousTransformer.apply","title":"<code>apply(data, missingness_column=None)</code>","text":"<p>Apply the transformer to the data via sklearn's <code>BayesianGaussianMixture</code>'s <code>fit</code> and <code>predict_proba</code> methods. Name the new columns via the original column name.</p> <p>If <code>missingness_column</code> is provided, use this to extract the non-missing data; the missing values are assigned to a new pseudo-cluster with mean 0 (i.e. all values in the normalised column are 0.0). We do this by taking the full index before subsetting to non-missing data, then reindexing.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>Series</code> <p>The column of data to transform.</p> required <code>missingness_column</code> <code>Optional[Series]</code> <p>The column of data representing missingness, this is only used as part of the <code>AugmentMissingnessStrategy</code>.</p> <code>None</code> <p>Returns:</p> Type Description <code>DataFrame</code> <p>The transformed data (will be multiple columns if <code>n_components</code> &gt; 1 at initialisation).</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/continuous.py</code> <pre><code>def apply(self, data: pd.Series, missingness_column: Optional[pd.Series] = None) -&gt; pd.DataFrame:\n    \"\"\"\n    Apply the transformer to the data via sklearn's `BayesianGaussianMixture`'s `fit` and `predict_proba` methods.\n    Name the new columns via the original column name.\n\n    If `missingness_column` is provided, use this to extract the non-missing data; the missing values are assigned to a new pseudo-cluster with mean 0\n    (i.e. all values in the normalised column are 0.0). We do this by taking the full index before subsetting to non-missing data, then reindexing.\n\n    Args:\n        data: The column of data to transform.\n        missingness_column: The column of data representing missingness, this is only used as part of the `AugmentMissingnessStrategy`.\n\n    Returns:\n        The transformed data (will be multiple columns if `n_components` &gt; 1 at initialisation).\n    \"\"\"\n    self.original_column_name = data.name\n    if missingness_column is not None:\n        self._missingness_column_name = missingness_column.name\n        full_index = data.index\n        data = data[missingness_column == 0]\n    index = data.index\n    data = np.array(data.values.reshape(-1, 1), dtype=data.dtype.name.lower())\n\n    with warnings.catch_warnings():\n        warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)\n        self._transformer.fit(data)\n\n    self.means = self._transformer.means_.reshape(-1)\n    self.stds = np.sqrt(self._transformer.covariances_).reshape(-1)\n\n    components = np.argmax(self._transformer.predict_proba(data), axis=1)\n    normalised_values = (data - self.means.reshape(1, -1)) / (self._std_multiplier * self.stds.reshape(1, -1))\n    normalised = normalised_values[np.arange(len(data)), components]\n    normalised = np.clip(normalised, -1.0, 1.0)\n    components = np.eye(self._n_components, dtype=int)[components]\n\n    transformed_data = pd.DataFrame(\n        np.hstack([normalised.reshape(-1, 1), components]),\n        index=index,\n        columns=[f\"{self.original_column_name}_normalised\"]\n        + [f\"{self.original_column_name}_c{i + 1}\" for i in range(self._n_components)],\n    )\n\n    # EXPERIMENTAL feature, removing components from the column matrix that have no data assigned to them\n    if self.remove_unused_components:\n        nunique = transformed_data.iloc[:, 1:].nunique(dropna=False)\n        unused_components = nunique[nunique == 1].index\n        unused_component_idx = [transformed_data.columns.get_loc(col_name) - 1 for col_name in unused_components]\n        self.means = np.delete(self.means, unused_component_idx)\n        self.stds = np.delete(self.stds, unused_component_idx)\n        transformed_data.drop(unused_components, axis=1, inplace=True)\n\n    if missingness_column is not None:\n        transformed_data = pd.concat([transformed_data.reindex(full_index).fillna(0.0), missingness_column], axis=1)\n\n    self.new_column_names = transformed_data.columns\n    return transformed_data.astype(\n        {col_name: int for col_name in transformed_data.columns if re.search(r\"_c\\d+\", col_name)}\n    )\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/continuous/#nhssynth.modules.dataloader.transformers.continuous.ClusterContinuousTransformer.revert","title":"<code>revert(data)</code>","text":"<p>Revert data to pre-transformer state via the means and stds of the BGM. Extract the relevant columns from the data via the <code>new_column_names</code> attribute. If <code>missingness_column</code> was provided to the <code>apply</code> method, drop the missing values from the data before reverting and use the <code>full_index</code> to reintroduce missing values when <code>original_column_name</code> is constructed.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>DataFrame</code> <p>The full dataset including the column(s) to be reverted to their pre-transformer state.</p> required <p>Returns:</p> Type Description <code>DataFrame</code> <p>The dataset with a single continuous column that is analogous to the original column, with the same name, and without the generated columns from which it is derived.</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/continuous.py</code> <pre><code>def revert(self, data: pd.DataFrame) -&gt; pd.DataFrame:\n    \"\"\"\n    Revert data to pre-transformer state via the means and stds of the BGM. Extract the relevant columns from the data via the `new_column_names` attribute.\n    If `missingness_column` was provided to the `apply` method, drop the missing values from the data before reverting and use the `full_index` to\n    reintroduce missing values when `original_column_name` is constructed.\n\n    Args:\n        data: The full dataset including the column(s) to be reverted to their pre-transformer state.\n\n    Returns:\n        The dataset with a single continuous column that is analogous to the original column, with the same name, and without the generated columns from which it is derived.\n    \"\"\"\n    working_data = data[self.new_column_names]\n    full_index = working_data.index\n    if self._missingness_column_name is not None:\n        working_data = working_data[working_data[self._missingness_column_name] == 0]\n        working_data = working_data.drop(self._missingness_column_name, axis=1)\n    index = working_data.index\n\n    components = np.argmax(working_data.filter(regex=r\".*_c\\d+\").values, axis=1)\n    working_data = working_data.filter(like=\"_normalised\").values.reshape(-1)\n    if self.clip_output:\n        working_data = np.clip(working_data, -1.0, 1.0)\n\n    mean_t = self.means[components]\n    std_t = self.stds[components]\n    data[self.original_column_name] = pd.Series(\n        working_data * self._std_multiplier * std_t + mean_t, index=index, name=self.original_column_name\n    ).reindex(full_index)\n    data.drop(self.new_column_names, axis=1, inplace=True)\n    return data\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/datetime/","title":"datetime","text":""},{"location":"reference/modules/dataloader/transformers/datetime/#nhssynth.modules.dataloader.transformers.datetime.DatetimeTransformer","title":"<code>DatetimeTransformer</code>","text":"<p>             Bases: <code>TransformerWrapper</code></p> <p>A transformer to convert datetime features to numeric features. Before applying an underlying (wrapped) transformer. The datetime features are converted to nanoseconds since the epoch, and missing values are assigned to 0.0 under the <code>AugmentMissingnessStrategy</code>.</p> <p>Parameters:</p> Name Type Description Default <code>transformer</code> <code>ColumnTransformer</code> <p>The <code>ColumnTransformer</code> to wrap.</p> required <p>After applying the transformer, the following attributes will be populated:</p> <p>Attributes:</p> Name Type Description <code>original_column_name</code> <p>The name of the original column.</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/datetime.py</code> <pre><code>class DatetimeTransformer(TransformerWrapper):\n    \"\"\"\n    A transformer to convert datetime features to numeric features. Before applying an underlying (wrapped) transformer.\n    The datetime features are converted to nanoseconds since the epoch, and missing values are assigned to 0.0 under the `AugmentMissingnessStrategy`.\n\n    Args:\n        transformer: The [`ColumnTransformer`][nhssynth.modules.dataloader.transformers.base.ColumnTransformer] to wrap.\n\n    After applying the transformer, the following attributes will be populated:\n\n    Attributes:\n        original_column_name: The name of the original column.\n    \"\"\"\n\n    def __init__(self, transformer: ColumnTransformer) -&gt; None:\n        super().__init__(transformer)\n\n    def apply(self, data: pd.Series, missingness_column: Optional[pd.Series] = None, **kwargs) -&gt; pd.DataFrame:\n        \"\"\"\n        Firstly, the datetime data is floored to the nano-second level. Next, the floored data is converted to float nanoseconds since the epoch.\n        The float value of `pd.NaT` under the operation above is then replaced with `np.nan` to ensure missing values are represented correctly.\n        Finally, the wrapped transformer is applied to the data.\n\n        Args:\n            data: The column of data to transform.\n            missingness_column: The column of missingness indicators to augment the data with.\n\n        Returns:\n            The transformed data.\n        \"\"\"\n        self.original_column_name = data.name\n        floored_data = pd.Series(data.dt.floor(\"ns\").to_numpy().astype(float), name=data.name)\n        nan_corrected_data = floored_data.replace(pd.to_datetime(pd.NaT).to_numpy().astype(float), np.nan)\n        return super().apply(nan_corrected_data, missingness_column, **kwargs)\n\n    def revert(self, data: pd.DataFrame, **kwargs) -&gt; pd.DataFrame:\n        \"\"\"\n        The wrapped transformer's `revert` method is applied to the data. The data is then converted back to datetime format.\n\n        Args:\n            data: The full dataset including the column(s) to be reverted to their pre-transformer state.\n\n        Returns:\n            The reverted data.\n        \"\"\"\n        reverted_data = super().revert(data, **kwargs)\n        data[self.original_column_name] = pd.to_datetime(\n            reverted_data[self.original_column_name].astype(\"Int64\"), unit=\"ns\"\n        )\n        return data\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/datetime/#nhssynth.modules.dataloader.transformers.datetime.DatetimeTransformer.apply","title":"<code>apply(data, missingness_column=None, **kwargs)</code>","text":"<p>Firstly, the datetime data is floored to the nano-second level. Next, the floored data is converted to float nanoseconds since the epoch. The float value of <code>pd.NaT</code> under the operation above is then replaced with <code>np.nan</code> to ensure missing values are represented correctly. Finally, the wrapped transformer is applied to the data.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>Series</code> <p>The column of data to transform.</p> required <code>missingness_column</code> <code>Optional[Series]</code> <p>The column of missingness indicators to augment the data with.</p> <code>None</code> <p>Returns:</p> Type Description <code>DataFrame</code> <p>The transformed data.</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/datetime.py</code> <pre><code>def apply(self, data: pd.Series, missingness_column: Optional[pd.Series] = None, **kwargs) -&gt; pd.DataFrame:\n    \"\"\"\n    Firstly, the datetime data is floored to the nano-second level. Next, the floored data is converted to float nanoseconds since the epoch.\n    The float value of `pd.NaT` under the operation above is then replaced with `np.nan` to ensure missing values are represented correctly.\n    Finally, the wrapped transformer is applied to the data.\n\n    Args:\n        data: The column of data to transform.\n        missingness_column: The column of missingness indicators to augment the data with.\n\n    Returns:\n        The transformed data.\n    \"\"\"\n    self.original_column_name = data.name\n    floored_data = pd.Series(data.dt.floor(\"ns\").to_numpy().astype(float), name=data.name)\n    nan_corrected_data = floored_data.replace(pd.to_datetime(pd.NaT).to_numpy().astype(float), np.nan)\n    return super().apply(nan_corrected_data, missingness_column, **kwargs)\n</code></pre>"},{"location":"reference/modules/dataloader/transformers/datetime/#nhssynth.modules.dataloader.transformers.datetime.DatetimeTransformer.revert","title":"<code>revert(data, **kwargs)</code>","text":"<p>The wrapped transformer's <code>revert</code> method is applied to the data. The data is then converted back to datetime format.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>DataFrame</code> <p>The full dataset including the column(s) to be reverted to their pre-transformer state.</p> required <p>Returns:</p> Type Description <code>DataFrame</code> <p>The reverted data.</p> Source code in <code>src/nhssynth/modules/dataloader/transformers/datetime.py</code> <pre><code>def revert(self, data: pd.DataFrame, **kwargs) -&gt; pd.DataFrame:\n    \"\"\"\n    The wrapped transformer's `revert` method is applied to the data. The data is then converted back to datetime format.\n\n    Args:\n        data: The full dataset including the column(s) to be reverted to their pre-transformer state.\n\n    Returns:\n        The reverted data.\n    \"\"\"\n    reverted_data = super().revert(data, **kwargs)\n    data[self.original_column_name] = pd.to_datetime(\n        reverted_data[self.original_column_name].astype(\"Int64\"), unit=\"ns\"\n    )\n    return data\n</code></pre>"},{"location":"reference/modules/evaluation/","title":"evaluation","text":""},{"location":"reference/modules/evaluation/aequitas/","title":"aequitas","text":""},{"location":"reference/modules/evaluation/io/","title":"io","text":""},{"location":"reference/modules/evaluation/io/#nhssynth.modules.evaluation.io.check_input_paths","title":"<code>check_input_paths(fn_dataset, fn_typed, fn_synthetic_datasets, fn_sdv_metadata, dir_experiment)</code>","text":"<p>Sets up the input and output paths for the model files.</p> <p>Parameters:</p> Name Type Description Default <code>fn_dataset</code> <code>str</code> <p>The base name of the dataset.</p> required <code>fn_typed</code> <code>str</code> <p>The name of the typed real dataset file.</p> required <code>fn_synthetic_datasets</code> <code>str</code> <p>The filename of the collection of synethtic datasets.</p> required <code>fn_sdv_metadata</code> <code>str</code> <p>The name of the SDV metadata file.</p> required <code>dir_experiment</code> <code>Path</code> <p>The path to the experiment directory.</p> required <p>Returns:</p> Type Description <code>tuple[str, str]</code> <p>The paths to the data, metadata and metatransformer files.</p> Source code in <code>src/nhssynth/modules/evaluation/io.py</code> <pre><code>def check_input_paths(\n    fn_dataset: str, fn_typed: str, fn_synthetic_datasets: str, fn_sdv_metadata: str, dir_experiment: Path\n) -&gt; tuple[str, str]:\n    \"\"\"\n    Sets up the input and output paths for the model files.\n\n    Args:\n        fn_dataset: The base name of the dataset.\n        fn_typed: The name of the typed real dataset file.\n        fn_synthetic_datasets: The filename of the collection of synethtic datasets.\n        fn_sdv_metadata: The name of the SDV metadata file.\n        dir_experiment: The path to the experiment directory.\n\n    Returns:\n        The paths to the data, metadata and metatransformer files.\n    \"\"\"\n    fn_dataset = Path(fn_dataset).stem\n    fn_typed, fn_synthetic_datasets, fn_sdv_metadata = io.consistent_endings(\n        [fn_typed, fn_synthetic_datasets, fn_sdv_metadata]\n    )\n    fn_typed, fn_synthetic_datasets, fn_sdv_metadata = io.potential_suffixes(\n        [fn_typed, fn_synthetic_datasets, fn_sdv_metadata], fn_dataset\n    )\n    io.warn_if_path_supplied([fn_typed, fn_synthetic_datasets, fn_sdv_metadata], dir_experiment)\n    io.check_exists([fn_typed, fn_synthetic_datasets, fn_sdv_metadata], dir_experiment)\n    return fn_dataset, fn_typed, fn_synthetic_datasets, fn_sdv_metadata\n</code></pre>"},{"location":"reference/modules/evaluation/io/#nhssynth.modules.evaluation.io.load_required_data","title":"<code>load_required_data(args, dir_experiment)</code>","text":"<p>Loads the data from <code>args</code> or from disk when the dataloader has not be run previously.</p> <p>Parameters:</p> Name Type Description Default <code>args</code> <code>Namespace</code> <p>The arguments passed to the module, in this case potentially carrying the outputs of the dataloader module.</p> required <code>dir_experiment</code> <code>Path</code> <p>The path to the experiment directory.</p> required <p>Returns:</p> Type Description <code>tuple[str, DataFrame, DataFrame, dict[str, dict[str, Any]]]</code> <p>The dataset name, the real data, the bundle of synthetic data from the modelling stage, and the SDV metadata.</p> Source code in <code>src/nhssynth/modules/evaluation/io.py</code> <pre><code>def load_required_data(\n    args: argparse.Namespace, dir_experiment: Path\n) -&gt; tuple[str, pd.DataFrame, pd.DataFrame, dict[str, dict[str, Any]]]:\n    \"\"\"\n    Loads the data from `args` or from disk when the dataloader has not be run previously.\n\n    Args:\n        args: The arguments passed to the module, in this case potentially carrying the outputs of the dataloader module.\n        dir_experiment: The path to the experiment directory.\n\n    Returns:\n        The dataset name, the real data, the bundle of synthetic data from the modelling stage, and the SDV metadata.\n    \"\"\"\n    if all(x in args.module_handover for x in [\"dataset\", \"typed\", \"synthetic_datasets\", \"sdv_metadata\"]):\n        return (\n            args.module_handover[\"dataset\"],\n            args.module_handover[\"typed\"],\n            args.module_handover[\"synthetic_datasets\"],\n            args.module_handover[\"sdv_metadata\"],\n        )\n    else:\n        fn_dataset, fn_typed, fn_synthetic_datasets, fn_sdv_metadata = check_input_paths(\n            args.dataset, args.typed, args.synthetic_datasets, args.sdv_metadata, dir_experiment\n        )\n        with open(dir_experiment / fn_typed, \"rb\") as f:\n            real_data = pickle.load(f).contents\n        with open(dir_experiment / fn_sdv_metadata, \"rb\") as f:\n            sdv_metadata = pickle.load(f)\n        with open(dir_experiment / fn_synthetic_datasets, \"rb\") as f:\n            synthetic_datasets = pickle.load(f).contents\n\n        return fn_dataset, real_data, synthetic_datasets, sdv_metadata\n</code></pre>"},{"location":"reference/modules/evaluation/io/#nhssynth.modules.evaluation.io.output_eval","title":"<code>output_eval(evaluations, fn_dataset, fn_evaluations, dir_experiment)</code>","text":"<p>Sets up the input and output paths for the model files.</p> <p>Parameters:</p> Name Type Description Default <code>evaluations</code> <code>DataFrame</code> <p>The evaluations to output.</p> required <code>fn_dataset</code> <code>Path</code> <p>The base name of the dataset.</p> required <code>fn_evaluations</code> <code>str</code> <p>The filename of the collection of evaluations.</p> required <code>dir_experiment</code> <code>Path</code> <p>The path to the experiment output directory.</p> required <p>Returns:</p> Type Description <code>None</code> <p>The path to output the model.</p> Source code in <code>src/nhssynth/modules/evaluation/io.py</code> <pre><code>def output_eval(\n    evaluations: pd.DataFrame,\n    fn_dataset: Path,\n    fn_evaluations: str,\n    dir_experiment: Path,\n) -&gt; None:\n    \"\"\"\n    Sets up the input and output paths for the model files.\n\n    Args:\n        evaluations: The evaluations to output.\n        fn_dataset: The base name of the dataset.\n        fn_evaluations: The filename of the collection of evaluations.\n        dir_experiment: The path to the experiment output directory.\n\n    Returns:\n        The path to output the model.\n    \"\"\"\n    fn_evaluations = io.consistent_ending(fn_evaluations)\n    fn_evaluations = io.potential_suffix(fn_evaluations, fn_dataset)\n    io.warn_if_path_supplied([fn_evaluations], dir_experiment)\n    with open(dir_experiment / fn_evaluations, \"wb\") as f:\n        pickle.dump(Evaluations(evaluations), f)\n</code></pre>"},{"location":"reference/modules/evaluation/metrics/","title":"metrics","text":""},{"location":"reference/modules/evaluation/run/","title":"run","text":""},{"location":"reference/modules/evaluation/tasks/","title":"tasks","text":""},{"location":"reference/modules/evaluation/tasks/#nhssynth.modules.evaluation.tasks.Task","title":"<code>Task</code>","text":"<p>A task offers a light-touch way for users to specify any arbitrary downstream task that they want to run on a dataset.</p> <p>Parameters:</p> Name Type Description Default <code>name</code> <code>str</code> <p>The name of the task.</p> required <code>run</code> <code>Callable</code> <p>The function to run.</p> required <code>supports_aequitas</code> <p>Whether the task supports Aequitas evaluation.</p> <code>False</code> <code>description</code> <code>str</code> <p>The description of the task.</p> <code>''</code> Source code in <code>src/nhssynth/modules/evaluation/tasks.py</code> <pre><code>class Task:\n    \"\"\"\n    A task offers a light-touch way for users to specify any arbitrary downstream task that they want to run on a dataset.\n\n    Args:\n        name: The name of the task.\n        run: The function to run.\n        supports_aequitas: Whether the task supports Aequitas evaluation.\n        description: The description of the task.\n    \"\"\"\n\n    def __init__(self, name: str, run: Callable, supports_aequitas=False, description: str = \"\"):\n        self._name: str = name\n        self._run: Callable = run\n        self._supports_aequitas: bool = supports_aequitas\n        self._description: str = description\n\n    def __str__(self) -&gt; str:\n        return f\"{self.name}: {self.description}\" if self.description else self.name\n\n    def __repr__(self) -&gt; str:\n        return str([self.name, self.run, self.supports_aequitas, self.description])\n\n    def run(self, *args, **kwargs):\n        return self._run(*args, **kwargs)\n</code></pre>"},{"location":"reference/modules/evaluation/tasks/#nhssynth.modules.evaluation.tasks.get_tasks","title":"<code>get_tasks(fn_dataset, tasks_root)</code>","text":"<p>Searches for and imports all tasks in the tasks directory for a given dataset. Uses <code>importlib</code> to extract the task from the file.</p> <p>Parameters:</p> Name Type Description Default <code>fn_dataset</code> <code>str</code> <p>The name of the dataset.</p> required <code>tasks_root</code> <code>str</code> <p>The root directory for downstream tasks.</p> required <p>Returns:</p> Type Description <code>list[Task]</code> <p>A list of tasks.</p> Source code in <code>src/nhssynth/modules/evaluation/tasks.py</code> <pre><code>def get_tasks(\n    fn_dataset: str,\n    tasks_root: str,\n) -&gt; list[Task]:\n    \"\"\"\n    Searches for and imports all tasks in the tasks directory for a given dataset.\n    Uses `importlib` to extract the task from the file.\n\n    Args:\n        fn_dataset: The name of the dataset.\n        tasks_root: The root directory for downstream tasks.\n\n    Returns:\n        A list of tasks.\n    \"\"\"\n    tasks_dir = Path(tasks_root) / fn_dataset\n    assert (\n        tasks_dir.exists()\n    ), f\"Downstream tasks directory does not exist ({tasks_dir}), NB there should be a directory in TASKS_DIR with the same name as the dataset.\"\n    tasks = []\n    for task_path in tasks_dir.iterdir():\n        if task_path.name.startswith((\".\", \"__\")):\n            continue\n        assert task_path.suffix == \".py\", f\"Downstream task file must be a python file ({task_path.name})\"\n        spec = importlib.util.spec_from_file_location(\n            \"nhssynth_task_\" + task_path.name, os.getcwd() + \"/\" + str(task_path)\n        )\n        task_module = importlib.util.module_from_spec(spec)\n        spec.loader.exec_module(task_module)\n        tasks.append(task_module.task)\n    return tasks\n</code></pre>"},{"location":"reference/modules/evaluation/utils/","title":"utils","text":""},{"location":"reference/modules/evaluation/utils/#nhssynth.modules.evaluation.utils.EvalFrame","title":"<code>EvalFrame</code>","text":"<p>Data structure for specifying and recording the evaluations of a set of synthetic datasets against a real dataset. All of the choices made by the user in the evaluation module are consolidated into this class.</p> <p>After running <code>evaluate</code> on a set of synthetic datasets, the evaluations can be retrieved using <code>get_evaluations</code>. They are stored in a dict of dataframes with indices matching that of the supplied dataframe of synthetic datasets.</p> <p>Parameters:</p> Name Type Description Default <code>tasks</code> <code>list[Task]</code> <p>A list of downstream tasks to run on the experiments.</p> required <code>metrics</code> <code>list[str]</code> <p>A list of metrics to calculate on the experiments.</p> required <code>sdv_metadata</code> <code>dict[str, dict[str, str]]</code> <p>The SDV metadata for the dataset.</p> required <code>aequitas</code> <code>bool</code> <p>Whether to run Aequitas on the results of supported downstream tasks.</p> <code>False</code> <code>aequitas_attributes</code> <code>list[str]</code> <p>The fairness-related attributes to use for Aequitas analysis.</p> <code>[]</code> <code>key_numerical_fields</code> <code>list[str]</code> <p>The numerical fields to use for SDV privacy metrics.</p> <code>[]</code> <code>sensitive_numerical_fields</code> <code>list[str]</code> <p>The numerical fields to use for SDV privacy metrics.</p> <code>[]</code> <code>key_categorical_fields</code> <code>list[str]</code> <p>The categorical fields to use for SDV privacy metrics.</p> <code>[]</code> <code>sensitive_categorical_fields</code> <code>list[str]</code> <p>The categorical fields to use for SDV privacy metrics.</p> <code>[]</code> Source code in <code>src/nhssynth/modules/evaluation/utils.py</code> <pre><code>class EvalFrame:\n    \"\"\"\n    Data structure for specifying and recording the evaluations of a set of synthetic datasets against a real dataset.\n    All of the choices made by the user in the evaluation module are consolidated into this class.\n\n    After running `evaluate` on a set of synthetic datasets, the evaluations can be retrieved using `get_evaluations`.\n    They are stored in a dict of dataframes with indices matching that of the supplied dataframe of synthetic datasets.\n\n    Args:\n        tasks: A list of downstream tasks to run on the experiments.\n        metrics: A list of metrics to calculate on the experiments.\n        sdv_metadata: The SDV metadata for the dataset.\n        aequitas: Whether to run Aequitas on the results of supported downstream tasks.\n        aequitas_attributes: The fairness-related attributes to use for Aequitas analysis.\n        key_numerical_fields: The numerical fields to use for SDV privacy metrics.\n        sensitive_numerical_fields: The numerical fields to use for SDV privacy metrics.\n        key_categorical_fields: The categorical fields to use for SDV privacy metrics.\n        sensitive_categorical_fields: The categorical fields to use for SDV privacy metrics.\n    \"\"\"\n\n    def __init__(\n        self,\n        tasks: list[Task],\n        metrics: list[str],\n        sdv_metadata: dict[str, dict[str, str]],\n        aequitas: bool = False,\n        aequitas_attributes: list[str] = [],\n        key_numerical_fields: list[str] = [],\n        sensitive_numerical_fields: list[str] = [],\n        key_categorical_fields: list[str] = [],\n        sensitive_categorical_fields: list[str] = [],\n    ):\n        self._tasks = tasks\n        self._aequitas = aequitas\n        self._aequitas_attributes = aequitas_attributes\n\n        self._metrics = metrics\n        self._sdv_metadata = sdv_metadata\n\n        self._key_numerical_fields = key_numerical_fields\n        self._sensitive_numerical_fields = sensitive_numerical_fields\n        self._key_categorical_fields = key_categorical_fields\n        self._sensitive_categorical_fields = sensitive_categorical_fields\n        assert all([metric not in NUMERICAL_PRIVACY_METRICS for metric in self._metrics]) or (\n            self._key_numerical_fields and self._sensitive_numerical_fields\n        ), \"Numerical key and sensitive fields must be provided when an SDV privacy metric is used.\"\n        assert all([metric not in CATEGORICAL_PRIVACY_METRICS for metric in self._metrics]) or (\n            self._key_categorical_fields and self._sensitive_categorical_fields\n        ), \"Categorical key and sensitive fields must be provided when an SDV privacy metric is used.\"\n\n        self._metric_groups = self._build_metric_groups()\n\n    def _build_metric_groups(self) -&gt; list[str]:\n        \"\"\"\n        Iterate through the concatenated list of metrics provided by the user and refer to the\n        [defined metric groups][nhssynth.common.constants] to identify which to evaluate.\n\n        Returns:\n            A list of metric groups to evaluate.\n        \"\"\"\n        metric_groups = set()\n        if self._tasks:\n            metric_groups.add(\"task\")\n        if self._aequitas:\n            metric_groups.add(\"aequitas\")\n        for metric in self._metrics:\n            if metric in TABLE_METRICS:\n                metric_groups.add(\"table\")\n            if metric in NUMERICAL_PRIVACY_METRICS or metric in CATEGORICAL_PRIVACY_METRICS:\n                metric_groups.add(\"privacy\")\n            if metric in TABLE_METRICS and issubclass(TABLE_METRICS[metric], MultiSingleColumnMetric):\n                metric_groups.add(\"columnwise\")\n            if metric in TABLE_METRICS and issubclass(TABLE_METRICS[metric], MultiColumnPairsMetric):\n                metric_groups.add(\"pairwise\")\n        return list(metric_groups)\n\n    def evaluate(self, real_dataset: pd.DataFrame, synthetic_datasets: list[dict[str, Any]]) -&gt; None:\n        \"\"\"\n        Evaluate a set of synthetic datasets against a real dataset.\n\n        Args:\n            real_dataset: The real dataset to evaluate against.\n            synthetic_datasets: The synthetic datasets to evaluate.\n        \"\"\"\n        assert not any(\"Real\" in i for i in synthetic_datasets.index), \"Real is a reserved dataset ID.\"\n        assert synthetic_datasets.index.is_unique, \"Dataset IDs must be unique.\"\n        self._evaluations = pd.DataFrame(index=synthetic_datasets.index, columns=self._metric_groups)\n        self._evaluations.loc[(\"Real\", None, None)] = self._step(real_dataset)\n        pbar = tqdm(synthetic_datasets.iterrows(), desc=\"Evaluating\", total=len(synthetic_datasets))\n        for i, dataset in pbar:\n            pbar.set_description(f\"Evaluating {i[0]}, repeat {i[1]}, config {i[2]}\")\n            self._evaluations.loc[i] = self._step(real_dataset, dataset.values[0])\n\n    def get_evaluations(self) -&gt; dict[str, pd.DataFrame]:\n        \"\"\"\n        Unpack the `self._evaluations` dataframe, where each metric group is a column, into a dict of dataframes.\n\n        Returns:\n            A dict of dataframes, one for each metric group, containing the evaluations.\n        \"\"\"\n        assert hasattr(\n            self, \"_evaluations\"\n        ), \"You must first run `evaluate` on a `real_dataset` and set of `synthetic_datasets`.\"\n        return {\n            metric_group: pd.DataFrame(\n                self._evaluations[metric_group].values.tolist(), index=self._evaluations.index\n            ).dropna(how=\"all\")\n            for metric_group in self._metric_groups\n        }\n\n    def _task_step(self, data: pd.DataFrame) -&gt; dict[str, dict]:\n        \"\"\"\n        Run the downstream tasks on the dataset. Optionally run Aequitas on the results of the tasks.\n\n        Args:\n            data: The dataset to run the tasks on.\n\n        Returns:\n            A dict of dicts, one for each metric group, to be populated with each groups metric values.\n        \"\"\"\n        metric_dict = {metric_group: {} for metric_group in self._metric_groups}\n        for task in tqdm(self._tasks, desc=\"Running downstream tasks\", leave=False):\n            task_pred_column, task_metric_values = task.run(data)\n            metric_dict[\"task\"].update(task_metric_values)\n            if self._aequitas and task.supports_aequitas:\n                metric_dict[\"aequitas\"].update(run_aequitas(data[self._aequitas_attributes].join(task_pred_column)))\n        return metric_dict\n\n    def _compute_metric(\n        self, metric_dict: dict, metric: str, real_data: pd.DataFrame, synthetic_data: pd.DataFrame\n    ) -&gt; dict[str, dict]:\n        \"\"\"\n        Given a metric, determine the correct way to evaluate it via the lists defined in `nhssynth.common.constants`.\n\n        Args:\n            metric_dict: The dict of dicts to populate with metric values.\n            metric: The metric to evaluate.\n            real_data: The real dataset to evaluate against.\n            synthetic_data: The synthetic dataset to evaluate.\n\n        Returns:\n            The metric_dict updated with the value of the metric.\n        \"\"\"\n        with pd.option_context(\"mode.chained_assignment\", None), warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", message=\"ConvergenceWarning\")\n            if metric in TABLE_METRICS:\n                metric_dict[\"table\"][metric] = TABLE_METRICS[metric].compute(\n                    real_data, synthetic_data, self._sdv_metadata\n                )\n                if issubclass(TABLE_METRICS[metric], MultiSingleColumnMetric):\n                    metric_dict[\"columnwise\"][metric] = TABLE_METRICS[metric].compute_breakdown(\n                        real_data, synthetic_data, self._sdv_metadata\n                    )\n                elif issubclass(TABLE_METRICS[metric], MultiColumnPairsMetric):\n                    metric_dict[\"pairwise\"][metric] = TABLE_METRICS[metric].compute_breakdown(\n                        real_data, synthetic_data, self._sdv_metadata\n                    )\n            elif metric in NUMERICAL_PRIVACY_METRICS:\n                metric_dict[\"privacy\"][metric] = NUMERICAL_PRIVACY_METRICS[metric].compute(\n                    real_data.dropna(),\n                    synthetic_data.dropna(),\n                    self._sdv_metadata,\n                    self._key_numerical_fields,\n                    self._sensitive_numerical_fields,\n                )\n            elif metric in CATEGORICAL_PRIVACY_METRICS:\n                metric_dict[\"privacy\"][metric] = CATEGORICAL_PRIVACY_METRICS[metric].compute(\n                    real_data.dropna(),\n                    synthetic_data.dropna(),\n                    self._sdv_metadata,\n                    self._key_categorical_fields,\n                    self._sensitive_categorical_fields,\n                )\n        return metric_dict\n\n    def _step(self, real_data: pd.DataFrame, synthetic_data: pd.DataFrame = None) -&gt; dict[str, dict]:\n        \"\"\"\n        Run the two functions above (or only the tasks when no synthetic data is provided).\n\n        Args:\n            real_data: The real dataset to evaluate against.\n            synthetic_data: The synthetic dataset to evaluate.\n\n        Returns:\n            A dict of dicts, one for each metric grou, to populate a row of `self._evaluations` corresponding to the `synthetic_data`.\n        \"\"\"\n        if synthetic_data is None:\n            metric_dict = self._task_step(real_data)\n        else:\n            metric_dict = self._task_step(synthetic_data)\n            for metric in tqdm(self._metrics, desc=\"Running metrics\", leave=False):\n                metric_dict = self._compute_metric(metric_dict, metric, real_data, synthetic_data)\n        return metric_dict\n</code></pre>"},{"location":"reference/modules/evaluation/utils/#nhssynth.modules.evaluation.utils.EvalFrame.evaluate","title":"<code>evaluate(real_dataset, synthetic_datasets)</code>","text":"<p>Evaluate a set of synthetic datasets against a real dataset.</p> <p>Parameters:</p> Name Type Description Default <code>real_dataset</code> <code>DataFrame</code> <p>The real dataset to evaluate against.</p> required <code>synthetic_datasets</code> <code>list[dict[str, Any]]</code> <p>The synthetic datasets to evaluate.</p> required Source code in <code>src/nhssynth/modules/evaluation/utils.py</code> <pre><code>def evaluate(self, real_dataset: pd.DataFrame, synthetic_datasets: list[dict[str, Any]]) -&gt; None:\n    \"\"\"\n    Evaluate a set of synthetic datasets against a real dataset.\n\n    Args:\n        real_dataset: The real dataset to evaluate against.\n        synthetic_datasets: The synthetic datasets to evaluate.\n    \"\"\"\n    assert not any(\"Real\" in i for i in synthetic_datasets.index), \"Real is a reserved dataset ID.\"\n    assert synthetic_datasets.index.is_unique, \"Dataset IDs must be unique.\"\n    self._evaluations = pd.DataFrame(index=synthetic_datasets.index, columns=self._metric_groups)\n    self._evaluations.loc[(\"Real\", None, None)] = self._step(real_dataset)\n    pbar = tqdm(synthetic_datasets.iterrows(), desc=\"Evaluating\", total=len(synthetic_datasets))\n    for i, dataset in pbar:\n        pbar.set_description(f\"Evaluating {i[0]}, repeat {i[1]}, config {i[2]}\")\n        self._evaluations.loc[i] = self._step(real_dataset, dataset.values[0])\n</code></pre>"},{"location":"reference/modules/evaluation/utils/#nhssynth.modules.evaluation.utils.EvalFrame.get_evaluations","title":"<code>get_evaluations()</code>","text":"<p>Unpack the <code>self._evaluations</code> dataframe, where each metric group is a column, into a dict of dataframes.</p> <p>Returns:</p> Type Description <code>dict[str, DataFrame]</code> <p>A dict of dataframes, one for each metric group, containing the evaluations.</p> Source code in <code>src/nhssynth/modules/evaluation/utils.py</code> <pre><code>def get_evaluations(self) -&gt; dict[str, pd.DataFrame]:\n    \"\"\"\n    Unpack the `self._evaluations` dataframe, where each metric group is a column, into a dict of dataframes.\n\n    Returns:\n        A dict of dataframes, one for each metric group, containing the evaluations.\n    \"\"\"\n    assert hasattr(\n        self, \"_evaluations\"\n    ), \"You must first run `evaluate` on a `real_dataset` and set of `synthetic_datasets`.\"\n    return {\n        metric_group: pd.DataFrame(\n            self._evaluations[metric_group].values.tolist(), index=self._evaluations.index\n        ).dropna(how=\"all\")\n        for metric_group in self._metric_groups\n    }\n</code></pre>"},{"location":"reference/modules/evaluation/utils/#nhssynth.modules.evaluation.utils.validate_metric_args","title":"<code>validate_metric_args(args, fn_dataset, columns)</code>","text":"<p>Validate the arguments for downstream tasks and Aequitas.</p> <p>Parameters:</p> Name Type Description Default <code>args</code> <code>Namespace</code> <p>The argument namespace to validate.</p> required <code>fn_dataset</code> <code>str</code> <p>The name of the dataset.</p> required <code>columns</code> <code>Index</code> <p>The columns in the dataset.</p> required <p>Returns:</p> Type Description <code>tuple[list[Task], Namespace]</code> <p>The validated arguments, the list of tasks and the list of metrics.</p> Source code in <code>src/nhssynth/modules/evaluation/utils.py</code> <pre><code>def validate_metric_args(\n    args: argparse.Namespace, fn_dataset: str, columns: pd.Index\n) -&gt; tuple[list[Task], argparse.Namespace]:\n    \"\"\"\n    Validate the arguments for downstream tasks and Aequitas.\n\n    Args:\n        args: The argument namespace to validate.\n        fn_dataset: The name of the dataset.\n        columns: The columns in the dataset.\n\n    Returns:\n        The validated arguments, the list of tasks and the list of metrics.\n    \"\"\"\n    if args.downstream_tasks:\n        tasks = get_tasks(fn_dataset, args.tasks_dir)\n        if not tasks:\n            warnings.warn(\"No valid downstream tasks found.\")\n    else:\n        tasks = []\n    if args.aequitas:\n        if not args.downstream_tasks or not any([task.supports_aequitas for task in tasks]):\n            warnings.warn(\n                \"Aequitas can only work in context of downstream tasks involving binary classification problems.\"\n            )\n        if not args.aequitas_attributes:\n            warnings.warn(\"No attributes specified for Aequitas analysis, defaulting to all columns in the dataset.\")\n            args.aequitas_attributes = columns.tolist()\n        assert all(\n            [attr in columns for attr in args.aequitas_attributes]\n        ), \"Invalid attribute(s) specified for Aequitas analysis.\"\n    metrics = {}\n    for metric_group in METRIC_CHOICES:\n        selected_metrics = getattr(args, \"_\".join(metric_group.split()).lower() + \"_metrics\") or []\n        metrics.update({metric_name: METRIC_CHOICES[metric_group][metric_name] for metric_name in selected_metrics})\n    return args, tasks, metrics\n</code></pre>"},{"location":"reference/modules/model/","title":"model","text":""},{"location":"reference/modules/model/io/","title":"io","text":""},{"location":"reference/modules/model/io/#nhssynth.modules.model.io.check_input_paths","title":"<code>check_input_paths(fn_dataset, fn_transformed, fn_metatransformer, dir_experiment)</code>","text":"<p>Sets up the input and output paths for the model files.</p> <p>Parameters:</p> Name Type Description Default <code>fn_dataset</code> <code>str</code> <p>The base name of the dataset.</p> required <code>fn_transformed</code> <code>str</code> <p>The name of the transformed data file.</p> required <code>fn_metatransformer</code> <code>str</code> <p>The name of the metatransformer file.</p> required <code>dir_experiment</code> <code>Path</code> <p>The path to the experiment directory.</p> required <p>Returns:</p> Type Description <code>tuple[str, str]</code> <p>The paths to the data, metadata and metatransformer files.</p> Source code in <code>src/nhssynth/modules/model/io.py</code> <pre><code>def check_input_paths(\n    fn_dataset: str, fn_transformed: str, fn_metatransformer: str, dir_experiment: Path\n) -&gt; tuple[str, str]:\n    \"\"\"\n    Sets up the input and output paths for the model files.\n\n    Args:\n        fn_dataset: The base name of the dataset.\n        fn_transformed: The name of the transformed data file.\n        fn_metatransformer: The name of the metatransformer file.\n        dir_experiment: The path to the experiment directory.\n\n    Returns:\n        The paths to the data, metadata and metatransformer files.\n    \"\"\"\n    fn_dataset = Path(fn_dataset).stem\n    fn_transformed, fn_metatransformer = io.consistent_endings([fn_transformed, fn_metatransformer])\n    fn_transformed, fn_metatransformer = io.potential_suffixes([fn_transformed, fn_metatransformer], fn_dataset)\n    io.warn_if_path_supplied([fn_transformed, fn_metatransformer], dir_experiment)\n    io.check_exists([fn_transformed, fn_metatransformer], dir_experiment)\n    return fn_dataset, fn_transformed, fn_metatransformer\n</code></pre>"},{"location":"reference/modules/model/io/#nhssynth.modules.model.io.load_required_data","title":"<code>load_required_data(args, dir_experiment)</code>","text":"<p>Loads the data from <code>args</code> or from disk when the dataloader has not be run previously.</p> <p>Parameters:</p> Name Type Description Default <code>args</code> <code>Namespace</code> <p>The arguments passed to the module, in this case potentially carrying the outputs of the dataloader module.</p> required <code>dir_experiment</code> <code>Path</code> <p>The path to the experiment directory.</p> required <p>Returns:</p> Type Description <code>tuple[str, DataFrame, dict[str, int], MetaTransformer]</code> <p>The data, metadata and metatransformer.</p> Source code in <code>src/nhssynth/modules/model/io.py</code> <pre><code>def load_required_data(\n    args: argparse.Namespace, dir_experiment: Path\n) -&gt; tuple[str, pd.DataFrame, dict[str, int], MetaTransformer]:\n    \"\"\"\n    Loads the data from `args` or from disk when the dataloader has not be run previously.\n\n    Args:\n        args: The arguments passed to the module, in this case potentially carrying the outputs of the dataloader module.\n        dir_experiment: The path to the experiment directory.\n\n    Returns:\n        The data, metadata and metatransformer.\n    \"\"\"\n    if all(x in args.module_handover for x in [\"dataset\", \"transformed\", \"metatransformer\"]):\n        return (\n            args.module_handover[\"dataset\"],\n            args.module_handover[\"transformed\"],\n            args.module_handover[\"metatransformer\"],\n        )\n    else:\n        fn_dataset, fn_transformed, fn_metatransformer = check_input_paths(\n            args.dataset, args.transformed, args.metatransformer, dir_experiment\n        )\n\n        with open(dir_experiment / fn_transformed, \"rb\") as f:\n            data = pickle.load(f)\n        with open(dir_experiment / fn_metatransformer, \"rb\") as f:\n            mt = pickle.load(f)\n\n        return fn_dataset, data, mt\n</code></pre>"},{"location":"reference/modules/model/run/","title":"run","text":""},{"location":"reference/modules/model/utils/","title":"utils","text":""},{"location":"reference/modules/model/utils/#nhssynth.modules.model.utils.configs_from_arg_combinations","title":"<code>configs_from_arg_combinations(args, arg_list)</code>","text":"<p>Generates a list of configurations from a list of arguments. Each configuration is one of a cartesian product of the arguments provided and identified in <code>arg_list</code>.</p> <p>Parameters:</p> Name Type Description Default <code>args</code> <code>Namespace</code> <p>The arguments.</p> required <code>arg_list</code> <code>list[str]</code> <p>The list of arguments to generate configurations from.</p> required <p>Returns:</p> Type Description <code>list[dict[str, Any]]</code> <p>A list of configurations.</p> Source code in <code>src/nhssynth/modules/model/utils.py</code> <pre><code>def configs_from_arg_combinations(args: argparse.Namespace, arg_list: list[str]) -&gt; list[dict[str, Any]]:\n    \"\"\"\n    Generates a list of configurations from a list of arguments. Each configuration is one of a cartesian product of\n    the arguments provided and identified in `arg_list`.\n\n    Args:\n        args: The arguments.\n        arg_list: The list of arguments to generate configurations from.\n\n    Returns:\n        A list of configurations.\n    \"\"\"\n    wrapped_args = {arg: wrap_arg(getattr(args, arg)) for arg in arg_list}\n    combinations = list(itertools.product(*wrapped_args.values()))\n    return [{k: v for k, v in zip(wrapped_args.keys(), values) if v is not None} for values in combinations]\n</code></pre>"},{"location":"reference/modules/model/utils/#nhssynth.modules.model.utils.get_experiments","title":"<code>get_experiments(args)</code>","text":"<p>Generates a dataframe of experiments from the arguments provided.</p> <p>Parameters:</p> Name Type Description Default <code>args</code> <code>Namespace</code> <p>The arguments.</p> required <p>Returns:</p> Type Description <code>DataFrame</code> <p>A dataframe of experiments indexed by architecture, repeat and config ID.</p> Source code in <code>src/nhssynth/modules/model/utils.py</code> <pre><code>def get_experiments(args: argparse.Namespace) -&gt; pd.DataFrame:\n    \"\"\"\n    Generates a dataframe of experiments from the arguments provided.\n\n    Args:\n        args: The arguments.\n\n    Returns:\n        A dataframe of experiments indexed by architecture, repeat and config ID.\n    \"\"\"\n    experiments = pd.DataFrame(\n        columns=[\"architecture\", \"repeat\", \"config\", \"model_config\", \"seed\", \"train_config\", \"num_configs\"]\n    )\n    train_configs = configs_from_arg_combinations(args, [\"num_epochs\", \"patience\"])\n    for arch_name, repeat in itertools.product(*[wrap_arg(args.architecture), list(range(args.repeats))]):\n        arch = MODELS[arch_name]\n        model_configs = configs_from_arg_combinations(args, arch.get_args() + [\"batch_size\", \"use_gpu\"])\n        for i, (train_config, model_config) in enumerate(itertools.product(train_configs, model_configs)):\n            experiments.loc[len(experiments.index)] = {\n                \"architecture\": arch_name,\n                \"repeat\": repeat + 1,\n                \"config\": i + 1,\n                \"model_config\": model_config,\n                \"num_configs\": len(model_configs) * len(train_configs),\n                \"seed\": args.seed + repeat if args.seed else None,\n                \"train_config\": train_config,\n            }\n    return experiments.set_index([\"architecture\", \"repeat\", \"config\"], drop=True)\n</code></pre>"},{"location":"reference/modules/model/utils/#nhssynth.modules.model.utils.wrap_arg","title":"<code>wrap_arg(arg)</code>","text":"<p>Wraps a single argument in a list if it is not already a list or tuple.</p> <p>Parameters:</p> Name Type Description Default <code>arg</code> <code>Any</code> <p>The argument to wrap.</p> required <p>Returns:</p> Type Description <code>Union[list, tuple]</code> <p>The wrapped argument.</p> Source code in <code>src/nhssynth/modules/model/utils.py</code> <pre><code>def wrap_arg(arg: Any) -&gt; Union[list, tuple]:\n    \"\"\"\n    Wraps a single argument in a list if it is not already a list or tuple.\n\n    Args:\n        arg: The argument to wrap.\n\n    Returns:\n        The wrapped argument.\n    \"\"\"\n    if not isinstance(arg, list) and not isinstance(arg, tuple):\n        return [arg]\n    return arg\n</code></pre>"},{"location":"reference/modules/model/common/","title":"common","text":""},{"location":"reference/modules/model/common/dp/","title":"dp","text":""},{"location":"reference/modules/model/common/dp/#nhssynth.modules.model.common.dp.DPMixin","title":"<code>DPMixin</code>","text":"<p>             Bases: <code>ABC</code></p> <p>Mixin class to make a <code>Model</code> differentially private</p> <p>Parameters:</p> Name Type Description Default <code>target_epsilon</code> <code>float</code> <p>The target epsilon for the model during training</p> <code>3.0</code> <code>target_delta</code> <code>Optional[float]</code> <p>The target delta for the model during training</p> <code>None</code> <code>max_grad_norm</code> <code>float</code> <p>The maximum norm for the gradients, they are trimmed to this norm if they are larger</p> <code>5.0</code> <code>secure_mode</code> <code>bool</code> <p>Whether to use the 'secure mode' of PyTorch's DP-SGD implementation via the <code>csprng</code> package</p> <code>False</code> <p>Attributes:</p> Name Type Description <code>target_epsilon</code> <code>float</code> <p>The target epsilon for the model during training</p> <code>target_delta</code> <code>float</code> <p>The target delta for the model during training</p> <code>max_grad_norm</code> <code>float</code> <p>The maximum norm for the gradients, they are trimmed to this norm if they are larger</p> <code>secure_mode</code> <code>bool</code> <p>Whether to use the 'secure mode' of PyTorch's DP-SGD implementation via the <code>csprng</code> package</p> <p>Raises:</p> Type Description <code>TypeError</code> <p>If the inheritor is not a <code>Model</code></p> Source code in <code>src/nhssynth/modules/model/common/dp.py</code> <pre><code>class DPMixin(ABC):\n    \"\"\"\n    Mixin class to make a [`Model`][nhssynth.modules.model.common.model.Model] differentially private\n\n    Args:\n        target_epsilon: The target epsilon for the model during training\n        target_delta: The target delta for the model during training\n        max_grad_norm: The maximum norm for the gradients, they are trimmed to this norm if they are larger\n        secure_mode: Whether to use the 'secure mode' of PyTorch's DP-SGD implementation via the `csprng` package\n\n    Attributes:\n        target_epsilon: The target epsilon for the model during training\n        target_delta: The target delta for the model during training\n        max_grad_norm: The maximum norm for the gradients, they are trimmed to this norm if they are larger\n        secure_mode: Whether to use the 'secure mode' of PyTorch's DP-SGD implementation via the `csprng` package\n\n    Raises:\n        TypeError: If the inheritor is not a `Model`\n    \"\"\"\n\n    def __init__(\n        self,\n        *args,\n        target_epsilon: float = 3.0,\n        target_delta: Optional[float] = None,\n        max_grad_norm: float = 5.0,\n        secure_mode: bool = False,\n        **kwargs,\n    ):\n        if not isinstance(self, Model):\n            raise TypeError(\"DPMixin can only be used with Model classes\")\n        super(DPMixin, self).__init__(*args, **kwargs)\n        self.target_epsilon: float = target_epsilon\n        self.target_delta: float = target_delta or 1 / self.nrows\n        self.max_grad_norm: float = max_grad_norm\n        self.secure_mode: bool = secure_mode\n\n    def make_private(self, num_epochs: int, module: Optional[nn.Module] = None) -&gt; GradSampleModule:\n        \"\"\"\n        Make the passed module (or the full model if a module is not passed), and its associated optimizer and data loader private.\n\n        Args:\n            num_epochs: The number of epochs to train for, used to calculate the privacy budget.\n            module: The module to make private.\n\n        Returns:\n            The privatised module.\n        \"\"\"\n        module = module or self\n        self.privacy_engine = PrivacyEngine(secure_mode=self.secure_mode)\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", message=\"invalid value encountered in log\")\n            warnings.filterwarnings(\"ignore\", message=\"Optimal order is the largest alpha\")\n            module, module.optim, self.data_loader = self.privacy_engine.make_private_with_epsilon(\n                module=module,\n                optimizer=module.optim,\n                data_loader=self.data_loader,\n                epochs=num_epochs,\n                target_epsilon=self.target_epsilon,\n                target_delta=self.target_delta,\n                max_grad_norm=self.max_grad_norm,\n            )\n        print(\n            f\"Using sigma={module.optim.noise_multiplier} and C={self.max_grad_norm} to target (\u03b5, \u03b4) = ({self.target_epsilon}, {self.target_delta})-differential privacy.\".format()\n        )\n        self.get_epsilon = self.privacy_engine.accountant.get_epsilon\n        return module\n\n    def _generate_metric_str(self, key) -&gt; str:\n        \"\"\"Generates a string to display the current value of the metric `key`.\"\"\"\n        if key == \"Privacy\":\n            with warnings.catch_warnings():\n                warnings.filterwarnings(\"ignore\", message=\"invalid value encountered in log\")\n                warnings.filterwarnings(\"ignore\", message=\"Optimal order is the largest alpha\")\n                val = self.get_epsilon(self.target_delta)\n            self.metrics[key] = np.append(self.metrics[key], val)\n            return f\"{(key + ' \u03b5 Spent:').ljust(self.max_length)}  {val:.4f}\"\n        else:\n            return super()._generate_metric_str(key)\n\n    @classmethod\n    def get_args(cls) -&gt; list[str]:\n        return [\"target_epsilon\", \"target_delta\", \"max_grad_norm\", \"secure_mode\"]\n\n    @classmethod\n    def get_metrics(cls) -&gt; list[str]:\n        return [\"Privacy\"]\n\n    def _start_training(self, num_epochs, patience, displayed_metrics):\n        self.make_private(num_epochs)\n        super()._start_training(num_epochs, patience, displayed_metrics)\n</code></pre>"},{"location":"reference/modules/model/common/dp/#nhssynth.modules.model.common.dp.DPMixin.make_private","title":"<code>make_private(num_epochs, module=None)</code>","text":"<p>Make the passed module (or the full model if a module is not passed), and its associated optimizer and data loader private.</p> <p>Parameters:</p> Name Type Description Default <code>num_epochs</code> <code>int</code> <p>The number of epochs to train for, used to calculate the privacy budget.</p> required <code>module</code> <code>Optional[Module]</code> <p>The module to make private.</p> <code>None</code> <p>Returns:</p> Type Description <code>GradSampleModule</code> <p>The privatised module.</p> Source code in <code>src/nhssynth/modules/model/common/dp.py</code> <pre><code>def make_private(self, num_epochs: int, module: Optional[nn.Module] = None) -&gt; GradSampleModule:\n    \"\"\"\n    Make the passed module (or the full model if a module is not passed), and its associated optimizer and data loader private.\n\n    Args:\n        num_epochs: The number of epochs to train for, used to calculate the privacy budget.\n        module: The module to make private.\n\n    Returns:\n        The privatised module.\n    \"\"\"\n    module = module or self\n    self.privacy_engine = PrivacyEngine(secure_mode=self.secure_mode)\n    with warnings.catch_warnings():\n        warnings.filterwarnings(\"ignore\", message=\"invalid value encountered in log\")\n        warnings.filterwarnings(\"ignore\", message=\"Optimal order is the largest alpha\")\n        module, module.optim, self.data_loader = self.privacy_engine.make_private_with_epsilon(\n            module=module,\n            optimizer=module.optim,\n            data_loader=self.data_loader,\n            epochs=num_epochs,\n            target_epsilon=self.target_epsilon,\n            target_delta=self.target_delta,\n            max_grad_norm=self.max_grad_norm,\n        )\n    print(\n        f\"Using sigma={module.optim.noise_multiplier} and C={self.max_grad_norm} to target (\u03b5, \u03b4) = ({self.target_epsilon}, {self.target_delta})-differential privacy.\".format()\n    )\n    self.get_epsilon = self.privacy_engine.accountant.get_epsilon\n    return module\n</code></pre>"},{"location":"reference/modules/model/common/mlp/","title":"mlp","text":""},{"location":"reference/modules/model/common/mlp/#nhssynth.modules.model.common.mlp.MLP","title":"<code>MLP</code>","text":"<p>             Bases: <code>Module</code></p> <p>Fully connected or residual neural nets for classification and regression.</p>"},{"location":"reference/modules/model/common/mlp/#nhssynth.modules.model.common.mlp.MLP--parameters","title":"Parameters","text":"<p>task_type: str     classification or regression n_units_int: int     Number of features n_units_out: int     Number of outputs n_layers_hidden: int     Number of hidden layers n_units_hidden: int     Number of hidden units in each layer nonlin: string, default 'elu'     Nonlinearity to use in NN. Can be 'elu', 'relu', 'selu', 'tanh' or 'leaky_relu'. lr: float     learning rate for optimizer. weight_decay: float     l2 (ridge) penalty for the weights. n_iter: int     Maximum number of iterations. batch_size: int     Batch size n_iter_print: int     Number of iterations after which to print updates and check the validation loss. random_state: int     random_state used patience: int     Number of iterations to wait before early stopping after decrease in validation loss n_iter_min: int     Minimum number of iterations to go through before starting early stopping dropout: float     Dropout value. If 0, the dropout is not used. clipping_value: int, default 1     Gradients clipping value batch_norm: bool     Enable/disable batch norm early_stopping: bool     Enable/disable early stopping residual: bool     Add residuals. loss: Callable     Optional Custom loss function. If None, the loss is CrossEntropy for classification tasks, or RMSE for regression.</p> Source code in <code>src/nhssynth/modules/model/common/mlp.py</code> <pre><code>class MLP(nn.Module):\n    \"\"\"\n    Fully connected or residual neural nets for classification and regression.\n\n    Parameters\n    ----------\n    task_type: str\n        classification or regression\n    n_units_int: int\n        Number of features\n    n_units_out: int\n        Number of outputs\n    n_layers_hidden: int\n        Number of hidden layers\n    n_units_hidden: int\n        Number of hidden units in each layer\n    nonlin: string, default 'elu'\n        Nonlinearity to use in NN. Can be 'elu', 'relu', 'selu', 'tanh' or 'leaky_relu'.\n    lr: float\n        learning rate for optimizer.\n    weight_decay: float\n        l2 (ridge) penalty for the weights.\n    n_iter: int\n        Maximum number of iterations.\n    batch_size: int\n        Batch size\n    n_iter_print: int\n        Number of iterations after which to print updates and check the validation loss.\n    random_state: int\n        random_state used\n    patience: int\n        Number of iterations to wait before early stopping after decrease in validation loss\n    n_iter_min: int\n        Minimum number of iterations to go through before starting early stopping\n    dropout: float\n        Dropout value. If 0, the dropout is not used.\n    clipping_value: int, default 1\n        Gradients clipping value\n    batch_norm: bool\n        Enable/disable batch norm\n    early_stopping: bool\n        Enable/disable early stopping\n    residual: bool\n        Add residuals.\n    loss: Callable\n        Optional Custom loss function. If None, the loss is CrossEntropy for classification tasks, or RMSE for regression.\n    \"\"\"\n\n    def __init__(\n        self,\n        n_units_in: int,\n        n_units_out: int,\n        n_layers_hidden: int = 1,\n        n_units_hidden: int = 100,\n        activation: str = \"relu\",\n        activation_out: Optional[list[tuple[str, int]]] = None,\n        lr: float = 1e-3,\n        weight_decay: float = 1e-3,\n        opt_betas: tuple = (0.9, 0.999),\n        n_iter: int = 1000,\n        batch_size: int = 500,\n        n_iter_print: int = 100,\n        patience: int = 10,\n        n_iter_min: int = 100,\n        dropout: float = 0.1,\n        clipping_value: int = 1,\n        batch_norm: bool = False,\n        early_stopping: bool = True,\n        residual: bool = False,\n        loss: Optional[Callable] = None,\n    ) -&gt; None:\n        super(MLP, self).__init__()\n        activation = ACTIVATION_FUNCTIONS[activation] if activation in ACTIVATION_FUNCTIONS else None\n\n        if n_units_in &lt; 0:\n            raise ValueError(\"n_units_in must be &gt;= 0\")\n        if n_units_out &lt; 0:\n            raise ValueError(\"n_units_out must be &gt;= 0\")\n\n        if residual:\n            block = ResidualLayer\n        else:\n            block = LinearLayer\n\n        # network\n        layers = []\n\n        if n_layers_hidden &gt; 0:\n            layers.append(\n                block(\n                    n_units_in,\n                    n_units_hidden,\n                    batch_norm=batch_norm,\n                    activation=activation,\n                )\n            )\n            n_units_hidden += int(residual) * n_units_in\n\n            # add required number of layers\n            for i in range(n_layers_hidden - 1):\n                layers.append(\n                    block(\n                        n_units_hidden,\n                        n_units_hidden,\n                        batch_norm=batch_norm,\n                        activation=activation,\n                        dropout=dropout,\n                    )\n                )\n                n_units_hidden += int(residual) * n_units_hidden\n\n            # add final layers\n            layers.append(nn.Linear(n_units_hidden, n_units_out))\n        else:\n            layers = [nn.Linear(n_units_in, n_units_out)]\n\n        if activation_out is not None:\n            total_nonlin_len = 0\n            activations = []\n            for nonlin, nonlin_len in activation_out:\n                total_nonlin_len += nonlin_len\n                activations.append((ACTIVATION_FUNCTIONS[nonlin](), nonlin_len))\n\n            if total_nonlin_len != n_units_out:\n                raise RuntimeError(\n                    f\"Shape mismatch for the output layer. Expected length {n_units_out}, but got {activation_out} with length {total_nonlin_len}\"\n                )\n            layers.append(MultiActivationHead(activations))\n\n        self.model = nn.Sequential(*layers)\n\n        # optimizer\n        self.lr = lr\n        self.weight_decay = weight_decay\n        self.opt_betas = opt_betas\n        self.optimizer = torch.optim.Adam(\n            self.parameters(),\n            lr=self.lr,\n            weight_decay=self.weight_decay,\n            betas=self.opt_betas,\n        )\n\n        # training\n        self.n_iter = n_iter\n        self.n_iter_print = n_iter_print\n        self.n_iter_min = n_iter_min\n        self.batch_size = batch_size\n        self.patience = patience\n        self.clipping_value = clipping_value\n        self.early_stopping = early_stopping\n        if loss is not None:\n            self.loss = loss\n        else:\n            self.loss = nn.MSELoss()\n\n    def fit(self, X: np.ndarray, y: np.ndarray) -&gt; \"MLP\":\n        Xt = self._check_tensor(X)\n        yt = self._check_tensor(y)\n\n        self._train(Xt, yt)\n\n        return self\n\n    def predict_proba(self, X: np.ndarray) -&gt; np.ndarray:\n        if self.task_type != \"classification\":\n            raise ValueError(f\"Invalid task type for predict_proba {self.task_type}\")\n\n        with torch.no_grad():\n            Xt = self._check_tensor(X)\n\n            yt = self.forward(Xt)\n\n            return yt.cpu().numpy().squeeze()\n\n    def predict(self, X: np.ndarray) -&gt; np.ndarray:\n        with torch.no_grad():\n            Xt = self._check_tensor(X)\n\n            yt = self.forward(Xt)\n\n            if self.task_type == \"classification\":\n                return np.argmax(yt.cpu().numpy().squeeze(), -1).squeeze()\n            else:\n                return yt.cpu().numpy().squeeze()\n\n    def score(self, X: np.ndarray, y: np.ndarray) -&gt; float:\n        y_pred = self.predict(X)\n        if self.task_type == \"classification\":\n            return np.mean(y_pred == y)\n        else:\n            return np.mean(np.inner(y - y_pred, y - y_pred) / 2.0)\n\n    def forward(self, X: torch.Tensor) -&gt; torch.Tensor:\n        return self.model(X.float())\n\n    def _train_epoch(self, loader: DataLoader) -&gt; float:\n        train_loss = []\n\n        for batch_ndx, sample in enumerate(loader):\n            self.optimizer.zero_grad()\n\n            X_next, y_next = sample\n            if len(X_next) &lt; 2:\n                continue\n\n            preds = self.forward(X_next).squeeze()\n\n            batch_loss = self.loss(preds, y_next)\n\n            batch_loss.backward()\n\n            if self.clipping_value &gt; 0:\n                torch.nn.utils.clip_grad_norm_(self.parameters(), self.clipping_value)\n\n            self.optimizer.step()\n\n            train_loss.append(batch_loss.detach())\n\n        return torch.mean(torch.Tensor(train_loss))\n\n    def _train(self, X: torch.Tensor, y: torch.Tensor) -&gt; \"MLP\":\n        X = self._check_tensor(X).float()\n        y = self._check_tensor(y).squeeze().float()\n        if self.task_type == \"classification\":\n            y = y.long()\n\n        # Load Dataset\n        dataset = TensorDataset(X, y)\n\n        train_size = int(0.8 * len(dataset))\n        test_size = len(dataset) - train_size\n        train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])\n        loader = DataLoader(train_dataset, batch_size=self.batch_size, pin_memory=False)\n\n        # Setup the network and optimizer\n        val_loss_best = 1e12\n        patience = 0\n\n        # do training\n        for i in range(self.n_iter):\n            self._train_epoch(loader)\n\n            if self.early_stopping or i % self.n_iter_print == 0:\n                with torch.no_grad():\n                    X_val, y_val = test_dataset.dataset.tensors\n\n                    preds = self.forward(X_val).squeeze()\n                    val_loss = self.loss(preds, y_val)\n\n                    if self.early_stopping:\n                        if val_loss_best &gt; val_loss:\n                            val_loss_best = val_loss\n                            patience = 0\n                        else:\n                            patience += 1\n\n                        if patience &gt; self.patience and i &gt; self.n_iter_min:\n                            break\n\n        return self\n\n    def _check_tensor(self, X: torch.Tensor) -&gt; torch.Tensor:\n        if isinstance(X, torch.Tensor):\n            return X\n        else:\n            return torch.from_numpy(np.asarray(X))\n\n    def __len__(self) -&gt; int:\n        return len(self.model)\n</code></pre>"},{"location":"reference/modules/model/common/mlp/#nhssynth.modules.model.common.mlp.MultiActivationHead","title":"<code>MultiActivationHead</code>","text":"<p>             Bases: <code>Module</code></p> <p>Final layer with multiple activations. Useful for tabular data.</p> Source code in <code>src/nhssynth/modules/model/common/mlp.py</code> <pre><code>class MultiActivationHead(nn.Module):\n    \"\"\"Final layer with multiple activations. Useful for tabular data.\"\"\"\n\n    def __init__(\n        self,\n        activations: list[tuple[nn.Module, int]],\n    ) -&gt; None:\n        super(MultiActivationHead, self).__init__()\n        self.activations = []\n        self.activation_lengths = []\n\n        for activation, length in activations:\n            self.activations.append(activation)\n            self.activation_lengths.append(length)\n\n    def forward(self, X: torch.Tensor) -&gt; torch.Tensor:\n        if X.shape[-1] != np.sum(self.activation_lengths):\n            raise RuntimeError(\n                f\"Shape mismatch for the activations: expected {np.sum(self.activation_lengths)}. Got shape {X.shape}.\"\n            )\n\n        split = 0\n        out = torch.zeros(X.shape)\n\n        for activation, step in zip(self.activations, self.activation_lengths):\n            out[..., split : split + step] = activation(X[..., split : split + step])\n            split += step\n\n        return out\n</code></pre>"},{"location":"reference/modules/model/common/mlp/#nhssynth.modules.model.common.mlp.SkipConnection","title":"<code>SkipConnection(cls)</code>","text":"<p>Wraps a model to add a skip connection from the input to the output.</p> <p>Example:</p> <p>ResidualBlock = SkipConnection(MLP) res_block = ResidualBlock(n_units_in=10, n_units_out=3, n_units_hidden=64) res_block(torch.ones(10, 10)).shape (10, 13)</p> Source code in <code>src/nhssynth/modules/model/common/mlp.py</code> <pre><code>def SkipConnection(cls: Type[nn.Module]) -&gt; Type[nn.Module]:\n    \"\"\"Wraps a model to add a skip connection from the input to the output.\n\n    Example:\n    &gt;&gt;&gt; ResidualBlock = SkipConnection(MLP)\n    &gt;&gt;&gt; res_block = ResidualBlock(n_units_in=10, n_units_out=3, n_units_hidden=64)\n    &gt;&gt;&gt; res_block(torch.ones(10, 10)).shape\n    (10, 13)\n    \"\"\"\n\n    class Wrapper(cls):\n        pass\n\n    Wrapper._forward = cls.forward\n    Wrapper.forward = _forward_skip_connection\n    Wrapper.__name__ = f\"SkipConnection({cls.__name__})\"\n    Wrapper.__qualname__ = f\"SkipConnection({cls.__qualname__})\"\n    Wrapper.__doc__ = f\"\"\"(With skipped connection) {cls.__doc__}\"\"\"\n    return Wrapper\n</code></pre>"},{"location":"reference/modules/model/common/model/","title":"model","text":""},{"location":"reference/modules/model/common/model/#nhssynth.modules.model.common.model.Model","title":"<code>Model</code>","text":"<p>             Bases: <code>Module</code>, <code>ABC</code></p> <p>Abstract base class for all NHSSynth models</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>DataFrame</code> <p>The data to train on</p> required <code>metatransformer</code> <code>MetaTransformer</code> <p>A <code>MetaTransformer</code> to use for converting the generated data to match the original data</p> required <code>batch_size</code> <code>int</code> <p>The batch size to use during training</p> <code>32</code> <code>use_gpu</code> <code>bool</code> <p>Flag to determine whether to use the GPU (if available)</p> <code>False</code> <p>Attributes:</p> Name Type Description <code>nrows</code> <p>The number of rows in the <code>data</code></p> <code>ncols</code> <p>The number of columns in the <code>data</code></p> <code>columns</code> <code>Index</code> <p>The names of the columns in the <code>data</code></p> <code>metatransformer</code> <p>The <code>MetaTransformer</code> (potentially) associated with the model</p> <code>multi_column_indices</code> <code>list[list[int]]</code> <p>A list of lists of column indices, where each sublist containts the indices for a one-hot encoded column</p> <code>single_column_indices</code> <code>list[int]</code> <p>Indices of all non-onehot columns</p> <code>data_loader</code> <code>DataLoader</code> <p>A PyTorch DataLoader for the <code>data</code></p> <code>private</code> <code>DataLoader</code> <p>Whether the model is private, i.e. whether the <code>DPMixin</code> class has been inherited</p> <code>device</code> <code>DataLoader</code> <p>The device to use for training (CPU or GPU)</p> <p>Raises:</p> Type Description <code>TypeError</code> <p>If the <code>Model</code> class is directly instantiated (i.e. not inherited)</p> <code>AssertionError</code> <p>If the number of columns in the <code>data</code> does not match the number of indices in <code>multi_column_indices</code> and <code>single_column_indices</code></p> <code>UserWarning</code> <p>If <code>use_gpu</code> is True but no GPU is available</p> Source code in <code>src/nhssynth/modules/model/common/model.py</code> <pre><code>class Model(nn.Module, ABC):\n    \"\"\"\n    Abstract base class for all NHSSynth models\n\n    Args:\n        data: The data to train on\n        metatransformer: A `MetaTransformer` to use for converting the generated data to match the original data\n        batch_size: The batch size to use during training\n        use_gpu: Flag to determine whether to use the GPU (if available)\n\n    Attributes:\n        nrows: The number of rows in the `data`\n        ncols: The number of columns in the `data`\n        columns: The names of the columns in the `data`\n        metatransformer: The `MetaTransformer` (potentially) associated with the model\n        multi_column_indices: A list of lists of column indices, where each sublist containts the indices for a one-hot encoded column\n        single_column_indices: Indices of all non-onehot columns\n        data_loader: A PyTorch DataLoader for the `data`\n        private: Whether the model is private, i.e. whether the `DPMixin` class has been inherited\n        device: The device to use for training (CPU or GPU)\n\n    Raises:\n        TypeError: If the `Model` class is directly instantiated (i.e. not inherited)\n        AssertionError: If the number of columns in the `data` does not match the number of indices in `multi_column_indices` and `single_column_indices`\n        UserWarning: If `use_gpu` is True but no GPU is available\n    \"\"\"\n\n    def __init__(\n        self,\n        data: pd.DataFrame,\n        metatransformer: MetaTransformer,\n        cond: Optional[Union[pd.DataFrame, pd.Series, np.ndarray]] = None,\n        batch_size: int = 32,\n        use_gpu: bool = False,\n    ) -&gt; None:\n        if type(self) is Model:\n            raise TypeError(\"Cannot directly instantiate the `Model` class\")\n        super().__init__()\n\n        self.nrows, self.ncols = data.shape\n        self.columns: pd.Index = data.columns\n\n        self.batch_size = batch_size\n\n        self.metatransformer = metatransformer\n        self.multi_column_indices: list[list[int]] = metatransformer.multi_column_indices\n        self.single_column_indices: list[int] = metatransformer.single_column_indices\n        assert len(self.single_column_indices) + sum([len(x) for x in self.multi_column_indices]) == self.ncols\n\n        tensor_data = torch.Tensor(data.to_numpy())\n        self.cond_encoder: Optional[OneHotEncoder] = None\n        if cond is not None:\n            cond = np.asarray(cond)\n            if len(cond.shape) == 1:\n                cond = cond.reshape(-1, 1)\n            self.cond_encoder = OneHotEncoder(handle_unknown=\"ignore\").fit(cond)\n            cond = self.cond_encoder.transform(cond).toarray()\n            self.n_units_conditional = cond.shape[-1]\n            dataset = TensorDataset(tensor_data, cond)\n        else:\n            self.n_units_conditional = 0\n            dataset = TensorDataset(tensor_data)\n\n        self.data_loader: DataLoader = DataLoader(\n            dataset,\n            pin_memory=True,\n            batch_size=self.batch_size,\n        )\n        self.setup_device(use_gpu)\n\n    def setup_device(self, use_gpu: bool) -&gt; None:\n        \"\"\"Sets up the device to use for training (CPU or GPU) depending on `use_gpu` and device availability.\"\"\"\n        if use_gpu:\n            if torch.cuda.is_available():\n                self.device: torch.device = torch.device(\"cuda:0\")\n            else:\n                warnings.warn(\"`use_gpu` was provided but no GPU is available, using CPU\")\n        self.device: torch.device = torch.device(\"cpu\")\n\n    def save(self, filename: str) -&gt; None:\n        \"\"\"Saves the model to `filename`.\"\"\"\n        torch.save(self.state_dict(), filename)\n\n    def load(self, path: str) -&gt; None:\n        \"\"\"Loads the model from `path`.\"\"\"\n        self.load_state_dict(torch.load(path))\n\n    @classmethod\n    @abstractmethod\n    def get_args() -&gt; list[str]:\n        \"\"\"Returns the list of arguments to look for in an `argparse.Namespace`, these must map to the arguments of the inheritor.\"\"\"\n        raise NotImplementedError\n\n    @classmethod\n    @abstractmethod\n    def get_metrics() -&gt; list[str]:\n        \"\"\"Returns the list of metrics to track during training.\"\"\"\n        raise NotImplementedError\n\n    def _start_training(self, num_epochs: int, patience: int, displayed_metrics: list[str]) -&gt; None:\n        \"\"\"\n        Initialises the training process.\n\n        Args:\n            num_epochs: The number of epochs to train for\n            patience: The number of epochs to wait before stopping training early if the loss does not improve\n            displayed_metrics: The metrics to display during training, this should be set to an empty list if running `train` in a notebook or the output may be messy\n\n        Attributes:\n            metrics: A dictionary of lists of tracked metrics, where each list contains the values for each batch\n            stats_bars: A dictionary of tqdm status bars for each tracked metric\n            max_length: The maximum length of the tracked metric names, used for formatting the tqdm status bars\n            start_time: The time at which training started\n            update_time: The time at which the tqdm status bars were last updated\n        \"\"\"\n        self.num_epochs = num_epochs\n        self.patience = patience\n        self.metrics = {metric: np.empty(0, dtype=float) for metric in self.get_metrics()}\n        displayed_metrics = displayed_metrics or self.get_metrics()\n        self.stats_bars = {\n            metric: tqdm(total=0, desc=\"\", position=i, bar_format=\"{desc}\", leave=True)\n            for i, metric in enumerate(displayed_metrics)\n        }\n        self.max_length = max([len(add_spaces_before_caps(s)) + 5 for s in displayed_metrics] + [20])\n        self.start_time = self.update_time = time.time()\n\n    def _generate_metric_str(self, key) -&gt; str:\n        \"\"\"Generates a string to display the current value of the metric `key`.\"\"\"\n        return f\"{(add_spaces_before_caps(key) + ':').ljust(self.max_length)}  {np.mean(self.metrics[key][-len(self.data_loader) :]):.4f}\"\n\n    def _record_metrics(self, losses):\n        \"\"\"Records the metrics for the current batch to file and updates the tqdm status bars.\"\"\"\n        for key in self.metrics.keys():\n            if key in losses:\n                if losses[key]:\n                    self.metrics[key] = np.append(\n                        self.metrics[key], losses[key].item() if isinstance(losses[key], torch.Tensor) else losses[key]\n                    )\n        if time.time() - self.update_time &gt; 0.5:\n            for key, stats_bar in self.stats_bars.items():\n                stats_bar.set_description_str(self._generate_metric_str(key))\n                self.update_time = time.time()\n\n    def _check_patience(self, epoch: int, metric: float) -&gt; bool:\n        \"\"\"Maintains `_min_metric` and `_stop_counter` to determine whether to stop training early according to `patience`.\"\"\"\n        if epoch == 0:\n            self._stop_counter = 0\n            self._min_metric = metric\n            self._patience_delta = self._min_metric / 1e4\n        if metric &lt; (self._min_metric - self._patience_delta):\n            self._min_metric = metric\n            self._stop_counter = 0  # Set counter to zero\n        else:  # elbo has not improved\n            self._stop_counter += 1\n        return self._stop_counter == self.patience\n\n    def _finish_training(self, num_epochs: int) -&gt; None:\n        \"\"\"Closes each of the tqdm status bars and prints the time taken to do `num_epochs`.\"\"\"\n        for stats_bar in self.stats_bars.values():\n            stats_bar.close()\n        tqdm.write(f\"Completed {num_epochs} epochs in {time.time() - self.start_time:.2f} seconds.\\033[0m\")\n</code></pre>"},{"location":"reference/modules/model/common/model/#nhssynth.modules.model.common.model.Model.get_args","title":"<code>get_args()</code>  <code>abstractmethod</code> <code>classmethod</code>","text":"<p>Returns the list of arguments to look for in an <code>argparse.Namespace</code>, these must map to the arguments of the inheritor.</p> Source code in <code>src/nhssynth/modules/model/common/model.py</code> <pre><code>@classmethod\n@abstractmethod\ndef get_args() -&gt; list[str]:\n    \"\"\"Returns the list of arguments to look for in an `argparse.Namespace`, these must map to the arguments of the inheritor.\"\"\"\n    raise NotImplementedError\n</code></pre>"},{"location":"reference/modules/model/common/model/#nhssynth.modules.model.common.model.Model.get_metrics","title":"<code>get_metrics()</code>  <code>abstractmethod</code> <code>classmethod</code>","text":"<p>Returns the list of metrics to track during training.</p> Source code in <code>src/nhssynth/modules/model/common/model.py</code> <pre><code>@classmethod\n@abstractmethod\ndef get_metrics() -&gt; list[str]:\n    \"\"\"Returns the list of metrics to track during training.\"\"\"\n    raise NotImplementedError\n</code></pre>"},{"location":"reference/modules/model/common/model/#nhssynth.modules.model.common.model.Model.load","title":"<code>load(path)</code>","text":"<p>Loads the model from <code>path</code>.</p> Source code in <code>src/nhssynth/modules/model/common/model.py</code> <pre><code>def load(self, path: str) -&gt; None:\n    \"\"\"Loads the model from `path`.\"\"\"\n    self.load_state_dict(torch.load(path))\n</code></pre>"},{"location":"reference/modules/model/common/model/#nhssynth.modules.model.common.model.Model.save","title":"<code>save(filename)</code>","text":"<p>Saves the model to <code>filename</code>.</p> Source code in <code>src/nhssynth/modules/model/common/model.py</code> <pre><code>def save(self, filename: str) -&gt; None:\n    \"\"\"Saves the model to `filename`.\"\"\"\n    torch.save(self.state_dict(), filename)\n</code></pre>"},{"location":"reference/modules/model/common/model/#nhssynth.modules.model.common.model.Model.setup_device","title":"<code>setup_device(use_gpu)</code>","text":"<p>Sets up the device to use for training (CPU or GPU) depending on <code>use_gpu</code> and device availability.</p> Source code in <code>src/nhssynth/modules/model/common/model.py</code> <pre><code>def setup_device(self, use_gpu: bool) -&gt; None:\n    \"\"\"Sets up the device to use for training (CPU or GPU) depending on `use_gpu` and device availability.\"\"\"\n    if use_gpu:\n        if torch.cuda.is_available():\n            self.device: torch.device = torch.device(\"cuda:0\")\n        else:\n            warnings.warn(\"`use_gpu` was provided but no GPU is available, using CPU\")\n    self.device: torch.device = torch.device(\"cpu\")\n</code></pre>"},{"location":"reference/modules/model/models/","title":"models","text":""},{"location":"reference/modules/model/models/dpvae/","title":"dpvae","text":""},{"location":"reference/modules/model/models/dpvae/#nhssynth.modules.model.models.dpvae.DPVAE","title":"<code>DPVAE</code>","text":"<p>             Bases: <code>DPMixin</code>, <code>VAE</code></p> <p>A differentially private VAE. Accepts <code>VAE</code> arguments as well as <code>DPMixin</code> arguments.</p> Source code in <code>src/nhssynth/modules/model/models/dpvae.py</code> <pre><code>class DPVAE(DPMixin, VAE):\n    \"\"\"\n    A differentially private VAE. Accepts [`VAE`][nhssynth.modules.model.models.vae.VAE] arguments\n    as well as [`DPMixin`][nhssynth.modules.model.common.dp.DPMixin] arguments.\n    \"\"\"\n\n    def __init__(\n        self,\n        *args,\n        target_epsilon: float = 3.0,\n        target_delta: Optional[float] = None,\n        max_grad_norm: float = 5.0,\n        secure_mode: bool = False,\n        shared_optimizer: bool = False,\n        **kwargs,\n    ) -&gt; None:\n        super(DPVAE, self).__init__(\n            *args,\n            target_epsilon=target_epsilon,\n            target_delta=target_delta,\n            max_grad_norm=max_grad_norm,\n            secure_mode=secure_mode,\n            # TODO fix shared_optimizer workflow for DP models\n            shared_optimizer=False,\n            **kwargs,\n        )\n\n    def make_private(self, num_epochs: int) -&gt; GradSampleModule:\n        \"\"\"\n        Make the [`Decoder`][nhssynth.modules.model.models.vae.Decoder] differentially private\n        unless `shared_optimizer` is True, in which case the whole VAE will be privatised.\n\n        Args:\n            num_epochs: The number of epochs to train for\n        \"\"\"\n        if self.shared_optimizer:\n            super().make_private(num_epochs)\n        else:\n            self.decoder = super().make_private(num_epochs, self.decoder)\n\n    @classmethod\n    def get_args(cls) -&gt; list[str]:\n        return VAE.get_args() + DPMixin.get_args()\n\n    @classmethod\n    def get_metrics(cls) -&gt; list[str]:\n        return VAE.get_metrics() + DPMixin.get_metrics()\n</code></pre>"},{"location":"reference/modules/model/models/dpvae/#nhssynth.modules.model.models.dpvae.DPVAE.make_private","title":"<code>make_private(num_epochs)</code>","text":"<p>Make the <code>Decoder</code> differentially private unless <code>shared_optimizer</code> is True, in which case the whole VAE will be privatised.</p> <p>Parameters:</p> Name Type Description Default <code>num_epochs</code> <code>int</code> <p>The number of epochs to train for</p> required Source code in <code>src/nhssynth/modules/model/models/dpvae.py</code> <pre><code>def make_private(self, num_epochs: int) -&gt; GradSampleModule:\n    \"\"\"\n    Make the [`Decoder`][nhssynth.modules.model.models.vae.Decoder] differentially private\n    unless `shared_optimizer` is True, in which case the whole VAE will be privatised.\n\n    Args:\n        num_epochs: The number of epochs to train for\n    \"\"\"\n    if self.shared_optimizer:\n        super().make_private(num_epochs)\n    else:\n        self.decoder = super().make_private(num_epochs, self.decoder)\n</code></pre>"},{"location":"reference/modules/model/models/gan/","title":"gan","text":""},{"location":"reference/modules/model/models/gan/#nhssynth.modules.model.models.gan.GAN","title":"<code>GAN</code>","text":"<p>             Bases: <code>Model</code></p> <p>Basic GAN implementation.</p> <p>Parameters:</p> Name Type Description Default <code>n_units_conditional</code> <code>int</code> <p>int Number of conditional units</p> <code>0</code> <code>generator_n_layers_hidden</code> <code>int</code> <p>int Number of hidden layers in the generator</p> <code>2</code> <code>generator_n_units_hidden</code> <code>int</code> <p>int Number of hidden units in each layer of the Generator</p> <code>250</code> <code>generator_activation</code> <code>str</code> <p>string, default 'elu' Nonlinearity to use in the generator. Can be 'elu', 'relu', 'selu' or 'leaky_relu'.</p> <code>'leaky_relu'</code> <code>generator_n_iter</code> <p>int Maximum number of iterations in the Generator.</p> required <code>generator_batch_norm</code> <code>bool</code> <p>bool Enable/disable batch norm for the generator</p> <code>False</code> <code>generator_dropout</code> <code>float</code> <p>float Dropout value. If 0, the dropout is not used.</p> <code>0</code> <code>generator_residual</code> <code>bool</code> <p>bool Use residuals for the generator</p> <code>True</code> <code>generator_activation_out</code> <p>Optional[List[Tuple[str, int]]] List of activations. Useful with the TabularEncoder</p> required <code>generator_lr</code> <code>float</code> <p>float = 2e-4 Generator learning rate, used by the Adam optimizer</p> <code>0.0002</code> <code>generator_weight_decay</code> <p>float = 1e-3 Generator weight decay, used by the Adam optimizer</p> required <code>generator_opt_betas</code> <code>tuple</code> <p>tuple = (0.9, 0.999) Generator initial decay rates, used by the Adam Optimizer</p> <code>(0.9, 0.999)</code> <code>generator_extra_penalty_cbks</code> <p>List[Callable] Additional loss callabacks for the generator. Used by the TabularGAN for the conditional loss</p> required <code>discriminator_n_layers_hidden</code> <code>int</code> <p>int Number of hidden layers in the discriminator</p> <code>3</code> <code>discriminator_n_units_hidden</code> <code>int</code> <p>int Number of hidden units in each layer of the discriminator</p> <code>300</code> <code>discriminator_activation</code> <code>str</code> <p>string, default 'relu' Nonlinearity to use in the discriminator. Can be 'elu', 'relu', 'selu' or 'leaky_relu'.</p> <code>'leaky_relu'</code> <code>discriminator_batch_norm</code> <code>bool</code> <p>bool Enable/disable batch norm for the discriminator</p> <code>False</code> <code>discriminator_dropout</code> <code>float</code> <p>float Dropout value for the discriminator. If 0, the dropout is not used.</p> <code>0.1</code> <code>discriminator_lr</code> <code>float</code> <p>float Discriminator learning rate, used by the Adam optimizer</p> <code>0.0002</code> <code>discriminator_weight_decay</code> <p>float Discriminator weight decay, used by the Adam optimizer</p> required <code>discriminator_opt_betas</code> <code>tuple</code> <p>tuple Initial weight decays for the Adam optimizer</p> <code>(0.9, 0.999)</code> <code>clipping_value</code> <code>int</code> <p>int, default 0 Gradients clipping value. Zero disables the feature</p> <code>0</code> <code>lambda_gradient_penalty</code> <code>float</code> <p>float = 10 Weight for the gradient penalty</p> <code>10</code> Source code in <code>src/nhssynth/modules/model/models/gan.py</code> <pre><code>class GAN(Model):\n    \"\"\"\n    Basic GAN implementation.\n\n    Args:\n        n_units_conditional: int\n            Number of conditional units\n        generator_n_layers_hidden: int\n            Number of hidden layers in the generator\n        generator_n_units_hidden: int\n            Number of hidden units in each layer of the Generator\n        generator_activation: string, default 'elu'\n            Nonlinearity to use in the generator. Can be 'elu', 'relu', 'selu' or 'leaky_relu'.\n        generator_n_iter: int\n            Maximum number of iterations in the Generator.\n        generator_batch_norm: bool\n            Enable/disable batch norm for the generator\n        generator_dropout: float\n            Dropout value. If 0, the dropout is not used.\n        generator_residual: bool\n            Use residuals for the generator\n        generator_activation_out: Optional[List[Tuple[str, int]]]\n            List of activations. Useful with the TabularEncoder\n        generator_lr: float = 2e-4\n            Generator learning rate, used by the Adam optimizer\n        generator_weight_decay: float = 1e-3\n            Generator weight decay, used by the Adam optimizer\n        generator_opt_betas: tuple = (0.9, 0.999)\n            Generator initial decay rates, used by the Adam Optimizer\n        generator_extra_penalty_cbks: List[Callable]\n            Additional loss callabacks for the generator. Used by the TabularGAN for the conditional loss\n        discriminator_n_layers_hidden: int\n            Number of hidden layers in the discriminator\n        discriminator_n_units_hidden: int\n            Number of hidden units in each layer of the discriminator\n        discriminator_activation: string, default 'relu'\n            Nonlinearity to use in the discriminator. Can be 'elu', 'relu', 'selu' or 'leaky_relu'.\n        discriminator_batch_norm: bool\n            Enable/disable batch norm for the discriminator\n        discriminator_dropout: float\n            Dropout value for the discriminator. If 0, the dropout is not used.\n        discriminator_lr: float\n            Discriminator learning rate, used by the Adam optimizer\n        discriminator_weight_decay: float\n            Discriminator weight decay, used by the Adam optimizer\n        discriminator_opt_betas: tuple\n            Initial weight decays for the Adam optimizer\n        clipping_value: int, default 0\n            Gradients clipping value. Zero disables the feature\n        lambda_gradient_penalty: float = 10\n            Weight for the gradient penalty\n    \"\"\"\n\n    def __init__(\n        self,\n        *args,\n        n_units_conditional: int = 0,\n        generator_n_layers_hidden: int = 2,\n        generator_n_units_hidden: int = 250,\n        generator_activation: str = \"leaky_relu\",\n        generator_batch_norm: bool = False,\n        generator_dropout: float = 0,\n        generator_lr: float = 2e-4,\n        generator_residual: bool = True,\n        generator_opt_betas: tuple = (0.9, 0.999),\n        discriminator_n_layers_hidden: int = 3,\n        discriminator_n_units_hidden: int = 300,\n        discriminator_activation: str = \"leaky_relu\",\n        discriminator_batch_norm: bool = False,\n        discriminator_dropout: float = 0.1,\n        discriminator_lr: float = 2e-4,\n        discriminator_opt_betas: tuple = (0.9, 0.999),\n        clipping_value: int = 0,\n        lambda_gradient_penalty: float = 10,\n        **kwargs,\n    ) -&gt; None:\n        super(GAN, self).__init__(*args, **kwargs)\n\n        self.generator_n_units_hidden = generator_n_units_hidden\n        self.n_units_conditional = n_units_conditional\n\n        self.generator = MLP(\n            n_units_in=generator_n_units_hidden + n_units_conditional,\n            n_units_out=self.ncols,\n            n_layers_hidden=generator_n_layers_hidden,\n            n_units_hidden=generator_n_units_hidden,\n            activation=generator_activation,\n            # nonlin_out=generator_activation_out,\n            batch_norm=generator_batch_norm,\n            dropout=generator_dropout,\n            lr=generator_lr,\n            residual=generator_residual,\n            opt_betas=generator_opt_betas,\n        ).to(self.device)\n\n        self.discriminator = MLP(\n            n_units_in=self.ncols + n_units_conditional,\n            n_units_out=1,\n            n_layers_hidden=discriminator_n_layers_hidden,\n            n_units_hidden=discriminator_n_units_hidden,\n            activation=discriminator_activation,\n            activation_out=[(\"none\", 1)],\n            batch_norm=discriminator_batch_norm,\n            dropout=discriminator_dropout,\n            lr=discriminator_lr,\n            opt_betas=discriminator_opt_betas,\n        ).to(self.device)\n\n        self.clipping_value = clipping_value\n        self.lambda_gradient_penalty = lambda_gradient_penalty\n\n        def gen_fake_labels(X: torch.Tensor) -&gt; torch.Tensor:\n            return torch.zeros((len(X),), device=self.device)\n\n        def gen_true_labels(X: torch.Tensor) -&gt; torch.Tensor:\n            return torch.ones((len(X),), device=self.device)\n\n        self.fake_labels_generator = gen_fake_labels\n        self.true_labels_generator = gen_true_labels\n\n    @classmethod\n    def get_args(cls) -&gt; list[str]:\n        return [\n            \"n_units_conditional\",\n            \"generator_n_layers_hidden\",\n            \"generator_n_units_hidden\",\n            \"generator_activation\",\n            \"generator_batch_norm\",\n            \"generator_dropout\",\n            \"generator_lr\",\n            \"generator_residual\",\n            \"generator_opt_betas\",\n            \"discriminator_n_layers_hidden\",\n            \"discriminator_n_units_hidden\",\n            \"discriminator_activation\",\n            \"discriminator_batch_norm\",\n            \"discriminator_dropout\",\n            \"discriminator_lr\",\n            \"discriminator_opt_betas\",\n            \"clipping_value\",\n            \"lambda_gradient_penalty\",\n        ]\n\n    @classmethod\n    def get_metrics(cls) -&gt; list[str]:\n        return [\"GLoss\", \"DLoss\"]\n\n    def generate(self, N: int, cond: Optional[np.ndarray] = None) -&gt; np.ndarray:\n        N = N or self.nrows\n        self.generator.eval()\n\n        condt: Optional[torch.Tensor] = None\n        if cond is not None:\n            condt = self._check_tensor(cond)\n        with torch.no_grad():\n            return self.metatransformer.inverse_apply(\n                pd.DataFrame(self(N, condt).detach().cpu().numpy(), columns=self.columns)\n            )\n\n    def forward(\n        self,\n        N: int,\n        cond: Optional[torch.Tensor] = None,\n    ) -&gt; torch.Tensor:\n        if cond is None and self.n_units_conditional &gt; 0:\n            # sample from the original conditional\n            if self._original_cond is None:\n                raise ValueError(\"Invalid original conditional. Provide a valid value.\")\n            cond_idxs = torch.randint(len(self._original_cond), (N,))\n            cond = self._original_cond[cond_idxs]\n\n        if cond is not None and len(cond.shape) == 1:\n            cond = cond.reshape(-1, 1)\n\n        if cond is not None and len(cond) != N:\n            raise ValueError(\"cond length must match N\")\n\n        fixed_noise = torch.randn(N, self.generator_n_units_hidden, device=self.device)\n        fixed_noise = self._append_optional_cond(fixed_noise, cond)\n\n        return self.generator(fixed_noise)\n\n    def _train_epoch_generator(\n        self,\n        X: torch.Tensor,\n        cond: Optional[torch.Tensor],\n    ) -&gt; float:\n        # Update the G network\n        self.generator.train()\n        self.generator.optimizer.zero_grad()\n\n        real_X_raw = X.to(self.device)\n        real_X = self._append_optional_cond(real_X_raw, cond)\n        batch_size = len(real_X)\n\n        noise = torch.randn(batch_size, self.generator_n_units_hidden, device=self.device)\n        noise = self._append_optional_cond(noise, cond)\n\n        fake_raw = self.generator(noise)\n        fake = self._append_optional_cond(fake_raw, cond)\n\n        output = self.discriminator(fake).squeeze().float()\n        # Calculate G's loss based on this output\n        errG = -torch.mean(output)\n        if hasattr(self, \"generator_extra_penalty_cbks\"):\n            for extra_loss in self.generator_extra_penalty_cbks:\n                errG += extra_loss(\n                    real_X_raw,\n                    fake_raw,\n                    cond=cond,\n                )\n\n        # Calculate gradients for G\n        errG.backward()\n\n        # Update G\n        if self.clipping_value &gt; 0:\n            torch.nn.utils.clip_grad_norm_(self.generator.parameters(), self.clipping_value)\n        self.generator.optimizer.step()\n\n        if torch.isnan(errG):\n            raise RuntimeError(\"NaNs detected in the generator loss\")\n\n        # Return loss\n        return errG.item()\n\n    def _train_epoch_discriminator(\n        self,\n        X: torch.Tensor,\n        cond: Optional[torch.Tensor],\n    ) -&gt; float:\n        # Update the D network\n        self.discriminator.train()\n\n        errors = []\n\n        batch_size = min(self.batch_size, len(X))\n\n        # Train with all-real batch\n        real_X = X.to(self.device)\n        real_X = self._append_optional_cond(real_X, cond)\n\n        real_labels = self.true_labels_generator(X).to(self.device).squeeze()\n        real_output = self.discriminator(real_X).squeeze().float()\n\n        # Train with all-fake batch\n        noise = torch.randn(batch_size, self.generator_n_units_hidden, device=self.device)\n        noise = self._append_optional_cond(noise, cond)\n\n        fake_raw = self.generator(noise)\n        fake = self._append_optional_cond(fake_raw, cond)\n\n        fake_labels = self.fake_labels_generator(fake_raw).to(self.device).squeeze().float()\n        fake_output = self.discriminator(fake.detach()).squeeze()\n\n        # Compute errors. Some fake inputs might be marked as real for privacy guarantees.\n\n        real_real_output = real_output[(real_labels * real_output) != 0]\n        real_fake_output = fake_output[(fake_labels * fake_output) != 0]\n        errD_real = torch.mean(torch.concat((real_real_output, real_fake_output)))\n\n        fake_real_output = real_output[((1 - real_labels) * real_output) != 0]\n        fake_fake_output = fake_output[((1 - fake_labels) * fake_output) != 0]\n        errD_fake = torch.mean(torch.concat((fake_real_output, fake_fake_output)))\n\n        penalty = self._loss_gradient_penalty(\n            real_samples=real_X,\n            fake_samples=fake,\n            batch_size=batch_size,\n        )\n        errD = -errD_real + errD_fake\n\n        self.discriminator.optimizer.zero_grad()\n        if isinstance(self, DPMixin):\n            # Adversarial loss\n            # 1. split fwd-bkwd on fake and real images into two explicit blocks.\n            # 2. no need to compute per_sample_gardients on fake data, disable hooks.\n            # 3. re-enable hooks to obtain per_sample_gardients for real data.\n            # fake fwd-bkwd\n            self.discriminator.disable_hooks()\n            penalty.backward(retain_graph=True)\n            errD_fake.backward(retain_graph=True)\n\n            self.discriminator.enable_hooks()\n            errD_real.backward()  # HACK: calling bkwd without zero_grad() accumulates param gradients\n        else:\n            penalty.backward(retain_graph=True)\n            errD.backward()\n\n        # Update D\n        if self.clipping_value &gt; 0:\n            torch.nn.utils.clip_grad_norm_(self.discriminator.parameters(), self.clipping_value)\n        self.discriminator.optimizer.step()\n\n        errors.append(errD.item())\n\n        if np.isnan(np.mean(errors)):\n            raise RuntimeError(\"NaNs detected in the discriminator loss\")\n\n        return np.mean(errors)\n\n    def _train_epoch(self) -&gt; Tuple[float, float]:\n        for data in tqdm(self.data_loader, desc=\"Batches\", position=len(self.stats_bars) + 1, leave=False):\n            cond: Optional[torch.Tensor] = None\n            if self.n_units_conditional &gt; 0:\n                X, cond = data\n            else:\n                X = data[0]\n\n            losses = {\n                \"DLoss\": self._train_epoch_discriminator(X, cond),\n                \"GLoss\": self._train_epoch_generator(X, cond),\n            }\n            self._record_metrics(losses)\n\n        return np.mean(self.metrics[\"GLoss\"][-len(self.data_loader) :]), np.mean(\n            self.metrics[\"DLoss\"][-len(self.data_loader) :]\n        )\n\n    def train(\n        self,\n        num_epochs: int = 100,\n        patience: int = 5,\n        displayed_metrics: list[str] = [\"GLoss\", \"DLoss\"],\n    ) -&gt; tuple[int, dict[str, np.ndarray]]:\n        self._start_training(num_epochs, patience, displayed_metrics)\n\n        for epoch in tqdm(range(num_epochs), desc=\"Epochs\", position=len(self.stats_bars), leave=False):\n            losses = self._train_epoch()\n            if self._check_patience(epoch, losses[0]) and self._check_patience(epoch, losses[1]):\n                num_epochs = epoch + 1\n                break\n\n        self._finish_training(num_epochs)\n        return (num_epochs, self.metrics)\n\n    def _check_tensor(self, X: torch.Tensor) -&gt; torch.Tensor:\n        if isinstance(X, torch.Tensor):\n            return X.to(self.device)\n        else:\n            return torch.from_numpy(np.asarray(X)).to(self.device)\n\n    def _loss_gradient_penalty(\n        self,\n        real_samples: torch.tensor,\n        fake_samples: torch.Tensor,\n        batch_size: int,\n    ) -&gt; torch.Tensor:\n        \"\"\"Calculates the gradient penalty loss for WGAN GP\"\"\"\n        # Random weight term for interpolation between real and fake samples\n        alpha = torch.rand([batch_size, 1]).to(self.device)\n        # Get random interpolation between real and fake samples\n        interpolated = (alpha * real_samples + ((1 - alpha) * fake_samples)).requires_grad_(True)\n        d_interpolated = self.discriminator(interpolated).squeeze()\n        labels = torch.ones((len(interpolated),), device=self.device)\n\n        # Get gradient w.r.t. interpolates\n        gradients = torch.autograd.grad(\n            outputs=d_interpolated,\n            inputs=interpolated,\n            grad_outputs=labels,\n            create_graph=True,\n            retain_graph=True,\n            only_inputs=True,\n            allow_unused=True,\n        )[0]\n        gradients = gradients.view(gradients.size(0), -1)\n        gradient_penalty = ((gradients.norm(2, dim=-1) - 1) ** 2).mean()\n        return self.lambda_gradient_penalty * gradient_penalty\n\n    def _append_optional_cond(self, X: torch.Tensor, cond: Optional[torch.Tensor]) -&gt; torch.Tensor:\n        if cond is None:\n            return X\n\n        return torch.cat([X, cond], dim=1)\n</code></pre>"},{"location":"reference/modules/model/models/vae/","title":"vae","text":""},{"location":"reference/modules/model/models/vae/#nhssynth.modules.model.models.vae.Decoder","title":"<code>Decoder</code>","text":"<p>             Bases: <code>Module</code></p> <p>Decoder, takes in z and outputs reconstruction</p> Source code in <code>src/nhssynth/modules/model/models/vae.py</code> <pre><code>class Decoder(nn.Module):\n    \"\"\"Decoder, takes in z and outputs reconstruction\"\"\"\n\n    def __init__(\n        self,\n        output_dim: int,\n        latent_dim: int,\n        hidden_dim: int,\n        activation: str,\n        learning_rate: float,\n        shared_optimizer: bool,\n    ) -&gt; None:\n        super().__init__()\n        activation = ACTIVATION_FUNCTIONS[activation]\n        self.net = nn.Sequential(\n            nn.Linear(latent_dim, hidden_dim),\n            activation(),\n            nn.Linear(hidden_dim, hidden_dim),\n            activation(),\n            nn.Linear(hidden_dim, output_dim),\n        )\n        if not shared_optimizer:\n            self.optim = torch.optim.Adam(self.parameters(), lr=learning_rate)\n\n    def forward(self, z):\n        return self.net(z)\n</code></pre>"},{"location":"reference/modules/model/models/vae/#nhssynth.modules.model.models.vae.Encoder","title":"<code>Encoder</code>","text":"<p>             Bases: <code>Module</code></p> <p>Encoder, takes in x and outputs mu_z, sigma_z (diagonal Gaussian variational posterior assumed)</p> Source code in <code>src/nhssynth/modules/model/models/vae.py</code> <pre><code>class Encoder(nn.Module):\n    \"\"\"Encoder, takes in x and outputs mu_z, sigma_z (diagonal Gaussian variational posterior assumed)\"\"\"\n\n    def __init__(\n        self,\n        input_dim: int,\n        latent_dim: int,\n        hidden_dim: int,\n        activation: str,\n        learning_rate: float,\n        shared_optimizer: bool,\n    ) -&gt; None:\n        super().__init__()\n        activation = ACTIVATION_FUNCTIONS[activation]\n        self.latent_dim = latent_dim\n        self.net = nn.Sequential(\n            nn.Linear(input_dim, hidden_dim),\n            activation(),\n            nn.Linear(hidden_dim, hidden_dim),\n            activation(),\n            nn.Linear(hidden_dim, 2 * latent_dim),\n        )\n        if not shared_optimizer:\n            self.optim = torch.optim.Adam(self.parameters(), lr=learning_rate)\n\n    def forward(self, x):\n        outs = self.net(x)\n        mu_z = outs[:, : self.latent_dim]\n        logsigma_z = outs[:, self.latent_dim :]\n        return mu_z, logsigma_z\n</code></pre>"},{"location":"reference/modules/model/models/vae/#nhssynth.modules.model.models.vae.VAE","title":"<code>VAE</code>","text":"<p>             Bases: <code>Model</code></p> <p>A Variational Autoencoder (VAE) model. Accepts <code>Model</code> arguments as well as the following:</p> <p>Parameters:</p> Name Type Description Default <code>encoder_latent_dim</code> <code>int</code> <p>The dimensionality of the latent space.</p> <code>256</code> <code>encoder_hidden_dim</code> <code>int</code> <p>The dimensionality of the hidden layers in the encoder.</p> <code>256</code> <code>encoder_activation</code> <code>str</code> <p>The activation function to use in the encoder.</p> <code>'leaky_relu'</code> <code>encoder_learning_rate</code> <code>float</code> <p>The learning rate for the encoder.</p> <code>0.001</code> <code>decoder_latent_dim</code> <code>int</code> <p>The dimensionality of the hidden layers in the decoder.</p> <code>256</code> <code>decoder_hidden_dim</code> <code>int</code> <p>The dimensionality of the hidden layers in the decoder.</p> <code>32</code> <code>decoder_activation</code> <code>str</code> <p>The activation function to use in the decoder.</p> <code>'leaky_relu'</code> <code>decoder_learning_rate</code> <code>float</code> <p>The learning rate for the decoder.</p> <code>0.001</code> <code>shared_optimizer</code> <code>bool</code> <p>Whether to use a shared optimizer for the encoder and decoder.</p> <code>True</code> Source code in <code>src/nhssynth/modules/model/models/vae.py</code> <pre><code>class VAE(Model):\n    \"\"\"\n    A Variational Autoencoder (VAE) model. Accepts [`Model`][nhssynth.modules.model.common.model.Model] arguments as well as the following:\n\n    Args:\n        encoder_latent_dim: The dimensionality of the latent space.\n        encoder_hidden_dim: The dimensionality of the hidden layers in the encoder.\n        encoder_activation: The activation function to use in the encoder.\n        encoder_learning_rate: The learning rate for the encoder.\n        decoder_latent_dim: The dimensionality of the hidden layers in the decoder.\n        decoder_hidden_dim: The dimensionality of the hidden layers in the decoder.\n        decoder_activation: The activation function to use in the decoder.\n        decoder_learning_rate: The learning rate for the decoder.\n        shared_optimizer: Whether to use a shared optimizer for the encoder and decoder.\n    \"\"\"\n\n    def __init__(\n        self,\n        *args,\n        encoder_latent_dim: int = 256,\n        encoder_hidden_dim: int = 256,\n        encoder_activation: str = \"leaky_relu\",\n        encoder_learning_rate: float = 1e-3,\n        decoder_latent_dim: int = 256,\n        decoder_hidden_dim: int = 32,\n        decoder_activation: str = \"leaky_relu\",\n        decoder_learning_rate: float = 1e-3,\n        shared_optimizer: bool = True,\n        **kwargs,\n    ) -&gt; None:\n        super(VAE, self).__init__(*args, **kwargs)\n\n        self.shared_optimizer = shared_optimizer\n        self.encoder = Encoder(\n            input_dim=self.ncols,\n            latent_dim=encoder_latent_dim,\n            hidden_dim=encoder_hidden_dim,\n            activation=encoder_activation,\n            learning_rate=encoder_learning_rate,\n            shared_optimizer=self.shared_optimizer,\n        ).to(self.device)\n        self.decoder = Decoder(\n            output_dim=self.ncols,\n            latent_dim=decoder_latent_dim,\n            hidden_dim=decoder_hidden_dim,\n            activation=decoder_activation,\n            learning_rate=decoder_learning_rate,\n            shared_optimizer=self.shared_optimizer,\n        ).to(self.device)\n        self.noiser = Noiser(\n            len(self.single_column_indices),\n        ).to(self.device)\n        if self.shared_optimizer:\n            assert (\n                encoder_learning_rate == decoder_learning_rate\n            ), \"If `shared_optimizer` is True, `encoder_learning_rate` must equal `decoder_learning_rate`\"\n            self.optim = torch.optim.Adam(\n                list(self.encoder.parameters()) + list(self.decoder.parameters()),\n                lr=encoder_learning_rate,\n            )\n            self.zero_grad = self.optim.zero_grad\n            self.step = self.optim.step\n        else:\n            self.zero_grad = lambda: (self.encoder.optim.zero_grad(), self.decoder.optim.zero_grad())\n            self.step = lambda: (self.encoder.optim.step(), self.decoder.optim.step())\n\n    @classmethod\n    def get_args(cls) -&gt; list[str]:\n        return [\n            \"encoder_latent_dim\",\n            \"encoder_hidden_dim\",\n            \"encoder_activation\",\n            \"encoder_learning_rate\",\n            \"decoder_latent_dim\",\n            \"decoder_hidden_dim\",\n            \"decoder_activation\",\n            \"decoder_learning_rate\",\n            \"shared_optimizer\",\n        ]\n\n    @classmethod\n    def get_metrics(cls) -&gt; list[str]:\n        return [\n            \"ELBO\",\n            \"KLD\",\n            \"ReconstructionLoss\",\n            \"CategoricalLoss\",\n            \"NumericalLoss\",\n        ]\n\n    def reconstruct(self, X):\n        mu_z, logsigma_z = self.encoder(X)\n        x_recon = self.decoder(mu_z)\n        return x_recon\n\n    def generate(self, N: Optional[int] = None) -&gt; pd.DataFrame:\n        N = N or self.nrows\n        z_samples = torch.randn_like(torch.ones((N, self.encoder.latent_dim)), device=self.device)\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", message=\"Using a non-full backward hook\")\n            x_gen = self.decoder(z_samples)\n        x_gen_ = torch.ones_like(x_gen, device=self.device)\n\n        if self.multi_column_indices != [[]]:\n            for cat_idxs in self.multi_column_indices:\n                x_gen_[:, cat_idxs] = torch.distributions.one_hot_categorical.OneHotCategorical(\n                    logits=x_gen[:, cat_idxs]\n                ).sample()\n\n        x_gen_[:, self.single_column_indices] = x_gen[:, self.single_column_indices] + torch.exp(\n            self.noiser(x_gen[:, self.single_column_indices])\n        ) * torch.randn_like(x_gen[:, self.single_column_indices])\n        if torch.cuda.is_available():\n            x_gen_ = x_gen_.cpu()\n        return self.metatransformer.inverse_apply(pd.DataFrame(x_gen_.detach(), columns=self.columns))\n\n    def loss(self, X):\n        mu_z, logsigma_z = self.encoder(X)\n\n        p = Normal(torch.zeros_like(mu_z), torch.ones_like(mu_z))\n        q = Normal(mu_z, torch.exp(logsigma_z))\n\n        kld = torch.sum(torch.distributions.kl_divergence(q, p))\n\n        s = torch.randn_like(mu_z)\n        z_samples = mu_z + s * torch.exp(logsigma_z)\n\n        x_recon = self.decoder(z_samples)\n\n        categoric_loglik = 0\n\n        if self.multi_column_indices != [[]]:\n            for cat_idxs in self.multi_column_indices:\n                categoric_loglik += -torch.nn.functional.cross_entropy(\n                    x_recon[:, cat_idxs],\n                    torch.max(X[:, cat_idxs], 1)[1],\n                ).sum()\n\n        gauss_loglik = 0\n        if self.single_column_indices:\n            gauss_loglik = (\n                Normal(\n                    loc=x_recon[:, self.single_column_indices],\n                    scale=torch.exp(self.noiser(x_recon[:, self.single_column_indices])),\n                )\n                .log_prob(X[:, self.single_column_indices])\n                .sum()\n            )\n\n        reconstruction_loss = -(categoric_loglik + gauss_loglik)\n\n        elbo = kld + reconstruction_loss\n\n        return {\n            \"ELBO\": elbo / X.size()[0],\n            \"ReconstructionLoss\": reconstruction_loss / X.size()[0],\n            \"KLD\": kld / X.size()[0],\n            \"CategoricalLoss\": categoric_loglik / X.size()[0],\n            \"NumericalLoss\": gauss_loglik / X.size()[0],\n        }\n\n    def train(\n        self,\n        num_epochs: int = 100,\n        patience: int = 5,\n        displayed_metrics: list[str] = [\"ELBO\"],\n    ) -&gt; tuple[int, dict[str, list[float]]]:\n        \"\"\"\n        Train the model.\n\n        Args:\n            num_epochs: Number of epochs to train for.\n            patience: Number of epochs to wait for improvement before early stopping.\n            displayed_metrics: List of metrics to display during training.\n\n        Returns:\n            The number of epochs trained for and a dictionary of the tracked metrics.\n        \"\"\"\n        self._start_training(num_epochs, patience, displayed_metrics)\n\n        self.encoder.train()\n        self.decoder.train()\n        self.noiser.train()\n\n        for epoch in tqdm(range(num_epochs), desc=\"Epochs\", position=len(self.stats_bars), leave=False):\n            for (Y_subset,) in tqdm(self.data_loader, desc=\"Batches\", position=len(self.stats_bars) + 1, leave=False):\n                self.zero_grad()\n                with warnings.catch_warnings():\n                    warnings.filterwarnings(\"ignore\", message=\"Using a non-full backward hook\")\n                    losses = self.loss(Y_subset.to(self.device))\n                losses[\"ELBO\"].backward()\n                self.step()\n                self._record_metrics(losses)\n\n            elbo = np.mean(self.metrics[\"ELBO\"][-len(self.data_loader) :])\n            if self._check_patience(epoch, elbo):\n                num_epochs = epoch + 1\n                break\n\n        self._finish_training(num_epochs)\n        return (num_epochs, self.metrics)\n</code></pre>"},{"location":"reference/modules/model/models/vae/#nhssynth.modules.model.models.vae.VAE.train","title":"<code>train(num_epochs=100, patience=5, displayed_metrics=['ELBO'])</code>","text":"<p>Train the model.</p> <p>Parameters:</p> Name Type Description Default <code>num_epochs</code> <code>int</code> <p>Number of epochs to train for.</p> <code>100</code> <code>patience</code> <code>int</code> <p>Number of epochs to wait for improvement before early stopping.</p> <code>5</code> <code>displayed_metrics</code> <code>list[str]</code> <p>List of metrics to display during training.</p> <code>['ELBO']</code> <p>Returns:</p> Type Description <code>tuple[int, dict[str, list[float]]]</code> <p>The number of epochs trained for and a dictionary of the tracked metrics.</p> Source code in <code>src/nhssynth/modules/model/models/vae.py</code> <pre><code>def train(\n    self,\n    num_epochs: int = 100,\n    patience: int = 5,\n    displayed_metrics: list[str] = [\"ELBO\"],\n) -&gt; tuple[int, dict[str, list[float]]]:\n    \"\"\"\n    Train the model.\n\n    Args:\n        num_epochs: Number of epochs to train for.\n        patience: Number of epochs to wait for improvement before early stopping.\n        displayed_metrics: List of metrics to display during training.\n\n    Returns:\n        The number of epochs trained for and a dictionary of the tracked metrics.\n    \"\"\"\n    self._start_training(num_epochs, patience, displayed_metrics)\n\n    self.encoder.train()\n    self.decoder.train()\n    self.noiser.train()\n\n    for epoch in tqdm(range(num_epochs), desc=\"Epochs\", position=len(self.stats_bars), leave=False):\n        for (Y_subset,) in tqdm(self.data_loader, desc=\"Batches\", position=len(self.stats_bars) + 1, leave=False):\n            self.zero_grad()\n            with warnings.catch_warnings():\n                warnings.filterwarnings(\"ignore\", message=\"Using a non-full backward hook\")\n                losses = self.loss(Y_subset.to(self.device))\n            losses[\"ELBO\"].backward()\n            self.step()\n            self._record_metrics(losses)\n\n        elbo = np.mean(self.metrics[\"ELBO\"][-len(self.data_loader) :])\n        if self._check_patience(epoch, elbo):\n            num_epochs = epoch + 1\n            break\n\n    self._finish_training(num_epochs)\n    return (num_epochs, self.metrics)\n</code></pre>"},{"location":"reference/modules/plotting/","title":"plotting","text":""},{"location":"reference/modules/plotting/io/","title":"io","text":""},{"location":"reference/modules/plotting/io/#nhssynth.modules.plotting.io.check_input_paths","title":"<code>check_input_paths(fn_dataset, fn_typed, fn_evaluations, dir_experiment)</code>","text":"<p>Sets up the input and output paths for the model files.</p> <p>Parameters:</p> Name Type Description Default <code>fn_dataset</code> <code>str</code> <p>The base name of the dataset.</p> required <code>fn_typed</code> <code>str</code> <p>The name of the typed data file.</p> required <code>fn_evaluations</code> <code>str</code> <p>The name of the file containing the evaluation bundle.</p> required <code>dir_experiment</code> <code>Path</code> <p>The path to the experiment directory.</p> required <p>Returns:</p> Type Description <code>tuple[str, str]</code> <p>The paths to the data, metadata and metatransformer files.</p> Source code in <code>src/nhssynth/modules/plotting/io.py</code> <pre><code>def check_input_paths(fn_dataset: str, fn_typed: str, fn_evaluations: str, dir_experiment: Path) -&gt; tuple[str, str]:\n    \"\"\"\n    Sets up the input and output paths for the model files.\n\n    Args:\n        fn_dataset: The base name of the dataset.\n        fn_typed: The name of the typed data file.\n        fn_evaluations: The name of the file containing the evaluation bundle.\n        dir_experiment: The path to the experiment directory.\n\n    Returns:\n        The paths to the data, metadata and metatransformer files.\n    \"\"\"\n    fn_dataset, fn_typed, fn_evaluations = io.consistent_endings([fn_dataset, fn_typed, fn_evaluations])\n    fn_typed, fn_evaluations = io.potential_suffixes([fn_typed, fn_evaluations], fn_dataset)\n    io.warn_if_path_supplied([fn_dataset, fn_typed, fn_evaluations], dir_experiment)\n    io.check_exists([fn_typed], dir_experiment)\n    return fn_dataset, fn_typed, fn_evaluations\n</code></pre>"},{"location":"reference/modules/plotting/io/#nhssynth.modules.plotting.io.load_required_data","title":"<code>load_required_data(args, dir_experiment)</code>","text":"<p>Loads the data from <code>args</code> or from disk when the dataloader has not be run previously.</p> <p>Parameters:</p> Name Type Description Default <code>args</code> <code>Namespace</code> <p>The arguments passed to the module, in this case potentially carrying the outputs of the dataloader module.</p> required <code>dir_experiment</code> <code>Path</code> <p>The path to the experiment directory.</p> required <p>Returns:</p> Type Description <code>tuple[str, DataFrame, DataFrame, dict[str, dict[str, Any]]]</code> <p>The data, metadata and metatransformer.</p> Source code in <code>src/nhssynth/modules/plotting/io.py</code> <pre><code>def load_required_data(\n    args: argparse.Namespace, dir_experiment: Path\n) -&gt; tuple[str, pd.DataFrame, pd.DataFrame, dict[str, dict[str, Any]]]:\n    \"\"\"\n    Loads the data from `args` or from disk when the dataloader has not be run previously.\n\n    Args:\n        args: The arguments passed to the module, in this case potentially carrying the outputs of the dataloader module.\n        dir_experiment: The path to the experiment directory.\n\n    Returns:\n        The data, metadata and metatransformer.\n    \"\"\"\n    if all(x in args.module_handover for x in [\"dataset\", \"typed\", \"evaluations\"]):\n        return (\n            args.module_handover[\"dataset\"],\n            args.module_handover[\"typed\"],\n            args.module_handover[\"evaluations\"],\n        )\n    else:\n        fn_dataset, fn_typed, fn_evaluations = check_input_paths(\n            args.dataset, args.typed, args.evaluations, dir_experiment\n        )\n\n        with open(dir_experiment / fn_typed, \"rb\") as f:\n            real_data = pickle.load(f)\n        with open(dir_experiment / fn_evaluations, \"rb\") as f:\n            evaluations = pickle.load(f)\n\n        return fn_dataset, real_data, evaluations\n</code></pre>"},{"location":"reference/modules/plotting/plots/","title":"plots","text":""},{"location":"reference/modules/plotting/plots/#nhssynth.modules.plotting.plots.factorize_all_categoricals","title":"<code>factorize_all_categoricals(df)</code>","text":"<p>Factorize all categorical columns in a dataframe.</p> Source code in <code>src/nhssynth/modules/plotting/plots.py</code> <pre><code>def factorize_all_categoricals(\n    df: pd.DataFrame,\n) -&gt; pd.DataFrame:\n    \"\"\"Factorize all categorical columns in a dataframe.\"\"\"\n    for col in df.columns:\n        if df[col].dtype == \"object\":\n            df[col] = pd.factorize(df[col])[0]\n        elif df[col].dtype == \"datetime64[ns]\":\n            df[col] = pd.to_numeric(df[col])\n        min_val = df[col].min()\n        max_val = df[col].max()\n        df[col] = (df[col] - min_val) / (max_val - min_val)\n\n    return df\n</code></pre>"},{"location":"reference/modules/plotting/run/","title":"run","text":""},{"location":"reference/modules/structure/","title":"structure","text":""},{"location":"reference/modules/structure/run/","title":"run","text":""}]}
\ No newline at end of file
diff --git a/sitemap.xml.gz b/sitemap.xml.gz
index 12e3f836cc9c4a742b1aa4ad5e74556d8a853764..d07dee109de290162faaf4754a738ba4a2e0f996 100644
GIT binary patch
delta 12
Tcmb=gXOr*d;1JWF$W{pe6zT&M

delta 12
Tcmb=gXOr*d;3&|W$W{pe7N`Sl