From 7f4648808708d60d97bc3c29c566e04a1f63f7f1 Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Thu, 10 Oct 2024 10:34:54 +0200
Subject: [PATCH] Improved schema with new workflow base sections

Deleted unused methods

Improved docstrings
---
 pyproject.toml                                |   2 +-
 .../workflow/base_workflows.py                |  29 +++--
 .../schema_packages/workflow/dft_plus_tb.py   | 105 ++++++------------
 .../schema_packages/workflow/single_point.py  |  30 ++---
 4 files changed, 67 insertions(+), 99 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index da497aba..1834e4a1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,7 @@ maintainers = [
 ]
 license = { file = "LICENSE" }
 dependencies = [
-    "nomad-lab>=1.3.0",
+    "nomad-lab@git+https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git@6b7149a71b2999abbb2225fcb67a5acafc811806",
     "matid>=2.0.0.dev2",
 ]
 
diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
index 47ccfd51..7c26cd5c 100644
--- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py
+++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
@@ -1,33 +1,31 @@
 from functools import wraps
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
     from nomad.datamodel.datamodel import EntryArchive
     from structlog.stdlib import BoundLogger
 
 from nomad.datamodel.data import ArchiveSection
-from nomad.datamodel.metainfo.workflow import TaskReference, Workflow
+from nomad.datamodel.metainfo.workflow_new import BaseTask
+from nomad.datamodel.metainfo.workflow_new import Workflow2 as Workflow
 from nomad.metainfo import SubSection
 
 from nomad_simulations.schema_packages.model_method import BaseModelMethod
 from nomad_simulations.schema_packages.outputs import Outputs
 
 
-def check_n_tasks(n_tasks: Optional[int] = None):
+def check_n_tasks(n_tasks: int = 1):
     """
-    Check if the `tasks` of a workflow exist. If the `n_tasks` input specified, it checks whether `tasks`
-    is of the same length as `n_tasks`.
+    Check if the `tasks` of a workflow exist. It checks whether `tasks` is of the same length as `n_tasks`.
 
     Args:
-        n_tasks (Optional[int], optional): The length of the `tasks` needs to be checked if set to an integer. Defaults to None.
+        n_tasks (int): The length of the `tasks` needs to be checked if set to an integer. Defaults to 1.
     """
 
     def decorator(func):
         @wraps(func)
         def wrapper(self, *args, **kwargs):
-            if not self.tasks:
-                return None
-            if n_tasks is not None and len(self.tasks) != n_tasks:
+            if not self.tasks or len(self.tasks) != n_tasks:
                 return None
 
             return func(self, *args, **kwargs)
@@ -39,14 +37,14 @@ def wrapper(self, *args, **kwargs):
 
 class SimulationWorkflow(Workflow):
     """
-    A base section used to define the workflows of a simulation with references to specific `tasks`, `inputs`, and `outputs`. The
+    A base section used to define the workflows of a simulation with specific `tasks`, `inputs`, and `outputs`. The
     normalize function checks the definition of these sections and sets the name of the workflow.
 
     A `SimulationWorkflow` will be composed of:
         - a `method` section containing methodological parameters used specifically during the workflow,
         - a list of `inputs` with references to the `ModelSystem` and, optionally, `ModelMethod` input sections,
         - a list of `outputs` with references to the `Outputs` section,
-        - a list of `tasks` containing references to the activity `Simulation` used in the workflow,
+        - a list of `tasks` containing references or the section information of the `task` used in the workflow,
     """
 
     method = SubSection(
@@ -66,7 +64,7 @@ class BeyondDFTMethod(ArchiveSection):
     """
     An abstract section used to store references to the `ModelMethod` sections of each of the
     archives defining the `tasks` and used to build the standard `BeyondDFT` workflow. This section needs to be
-    inherit and the method references need to be defined for each specific case (see, e.g., dft_plus_tb.py module).
+    inherit and the method references need to be defined for each specific case (see, e.g., `dft_plus_tb.py` module).
     """
 
     pass
@@ -104,16 +102,15 @@ def resolve_all_outputs(self) -> list[Outputs]:
             all_outputs.append(task.outputs[-1])
         return all_outputs
 
-    @check_n_tasks()
     def resolve_method_refs(
-        self, tasks: list[TaskReference], tasks_names: list[str]
+        self, tasks: list[BaseTask], tasks_names: list[str]
     ) -> list[BaseModelMethod]:
         """
         Resolve the references to the `BaseModelMethod` sections in the list of `tasks`. This is useful
         when defining the `method` section of the `BeyondDFT` workflow.
 
         Args:
-            tasks (list[TaskReference]): The list of tasks from which resolve the `BaseModelMethod` sections.
+            tasks (list[BaseTask]): The list of tasks from which resolve the `BaseModelMethod` sections.
             tasks_names (list[str]): The list of names for each of the tasks forming the BeyondDFT workflow.
 
         Returns:
@@ -132,7 +129,7 @@ def resolve_method_refs(
             if not task.m_xpath('task.inputs'):
                 continue
 
-            # Resolve the method of each task.inputs
+            # Resolve the method of each `tasks[*].task.inputs`
             for input in task.task.inputs:
                 if isinstance(input.section, BaseModelMethod):
                     method_refs.append(input.section)
diff --git a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
index 651c988f..4f94a5eb 100644
--- a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
+++ b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
@@ -4,8 +4,9 @@
     from nomad.datamodel.datamodel import EntryArchive
     from structlog.stdlib import BoundLogger
 
-from nomad.datamodel.metainfo.workflow import Link, TaskReference
-from nomad.metainfo import Quantity, Reference
+from nomad.datamodel.metainfo.workflow_new import LinkReference
+from nomad.metainfo import Quantity
+from nomad.utils import extract_section
 
 from nomad_simulations.schema_packages.model_method import DFT, TB
 from nomad_simulations.schema_packages.workflow import BeyondDFT, BeyondDFTMethod
@@ -21,13 +22,13 @@ class DFTPlusTBMethod(BeyondDFTMethod):
     """
 
     dft_method_ref = Quantity(
-        type=Reference(DFT),
+        type=DFT,
         description="""
         Reference to the DFT `ModelMethod` section in the DFT task.
         """,
     )
     tb_method_ref = Quantity(
-        type=Reference(TB),
+        type=TB,
         description="""
         Reference to the TB `ModelMethod` section in the TB task.
         """,
@@ -40,12 +41,10 @@ class DFTPlusTB(BeyondDFT):
     two tasks: the initial DFT calculation + the final TB projection.
 
     The section only needs to be populated with (everything else is handled by the `normalize` function):
-        i. The `tasks` as `TaskReference` sections, adding `task` to the specific archive.workflow2 sections.
-        ii. The `inputs` and `outputs` as `Link` sections pointing to the specific archives.
+        i. The `tasks` as `TaskReference` sections, adding `task` to the specific `archive.workflow2` sections.
 
     Note 1: the `inputs[0]` of the `DFTPlusTB` coincides with the `inputs[0]` of the DFT task (`ModelSystem` section).
     Note 2: the `outputs[-1]` of the `DFTPlusTB` coincides with the `outputs[-1]` of the TB task (`Outputs` section).
-    Note 3: the `outputs[-1]` of the DFT task is used as `inputs[0]` of the TB task.
 
     The archive.workflow2 section is:
         - name = 'DFT+TB'
@@ -54,68 +53,39 @@ class DFTPlusTB(BeyondDFT):
             tb_method_ref=tb_archive.data.model_method[-1],
         )
         - inputs = [
-            Link(name='Input Model System', section=dft_archive.data.model_system[0]),
+            LinkReference(name='Input Model System', section=dft_archive.data.model_system[0]),
         ]
         - outputs = [
-            Link(name='Output TB Data', section=tb_archive.data.outputs[-1]),
+            LinkReference(name='Output TB Data', section=tb_archive.data.outputs[-1]),
         ]
         - tasks = [
-            TaskReference(
-                name='DFT SinglePoint Task',
-                task=dft_archive.workflow2
-                inputs=[
-                    Link(name='Input Model System', section=dft_archive.data.model_system[0]),
-                ],
-                outputs=[
-                    Link(name='Output DFT Data', section=dft_archive.data.outputs[-1]),
-                ]
-            ),
-            TaskReference(
-                name='TB SinglePoint Task',
-                task=tb_archive.workflow2,
-                inputs=[
-                    Link(name='Output DFT Data', section=dft_archive.data.outputs[-1]),
-                ],
-                outputs=[
-                    Link(name='Output tb Data', section=tb_archive.data.outputs[-1]),
-                ]
-            ),
+            TaskReference(task=dft_archive.workflow2),
+            TaskReference(task=tb_archive.workflow2),
         ]
     """
 
     @check_n_tasks(n_tasks=2)
-    def link_task_inputs_outputs(
-        self, tasks: list[TaskReference], logger: 'BoundLogger'
-    ) -> None:
-        if not self.inputs or not self.outputs:
-            logger.warning(
-                'The `DFTPlusTB` workflow needs to have `inputs` and `outputs` defined in order to link with the `tasks`.'
-            )
+    def resolve_inputs_outputs(self) -> None:
+        """
+        Resolve the `inputs` and `outputs` of the `DFTPlusTB` workflow.
+        """
+        input = extract_section(self.tasks[0], ['task', 'inputs[0]', 'section'])
+        print(
+            self.tasks[0],
+            extract_section(self.tasks[0], ['task']),
+            extract_section(self.tasks[0], ['task', 'inputs[0]']),
+            extract_section(self.tasks[0], ['task', 'inputs[0]', 'section']),
+        )
+        if not input:
             return None
+        print(input)
+        self.inputs = [LinkReference(name='Input Model System', section=input)]
 
-        dft_task = tasks[0]
-        tb_task = tasks[1]
-
-        # Initial check
-        if not dft_task.m_xpath('task.outputs'):
+        output = extract_section(self.tasks[1], ['task', 'outputs[-1]', 'section'])
+        if not output:
             return None
-
-        # Input of DFT Task is the ModelSystem
-        dft_task.inputs = [
-            Link(name='Input Model System', section=self.inputs[0]),
-        ]
-        # Output of DFT Task is the output section of the DFT entry
-        dft_task.outputs = [
-            Link(name='Output DFT Data', section=dft_task.task.outputs[-1]),
-        ]
-        # Input of TB Task is the output of the DFT task
-        tb_task.inputs = [
-            Link(name='Output DFT Data', section=dft_task.task.outputs[-1]),
-        ]
-        # Output of TB Task is the output section of the TB entry
-        tb_task.outputs = [
-            Link(name='Output TB Data', section=self.outputs[-1]),
-        ]
+        print(output)
+        self.outputs = [LinkReference(name='Output TB Data', section=output)]
 
     # TODO check if implementing overwritting the FermiLevel.value in the TB entry from the DFT entry
 
@@ -144,14 +114,11 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
             tasks=self.tasks,
             tasks_names=['DFT SinglePoint Task', 'TB SinglePoint Task'],
         )
-        if method_refs is not None:
-            method_workflow = DFTPlusTBMethod()
-            for method in method_refs:
-                if isinstance(method, DFT):
-                    method_workflow.dft_method_ref = method
-                elif isinstance(method, TB):
-                    method_workflow.tb_method_ref = method
-            self.method = method_workflow
-
-        # Resolve `tasks[*].inputs` and `tasks[*].outputs`
-        self.link_task_inputs_outputs(tasks=self.tasks, logger=logger)
+        if method_refs is not None and len(method_refs) == 2:
+            print(method_refs)
+            self.method = DFTPlusTBMethod(
+                dft_method_ref=method_refs[0], tb_method_ref=method_refs[1]
+            )
+
+        # Resolve `inputs` and `outputs` from the `tasks`
+        self.resolve_inputs_outputs()
diff --git a/src/nomad_simulations/schema_packages/workflow/single_point.py b/src/nomad_simulations/schema_packages/workflow/single_point.py
index e2b9d669..4e4aca73 100644
--- a/src/nomad_simulations/schema_packages/workflow/single_point.py
+++ b/src/nomad_simulations/schema_packages/workflow/single_point.py
@@ -6,11 +6,11 @@
     from nomad.datamodel.datamodel import EntryArchive
     from structlog.stdlib import BoundLogger
 
-from nomad.datamodel.metainfo.workflow import Link
+from nomad.datamodel.metainfo.workflow_new import LinkReference
 from nomad.metainfo import Quantity
+from nomad.utils import extract_section
 
 from nomad_simulations.schema_packages.outputs import SCFOutputs
-from nomad_simulations.schema_packages.utils import extract_all_simulation_subsections
 from nomad_simulations.schema_packages.workflow import SimulationWorkflow
 
 
@@ -26,11 +26,11 @@ class SinglePoint(SimulationWorkflow):
     The archive.workflow2 section is:
         - name = 'SinglePoint'
         - inputs = [
-            Link(name='Input Model System', section=archive.data.model_system[0]),
-            Link(name='Input Model Method', section=archive.data.model_method[-1]),
+            LinkReference(name='Input Model System', section=archive.data.model_system[0]),
+            LinkReference(name='Input Model Method', section=archive.data.model_method[-1]),
         ]
         - outputs = [
-            Link(name='Output Data', section=archive.data.outputs[-1]),
+            LinkReference(name='Output Data', section=archive.data.outputs[-1]),
         ]
         - tasks = []
     """
@@ -53,19 +53,23 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
         self.name = 'SinglePoint'
 
         # Define `inputs` and `outputs`
-        input_model_system, input_model_method, output = (
-            extract_all_simulation_subsections(archive=archive)
-        )
-        if not input_model_system or not input_model_method or not output:
+        input_model_system = extract_section(archive, ['data', 'model_system'])
+        output = extract_section(archive, ['data', 'outputs'])
+        if not input_model_system or not output:
             logger.warning(
-                'Could not find the ModelSystem, ModelMethod, or Outputs section in the archive.data section of the SinglePoint entry.'
+                'Could not find the `ModelSystem` or `Outputs` section in the archive.data section of the SinglePoint entry.'
             )
             return
         self.inputs = [
-            Link(name='Input Model System', section=input_model_system),
-            Link(name='Input Model Method', section=input_model_method),
+            LinkReference(name='Input Model System', section=input_model_system),
         ]
-        self.outputs = [Link(name='Output Data', section=output)]
+        self.outputs = [LinkReference(name='Output Data', section=output)]
+        # `ModelMethod` is optional when defining workflows like the `SinglePoint`
+        input_model_method = extract_section(archive, ['data', 'model_method'])
+        if input_model_method is not None:
+            self.inputs.append(
+                LinkReference(name='Input Model Method', section=input_model_method)
+            )
 
         # Resolve the `n_scf_steps` if the output is of `SCFOutputs` type
         if isinstance(output, SCFOutputs):