From 118a8b57af1a433603deb98aa06a9c70265a1743 Mon Sep 17 00:00:00 2001 From: "Jose M. Pizarro" <112697669+JosePizarro3@users.noreply.github.com> Date: Tue, 2 Apr 2024 15:28:11 +0200 Subject: [PATCH] Add `Outputs` base class (#29) * Added Outputs and SCFOutputs sections * Added unit testing for Outputs and SCFOutputs * Added todo for times * Added Reference ELN for refs * Improved description of Simulation * Added ? in is_scf_converged * Fix pipeline * Clean duplicated copyright comments in modules --- pyproject.toml | 3 +- src/nomad_simulations/__init__.py | 1 + src/nomad_simulations/atoms_state.py | 18 --- src/nomad_simulations/common.py | 18 --- src/nomad_simulations/general.py | 40 +++--- src/nomad_simulations/model_system.py | 18 --- src/nomad_simulations/outputs.py | 199 +++++++++++++++++++++++--- tests/__init__.py | 22 +++ tests/test_outputs.py | 74 ++++++++++ 9 files changed, 292 insertions(+), 101 deletions(-) create mode 100644 tests/test_outputs.py diff --git a/pyproject.toml b/pyproject.toml index 931c3ecf..4e356c0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ license = { text = "Apache-2.0" } requires-python = ">=3.9" dependencies = [ "nomad-lab>=1.2.0", - 'matid>=2.0.0.dev2' + "matid>=2.0.0.dev2", ] [project.urls] @@ -35,6 +35,7 @@ dev = [ 'pytest-cov==2.7.1', 'ruff==0.1.8', "structlog==22.3.0", + "lxml_html_clean>=0.1.0", ] [tool.ruff] diff --git a/src/nomad_simulations/__init__.py b/src/nomad_simulations/__init__.py index 3851b452..606c722c 100644 --- a/src/nomad_simulations/__init__.py +++ b/src/nomad_simulations/__init__.py @@ -16,4 +16,5 @@ # See the License for the specific language governing permissions and # limitations under the License. # + from .general import Program, Simulation diff --git a/src/nomad_simulations/atoms_state.py b/src/nomad_simulations/atoms_state.py index b57182d4..b15f3b8d 100644 --- a/src/nomad_simulations/atoms_state.py +++ b/src/nomad_simulations/atoms_state.py @@ -15,24 +15,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. -# See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# import numpy as np import ase diff --git a/src/nomad_simulations/common.py b/src/nomad_simulations/common.py index 2752bf88..90c58425 100644 --- a/src/nomad_simulations/common.py +++ b/src/nomad_simulations/common.py @@ -15,24 +15,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. -# See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# # import typing # from structlog.stdlib import BoundLogger diff --git a/src/nomad_simulations/general.py b/src/nomad_simulations/general.py index f216102f..eaec8d3a 100644 --- a/src/nomad_simulations/general.py +++ b/src/nomad_simulations/general.py @@ -15,24 +15,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. -# See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# import numpy as np @@ -104,11 +86,11 @@ def normalize(self, archive, logger) -> None: class BaseSimulation(Activity): """ A computational simulation that produces output data from a given input model system - and methodological parameters. + and input methodological parameters. Synonyms: - - computation - - calculation + - computation + - calculation """ m_def = Section( @@ -166,9 +148,21 @@ def normalize(self, archive, logger) -> None: class Simulation(BaseSimulation, EntryData): - """ """ + """ + A `Simulation` is a computational calculation that produces output data from a given input model system + and input (model) methodological parameters. The output properties obtained from the simulation are stored + in a list under `outputs`. + + Each sub-section of `Simulation` is defined in their corresponding modules: `model_system.py`, `model_method.py`, + and `outputs.py`. + + The basic entry data for a `Simulation`, known as `SinglePoint` workflow, contains all the self-consistent (SCF) steps + performed to converge the calculation, i.e., we do not split each SCF step in its own entry but rather group them in a general one. - # m_def = Section(extends_base_section=True) + Synonyms: + - calculation + - computation + """ model_system = SubSection(sub_section=ModelSystem.m_def, repeats=True) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index c505eef2..e7a99f2a 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -15,24 +15,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. -# See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# import re import numpy as np diff --git a/src/nomad_simulations/outputs.py b/src/nomad_simulations/outputs.py index 6b1dfbd1..3667b968 100644 --- a/src/nomad_simulations/outputs.py +++ b/src/nomad_simulations/outputs.py @@ -15,38 +15,191 @@ # See the License for the specific language governing permissions and # limitations under the License. # -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. -# See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import re import numpy as np +from typing import Optional +from structlog.stdlib import BoundLogger -from nomad.units import ureg from nomad.datamodel.data import ArchiveSection -from nomad.metainfo import Quantity, SubSection, SectionProxy, MEnum +from nomad.datamodel.metainfo.annotations import ELNAnnotation +from nomad.metainfo import Quantity, SubSection, SectionProxy, Reference + +from .atoms_state import AtomsState, OrbitalsState +from .model_system import ModelSystem +from .numerical_settings import SelfConsistency class Outputs(ArchiveSection): - """ """ + """ + Output properties of a simulation. This base class can be used for inheritance in any of the output properties + defined in this schema. + + It contains references to the specific sections used to obtain the output properties, as well as + information if the output `is_derived` from another output section or directly parsed from the simulation output files. + """ + + # TODO add time quantities normalizer_level = 2 + name = Quantity( + type=str, + description=""" + Name of the output property. This is used for easier identification of the property and is connected + with the class name of each output property class, e.g., `'ElectronicBandGap'`, `'ElectronicBandStructure'`, etc. + """, + a_eln=ELNAnnotation(component='StringEditQuantity'), + ) + + orbitals_state_ref = Quantity( + type=OrbitalsState, + description=""" + Reference to the `OrbitalsState` section to which the output property references to and on + on which the simulation is performed. + """, + a_eln=ELNAnnotation(component='ReferenceEditQuantity'), + ) + + atoms_state_ref = Quantity( + type=AtomsState, + description=""" + Reference to the `AtomsState` section to which the output property references to and on + on which the simulation is performed. + """, + a_eln=ELNAnnotation(component='ReferenceEditQuantity'), + ) + + model_system_ref = Quantity( + type=ModelSystem, + description=""" + Reference to the `ModelSystem` section to which the output property references to and on + on which the simulation is performed. + """, + a_eln=ELNAnnotation(component='ReferenceEditQuantity'), + ) + + is_derived = Quantity( + type=bool, + default=False, + description=""" + Flag indicating whether the output property is derived from other output properties. We make + the distinction between directly parsed and derived output properties: + - Directly parsed: the output property is directly parsed from the simulation output files. + - Derived: the output property is derived from other output properties. No extra numerical settings + are required to calculate the output property. + """, + ) + + outputs_ref = Quantity( + type=Reference(SectionProxy('Outputs')), + description=""" + Reference to the `Outputs` section from which the output property was derived. This is only + relevant if `is_derived` is set to True. + """, + a_eln=ELNAnnotation(component='ReferenceEditQuantity'), + ) + + def check_is_derived(self, is_derived: bool, outputs_ref) -> Optional[bool]: + """ + Check if the output property is derived or not. + + Args: + is_derived (bool): The flag indicating whether the output property is derived or not. + outputs_ref (_type_): The reference to the `BaseOutputs` section from which the output property was derived. + + Returns: + Optional[bool]: The flag indicating whether the output property is derived or not, or whether there are missing references exists (returns None). + """ + if not is_derived: + if outputs_ref is not None: + return True + return False + elif is_derived and outputs_ref is not None: + return True + return None + def normalize(self, archive, logger) -> None: super().normalize(archive, logger) - self.logger = logger + + # Check if the output property `is_derived` or not, or if there are missing references. + check_derived = self.check_is_derived(self.is_derived, self.outputs_ref) + if check_derived is not None: + self.is_derived = check_derived + else: + logger.error( + 'A derived output property must have a reference to another `Outputs` section.' + ) + return + + +class SCFOutputs(Outputs): + """ + This section contains the self-consistent (SCF) steps performed to converge an output property, + as well as the information if the output property `is_converged` or not, depending on the + settings in the `SelfConsistency` base class defined in `numerical_settings.py`. + + For simplicity, we contain the SCF steps of a simulation as part of the minimal workflow defined in NOMAD, + the `SinglePoint`, i.e., we do not split each SCF step in its own entry. Thus, each `SinglePoint` + `Simulation` entry in NOMAD contains the final output properties and all the SCF steps. + """ + + n_scf_steps = Quantity( + type=np.int32, + description=""" + Number of self-consistent steps to converge the output property. Note that the SCF steps belong to + the same minimal `Simulation` workflow entry which is known as `SinglePoint`. + """, + ) + + scf_step = SubSection( + sub_section=Outputs.m_def, + repeats=True, + description=""" + Self-consistent (SCF) steps performed for converging a given output property. Note that the SCF steps belong to + the same minimal `Simulation` workflow entry which is known as `SinglePoint`. + """, + ) + + is_scf_converged = Quantity( + type=bool, + description=""" + Flag indicating whether the output property is converged or not after a SCF process. This quantity is connected + with `SelfConsistency` defined in the `numerical_settings.py` module. + """, + ) + + self_consistency_ref = Quantity( + type=SelfConsistency, + description=""" + Reference to the `SelfConsistency` section that defines the numerical settings to converge the + output property. + """, + ) + + # TODO add more functionality to automatically check convergence from `self_consistency_ref` and the last `scf_step[-1]` + def check_is_scf_converged( + self, is_scf_converged: bool, logger: BoundLogger + ) -> bool: + """ + Check if the output property is converged or not. + + Args: + is_converged (bool): The flag indicating whether the output property is converged or not. + logger (BoundLogger): The logger to log messages. + + Returns: + (bool): The flag indicating whether the output property is converged or not. + """ + if not is_scf_converged: + # ? It will be nice if some of this logger messages can be checked or be used when querying data + logger.info('The output property is not converged after the SCF process.') + return False + return True + + def normalize(self, archive, logger) -> None: + super().normalize(archive, logger) + + # Set if the output property `is_converged` or not. + self.is_scf_converged = self.check_is_scf_converged( + self.is_scf_converged, logger + ) diff --git a/tests/__init__.py b/tests/__init__.py index e69de29b..8176ffc0 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,22 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from nomad import utils + + +logger = utils.get_logger(__name__) diff --git a/tests/test_outputs.py b/tests/test_outputs.py new file mode 100644 index 00000000..97ef94be --- /dev/null +++ b/tests/test_outputs.py @@ -0,0 +1,74 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import pytest + +from . import logger + +from nomad_simulations.outputs import Outputs, SCFOutputs + + +class TestBaseOutputs: + """ + Test the `BaseOutputs` class defined in `outputs.py`. + """ + + @pytest.mark.parametrize( + 'is_derived, outputs_ref, result', + [ + (False, Outputs(), True), + (False, None, False), + (True, Outputs(), True), + (True, None, None), + ], + ) + def test_normalize(self, is_derived, outputs_ref, result): + """ + Test the `normalize` and `check_is_derived` methods. + """ + outputs = Outputs() + assert outputs.check_is_derived(is_derived, outputs_ref) == result + outputs.is_derived = is_derived + outputs.outputs_ref = outputs_ref + outputs.normalize(None, logger) + if result is not None: + assert outputs.is_derived == result + + +class TestOutputs: + """ + Test the `Outputs` class defined in `outputs.py`. + """ + + @pytest.mark.parametrize( + 'is_scf_converged, result', + [ + (False, False), + (True, True), + ], + ) + def test_normalize(self, is_scf_converged, result): + """ + Test the `normalize` method. + """ + scf_outputs = SCFOutputs() + # ! This testing is repetivite, but `check_is_scf_converged` should eventually contain more complex logic and be separated in its own testing method. + assert scf_outputs.check_is_scf_converged(is_scf_converged, logger) == result + scf_outputs.is_scf_converged = is_scf_converged + scf_outputs.normalize(None, logger) + assert scf_outputs.is_scf_converged == result