diff --git a/src/nomad_simulations/general.py b/src/nomad_simulations/general.py index eaec8d3a..bbceac87 100644 --- a/src/nomad_simulations/general.py +++ b/src/nomad_simulations/general.py @@ -17,6 +17,8 @@ # import numpy as np +from typing import List +from structlog.stdlib import BoundLogger from nomad.units import ureg from nomad.metainfo import SubSection, Quantity, MEnum, Section, Datetime @@ -27,6 +29,7 @@ from .model_system import ModelSystem from .model_method import ModelMethod from .outputs import Outputs +from .utils import is_not_representative, get_composition class Program(Entity): @@ -177,6 +180,70 @@ def _set_system_branch_depth( system_child.branch_depth = branch_depth + 1 self._set_system_branch_depth(system_child, branch_depth + 1) + def resolve_composition_formula(self, system_parent: ModelSystem) -> None: + """Determine and set the composition formula for `system_parent` and all of its + descendants. + + Args: + system_parent (ModelSystem): The upper-most level of the system hierarchy to consider. + """ + + def set_composition_formula( + system: ModelSystem, subsystems: List[ModelSystem], atom_labels: List[str] + ) -> None: + """Determine the composition formula for `system` based on its `subsystems`. + If `system` has no children, the atom_labels are used to determine the formula. + + Args: + system (ModelSystem): The system under consideration. + subsystems (List[ModelSystem]): The children of system. + atom_labels (List[str]): The global list of atom labels corresponding + to the atom indices stored in system. + """ + if not subsystems: + atom_indices = ( + system.atom_indices if system.atom_indices is not None else [] + ) + subsystem_labels = ( + [np.array(atom_labels)[atom_indices]] + if atom_labels + else ['Unknown' for atom in range(len(atom_indices))] + ) + else: + subsystem_labels = [ + subsystem.branch_label + if subsystem.branch_label is not None + else 'Unknown' + for subsystem in subsystems + ] + if system.composition_formula is None: + system.composition_formula = get_composition(subsystem_labels) + + def get_composition_recurs(system: ModelSystem, atom_labels: List[str]) -> None: + """Traverse the system hierarchy downward and set the branch composition for + all (sub)systems at each level. + + Args: + system (ModelSystem): The system to traverse downward. + atom_labels (List[str]): The global list of atom labels corresponding + to the atom indices stored in system. + """ + subsystems = system.model_system + set_composition_formula(system, subsystems, atom_labels) + if subsystems: + for subsystem in subsystems: + get_composition_recurs(subsystem, atom_labels) + + atoms_state = ( + system_parent.cell[0].atoms_state if system_parent.cell is not None else [] + ) + atom_labels = ( + [atom.chemical_symbol for atom in atoms_state] + if atoms_state is not None + else [] + ) + get_composition_recurs(system_parent, atom_labels) + def normalize(self, archive, logger) -> None: super(EntryData, self).normalize(archive, logger) @@ -192,8 +259,12 @@ def normalize(self, archive, logger) -> None: self.m_cache['system_ref'] = system_ref # Setting up the `branch_depth` in the parent-child tree - for system_parents in self.model_system: - system_parents.branch_depth = 0 - if len(system_parents.model_system) == 0: + for system_parent in self.model_system: + system_parent.branch_depth = 0 + if len(system_parent.model_system) == 0: + continue + self._set_system_branch_depth(system_parent) + + if is_not_representative(system_parent, logger): continue - self._set_system_branch_depth(system_parents) + self.resolve_composition_formula(system_parent) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index 7303b1bb..1b6f0842 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -19,7 +19,7 @@ import re import numpy as np import ase -from typing import Tuple, Optional +from typing import Tuple, Optional, List from structlog.stdlib import BoundLogger from matid import SymmetryAnalyzer, Classifier # pylint: disable=import-error @@ -35,7 +35,12 @@ from nomad import config from nomad.units import ureg -from nomad.atomutils import Formula, get_normalized_wyckoff, search_aflow_prototype +from nomad.atomutils import ( + Formula, + get_normalized_wyckoff, + search_aflow_prototype, + get_composition, +) from nomad.metainfo import Quantity, SubSection, SectionProxy, MEnum, Section, Context from nomad.datamodel.data import ArchiveSection @@ -901,6 +906,30 @@ class ModelSystem(System): """, ) + composition_formula = Quantity( + type=str, + description=""" + The overall composition of the system with respect to its subsystems. + The syntax for a system composed of X and Y with x and y components of each, + respectively, is X(x)Y(y). At the deepest branch in the hierarchy, the + composition_formula is expressed in terms of the atomic labels. + + Example: A system composed of 3 water molecules with the following hierarchy + + TotalSystem + | + group_H2O + | | | + H2O H2O H2O + + has the following compositional formulas at each branch: + + branch 0, index 0: "Total_System" composition_formula = group_H2O(1) + branch 1, index 0: "group_H2O" composition_formula = H2O(3) + branch 2, index 0: "H2O" composition_formula = H(1)O(2) + """, + ) + model_system = SubSection(sub_section=SectionProxy('ModelSystem'), repeats=True) def resolve_system_type_and_dimensionality( diff --git a/src/nomad_simulations/utils/__init__.py b/src/nomad_simulations/utils/__init__.py index 0c96821f..47f68bf9 100644 --- a/src/nomad_simulations/utils/__init__.py +++ b/src/nomad_simulations/utils/__init__.py @@ -21,4 +21,5 @@ RussellSaundersState, is_not_representative, get_variables, + get_composition, ) diff --git a/src/nomad_simulations/utils/utils.py b/src/nomad_simulations/utils/utils.py index 1048d921..5055c250 100644 --- a/src/nomad_simulations/utils/utils.py +++ b/src/nomad_simulations/utils/utils.py @@ -17,6 +17,7 @@ # limitations under the License. # +import numpy as np from math import factorial from typing import Optional, List from structlog.stdlib import BoundLogger @@ -126,7 +127,6 @@ def is_not_representative(model_system, logger: BoundLogger): logger.warning('The `ModelSystem` is empty.') return None if not model_system.is_representative: - logger.warning('The `ModelSystem` was not found to be representative.') return True return False @@ -152,3 +152,14 @@ def get_variables( if isinstance(var, variable_cls): result.append(var) return result + + +# TODO remove function in nomad.atomutils +def get_composition(children_names: List[str]) -> str: + """ + Generates a generalized "chemical formula" based on the provided list `children_names`, + with the format X(m)Y(n) for children_names X and Y of quantities m and n, respectively. + """ + children_count_tup = np.unique(children_names, return_counts=True) + formula = ''.join([f'{name}({count})' for name, count in zip(*children_count_tup)]) + return formula if formula else None diff --git a/tests/test_model_system.py b/tests/test_model_system.py index 6f3a02e6..34caf9c3 100644 --- a/tests/test_model_system.py +++ b/tests/test_model_system.py @@ -29,7 +29,10 @@ Symmetry, ChemicalFormula, ModelSystem, + AtomicCell, + AtomsState, ) +from nomad_simulations.general import Simulation class TestAtomicCell: @@ -439,3 +442,187 @@ def test_normalize(self): assert np.isclose(model_system.elemental_composition[0].atomic_fraction, 2 / 3) assert model_system.elemental_composition[1].element == 'O' assert np.isclose(model_system.elemental_composition[1].atomic_fraction, 1 / 3) + + @pytest.mark.parametrize( + 'is_representative, has_atom_indices, mol_label_list, n_mol_list, atom_labels_list, composition_formula_list, custom_formulas', + [ + ( + True, + True, + ['H20'], + [3], + [['H', 'O', 'O']], + ['group_H20(1)', 'H20(3)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)'], + [None, None, None, None, None], + ), # pure system + ( + False, + True, + ['H20'], + [3], + [['H', 'O', 'O']], + [None, None, None, None, None], + [None, None, None, None, None], + ), # non-representative system + ( + True, + True, + [None], + [3], + [['H', 'O', 'O']], + ['Unknown(1)', 'Unknown(3)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)'], + [None, None, None, None, None], + ), # missing branch labels + ( + True, + True, + ['H20'], + [3], + [[None, None, None]], + ['group_H20(1)', 'H20(3)', 'Unknown(3)', 'Unknown(3)', 'Unknown(3)'], + [None, None, None, None, None], + ), # missing atom labels + ( + True, + False, + ['H20'], + [3], + [['H', 'O', 'O']], + ['group_H20(1)', 'H20(3)', None, None, None], + [None, None, None, None, None], + ), # missing atom indices + ( + True, + True, + ['H20'], + [3], + [['H', 'O', 'O']], + ['waters(1)', 'water_molecules(3)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)'], + ['waters(1)', 'water_molecules(3)', None, None, None], + ), # custom formulas + ( + True, + True, + ['H20', 'Methane'], + [5, 2], + [['H', 'O', 'O'], ['C', 'H', 'H', 'H', 'H']], + [ + 'group_H20(1)group_Methane(1)', + 'H20(5)', + 'H(1)O(2)', + 'H(1)O(2)', + 'H(1)O(2)', + 'H(1)O(2)', + 'H(1)O(2)', + 'Methane(2)', + 'C(1)H(4)', + 'C(1)H(4)', + ], + [None, None, None, None, None, None, None, None, None, None], + ), # binary mixture + ], + ) + def test_system_hierarchy_for_molecules( + self, + is_representative: bool, + has_atom_indices: bool, + mol_label_list: List[str], + n_mol_list: List[int], + atom_labels_list: List[str], + composition_formula_list: List[str], + custom_formulas: List[str], + ): + """Test the `ModelSystem` normalization of 'composition_formula' for atoms and molecules. + + Args: + is_representative (bool): Specifies if branch_depth = 0 is representative or not. + If not representative, the composition formulas should not be generated. + has_atom_indices (bool): Specifies if the atom_indices should be populated during parsing. + Without atom_indices, the composition formulas for the deepest level of the hierarchy + should not be populated. + mol_label_list (List[str]): Molecule types for generating the hierarchy. + n_mol_list (List[int]): Number of molecules for each molecule type. Should be same + length as mol_label_list. + atom_labels_list (List[str]): Atom labels for each molecule type. Should be same length as + mol_label_list, with each entry being a list of corresponding atom labels. + composition_formula_list (List[str]): Resulting composition formulas after normalization. The + ordering is dictated by the recursive traversing of the hierarchy in get_system_recurs(), + which follows each branch to its deepest level before moving to the next branch, i.e., + [model_system.composition_formula, + model_system.model_system[0].composition_formula], + model_system.model_system[0].model_system[0].composition_formula, + model_system.model_system[0].model_system[1].composition_formula, ..., + model_system.model_system[1].composition_formula, ...] + custom_formulas (List[str]): Custom composition formulas that can be set in the generation + of the hierarchy, which will cause the normalize to ignore (i.e., not overwrite) these formula entries. + The ordering is as described above. + + Returns: + None + """ + + ### Generate the system hierarchy ### + simulation = Simulation() + model_system = ModelSystem(is_representative=True) + simulation.model_system.append(model_system) + model_system.branch_label = 'Total System' + model_system.is_representative = is_representative + model_system.composition_formula = custom_formulas[0] + ctr_comp = 1 + atomic_cell = AtomicCell() + model_system.cell.append(atomic_cell) + if has_atom_indices: + model_system.atom_indices = [] + for mol_label, n_mol, atom_labels in zip( + mol_label_list, n_mol_list, atom_labels_list + ): + # Create a branch in the hierarchy for this molecule type + model_system_mol_group = ModelSystem() + if has_atom_indices: + model_system_mol_group.atom_indices = [] + model_system_mol_group.branch_label = ( + f'group_{mol_label}' if mol_label is not None else None + ) + model_system_mol_group.composition_formula = custom_formulas[ctr_comp] + ctr_comp += 1 + model_system.model_system.append(model_system_mol_group) + for _ in range(n_mol): + # Create a branch in the hierarchy for this molecule + model_system_mol = ModelSystem(branch_label=mol_label) + model_system_mol.branch_label = mol_label + model_system_mol.composition_formula = custom_formulas[ctr_comp] + ctr_comp += 1 + model_system_mol_group.model_system.append(model_system_mol) + # add the corresponding atoms to the global atom list + for atom_label in atom_labels: + if atom_label is not None: + atomic_cell.atoms_state.append( + AtomsState(chemical_symbol=atom_label) + ) + n_atoms = len(atomic_cell.atoms_state) + atom_indices = np.arange(n_atoms - len(atom_labels), n_atoms) + if has_atom_indices: + model_system_mol.atom_indices = atom_indices + model_system_mol_group.atom_indices = np.append( + model_system_mol_group.atom_indices, atom_indices + ) + model_system.atom_indices = np.append( + model_system.atom_indices, atom_indices + ) + + simulation.normalize(EntryArchive(), logger) + + ### Traverse the hierarchy recursively and check the results ### + assert model_system.composition_formula == composition_formula_list[0] + ctr_comp = 1 + + def get_system_recurs(sec_system, ctr_comp): + for sys in sec_system: + assert sys.composition_formula == composition_formula_list[ctr_comp] + ctr_comp += 1 + sec_subsystem = sys.model_system + if sec_subsystem: + ctr_comp = get_system_recurs(sec_subsystem, ctr_comp) + return ctr_comp + + get_system_recurs(model_system.model_system, ctr_comp)