Skip to content

Commit

Permalink
added branch composition to the modelsystem normalizer (#76)
Browse files Browse the repository at this point in the history
* added branch composition to the modelsystem normalizer

* fixed attribute name mistake

* added test for composition formula

* resolve composition formula outside of normalize within ModelSystem

* moved resolve composition formula up to Simulations()

* save point

* finalized movement of resolve formulas and expanded tests

* added doc strings

* removed logger and unused typing

* some typos

* applied ruff

* ruff formatting

---------

Co-authored-by: jrudz <[email protected]>
  • Loading branch information
JFRudzinski and jrudz authored Jun 4, 2024
1 parent cdad337 commit e032021
Show file tree
Hide file tree
Showing 5 changed files with 306 additions and 7 deletions.
79 changes: 75 additions & 4 deletions src/nomad_simulations/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#

import numpy as np
from typing import List
from structlog.stdlib import BoundLogger

from nomad.units import ureg
from nomad.metainfo import SubSection, Quantity, MEnum, Section, Datetime
Expand All @@ -27,6 +29,7 @@
from .model_system import ModelSystem
from .model_method import ModelMethod
from .outputs import Outputs
from .utils import is_not_representative, get_composition


class Program(Entity):
Expand Down Expand Up @@ -177,6 +180,70 @@ def _set_system_branch_depth(
system_child.branch_depth = branch_depth + 1
self._set_system_branch_depth(system_child, branch_depth + 1)

def resolve_composition_formula(self, system_parent: ModelSystem) -> None:
"""Determine and set the composition formula for `system_parent` and all of its
descendants.
Args:
system_parent (ModelSystem): The upper-most level of the system hierarchy to consider.
"""

def set_composition_formula(
system: ModelSystem, subsystems: List[ModelSystem], atom_labels: List[str]
) -> None:
"""Determine the composition formula for `system` based on its `subsystems`.
If `system` has no children, the atom_labels are used to determine the formula.
Args:
system (ModelSystem): The system under consideration.
subsystems (List[ModelSystem]): The children of system.
atom_labels (List[str]): The global list of atom labels corresponding
to the atom indices stored in system.
"""
if not subsystems:
atom_indices = (
system.atom_indices if system.atom_indices is not None else []
)
subsystem_labels = (
[np.array(atom_labels)[atom_indices]]
if atom_labels
else ['Unknown' for atom in range(len(atom_indices))]
)
else:
subsystem_labels = [
subsystem.branch_label
if subsystem.branch_label is not None
else 'Unknown'
for subsystem in subsystems
]
if system.composition_formula is None:
system.composition_formula = get_composition(subsystem_labels)

def get_composition_recurs(system: ModelSystem, atom_labels: List[str]) -> None:
"""Traverse the system hierarchy downward and set the branch composition for
all (sub)systems at each level.
Args:
system (ModelSystem): The system to traverse downward.
atom_labels (List[str]): The global list of atom labels corresponding
to the atom indices stored in system.
"""
subsystems = system.model_system
set_composition_formula(system, subsystems, atom_labels)
if subsystems:
for subsystem in subsystems:
get_composition_recurs(subsystem, atom_labels)

atoms_state = (
system_parent.cell[0].atoms_state if system_parent.cell is not None else []
)
atom_labels = (
[atom.chemical_symbol for atom in atoms_state]
if atoms_state is not None
else []
)
get_composition_recurs(system_parent, atom_labels)

def normalize(self, archive, logger) -> None:
super(EntryData, self).normalize(archive, logger)

Expand All @@ -192,8 +259,12 @@ def normalize(self, archive, logger) -> None:
self.m_cache['system_ref'] = system_ref

# Setting up the `branch_depth` in the parent-child tree
for system_parents in self.model_system:
system_parents.branch_depth = 0
if len(system_parents.model_system) == 0:
for system_parent in self.model_system:
system_parent.branch_depth = 0
if len(system_parent.model_system) == 0:
continue
self._set_system_branch_depth(system_parent)

if is_not_representative(system_parent, logger):
continue
self._set_system_branch_depth(system_parents)
self.resolve_composition_formula(system_parent)
33 changes: 31 additions & 2 deletions src/nomad_simulations/model_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import re
import numpy as np
import ase
from typing import Tuple, Optional
from typing import Tuple, Optional, List
from structlog.stdlib import BoundLogger

from matid import SymmetryAnalyzer, Classifier # pylint: disable=import-error
Expand All @@ -35,7 +35,12 @@

from nomad import config
from nomad.units import ureg
from nomad.atomutils import Formula, get_normalized_wyckoff, search_aflow_prototype
from nomad.atomutils import (
Formula,
get_normalized_wyckoff,
search_aflow_prototype,
get_composition,
)

from nomad.metainfo import Quantity, SubSection, SectionProxy, MEnum, Section, Context
from nomad.datamodel.data import ArchiveSection
Expand Down Expand Up @@ -901,6 +906,30 @@ class ModelSystem(System):
""",
)

composition_formula = Quantity(
type=str,
description="""
The overall composition of the system with respect to its subsystems.
The syntax for a system composed of X and Y with x and y components of each,
respectively, is X(x)Y(y). At the deepest branch in the hierarchy, the
composition_formula is expressed in terms of the atomic labels.
Example: A system composed of 3 water molecules with the following hierarchy
TotalSystem
|
group_H2O
| | |
H2O H2O H2O
has the following compositional formulas at each branch:
branch 0, index 0: "Total_System" composition_formula = group_H2O(1)
branch 1, index 0: "group_H2O" composition_formula = H2O(3)
branch 2, index 0: "H2O" composition_formula = H(1)O(2)
""",
)

model_system = SubSection(sub_section=SectionProxy('ModelSystem'), repeats=True)

def resolve_system_type_and_dimensionality(
Expand Down
1 change: 1 addition & 0 deletions src/nomad_simulations/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,5 @@
RussellSaundersState,
is_not_representative,
get_variables,
get_composition,
)
13 changes: 12 additions & 1 deletion src/nomad_simulations/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# limitations under the License.
#

import numpy as np
from math import factorial
from typing import Optional, List
from structlog.stdlib import BoundLogger
Expand Down Expand Up @@ -126,7 +127,6 @@ def is_not_representative(model_system, logger: BoundLogger):
logger.warning('The `ModelSystem` is empty.')
return None
if not model_system.is_representative:
logger.warning('The `ModelSystem` was not found to be representative.')
return True
return False

Expand All @@ -152,3 +152,14 @@ def get_variables(
if isinstance(var, variable_cls):
result.append(var)
return result


# TODO remove function in nomad.atomutils
def get_composition(children_names: List[str]) -> str:
"""
Generates a generalized "chemical formula" based on the provided list `children_names`,
with the format X(m)Y(n) for children_names X and Y of quantities m and n, respectively.
"""
children_count_tup = np.unique(children_names, return_counts=True)
formula = ''.join([f'{name}({count})' for name, count in zip(*children_count_tup)])
return formula if formula else None
187 changes: 187 additions & 0 deletions tests/test_model_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@
Symmetry,
ChemicalFormula,
ModelSystem,
AtomicCell,
AtomsState,
)
from nomad_simulations.general import Simulation


class TestAtomicCell:
Expand Down Expand Up @@ -439,3 +442,187 @@ def test_normalize(self):
assert np.isclose(model_system.elemental_composition[0].atomic_fraction, 2 / 3)
assert model_system.elemental_composition[1].element == 'O'
assert np.isclose(model_system.elemental_composition[1].atomic_fraction, 1 / 3)

@pytest.mark.parametrize(
'is_representative, has_atom_indices, mol_label_list, n_mol_list, atom_labels_list, composition_formula_list, custom_formulas',
[
(
True,
True,
['H20'],
[3],
[['H', 'O', 'O']],
['group_H20(1)', 'H20(3)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)'],
[None, None, None, None, None],
), # pure system
(
False,
True,
['H20'],
[3],
[['H', 'O', 'O']],
[None, None, None, None, None],
[None, None, None, None, None],
), # non-representative system
(
True,
True,
[None],
[3],
[['H', 'O', 'O']],
['Unknown(1)', 'Unknown(3)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)'],
[None, None, None, None, None],
), # missing branch labels
(
True,
True,
['H20'],
[3],
[[None, None, None]],
['group_H20(1)', 'H20(3)', 'Unknown(3)', 'Unknown(3)', 'Unknown(3)'],
[None, None, None, None, None],
), # missing atom labels
(
True,
False,
['H20'],
[3],
[['H', 'O', 'O']],
['group_H20(1)', 'H20(3)', None, None, None],
[None, None, None, None, None],
), # missing atom indices
(
True,
True,
['H20'],
[3],
[['H', 'O', 'O']],
['waters(1)', 'water_molecules(3)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)'],
['waters(1)', 'water_molecules(3)', None, None, None],
), # custom formulas
(
True,
True,
['H20', 'Methane'],
[5, 2],
[['H', 'O', 'O'], ['C', 'H', 'H', 'H', 'H']],
[
'group_H20(1)group_Methane(1)',
'H20(5)',
'H(1)O(2)',
'H(1)O(2)',
'H(1)O(2)',
'H(1)O(2)',
'H(1)O(2)',
'Methane(2)',
'C(1)H(4)',
'C(1)H(4)',
],
[None, None, None, None, None, None, None, None, None, None],
), # binary mixture
],
)
def test_system_hierarchy_for_molecules(
self,
is_representative: bool,
has_atom_indices: bool,
mol_label_list: List[str],
n_mol_list: List[int],
atom_labels_list: List[str],
composition_formula_list: List[str],
custom_formulas: List[str],
):
"""Test the `ModelSystem` normalization of 'composition_formula' for atoms and molecules.
Args:
is_representative (bool): Specifies if branch_depth = 0 is representative or not.
If not representative, the composition formulas should not be generated.
has_atom_indices (bool): Specifies if the atom_indices should be populated during parsing.
Without atom_indices, the composition formulas for the deepest level of the hierarchy
should not be populated.
mol_label_list (List[str]): Molecule types for generating the hierarchy.
n_mol_list (List[int]): Number of molecules for each molecule type. Should be same
length as mol_label_list.
atom_labels_list (List[str]): Atom labels for each molecule type. Should be same length as
mol_label_list, with each entry being a list of corresponding atom labels.
composition_formula_list (List[str]): Resulting composition formulas after normalization. The
ordering is dictated by the recursive traversing of the hierarchy in get_system_recurs(),
which follows each branch to its deepest level before moving to the next branch, i.e.,
[model_system.composition_formula,
model_system.model_system[0].composition_formula],
model_system.model_system[0].model_system[0].composition_formula,
model_system.model_system[0].model_system[1].composition_formula, ...,
model_system.model_system[1].composition_formula, ...]
custom_formulas (List[str]): Custom composition formulas that can be set in the generation
of the hierarchy, which will cause the normalize to ignore (i.e., not overwrite) these formula entries.
The ordering is as described above.
Returns:
None
"""

### Generate the system hierarchy ###
simulation = Simulation()
model_system = ModelSystem(is_representative=True)
simulation.model_system.append(model_system)
model_system.branch_label = 'Total System'
model_system.is_representative = is_representative
model_system.composition_formula = custom_formulas[0]
ctr_comp = 1
atomic_cell = AtomicCell()
model_system.cell.append(atomic_cell)
if has_atom_indices:
model_system.atom_indices = []
for mol_label, n_mol, atom_labels in zip(
mol_label_list, n_mol_list, atom_labels_list
):
# Create a branch in the hierarchy for this molecule type
model_system_mol_group = ModelSystem()
if has_atom_indices:
model_system_mol_group.atom_indices = []
model_system_mol_group.branch_label = (
f'group_{mol_label}' if mol_label is not None else None
)
model_system_mol_group.composition_formula = custom_formulas[ctr_comp]
ctr_comp += 1
model_system.model_system.append(model_system_mol_group)
for _ in range(n_mol):
# Create a branch in the hierarchy for this molecule
model_system_mol = ModelSystem(branch_label=mol_label)
model_system_mol.branch_label = mol_label
model_system_mol.composition_formula = custom_formulas[ctr_comp]
ctr_comp += 1
model_system_mol_group.model_system.append(model_system_mol)
# add the corresponding atoms to the global atom list
for atom_label in atom_labels:
if atom_label is not None:
atomic_cell.atoms_state.append(
AtomsState(chemical_symbol=atom_label)
)
n_atoms = len(atomic_cell.atoms_state)
atom_indices = np.arange(n_atoms - len(atom_labels), n_atoms)
if has_atom_indices:
model_system_mol.atom_indices = atom_indices
model_system_mol_group.atom_indices = np.append(
model_system_mol_group.atom_indices, atom_indices
)
model_system.atom_indices = np.append(
model_system.atom_indices, atom_indices
)

simulation.normalize(EntryArchive(), logger)

### Traverse the hierarchy recursively and check the results ###
assert model_system.composition_formula == composition_formula_list[0]
ctr_comp = 1

def get_system_recurs(sec_system, ctr_comp):
for sys in sec_system:
assert sys.composition_formula == composition_formula_list[ctr_comp]
ctr_comp += 1
sec_subsystem = sys.model_system
if sec_subsystem:
ctr_comp = get_system_recurs(sec_subsystem, ctr_comp)
return ctr_comp

get_system_recurs(model_system.model_system, ctr_comp)

0 comments on commit e032021

Please sign in to comment.