nomad-coe · EBB2675 · Sep 24, 2024 · Sep 24, 2024 · Sep 25, 2024 · Sep 25, 2024
diff --git a/src/nomad_simulations/schema_packages/basis_set.py b/src/nomad_simulations/schema_packages/basis_set.py
@@ -13,7 +13,7 @@
 from nomad import utils
 from nomad.datamodel.data import ArchiveSection
 from nomad.datamodel.metainfo.annotations import ELNAnnotation
-from nomad.metainfo import MEnum, Quantity, SubSection
+from nomad.metainfo import JSON, MEnum, Quantity, SubSection
 from nomad.units import ureg
 
 from nomad_simulations.schema_packages.atoms_state import AtomsState
@@ -190,25 +190,118 @@ class AtomCenteredFunction(ArchiveSection):
     Specifies a single function (term) in an atom-centered basis set.
     """
 
-    pass
+    basis_type = Quantity(
+        type=MEnum(
+            'spherical',
+            'cartesian',
+        ),
+        default='spherical',
+        description="""
+        Specifies whether the basis functions are spherical-harmonic or cartesian functions.
+        """,
+    )
+
+    function_type = Quantity(
+        type=MEnum('s', 'p', 'd', 'f', 'g', 'h', 'i', 'j', 'k', 'l'),
+        description="""
+        L=a+b+c
+        The angular momentum of GTO to be added.
+        """,
+    )
+
+    n_primitive = Quantity(
+        type=np.int32,
+        description="""
+        Number of primitives.
+        Linear combinations of the primitive Gaussians are formed to approximate the radial extent of an STO.
+        """,
+    )
+
+    exponents = Quantity(
+        type=np.float32,
+        shape=['n_primitive'],
+        description="""
+        List of exponents for the basis function.
+        """,
+    )
+
+    contraction_coefficients = Quantity(
+        type=np.float32,
+        shape=['n_primitive'],
+        description="""
+        List of contraction coefficients corresponding to the exponents.
+        """,
+    )
+
+    point_charge = Quantity(
+        type=np.float32,
+        description="""
+        the value of the point charge.
+        """,
+    )
 
-    # TODO: design system for writing basis functions like gaussian or slater orbitals
+    def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
+        super().normalize(archive, logger)
+
+        # Validation: Check that n_primitive matches the lengths of exponents and contraction coefficients
+        if self.n_primitive is not None:
+            if self.exponents is not None and len(self.exponents) != self.n_primitive:
+                raise ValueError(
+                    f'Mismatch in number of exponents: expected {self.n_primitive}, found {len(self.exponents)}.'
+                )
+            if (
+                self.contraction_coefficients is not None
+                and len(self.contraction_coefficients) != self.n_primitive
+            ):
+                raise ValueError(
+                    f'Mismatch in number of contraction coefficients: expected {self.n_primitive}, found {len(self.contraction_coefficients)}.'
+                )
 
 
 class AtomCenteredBasisSet(BasisSetComponent):
     """
     Defines an atom-centered basis set.
     """
 
+    basis_set = Quantity(
+        type=str,
+        description="""
+        name of the basis set.
+        """,
+    )
+
+    type = Quantity(
+        type=MEnum(
+            'STO',  # Slater-type orbitals
+            'GTO',  # Gaussian-type orbitals
+            'NAO',  # Numerical atomic orbitals
+            'cECP',  # Capped effective core potentials
+            'PC',  # Point charges
+        ),
+        description="""
+        Type of the basis set, e.g. STO or GTO.
+        """,
+    )
+
+    role = Quantity(
+        type=MEnum(
+            'orbital',
+            'auxiliary_scf',
+            'auxiliary_post_hf',
+            'cabs',  # complementary auxiliary basis set
+        ),
+        description="""
+        The role of the basis set.
+        """,
+    )
+
     functional_composition = SubSection(
         sub_section=AtomCenteredFunction.m_def, repeats=True
-    )  # TODO change name
+    )
 
     def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
         super().normalize(archive, logger)
         # self.name = self.m_def.name
-        # TODO: set name based on basis functions
-        # ? use basis set names from Basis Set Exchange
 
 
 class APWBaseOrbital(ArchiveSection):

diff --git a/src/nomad_simulations/schema_packages/numerical_settings.py b/src/nomad_simulations/schema_packages/numerical_settings.py
@@ -52,66 +52,52 @@ class Smearing(NumericalSettings):
 
 class Mesh(ArchiveSection):
     """
-    A base section used to specify the settings of a sampling mesh.
-    It supports uniformly-spaced meshes and symmetry-reduced representations.
+    A base section used to define the mesh or space partitioning over which a discrete numerical integration is performed.
     """
 
-    spacing = Quantity(
-        type=MEnum('Equidistant', 'Logarithmic', 'Tan'),
-        shape=['dimensionality'],
+    dimensionality = Quantity(
+        type=np.int32,
+        default=3,
         description="""
-        Identifier for the spacing of the Mesh. Defaults to 'Equidistant' if not defined. It can take the values:
-
-        | Name      | Description                      |
-        | --------- | -------------------------------- |
-        | `'Equidistant'`  | Equidistant grid (also known as 'Newton-Cotes') |
-        | `'Logarithmic'`  | log distance grid |
-        | `'Tan'`  | Non-uniform tan mesh for grids. More dense at low abs values of the points, while less dense for higher values |
+        Dimensionality of the mesh: 1, 2, or 3. Defaults to 3.
         """,
     )
 
-    quadrature = Quantity(
-        type=MEnum(
-            'Gauss-Legendre',
-            'Gauss-Laguerre',
-            'Clenshaw-Curtis',
-            'Newton-Cotes',
-            'Gauss-Hermite',
-        ),
+    mesh_type = Quantity(
+        type=MEnum('equidistant', 'logarithmic', 'tan'),
+        shape=['dimensionality'],
         description="""
-        Quadrature rule used for integration of the Mesh. This quantity is relevant for 1D meshes:
+        Kind of mesh identifying the spacing in each of the dimensions specified by `dimensionality`. It can take the values:
 
         | Name      | Description                      |
         | --------- | -------------------------------- |
-        | `'Gauss-Legendre'` | Quadrature rule for integration using Legendre polynomials |
-        | `'Gauss-Laguerre'` | Quadrature rule for integration using Laguerre polynomials |
-        | `'Clenshaw-Curtis'`  | Quadrature rule for integration using Chebyshev polynomials using discrete cosine transformations |
-        | `'Gauss-Hermite'`  | Quadrature rule for integration using Hermite polynomials |
+        | `'equidistant'`  | Equidistant grid (also known as 'Newton-Cotes') |
+        | `'logarithmic'`  | log distance grid |
+        | `'Tan'`  | Non-uniform tan mesh for grids. More dense at low abs values of the points, while less dense for higher values |
         """,
-    )  # ! @JosePizarro3 I think that this is separate from the spacing
+    )
 
-    n_points = Quantity(
+    grid = Quantity(
         type=np.int32,
+        shape=['dimensionality'],
         description="""
-        Number of points in the mesh.
+        Number of points sampled along each axis of the mesh.
         """,
     )
 
-    dimensionality = Quantity(
+    n_points = Quantity(
         type=np.int32,
-        default=3,
         description="""
-        Dimensionality of the mesh: 1, 2, or 3. Defaults to 3.
+        Total number of points in the mesh.
         """,
     )
 
-    grid = Quantity(
-        type=np.int32,
+    spacing = Quantity(
+        type=np.float64,
         shape=['dimensionality'],
-        description="""
-        Amount of mesh point sampling along each axis. See `type` for the axes definition.
+        description="""Grid spacing for equidistant meshes. Ignored for other kinds.
         """,
-    )  # ? @JosePizzaro3: should the mesh also contain its boundary information
+    )
 
     points = Quantity(
         type=np.complex128,
@@ -126,25 +112,102 @@ class Mesh(ArchiveSection):
         shape=['n_points'],
         description="""
         The amount of times the same point reappears. A value larger than 1, typically indicates
-        a symmetry operation that was applied to the `Mesh`. This quantity is equivalent to `weights`:
+        a symmetry operation that was applied to the `Mesh`.
+        """,
+    )
+
+    pruning = Quantity(
+        type=MEnum('fixed', 'adaptive'),
+        description="""
+        Pruning method applied for reducing the amount of points in the Mesh. This is typically
+        used for numerical integration near the core levels in atoms.
+        In the fixed grid methods, the number of angular grid points is predetermined for
+        ranges of radial grid points, while in the adaptive methods, the angular grid is adjusted
+        on-the-fly for each radial point according to some accuracy criterion.
+        """,
+    )
+
+    def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
+        super().normalize(archive, logger)
+
+        if self.dimensionality not in [1, 2, 3]:
+            logger.error(
+                '`dimensionality` meshes different than 1, 2, or 3 are not supported.'
+            )
 
-            multiplicities = n_points * weights
+
+class NumericalIntegration(NumericalSettings):
+    """
+    Numerical integration settings used to resolve the following type of integrals by discrete
+    numerical integration:
+
+    ```math
+    \int_{\vec{r}_a}^{\vec{r}_b} d^3 \vec{r} F(\vec{r}) \approx \sum_{n=a}^{b} w(\vec{r}_n) F(\vec{r}_n)
+    ```
+
+    Here, $F$ can be any type of function which would define the type of rules that can be applied
+    to solve such integral (e.g., 1D Gaussian quadrature rule or multi-dimensional `angular` rules like the
+    Lebedev quadrature rule).
+
+    These multidimensional integral has a `Mesh` defined over which the integration is performed, i.e., the
+    $\vec{r}_n$ points.
+    """
+
+    mesh = SubSection(sub_section=Mesh.m_def)
+
+    coordinate = Quantity(
+        type=MEnum('full', 'radial', 'angular'),
+        description="""
+        Coordinate over which the integration is performed. `full` means the integration is performed in
+        entire space. `radial` and `angular` describe cases where the integration is performed for
+        functions which can be splitted into radial and angular distributions (e.g., orbital wavefunctions).
         """,
     )
 
-    weights = Quantity(
+    integration_rule = Quantity(
+        type=str,  # ? extend to MEnum?
+        description="""
+        Integration rule used. This can be any 1D Gaussian quadrature rule or multi-dimensional `angular` rules,
+        e.g., Lebedev quadrature rule (see e.g., Becke, Chem. Phys. 88, 2547 (1988)).
+        """,
+    )
+
+    integration_thresh = Quantity(
         type=np.float64,
-        shape=['n_points'],
         description="""
-        Weight of each point. A value smaller than 1, typically indicates a symmetry operation that was
-        applied to the mesh. This quantity is equivalent to `multiplicities`:
+        Accuracy threshold for integration grid.
+        GRIDTHR in Molpro.
+        BFCut in ORCA.
+        """,
+    )
 
-            weights = multiplicities / n_points
+    weight_approximation = Quantity(
+        type=str,
+        description="""
+        Approximation applied to the weight when doing the numerical integration.
+        See e.g., C. W. Murray, N. C. Handy
+        and G. J. Laming, Mol. Phys. 78, 997 (1993).
+        """,
+    )
+
+    weight_cutoff = Quantity(
+        type=np.float64,
+        description="""
+        Threshold for discarding small weights during integration.
+        Grid points very close to the nucleus can have very small grid weights.
+        WEIGHT_CUT in Molpro.
+        Wcut in ORCA.
         """,
     )
 
     def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
         super().normalize(archive, logger)
+        valid_coordinates = ['full', 'radial', 'angular', None]
+        if self.coordinate not in valid_coordinates:
+            logger.warning(
+                f'Invalid coordinate value: {self.coordinate}. Resetting to None.'
+            )
+            self.coordinate = None
 
 
 class KSpaceFunctionalities:
@@ -887,3 +950,25 @@ def __init__(self, m_def: 'Section' = None, m_context: 'Context' = None, **kwarg
 
     def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
         super().normalize(archive, logger)
+
+
+class GTOIntegralDecomposition(NumericalSettings):
+    """
+    A general class for integral decomposition techniques for Coulomb and exchange integrals.
+    Examples:
+    Resolution of identity (RI-approximation):
+    RI
+    RIJK
+    ....
+    Chain-of-spheres (COSX) algorithm for exchange: doi:10.1016/j.chemphys.2008.10.036
+    """
+
+    approximation_type = Quantity(
+        type=str,
+        description="""
+        RIJ, RIK, RIJK,
+        """,
+    )
+
+    def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
+        super().normalize(archive, logger)