Use Latin Hypercube Sampling

equinor · Nov 8, 2024 · 0d0e012 · 0d0e012
1 parent e15975c
commit 0d0e012
Show file tree

Hide file tree

Showing 5 changed files with 5,647 additions and 5,636 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -41,7 +41,7 @@ dependencies = [
     "openpyxl>=2.6",
     "pandas>=0.21",
     "pyyaml>=5.3",
-    "scipy>=1.2",
+    "scipy>=1.7",
     "xlrd>=1.2",
     "xtgeo>=2.15",
 ]

diff --git a/src/fmu/tools/sensitivities/create_design.py b/src/fmu/tools/sensitivities/create_design.py
@@ -7,12 +7,53 @@
 from pathlib import Path
 
 import numpy as np
+import numpy.linalg as la
 import pandas as pd
+import scipy
+from scipy.stats import qmc
 
 import fmu.tools
 from fmu.tools.sensitivities import design_distributions as design_dist
 
 
+def _is_positive_definite(b_mat):
+    """Returns true when input is positive-definite, via Cholesky"""
+    try:
+        _ = la.cholesky(b_mat)
+        return True
+    except la.LinAlgError:
+        return False
+
+
+def _nearest_positive_definite(a_mat):
+    """Implementation taken from:
+    https://stackoverflow.com/questions/43238173/
+    python-convert-matrix-to-positive-semi-definite/43244194#43244194
+    """
+
+    b_mat = (a_mat + a_mat.T) / 2
+    _, s_mat, v_mat = la.svd(b_mat)
+
+    h_mat = np.dot(v_mat.T, np.dot(np.diag(s_mat), v_mat))
+
+    a2_mat = (b_mat + h_mat) / 2
+
+    a3_mat = (a2_mat + a2_mat.T) / 2
+
+    if _is_positive_definite(a3_mat):
+        return a3_mat
+
+    spacing = np.spacing(la.norm(a_mat))
+    identity = np.eye(a_mat.shape[0])
+    kiter = 1
+    while not _is_positive_definite(a3_mat):
+        mineig = np.min(np.real(la.eigvals(a3_mat)))
+        a3_mat += identity * (-mineig * kiter**2 + spacing)
+        kiter += 1
+
+    return a3_mat
+
+
 class DesignMatrix:
     """Class for design matrix in FMU. Can contain a onebyone design
     or a full montecarlo design.
@@ -709,11 +750,25 @@ def generate(self, realnums, parameters, seedvalues, corrdict, rng):
                         _printwarning(correl)
                     df_correlations = design_dist.read_correlations(corrdict, correl)
                     multivariate_parameters = df_correlations.index.values
-                    cov_matrix = design_dist.make_covariance_matrix(df_correlations)
-                    normalscoremeans = len(multivariate_parameters) * [0]
-                    normalscoresamples = rng.multivariate_normal(
-                        normalscoremeans, cov_matrix, size=numreals
+
+                    # Generate LHS samples for multiple correlated parameters
+                    sampler = qmc.LatinHypercube(
+                        d=len(multivariate_parameters), seed=rng
                     )
+                    uniform_samples = sampler.random(n=numreals)
+
+                    # Transform to normal scores
+                    normal_scores = scipy.stats.norm.ppf(uniform_samples)
+
+                    # Apply correlation structure
+                    # (using Cholesky decomposition of correlation matrix)
+                    correlations = df_correlations.values
+                    # Make correlation matrix symmetric by filling upper triangle
+                    # (which contains NaN by default) with values from lower triangle
+                    correlations = np.triu(correlations.T, k=1) + np.tril(correlations)
+                    L = np.linalg.cholesky(_nearest_positive_definite(correlations))
+                    normalscoresamples = np.dot(normal_scores, L.T)
+
                     normalscoresamples_df = pd.DataFrame(
                         data=normalscoresamples, columns=multivariate_parameters
                     )