Skip to content

Commit

Permalink
Use Latin Hypercube Sampling
Browse files Browse the repository at this point in the history
  • Loading branch information
dafeda committed Nov 8, 2024
1 parent e15975c commit 0d0e012
Show file tree
Hide file tree
Showing 5 changed files with 5,647 additions and 5,636 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ dependencies = [
"openpyxl>=2.6",
"pandas>=0.21",
"pyyaml>=5.3",
"scipy>=1.2",
"scipy>=1.7",
"xlrd>=1.2",
"xtgeo>=2.15",
]
Expand Down
63 changes: 59 additions & 4 deletions src/fmu/tools/sensitivities/create_design.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,53 @@
from pathlib import Path

import numpy as np
import numpy.linalg as la
import pandas as pd
import scipy
from scipy.stats import qmc

import fmu.tools
from fmu.tools.sensitivities import design_distributions as design_dist


def _is_positive_definite(b_mat):
"""Returns true when input is positive-definite, via Cholesky"""
try:
_ = la.cholesky(b_mat)
return True
except la.LinAlgError:
return False


def _nearest_positive_definite(a_mat):
"""Implementation taken from:
https://stackoverflow.com/questions/43238173/
python-convert-matrix-to-positive-semi-definite/43244194#43244194
"""

b_mat = (a_mat + a_mat.T) / 2
_, s_mat, v_mat = la.svd(b_mat)

h_mat = np.dot(v_mat.T, np.dot(np.diag(s_mat), v_mat))

a2_mat = (b_mat + h_mat) / 2

a3_mat = (a2_mat + a2_mat.T) / 2

if _is_positive_definite(a3_mat):
return a3_mat

spacing = np.spacing(la.norm(a_mat))
identity = np.eye(a_mat.shape[0])
kiter = 1
while not _is_positive_definite(a3_mat):
mineig = np.min(np.real(la.eigvals(a3_mat)))
a3_mat += identity * (-mineig * kiter**2 + spacing)
kiter += 1

return a3_mat


class DesignMatrix:
"""Class for design matrix in FMU. Can contain a onebyone design
or a full montecarlo design.
Expand Down Expand Up @@ -709,11 +750,25 @@ def generate(self, realnums, parameters, seedvalues, corrdict, rng):
_printwarning(correl)
df_correlations = design_dist.read_correlations(corrdict, correl)
multivariate_parameters = df_correlations.index.values
cov_matrix = design_dist.make_covariance_matrix(df_correlations)
normalscoremeans = len(multivariate_parameters) * [0]
normalscoresamples = rng.multivariate_normal(
normalscoremeans, cov_matrix, size=numreals

# Generate LHS samples for multiple correlated parameters
sampler = qmc.LatinHypercube(
d=len(multivariate_parameters), seed=rng
)
uniform_samples = sampler.random(n=numreals)

# Transform to normal scores
normal_scores = scipy.stats.norm.ppf(uniform_samples)

# Apply correlation structure
# (using Cholesky decomposition of correlation matrix)
correlations = df_correlations.values
# Make correlation matrix symmetric by filling upper triangle
# (which contains NaN by default) with values from lower triangle
correlations = np.triu(correlations.T, k=1) + np.tril(correlations)
L = np.linalg.cholesky(_nearest_positive_definite(correlations))
normalscoresamples = np.dot(normal_scores, L.T)

normalscoresamples_df = pd.DataFrame(
data=normalscoresamples, columns=multivariate_parameters
)
Expand Down
Loading

0 comments on commit 0d0e012

Please sign in to comment.