Skip to content

Commit

Permalink
Do not sample from multivariate_normal in test
Browse files Browse the repository at this point in the history
Instead, add correlated PARAM5 ~ TruncatedNormal(3, 1, 1, 5) and PARAM6 ~ Uniform(0, 1) to design_input_background.xlsx.
We don't want tests that call rng.multivariate_normal directly as that will change when we do latin hypercube sampling instead.
Add check that lower bound must be less than upper bound in truncated normal
  • Loading branch information
dafeda authored Nov 8, 2024
1 parent c8e260f commit 4fc530a
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 95 deletions.
64 changes: 36 additions & 28 deletions src/fmu/tools/sensitivities/design_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@ def _check_dist_params_normal(dist_params):
elif float(dist_params[1]) < 0:
status = False
msg = "Stddev for normal distribution must be >= 0. "
elif len(dist_params) == 4 and float(dist_params[2]) >= float(dist_params[3]):
status = False
msg = (
"For truncated normal distribution, "
"lower bound must be less than upper bound, "
f"but got [{dist_params[2]}, {dist_params[3]}]. "
)
else:
status = True
msg = ""
Expand Down Expand Up @@ -141,7 +148,7 @@ def _check_dist_params_logunif(dist_params):


def draw_values_normal(dist_parameters, numreals, rng, normalscoresamples=None):
"""Draws values from normal distribution.
"""Draws values from normal or truncated normal.
Args:
dist_parameters(list): [mean, std dev, min, max],
min/max defining truncated normal
Expand All @@ -152,39 +159,40 @@ def draw_values_normal(dist_parameters, numreals, rng, normalscoresamples=None):
list of values
"""
status, msg = _check_dist_params_normal(dist_parameters)

if not status:
raise ValueError(msg)
if len(dist_parameters) == 2: # normal
dist_mean = float(dist_parameters[0])
dist_stddev = float(dist_parameters[1])
if normalscoresamples is not None:
values = scipy.stats.norm.ppf(
scipy.stats.norm.cdf(normalscoresamples),
loc=dist_mean,
scale=dist_stddev,
)
else:
distribution = scipy.stats.norm(dist_mean, dist_stddev)
values = distribution.rvs(size=numreals, random_state=rng)
else: # truncated normal or invalid
mean = float(dist_parameters[0])
sigma = float(dist_parameters[1])
clip1 = float(dist_parameters[2])
clip2 = float(dist_parameters[3])
low = (clip1 - mean) / sigma
high = (clip2 - mean) / sigma

mean = float(dist_parameters[0])
stddev = float(dist_parameters[1])

if len(dist_parameters) == 2:
if normalscoresamples is not None:
values = scipy.stats.truncnorm.ppf(
return scipy.stats.norm.ppf(
scipy.stats.norm.cdf(normalscoresamples),
low,
high,
loc=mean,
scale=sigma,
scale=stddev,
)
else:
distribution = scipy.stats.truncnorm(low, high, loc=mean, scale=sigma)
values = distribution.rvs(size=numreals, random_state=rng)
return values
distribution = scipy.stats.norm(mean, stddev)
return distribution.rvs(size=numreals, random_state=rng)

# Handle truncated normal case
clip1 = float(dist_parameters[2])
clip2 = float(dist_parameters[3])
low = (clip1 - mean) / stddev
high = (clip2 - mean) / stddev

if normalscoresamples is not None:
return scipy.stats.truncnorm.ppf(
scipy.stats.norm.cdf(normalscoresamples),
low,
high,
loc=mean,
scale=stddev,
)

distribution = scipy.stats.truncnorm(low, high, loc=mean, scale=stddev)
return distribution.rvs(size=numreals, random_state=rng)


def draw_values_lognormal(dist_parameters, numreals, rng, normalscoresamples=None):
Expand Down
Binary file modified tests/sensitivities/data/config/design_input_background.xlsx
Binary file not shown.
8 changes: 8 additions & 0 deletions tests/sensitivities/test_create_design.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,14 @@ def test_generate_background(tmpdir):

assert (faults_vals.values == contacts_vals.values).all()

sens6 = design.designvalues[design.designvalues["SENSNAME"] == "sens6"]
# PARAM5 ~ TruncatedNormal(3, 1, 1, 5)
# PARAM6 ~ Uniform(0, 1)
assert np.isclose(
stats.spearmanr(sens6["PARAM5"], sens6["PARAM6"])[0],
0.8,
atol=0.1,
)
sens7 = design.designvalues[design.designvalues["SENSNAME"] == "sens7"]

# PARAM9 and PARAM10 have a target correlation of 0.9 in the design config.
Expand Down
75 changes: 8 additions & 67 deletions tests/sensitivities/test_design_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,34 +117,6 @@ def test_check_dist_params_logunif():
assert dists._check_dist_params_logunif([1, 1])[0]


@pytest.mark.parametrize(
"dist_params",
[
[10.0, 2.0], # normal distribution params [mean, std_dev]
[10.0, 2.0, 5.0, 15.0], # truncated normal params [mean, std_dev, min, max]
],
)
def test_draw_values_normal_correlation(dist_params):
# Create two sets of correlated normal scores
rng = np.random.RandomState()
correlation = 0.8
n_samples = 1000

# Generate correlated standard normal variables
cov_matrix = [[1.0, correlation], [correlation, 1.0]]
normal_scores = rng.multivariate_normal([0, 0], cov_matrix, size=n_samples)

# Draw values using the correlated normal scores
values1 = dists.draw_values_normal(dist_params, n_samples, rng, normal_scores[:, 0])
values2 = dists.draw_values_normal(dist_params, n_samples, rng, normal_scores[:, 1])

# Calculate the correlation between the transformed variables
actual_correlation = np.corrcoef(values1, values2)[0, 1]

# Test if the correlation is close to the expected value
assert abs(actual_correlation - correlation) < 0.1


def test_draw_values_normal():
rng = np.random.RandomState()
values = dists.draw_values_normal([0, 1], 10, rng)
Expand All @@ -170,49 +142,18 @@ def test_draw_values_normal():
):
values = dists.draw_values_normal([0, -1], 10, rng)

values = dists.draw_values_normal([0, 10, -1, 2], 50, rng)
assert all(-1 <= value <= 2 for value in values)


def test_draw_values_lognormal():
rng = np.random.RandomState()
assert not dists.draw_values_lognormal([100, 10], 0, rng).size
values = dists.draw_values_lognormal([100, 10], 10, rng)
assert len(values) == 10

assert all(isinstance(value, numbers.Number) for value in values)
assert all(value > 0 for value in values)

with pytest.raises(
ValueError,
match="Lognormal distribution must have 2 parameters, but had 3 parameters.",
):
values = dists.draw_values_lognormal([10, 50, 100], 10, rng)

with pytest.raises(
ValueError, match="Parameters for lognormal distribution must be numbers."
):
values = dists.draw_values_lognormal(["a", 10], 10, rng)

with pytest.raises(
ValueError, match="Lognormal distribution must have stddev >= 0."
match=(
"For truncated normal distribution, "
"lower bound must be less than upper bound, "
r"but got \[2, -1\]."
),
):
values = dists.draw_values_lognormal([0, -1], 10, rng)
values = dists.draw_values_normal([0, 1, 2, -1], 10, rng)


def test_draw_uniform_with_correlation():
rng = np.random.RandomState()
correlation = 0.6
n_samples = 1000

cov_matrix = [[1.0, correlation], [correlation, 1.0]]
normal_scores = rng.multivariate_normal([0, 0], cov_matrix, size=n_samples)

values1 = dists.draw_values_uniform([0, 100], n_samples, rng, normal_scores[:, 0])
values2 = dists.draw_values_uniform([10, 50], n_samples, rng, normal_scores[:, 1])

actual_correlation = np.corrcoef(values1, values2)[0, 1]
assert (actual_correlation - correlation) < 0.1
values = dists.draw_values_normal([0, 10, -1, 2], 50, rng)
assert all(-1 <= value <= 2 for value in values)


def test_draw_values_uniform():
Expand Down

0 comments on commit 4fc530a

Please sign in to comment.