Skip to content

Commit

Permalink
clean up and more docs
Browse files Browse the repository at this point in the history
  • Loading branch information
tommyod committed Nov 18, 2024
1 parent fe97ab7 commit 48776a4
Showing 1 changed file with 32 additions and 16 deletions.
48 changes: 32 additions & 16 deletions src/fmu/tools/sensitivities/iman_conover.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@
"""

import numpy as np
import scipy as sp
import pandas as pd
import requests
import io

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pytest
import requests
import scipy as sp


class ImanConover:
Expand Down Expand Up @@ -87,12 +88,14 @@ def __call__(self, X):
Parameters
----------
X : ndarray
Input matrix of shape (N, K).
Input matrix of shape (N, K). This is the data set that we want to
induce correlation structure on.
Returns
-------
ndarray
Input matrix of shape (N, K).
Output matrix of shape (N, K). This data set will have a
correlation structure that is more similar to `correlation_matrix`.
"""
if not isinstance(X, np.ndarray):
Expand All @@ -102,36 +105,49 @@ def __call__(self, X):

N, K = X.shape

if not K == self.P.shape[0]:
if self.P.shape[0] != K:
msg = f"Shape of `X` ({X.shape}) does not match shape of "
msg += f"correlation matrix ({self.P.shape})"
raise ValueError(msg)

# Step one - use van der Waerden scores to transform data into new data
# that are approximately multivariate normal.
# STEP ONE - Use van der Waerden scores to transform data to
# approximately multivariate normal (but with correlations).
# The new data has the same rank correlation as the original data.
ranks = sp.stats.rankdata(X, method="average", axis=0) / (N + 1)
normal_scores = sp.stats.norm.ppf(ranks)

# TODO: Remove these sanity checks eventually
spearman_before = sp.stats.spearmanr(X).statistic
spearman_after = sp.stats.spearmanr(normal_scores).statistic
msg = "Spearman correlation before and after ranking should be equal"
msg = "Spearman correlation before and after transforming should be equal"
assert np.allclose(spearman_before, spearman_after), msg

# Step two - Remove correlations from the input data set
# STEP TWO - Remove correlations from the transformed data
I = np.corrcoef(normal_scores, rowvar=False)
Q = np.linalg.cholesky(I)
decorrelated_scores = np.linalg.solve(Q, normal_scores.T).T
Q = np.linalg.cholesky(I) # QQ' = I
# Equivalent to: decorrelated_scores = normal_scores @ np.linalg.inv(Q).T
# We exploit the fact that Q is lower-triangular and avoid the inverse.
# X = N @ inv(Q)^T => X @ Q^T = N => (Q @ X^T)^T = N
decorrelated_scores = sp.linalg.solve_triangular(
Q, normal_scores.T, lower=True
).T

# TODO: Remove these sanity checks eventually
assert np.allclose(np.corrcoef(decorrelated_scores, rowvar=False), np.eye(K))

# Step three - Induce correlations
# STEP THREE - Induce correlations in transformed space
correlated_scores = decorrelated_scores @ self.P.T

# TODO: Remove these sanity checks eventually (why does this fail??)
# correlations = np.corrcoef(correlated_scores, rowvar=False)
# assert np.allclose(correlations, self.C)

# Step four - Restore marginal distributions using ranks
# STEP FOUR - Map back to original space using ranks, ensuring
# that marginal distributions are preserved
result = np.empty_like(X)
for k in range(K):
# If row j is the k'th largest in `correlated_scores`, then
# we map the k'th largest entry in X to row j.
ranks = sp.stats.rankdata(correlated_scores[:, k]).astype(int) - 1
result[:, k] = np.sort(X[:, k])[ranks]

Expand All @@ -143,7 +159,7 @@ class TestImanConover:
def test_marginals_and_correlation_distance(self, seed):
rng = np.random.default_rng(42)

n_variables = rng.integers(2, 100)
n_variables = rng.integers(2, 10)
n_observations = 10 * n_variables
rng = np.random.default_rng(42)

Expand Down

0 comments on commit 48776a4

Please sign in to comment.