diff --git a/src/resample/bootstrap.py b/src/resample/bootstrap.py index 7552cfc..58d833c 100644 --- a/src/resample/bootstrap.py +++ b/src/resample/bootstrap.py @@ -295,6 +295,8 @@ def variance( """ Calculate bootstrap estimate of variance. + If the function returns a vector, the variance is computed elementwise. + Parameters ---------- fn : callable @@ -327,6 +329,51 @@ def variance( return np.var(thetas, ddof=1, axis=0) +def covariance( + fn: Callable[..., np.ndarray], + sample: "ArrayLike", + *args: "ArrayLike", + **kwargs: Any, +) -> np.ndarray: + """ + Calculate bootstrap estimate of covariance. + + Parameters + ---------- + fn : callable + Estimator. Can be any mapping ℝⁿ → ℝᵏ, where n is the sample size + and k is the length of the output array. + sample : array-like + Original sample. + *args : array-like + Optional additional arrays of the same length to resample. + **kwargs + Keyword arguments forwarded to :func:`resample`. + + Returns + ------- + ndarray + Bootstrap estimate of covariance. In general, this is a matrix, but if the + function maps to a scalar, it is scalar as well. + + Examples + -------- + Compute variance of arithmetic mean. + + >>> from resample.bootstrap import variance + >>> import numpy as np + >>> x = np.arange(10) + >>> def fn(x): + ... return np.mean(x), np.var(x) + >>> np.round(covariance(fn, x, size=10000, random_state=1), 1) + array([[0.8, 0. ], + [0. , 5.5]]) + + """ + thetas = bootstrap(fn, sample, *args, **kwargs) + return np.cov(thetas, rowvar=False, ddof=1) + + def confidence_interval( fn: Callable[..., np.ndarray], sample: "ArrayLike", diff --git a/tests/test_bootstrap.py b/tests/test_bootstrap.py index 4e5f882..b61974f 100644 --- a/tests/test_bootstrap.py +++ b/tests/test_bootstrap.py @@ -10,6 +10,7 @@ confidence_interval, resample, variance, + covariance, ) PARAMETRIC_CONTINUOUS = { @@ -295,6 +296,17 @@ def test_variance(method, rng): assert r == pytest.approx(v, rel=0.05) +@pytest.mark.parametrize("method", NON_PARAMETRIC) +def test_covariance(method, rng): + cov = np.array([[1.0, 0.1], [0.1, 2.0]]) + data = rng.multivariate_normal([0.1, 0.2], cov, size=1000) + + r = covariance( + lambda x: np.mean(x, axis=0), data, size=1000, method=method, random_state=rng + ) + assert_allclose(r, cov / len(data), atol=1e-3) + + def test_resample_deprecation(rng): data = [1, 2, 3] from numpy import VisibleDeprecationWarning