diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a4d922613..ae619b740 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,14 @@ Changelog ========= +v0.55.0 (unreleased) +-------------------- +Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`) + +New features and enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* New function ``ensemble.partition.general_partition`` (:pull:`2035`) + v0.54.0 (2024-12-16) -------------------- Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Pascal Bourgault (:user:`aulemahal`), Éric Dupuis (:user:`coxipi`), Sascha Hofmann (:user:`saschahofmann`). diff --git a/environment.yml b/environment.yml index cd02063aa..f00d3c72e 100644 --- a/environment.yml +++ b/environment.yml @@ -81,3 +81,4 @@ dependencies: - xdoctest >=1.1.5 - yamllint >=1.35.1 - pip >=24.2.0 + - pygments <2.19 #FIXME: temporary fix, https://github.com/felix-hilden/sphinx-codeautolink/issues/153 diff --git a/pyproject.toml b/pyproject.toml index f665e2cde..f56c801b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -111,7 +111,8 @@ docs = [ "sphinx-copybutton", "sphinx-mdinclude", "sphinxcontrib-bibtex", - "sphinxcontrib-svg2pdfconverter[Cairosvg]" + "sphinxcontrib-svg2pdfconverter[Cairosvg]", + "pygments <2.19" # FIXME: temporary fix, https://github.com/felix-hilden/sphinx-codeautolink/issues/153 ] extras = ["fastnanquantile >=0.0.2", "flox >=0.9", "POT >=0.9.4"] all = ["xclim[dev]", "xclim[docs]", "xclim[extras]"] diff --git a/src/xclim/ensembles/__init__.py b/src/xclim/ensembles/__init__.py index 8c503b842..f769f7e88 100644 --- a/src/xclim/ensembles/__init__.py +++ b/src/xclim/ensembles/__init__.py @@ -16,6 +16,7 @@ ) from xclim.ensembles._partitioning import ( fractional_uncertainty, + general_partition, hawkins_sutton, lafferty_sriver, ) diff --git a/src/xclim/ensembles/_partitioning.py b/src/xclim/ensembles/_partitioning.py index 4e9ed51b7..e759a6899 100644 --- a/src/xclim/ensembles/_partitioning.py +++ b/src/xclim/ensembles/_partitioning.py @@ -302,6 +302,128 @@ def lafferty_sriver( return g, uncertainty +def general_partition( + da: xr.DataArray, + sm: xr.DataArray | str = "poly", + var_first: list | None = None, + mean_first: list | None = None, + weights: list | None = None, +) -> tuple[xr.DataArray, xr.DataArray]: + """ + Return the mean and partitioned variance of an ensemble. + + This is a general function that can be used to implemented methods from different papers. + The defaults are set to match the Lavoie et al. (2025, in preparation) method. + + Parameters + ---------- + da : xr.DataArray + Time series with dimensions 'time', 'mean_first', and 'var_first'. + sm : xr.DataArray or {"poly"} + Smoothed time series over time, with the same dimensions as `da`. + If 'poly', this is estimated using a 4th-order polynomial. + It is also possible to pass a precomputed smoothed time series. + var_first : list of str + List of dimensions where the variance is computed first of the dimension, + followed by the mean over the other dimensions. + mean_first : list of str + List of dimensions where the mean over the other dimensions is computed first, + followed by the variance over the dimension. + weights : list of str + List of dimensions where the first operation is weighted. + + Returns + ------- + xr.DataArray, xr.DataArray + The mean relative to the baseline, and the components of variance of the + ensemble. These components are coordinates along the `uncertainty` dimension: + element of var_first, elements of mean_first and `total`. + + Notes + ----- + To prepare input data, make sure `da` has dimensions list in both var_first and + mean_first, as well as time. + e.g. `da.rename({"experiment": "scenario"})`. + + To get the fraction of the total variance instead of the variance itself, call `fractional_uncertainty` on the + output. + """ + # set defaults + var_first = var_first or ["model", "reference", "adjustment"] + mean_first = mean_first or ["scenario"] + weights = weights or ["model", "reference", "adjustment"] + + all_types = mean_first + var_first + + if xr.infer_freq(da.time)[0] not in ["A", "Y"]: + raise ValueError("This algorithm expects annual time series.") + + if not ({"time"} | set(all_types)).issubset(da.dims): + error_msg = f"DataArray dimensions should include {all_types} and time." + raise ValueError(error_msg) + + if sm == "poly": + # Fit a 4th order polynomial + fit = da.polyfit(dim="time", deg=4, skipna=True) + sm = xr.polyval(coord=da.time, coeffs=fit.polyfit_coefficients).where( + da.notnull() + ) + elif isinstance(sm, xr.DataArray): + sm = sm + else: + raise ValueError("sm should be 'poly' or a DataArray.") + + # "Interannual variability is then estimated as the centered rolling 11-year variance of the difference + # between the extracted forced response and the raw outputs, averaged over all outputs." + # same as lafferty_sriver() + nv_u = (da - sm).rolling(time=11, center=True).var().mean(dim=all_types) + + all_u = [] + total = nv_u.copy() + for t in mean_first: + all_but_t = [x for x in all_types if x != t] + if t in weights: + tw = sm.count(t) + t_u = sm.mean(dim=all_but_t).weighted(tw).var(dim=t) + + else: + t_u = sm.mean(dim=all_but_t).var(dim=t) + all_u.append(t_u) + total += t_u + + for t in var_first: + all_but_t = [x for x in all_types if x != t] + if t in weights: + tw = sm.count(t) + t_u = sm.var(dim=t).weighted(tw).mean(dim=all_but_t) + + else: + t_u = sm.var(dim=t).mean(dim=all_but_t) + all_u.append(t_u) + total += t_u + + # Create output array with the uncertainty components + u = pd.Index([*all_types, "variability", "total"], name="uncertainty") + uncertainty = xr.concat([*all_u, nv_u, total], dim=u) + + uncertainty.attrs["indicator_long_name"] = da.attrs.get("long_name", "unknown") + uncertainty.attrs["indicator_description"] = da.attrs.get("description", "unknown") + uncertainty.attrs["indicator_units"] = da.attrs.get("units", "unknown") + uncertainty.attrs["partition_fit"] = sm if isinstance(sm, str) else "unknown" + # Keep a trace of the elements for each uncertainty component + for t in all_types: + uncertainty.attrs[t] = da[t].values + + # Mean projection: + # This is not part of the original algorithm, + # but we want all partition algos to have similar outputs. + g = sm.mean(dim=all_types[0]) + for dim in all_types[1:]: + g = g.mean(dim=dim) + + return g, uncertainty + + def fractional_uncertainty(u: xr.DataArray) -> xr.DataArray: """ Return the fractional uncertainty. @@ -316,8 +438,9 @@ def fractional_uncertainty(u: xr.DataArray) -> xr.DataArray: xr.DataArray Fractional, or relative uncertainty with respect to the total uncertainty. """ - uncertainty = u / u.sel(uncertainty="total") * 100 - uncertainty.attrs.update(u.attrs) - uncertainty.attrs["long_name"] = "Fraction of total variance" - uncertainty.attrs["units"] = "%" - return uncertainty + with xr.set_options(keep_attrs=True): + uncertainty = u / u.sel(uncertainty="total") * 100 + uncertainty.attrs.update(u.attrs) + uncertainty.attrs["long_name"] = "Fraction of total variance" + uncertainty.attrs["units"] = "%" + return uncertainty diff --git a/tests/test_partitioning.py b/tests/test_partitioning.py index f34691985..21ffe9995 100644 --- a/tests/test_partitioning.py +++ b/tests/test_partitioning.py @@ -3,7 +3,12 @@ import numpy as np import xarray as xr -from xclim.ensembles import fractional_uncertainty, hawkins_sutton, lafferty_sriver +from xclim.ensembles import ( + fractional_uncertainty, + general_partition, + hawkins_sutton, + lafferty_sriver, +) from xclim.ensembles._filters import _concat_hist, _model_in_all_scens, _single_member @@ -156,3 +161,22 @@ def graph(): plt.show() # graph() + + +def test_general_partition(lafferty_sriver_ds): + """Reproduce Lafferty & Sriver's results using general_partition.""" + g1, u1 = lafferty_sriver(lafferty_sriver_ds.tas) + g2, u2 = general_partition( + lafferty_sriver_ds.tas, + var_first=["model", "downscaling"], + mean_first=["scenario"], + weights=["model", "downscaling"], + sm="poly", + ) + # fix order + u2 = u2.sel( + uncertainty=["model", "scenario", "downscaling", "variability", "total"] + ) + + assert u1.equals(u2) + np.testing.assert_allclose(g1.values, g2.values, atol=0.1)