From 0830edc7fcf825ecf644926dd63d6b38e7b98b00 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Thu, 23 May 2024 15:47:00 -0400 Subject: [PATCH 01/29] add general_partition --- xclim/ensembles/_partitioning.py | 105 +++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index ce957d672..6b01960b5 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -13,6 +13,8 @@ import pandas as pd import xarray as xr +from .sdba import loess + """ Implemented partitioning algorithms: @@ -297,6 +299,109 @@ def lafferty_sriver( return g, uncertainty +def general_partition( + da: xr.DataArray, + sm: xr.DataArray | str = "loess", + var_first: list = ["model", "reference", "method"], + mean_first: list = ["scenario"], + weights: list = ["model", "reference", "method"], +) -> tuple[xr.DataArray, xr.DataArray]: + """Return the mean and partitioned variance of an ensemble. + + This is a general function that can be used to implemented methods from different papers. + The defaults are set to match the Lavoie et al. (2025, in preparation) method. + + Parameters + ---------- + da : xr.DataArray + Time series with dimensions 'time', mean_first and var_first. + sm : xr.DataArray or str + Smoothed time series over time, with the same dimensions as `da`. + If 'poly', this is estimated using a 4th-order polynomial. + If 'loess', this is estimated using a LOESS curve. + It is also possible to pass a precomputed smoothed time series. + var_first: list of strings + List of dimensions where the variance is computed first of the dimension, followed by the mean over the other dimensions. + mean_first : list of strings + List of dimensions where the mean over the other dimensions is computed first, followed by the variance over the dimension. + weights: list of strings + List of dimensions where the first operation is weighted. + + Returns + ------- + xr.DataArray, xr.DataArray + The mean relative to the baseline, and the components of variance of the ensemble. These components are + coordinates along the `uncertainty` dimension: `variability`, `model`, `scenario`, `downscaling` and `total`. + + Notes + ----- + To prepare input data, make sure `da` has dimensions list in both var_first and mean_first, as well as time. + e.g. `da.rename({"experiment": "scenario"})`. + + To get the fraction of the total variance instead of the variance itself, call `fractional_uncertainty` on the + output. + """ + all_types = mean_first + var_first + + if xr.infer_freq(da.time)[0] not in ["A", "Y"]: + raise ValueError("This algorithm expects annual time series.") + + if not ({"time"} | set(all_types)).issubset(da.dims): + raise ValueError(f"DataArray dimensions should include {all_types} and time.") + + if sm == "poly": + # Fit a 4th order polynomial + fit = da.polyfit(dim="time", deg=4, skipna=True) + sm = xr.polyval(coord=da.time, coeffs=fit.polyfit_coefficients).where( + da.notnull() + ) + elif sm == "loess": + sm = loess.loess_smoothing(da) + + # "Interannual variability is then estimated as the centered rolling 11-year variance of the difference + # between the extracted forced response and the raw outputs, averaged over all outputs." + # same as lafferty_sriver() + nv_u = (da - sm).rolling(time=11, center=True).var().mean(dim=all_types) + + all_u = [] + total = nv_u.copy() + for t in mean_first: + all_but_t = [x for x in all_types if x != t] + if t in weights: + tw = sm.count(t) + t_u = sm.mean(dim=all_but_t).weighted(tw).var(dim=t) + + else: + t_u = sm.mean(dim=all_but_t).var(dim=t) + all_u.append(t_u) + total += t_u + + for t in var_first: + all_but_t = [x for x in all_types if x != t] + if t in weights: + tw = sm.count(t) + t_u = sm.var(dim=t).weighted(tw).mean(dim=all_but_t) + + else: + t_u = sm.var(dim=t).mean(dim=all_but_t) + all_u.append(t_u) + total += t_u + + # Create output array with the uncertainty components + u = pd.Index(all_types + ["variability", "total"], name="uncertainty") + uncertainty = xr.concat(all_u + [nv_u, total], dim=u) + + # Keep a trace of the elements for each uncertainty component + for t in all_types: + uncertainty.attrs[t] = da[t].values + + # Mean projection: + # This is not part of the original algorithm, but we want all partition algos to have similar outputs. + g = sm.mean(dim=all_types) + + return g, uncertainty + + def fractional_uncertainty(u: xr.DataArray) -> xr.DataArray: """Return the fractional uncertainty. From 90f3b7d50f4b12cc4038ffad59b685d405d46ea0 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Fri, 24 May 2024 09:00:15 -0400 Subject: [PATCH 02/29] import better --- xclim/ensembles/_partitioning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index 6b01960b5..205986fc8 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -13,7 +13,7 @@ import pandas as pd import xarray as xr -from .sdba import loess +from xclim.sdba import loess """ Implemented partitioning algorithms: From 7c5463a8b9d5a52ab78535de69494fe54727d4fe Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Fri, 24 May 2024 09:13:31 -0400 Subject: [PATCH 03/29] init --- xclim/ensembles/__init__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/xclim/ensembles/__init__.py b/xclim/ensembles/__init__.py index 8f6451495..23a6f039e 100644 --- a/xclim/ensembles/__init__.py +++ b/xclim/ensembles/__init__.py @@ -11,7 +11,12 @@ from __future__ import annotations from ._base import create_ensemble, ensemble_mean_std_max_min, ensemble_percentiles -from ._partitioning import fractional_uncertainty, hawkins_sutton, lafferty_sriver +from ._partitioning import ( + fractional_uncertainty, + general_partition, + hawkins_sutton, + lafferty_sriver, +) from ._reduce import ( kkz_reduce_ensemble, kmeans_reduce_ensemble, From b0c5ddd806fb63e83d1ecc2ca5dca7d3cd898aa9 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Mon, 10 Jun 2024 09:46:30 -0400 Subject: [PATCH 04/29] attrs --- xclim/ensembles/_partitioning.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index 205986fc8..121dd2f08 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -321,21 +321,25 @@ def general_partition( If 'loess', this is estimated using a LOESS curve. It is also possible to pass a precomputed smoothed time series. var_first: list of strings - List of dimensions where the variance is computed first of the dimension, followed by the mean over the other dimensions. + List of dimensions where the variance is computed first of the dimension, + followed by the mean over the other dimensions. mean_first : list of strings - List of dimensions where the mean over the other dimensions is computed first, followed by the variance over the dimension. + List of dimensions where the mean over the other dimensions is computed first, + followed by the variance over the dimension. weights: list of strings List of dimensions where the first operation is weighted. Returns ------- xr.DataArray, xr.DataArray - The mean relative to the baseline, and the components of variance of the ensemble. These components are - coordinates along the `uncertainty` dimension: `variability`, `model`, `scenario`, `downscaling` and `total`. + The mean relative to the baseline, and the components of variance of the + ensemble. These components are coordinates along the `uncertainty` dimension: + element of var_first, elements of mean_first and `total`. Notes ----- - To prepare input data, make sure `da` has dimensions list in both var_first and mean_first, as well as time. + To prepare input data, make sure `da` has dimensions list in both var_first and + mean_first, as well as time. e.g. `da.rename({"experiment": "scenario"})`. To get the fraction of the total variance instead of the variance itself, call `fractional_uncertainty` on the @@ -391,12 +395,15 @@ def general_partition( u = pd.Index(all_types + ["variability", "total"], name="uncertainty") uncertainty = xr.concat(all_u + [nv_u, total], dim=u) + uncertainty.attrs["indicator_long_name"] = da.attrs.get("long_name", "unknown") + uncertainty.attrs["indicator_description"] = da.attrs.get("description", "unknown") # Keep a trace of the elements for each uncertainty component for t in all_types: uncertainty.attrs[t] = da[t].values # Mean projection: - # This is not part of the original algorithm, but we want all partition algos to have similar outputs. + # This is not part of the original algorithm, + # but we want all partition algos to have similar outputs. g = sm.mean(dim=all_types) return g, uncertainty From a81ba85495fe86bfeb1f8b654ce998d342b76ad3 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Mon, 10 Jun 2024 10:35:02 -0400 Subject: [PATCH 05/29] use adjustment --- xclim/ensembles/_partitioning.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index 121dd2f08..d9d504a5b 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -302,9 +302,9 @@ def lafferty_sriver( def general_partition( da: xr.DataArray, sm: xr.DataArray | str = "loess", - var_first: list = ["model", "reference", "method"], + var_first: list = ["model", "reference", "adjustment"], mean_first: list = ["scenario"], - weights: list = ["model", "reference", "method"], + weights: list = ["model", "reference", "adjustment"], ) -> tuple[xr.DataArray, xr.DataArray]: """Return the mean and partitioned variance of an ensemble. From 132df88d0a843138d5e54b61d43e0b6fa376d2e0 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Wed, 12 Jun 2024 16:07:08 -0400 Subject: [PATCH 06/29] try to fix loess --- xclim/sdba/loess.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xclim/sdba/loess.py b/xclim/sdba/loess.py index 2a527d993..10ca0ffe4 100644 --- a/xclim/sdba/loess.py +++ b/xclim/sdba/loess.py @@ -165,6 +165,7 @@ def _loess_nb( residuals = y - yest s = np.median(np.abs(residuals)) xres = residuals / (6.0 * s) + xres[np.isnan(xres)] = 0 # TEST delta = (1 - xres**2) ** 2 delta[np.abs(xres) >= 1] = 0 From 9b6509810a0a1270e4a59466130b2cf348795138 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Thu, 13 Jun 2024 14:33:27 -0400 Subject: [PATCH 07/29] poly if not loess --- xclim/ensembles/_partitioning.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index d9d504a5b..6563db0c5 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -360,7 +360,13 @@ def general_partition( da.notnull() ) elif sm == "loess": - sm = loess.loess_smoothing(da) + try: + sm = loess.loess_smoothing(da) + except np.linalg.LinAlgError: + fit = da.polyfit(dim="time", deg=4, skipna=True) + sm = xr.polyval(coord=da.time, coeffs=fit.polyfit_coefficients).where( + da.notnull() + ) # "Interannual variability is then estimated as the centered rolling 11-year variance of the difference # between the extracted forced response and the raw outputs, averaged over all outputs." From 34a96730985c93679e0867908ad4e43c10a0bf4e Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Thu, 13 Jun 2024 14:34:13 -0400 Subject: [PATCH 08/29] warning --- xclim/ensembles/_partitioning.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index 6563db0c5..e2b32e902 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -9,6 +9,8 @@ from __future__ import annotations +import warnings + import numpy as np import pandas as pd import xarray as xr @@ -363,6 +365,7 @@ def general_partition( try: sm = loess.loess_smoothing(da) except np.linalg.LinAlgError: + warnings.warn("LOESS smoothing failed, falling back to polynomial.") fit = da.polyfit(dim="time", deg=4, skipna=True) sm = xr.polyval(coord=da.time, coeffs=fit.polyfit_coefficients).where( da.notnull() From 842793930c08c02c34966e552b0c484c74a96f12 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Thu, 13 Jun 2024 14:58:05 -0400 Subject: [PATCH 09/29] test --- xclim/ensembles/_partitioning.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index e2b32e902..b1ef8cc6a 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -363,6 +363,7 @@ def general_partition( ) elif sm == "loess": try: + print("doing loess") sm = loess.loess_smoothing(da) except np.linalg.LinAlgError: warnings.warn("LOESS smoothing failed, falling back to polynomial.") From f7c7f4eb3f8e24d70b99e9cb76c913dfae3b4570 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Thu, 13 Jun 2024 15:20:39 -0400 Subject: [PATCH 10/29] remove try --- xclim/ensembles/_partitioning.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index b1ef8cc6a..d9d504a5b 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -9,8 +9,6 @@ from __future__ import annotations -import warnings - import numpy as np import pandas as pd import xarray as xr @@ -362,15 +360,7 @@ def general_partition( da.notnull() ) elif sm == "loess": - try: - print("doing loess") - sm = loess.loess_smoothing(da) - except np.linalg.LinAlgError: - warnings.warn("LOESS smoothing failed, falling back to polynomial.") - fit = da.polyfit(dim="time", deg=4, skipna=True) - sm = xr.polyval(coord=da.time, coeffs=fit.polyfit_coefficients).where( - da.notnull() - ) + sm = loess.loess_smoothing(da) # "Interannual variability is then estimated as the centered rolling 11-year variance of the difference # between the extracted forced response and the raw outputs, averaged over all outputs." From bfefde6a283b0511e548a1221c4a1399cce318f3 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Fri, 14 Jun 2024 11:06:51 -0400 Subject: [PATCH 11/29] try handling 0s --- xclim/sdba/loess.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/xclim/sdba/loess.py b/xclim/sdba/loess.py index 10ca0ffe4..7b64fbcc0 100644 --- a/xclim/sdba/loess.py +++ b/xclim/sdba/loess.py @@ -42,7 +42,11 @@ def _constant_regression(xi, x, y, w): # pragma: no cover def _linear_regression(xi, x, y, w): # pragma: no cover b = np.array([np.sum(w * y), np.sum(w * y * x)]) A = np.array([[np.sum(w), np.sum(w * x)], [np.sum(w * x), np.sum(w * x * x)]]) - beta = np.linalg.solve(A, b) + # if both matrix are only 0s + if not np.any(A) and not np.any(b): + beta = np.array([0, 0]) + else: + beta = np.linalg.solve(A, b) return beta[0] + beta[1] * xi @@ -165,9 +169,8 @@ def _loess_nb( residuals = y - yest s = np.median(np.abs(residuals)) xres = residuals / (6.0 * s) - xres[np.isnan(xres)] = 0 # TEST delta = (1 - xres**2) ** 2 - delta[np.abs(xres) >= 1] = 0 + delta[(np.abs(xres) >= 1) | np.isnan(xres)] = 0 if skipna: out[~nan] = yest From e2c8782cfd34724a417f72e32ddf0b42022b00ee Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Fri, 14 Jun 2024 11:37:45 -0400 Subject: [PATCH 12/29] try change with delta --- xclim/sdba/loess.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/xclim/sdba/loess.py b/xclim/sdba/loess.py index 7b64fbcc0..5dd9e8806 100644 --- a/xclim/sdba/loess.py +++ b/xclim/sdba/loess.py @@ -43,10 +43,10 @@ def _linear_regression(xi, x, y, w): # pragma: no cover b = np.array([np.sum(w * y), np.sum(w * y * x)]) A = np.array([[np.sum(w), np.sum(w * x)], [np.sum(w * x), np.sum(w * x * x)]]) # if both matrix are only 0s - if not np.any(A) and not np.any(b): - beta = np.array([0, 0]) - else: - beta = np.linalg.solve(A, b) + # if not np.any(A) and not np.any(b): + # beta = np.array([0, 0]) + # else: + beta = np.linalg.solve(A, b) return beta[0] + beta[1] * xi @@ -168,9 +168,16 @@ def _loess_nb( if iteration < niter - 1: residuals = y - yest s = np.median(np.abs(residuals)) - xres = residuals / (6.0 * s) - delta = (1 - xres**2) ** 2 - delta[(np.abs(xres) >= 1) | np.isnan(xres)] = 0 + # xres = residuals / (6.0 * s) + # delta = (1 - xres**2) ** 2 + # delta[(np.abs(xres) >= 1) | np.isnan(xres)] = 0 + + if s == 0: # don't reject any points for the next iteration + delta = np.ones_like(residuals) + else: + xres = residuals / (6.0 * s) + delta = (1 - xres**2) ** 2 + delta[(np.abs(xres) >= 1)] = 0 if skipna: out[~nan] = yest From 456ff76ce8e7dab6c66092294d104f6b61645bb2 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Tue, 18 Jun 2024 14:29:51 -0400 Subject: [PATCH 13/29] keep attrs --- xclim/ensembles/_partitioning.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index d9d504a5b..7d5a58b5d 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -361,6 +361,10 @@ def general_partition( ) elif sm == "loess": sm = loess.loess_smoothing(da) + elif isinstance(sm, xr.DataArray): + sm = sm + else: + raise ValueError("sm should be 'poly', 'loess' or a DataArray.") # "Interannual variability is then estimated as the centered rolling 11-year variance of the difference # between the extracted forced response and the raw outputs, averaged over all outputs." @@ -397,6 +401,7 @@ def general_partition( uncertainty.attrs["indicator_long_name"] = da.attrs.get("long_name", "unknown") uncertainty.attrs["indicator_description"] = da.attrs.get("description", "unknown") + uncertainty.attrs["partition_fit"] = sm if isinstance(sm, str) else "unknown" # Keep a trace of the elements for each uncertainty component for t in all_types: uncertainty.attrs[t] = da[t].values @@ -422,8 +427,9 @@ def fractional_uncertainty(u: xr.DataArray) -> xr.DataArray: xr.DataArray Fractional, or relative uncertainty with respect to the total uncertainty. """ - uncertainty = u / u.sel(uncertainty="total") * 100 - uncertainty.attrs.update(u.attrs) - uncertainty.attrs["long_name"] = "Fraction of total variance" - uncertainty.attrs["units"] = "%" - return uncertainty + with xr.set_options(keep_attrs=True): + uncertainty = u / u.sel(uncertainty="total") * 100 + uncertainty.attrs.update(u.attrs) + uncertainty.attrs["long_name"] = "Fraction of total variance" + uncertainty.attrs["units"] = "%" + return uncertainty From 80ceff57b4e0324f5b3f384cee8cee90bcbfec5b Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Wed, 28 Aug 2024 13:11:21 -0400 Subject: [PATCH 14/29] units --- xclim/ensembles/_partitioning.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xclim/ensembles/_partitioning.py b/xclim/ensembles/_partitioning.py index 7d5a58b5d..7866980cb 100644 --- a/xclim/ensembles/_partitioning.py +++ b/xclim/ensembles/_partitioning.py @@ -401,6 +401,7 @@ def general_partition( uncertainty.attrs["indicator_long_name"] = da.attrs.get("long_name", "unknown") uncertainty.attrs["indicator_description"] = da.attrs.get("description", "unknown") + uncertainty.attrs["indicator_units"] = da.attrs.get("units", "unknown") uncertainty.attrs["partition_fit"] = sm if isinstance(sm, str) else "unknown" # Keep a trace of the elements for each uncertainty component for t in all_types: From 564fcc69025a7c6bae8e25c7349a0746e9ff314a Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Mon, 6 Jan 2025 15:48:10 -0500 Subject: [PATCH 15/29] change log --- CHANGELOG.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a4d922613..f3f0807e0 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,15 @@ Changelog ========= +v0.55.0 (unreleased) +-------------------- +Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`) + +New features and enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* New function ``ensemble.partition.general_partition` + + v0.54.0 (2024-12-16) -------------------- Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Pascal Bourgault (:user:`aulemahal`), Éric Dupuis (:user:`coxipi`), Sascha Hofmann (:user:`saschahofmann`). From b43eff0d43ff75cb28f734bbdcbce989d580a160 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Mon, 6 Jan 2025 15:52:18 -0500 Subject: [PATCH 16/29] fix test --- tests/test_partitioning.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/test_partitioning.py b/tests/test_partitioning.py index 780b9f49c..100cef5c6 100644 --- a/tests/test_partitioning.py +++ b/tests/test_partitioning.py @@ -164,9 +164,14 @@ def graph(): def test_general_partition(lafferty_sriver_ds): - """Defaults should give the same thing as lafferty_sriver""" + """Reproduce Lafferty & Sriver's results using general_partition.""" g1, u1 = lafferty_sriver(lafferty_sriver_ds.tas) - g2, u2 = general_partition(lafferty_sriver_ds.tas) + g2, u2 = general_partition( + lafferty_sriver_ds.tas, + var_first=["model", "downscaling"], + mean_first=["scenario"], + weights=["model", "downscaling"], + ) assert u1.equals(u2) assert g1.equals(g2) From b049ee35b4613ddaccd711664395a3668ad63fb3 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Mon, 6 Jan 2025 15:56:09 -0500 Subject: [PATCH 17/29] cleanup --- src/xclim/sdba/loess.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/src/xclim/sdba/loess.py b/src/xclim/sdba/loess.py index f96682bf8..44a6f67cf 100644 --- a/src/xclim/sdba/loess.py +++ b/src/xclim/sdba/loess.py @@ -42,10 +42,6 @@ def _constant_regression(xi, x, y, w): # pragma: no cover def _linear_regression(xi, x, y, w): # pragma: no cover b = np.array([np.sum(w * y), np.sum(w * y * x)]) A = np.array([[np.sum(w), np.sum(w * x)], [np.sum(w * x), np.sum(w * x * x)]]) - # if both matrix are only 0s - # if not np.any(A) and not np.any(b): - # beta = np.array([0, 0]) - # else: beta = np.linalg.solve(A, b) return beta[0] + beta[1] * xi @@ -169,16 +165,9 @@ def _loess_nb( if iteration < niter - 1: residuals = y - yest s = np.median(np.abs(residuals)) - # xres = residuals / (6.0 * s) - # delta = (1 - xres**2) ** 2 - # delta[(np.abs(xres) >= 1) | np.isnan(xres)] = 0 - - if s == 0: # don't reject any points for the next iteration - delta = np.ones_like(residuals) - else: - xres = residuals / (6.0 * s) - delta = (1 - xres**2) ** 2 - delta[(np.abs(xres) >= 1)] = 0 + xres = residuals / (6.0 * s) + delta = (1 - xres**2) ** 2 + delta[(np.abs(xres) >= 1) | np.isnan(xres)] = 0 if skipna: out[~nan] = yest From 81eadce1929a64ca9ef4697aa78e0858119907b3 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Mon, 6 Jan 2025 15:58:36 -0500 Subject: [PATCH 18/29] cleanup --- src/xclim/sdba/loess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xclim/sdba/loess.py b/src/xclim/sdba/loess.py index 44a6f67cf..8cd143642 100644 --- a/src/xclim/sdba/loess.py +++ b/src/xclim/sdba/loess.py @@ -167,7 +167,7 @@ def _loess_nb( s = np.median(np.abs(residuals)) xres = residuals / (6.0 * s) delta = (1 - xres**2) ** 2 - delta[(np.abs(xres) >= 1) | np.isnan(xres)] = 0 + delta[np.abs(xres) >= 1] = 0 if skipna: out[~nan] = yest From b88090f54f23c0c33504de77f865c2c096eb33c3 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Mon, 6 Jan 2025 16:07:25 -0500 Subject: [PATCH 19/29] pr num --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f3f0807e0..8dd20b5b1 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -8,7 +8,7 @@ Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`) New features and enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -* New function ``ensemble.partition.general_partition` +* New function ``ensemble.partition.general_partition` (:pull:`2035`) v0.54.0 (2024-12-16) From 06ea4c3ac3bf9170f5746ee01b3f008ef84e4253 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Mon, 6 Jan 2025 17:22:08 -0500 Subject: [PATCH 20/29] Update CHANGELOG.rst Co-authored-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- CHANGELOG.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 8dd20b5b1..ae619b740 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -8,8 +8,7 @@ Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`) New features and enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -* New function ``ensemble.partition.general_partition` (:pull:`2035`) - +* New function ``ensemble.partition.general_partition`` (:pull:`2035`) v0.54.0 (2024-12-16) -------------------- From 0444ffedc2b46e8e0701246f145aeba0cbf5e830 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Mon, 6 Jan 2025 17:22:28 -0500 Subject: [PATCH 21/29] Update src/xclim/ensembles/_partitioning.py Co-authored-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- src/xclim/ensembles/_partitioning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xclim/ensembles/_partitioning.py b/src/xclim/ensembles/_partitioning.py index 8a730657f..201f9360d 100644 --- a/src/xclim/ensembles/_partitioning.py +++ b/src/xclim/ensembles/_partitioning.py @@ -321,7 +321,7 @@ def general_partition( ---------- da : xr.DataArray Time series with dimensions 'time', mean_first and var_first. - sm : xr.DataArray or str + sm : xr.DataArray or {"poly", "loess"} Smoothed time series over time, with the same dimensions as `da`. If 'poly', this is estimated using a 4th-order polynomial. If 'loess', this is estimated using a LOESS curve. From 58b58ebd5e90e9ddde29c4277221fdf443e25d9f Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Mon, 6 Jan 2025 17:22:42 -0500 Subject: [PATCH 22/29] Update src/xclim/ensembles/_partitioning.py Co-authored-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- src/xclim/ensembles/_partitioning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xclim/ensembles/_partitioning.py b/src/xclim/ensembles/_partitioning.py index 201f9360d..a38bcf35d 100644 --- a/src/xclim/ensembles/_partitioning.py +++ b/src/xclim/ensembles/_partitioning.py @@ -320,7 +320,7 @@ def general_partition( Parameters ---------- da : xr.DataArray - Time series with dimensions 'time', mean_first and var_first. + Time series with dimensions 'time', 'mean_first', and 'var_first'. sm : xr.DataArray or {"poly", "loess"} Smoothed time series over time, with the same dimensions as `da`. If 'poly', this is estimated using a 4th-order polynomial. From 525b4c0fdd8a769660fed32b7bd7a778a7fca469 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Mon, 6 Jan 2025 17:24:04 -0500 Subject: [PATCH 23/29] Apply suggestions from code review Co-authored-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- src/xclim/ensembles/_partitioning.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/xclim/ensembles/_partitioning.py b/src/xclim/ensembles/_partitioning.py index a38bcf35d..38929135f 100644 --- a/src/xclim/ensembles/_partitioning.py +++ b/src/xclim/ensembles/_partitioning.py @@ -326,13 +326,13 @@ def general_partition( If 'poly', this is estimated using a 4th-order polynomial. If 'loess', this is estimated using a LOESS curve. It is also possible to pass a precomputed smoothed time series. - var_first : list of strings + var_first : list of str List of dimensions where the variance is computed first of the dimension, followed by the mean over the other dimensions. - mean_first : list of strings + mean_first : list of str List of dimensions where the mean over the other dimensions is computed first, followed by the variance over the dimension. - weights : list of strings + weights : list of str List of dimensions where the first operation is weighted. Returns @@ -375,7 +375,7 @@ def general_partition( elif isinstance(sm, xr.DataArray): sm = sm else: - raise ValueError("sm should be 'poly', 'loess' or a DataArray.") + raise ValueError("sm should be 'poly', 'loess', or a DataArray.") # "Interannual variability is then estimated as the centered rolling 11-year variance of the difference # between the extracted forced response and the raw outputs, averaged over all outputs." @@ -407,8 +407,8 @@ def general_partition( total += t_u # Create output array with the uncertainty components - u = pd.Index(all_types + ["variability", "total"], name="uncertainty") - uncertainty = xr.concat(all_u + [nv_u, total], dim=u) + u = pd.Index([*all_types, "variability", "total"], name="uncertainty") + uncertainty = xr.concat([*all_u, nv_u, total], dim=u) uncertainty.attrs["indicator_long_name"] = da.attrs.get("long_name", "unknown") uncertainty.attrs["indicator_description"] = da.attrs.get("description", "unknown") From 31b73c926a00e33b0488c86cbcb51866f46f84bd Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Mon, 6 Jan 2025 17:24:56 -0500 Subject: [PATCH 24/29] suggestion --- src/xclim/ensembles/_partitioning.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/xclim/ensembles/_partitioning.py b/src/xclim/ensembles/_partitioning.py index 38929135f..00582714b 100644 --- a/src/xclim/ensembles/_partitioning.py +++ b/src/xclim/ensembles/_partitioning.py @@ -362,7 +362,8 @@ def general_partition( raise ValueError("This algorithm expects annual time series.") if not ({"time"} | set(all_types)).issubset(da.dims): - raise ValueError(f"DataArray dimensions should include {all_types} and time.") + error_msg = f"DataArray dimensions should include {all_types} and time." + raise ValueError(error_msg) if sm == "poly": # Fit a 4th order polynomial From 0887677f1356c3f5fc55761d314ce692eb57a007 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Mon, 6 Jan 2025 18:09:56 -0500 Subject: [PATCH 25/29] order --- src/xclim/ensembles/_partitioning.py | 4 +++- tests/test_partitioning.py | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/xclim/ensembles/_partitioning.py b/src/xclim/ensembles/_partitioning.py index 00582714b..549819642 100644 --- a/src/xclim/ensembles/_partitioning.py +++ b/src/xclim/ensembles/_partitioning.py @@ -422,7 +422,9 @@ def general_partition( # Mean projection: # This is not part of the original algorithm, # but we want all partition algos to have similar outputs. - g = sm.mean(dim=all_types) + g = sm.mean(dim=all_types[0]) + for dim in all_types[1:]: + g = g.mean(dim=dim) return g, uncertainty diff --git a/tests/test_partitioning.py b/tests/test_partitioning.py index 100cef5c6..64bcb2c7c 100644 --- a/tests/test_partitioning.py +++ b/tests/test_partitioning.py @@ -171,6 +171,11 @@ def test_general_partition(lafferty_sriver_ds): var_first=["model", "downscaling"], mean_first=["scenario"], weights=["model", "downscaling"], + sm="poly", + ) + # fix order + u2 = u2.sel( + uncertainty=["model", "scenario", "downscaling", "variability", "total"] ) assert u1.equals(u2) From 96a458c820817c83fb8d06692c1268204c5b6bbc Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Mon, 6 Jan 2025 21:20:18 -0500 Subject: [PATCH 26/29] close instead of equal --- tests/test_partitioning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_partitioning.py b/tests/test_partitioning.py index 64bcb2c7c..21ffe9995 100644 --- a/tests/test_partitioning.py +++ b/tests/test_partitioning.py @@ -179,4 +179,4 @@ def test_general_partition(lafferty_sriver_ds): ) assert u1.equals(u2) - assert g1.equals(g2) + np.testing.assert_allclose(g1.values, g2.values, atol=0.1) From 77a9e233ca7f6503ac324895c59a9f480d3583bc Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Wed, 8 Jan 2025 10:24:13 -0500 Subject: [PATCH 27/29] fix docs --- environment.yml | 1 + pyproject.toml | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index cd02063aa..f00d3c72e 100644 --- a/environment.yml +++ b/environment.yml @@ -81,3 +81,4 @@ dependencies: - xdoctest >=1.1.5 - yamllint >=1.35.1 - pip >=24.2.0 + - pygments <2.19 #FIXME: temporary fix, https://github.com/felix-hilden/sphinx-codeautolink/issues/153 diff --git a/pyproject.toml b/pyproject.toml index f665e2cde..f56c801b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -111,7 +111,8 @@ docs = [ "sphinx-copybutton", "sphinx-mdinclude", "sphinxcontrib-bibtex", - "sphinxcontrib-svg2pdfconverter[Cairosvg]" + "sphinxcontrib-svg2pdfconverter[Cairosvg]", + "pygments <2.19" # FIXME: temporary fix, https://github.com/felix-hilden/sphinx-codeautolink/issues/153 ] extras = ["fastnanquantile >=0.0.2", "flox >=0.9", "POT >=0.9.4"] all = ["xclim[dev]", "xclim[docs]", "xclim[extras]"] From ab4397d4fb36a45834eaa2daf94435b8aa0dc980 Mon Sep 17 00:00:00 2001 From: juliettelavoie Date: Wed, 8 Jan 2025 12:38:48 -0500 Subject: [PATCH 28/29] remove loess --- src/xclim/ensembles/_partitioning.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/xclim/ensembles/_partitioning.py b/src/xclim/ensembles/_partitioning.py index 549819642..26eb438be 100644 --- a/src/xclim/ensembles/_partitioning.py +++ b/src/xclim/ensembles/_partitioning.py @@ -13,8 +13,6 @@ import pandas as pd import xarray as xr -from xclim.sdba import loess - # pylint: disable=pointless-string-statement """ Implemented partitioning algorithms: @@ -306,7 +304,7 @@ def lafferty_sriver( def general_partition( da: xr.DataArray, - sm: xr.DataArray | str = "loess", + sm: xr.DataArray | str = "poly", var_first: list | None = None, mean_first: list | None = None, weights: list | None = None, @@ -321,10 +319,9 @@ def general_partition( ---------- da : xr.DataArray Time series with dimensions 'time', 'mean_first', and 'var_first'. - sm : xr.DataArray or {"poly", "loess"} + sm : xr.DataArray or "poly" Smoothed time series over time, with the same dimensions as `da`. If 'poly', this is estimated using a 4th-order polynomial. - If 'loess', this is estimated using a LOESS curve. It is also possible to pass a precomputed smoothed time series. var_first : list of str List of dimensions where the variance is computed first of the dimension, @@ -371,12 +368,10 @@ def general_partition( sm = xr.polyval(coord=da.time, coeffs=fit.polyfit_coefficients).where( da.notnull() ) - elif sm == "loess": - sm = loess.loess_smoothing(da) elif isinstance(sm, xr.DataArray): sm = sm else: - raise ValueError("sm should be 'poly', 'loess', or a DataArray.") + raise ValueError("sm should be 'poly' or a DataArray.") # "Interannual variability is then estimated as the centered rolling 11-year variance of the difference # between the extracted forced response and the raw outputs, averaged over all outputs." From 7e09f65322da01214bfdd41cae8fa3a5e0556f17 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Wed, 8 Jan 2025 12:44:03 -0500 Subject: [PATCH 29/29] Update src/xclim/ensembles/_partitioning.py --- src/xclim/ensembles/_partitioning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xclim/ensembles/_partitioning.py b/src/xclim/ensembles/_partitioning.py index 26eb438be..e759a6899 100644 --- a/src/xclim/ensembles/_partitioning.py +++ b/src/xclim/ensembles/_partitioning.py @@ -319,7 +319,7 @@ def general_partition( ---------- da : xr.DataArray Time series with dimensions 'time', 'mean_first', and 'var_first'. - sm : xr.DataArray or "poly" + sm : xr.DataArray or {"poly"} Smoothed time series over time, with the same dimensions as `da`. If 'poly', this is estimated using a 4th-order polynomial. It is also possible to pass a precomputed smoothed time series.