Skip to content

Commit

Permalink
Merge branch 'main' into pre-commit-ci-update-config
Browse files Browse the repository at this point in the history
  • Loading branch information
Zeitsperre authored Dec 9, 2024
2 parents 7ade465 + 2ede288 commit a12d1f9
Show file tree
Hide file tree
Showing 13 changed files with 148 additions and 157 deletions.
1 change: 1 addition & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ jobs:
api.electricitymap.org:443
api.github.com:443
api.green-coding.io:443
conda.anaconda.org:443
coveralls.io:443
files.pythonhosted.org:443
github.com:443
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ Bug fixes
Internal changes
^^^^^^^^^^^^^^^^
* Changed french translations with word "pluvieux" to "avec précipitations". (:issue:`1960`, :pull:`1994`).
* Using different time for `ref` and `hist` is now explicitly forbidden in many bias adjustment methods (e.g. `EmpiricalQuantileMapping`). Methods that combine `ref,hist,sim` in a same `map_groups` also require the time arrays to be equal in size. (:issue:`1903`, :pull:`1995`)
* Nans in `OTC` and `dOTC` are only dropped and put back in place at the lowest level so that the size of time array never changes on xarray levels. (:pull:`1995`)
* `streamflow` entry replaced with `q` in ``variables.yml``. (:issue:`1912`, :pull:`1996`)
* In order to address 403 (forbidden) request errors when retrieving data from GitHub via ReadTheDocs, the ``nimbus`` class has been modified to use an overloaded `fetch` method that appends a User-Agent header to the request. (:pull:`2001`).
* Addressed a very rare race condition that can happen if `pytest` is tearing down the test environment when running across multiple workers. (:pull:`1863`).
Expand Down
2 changes: 1 addition & 1 deletion docs/notebooks/customize.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
"outputs": [],
"source": [
"tasmax = (\n",
" xr.tutorial.open_dataset(\"air_temperature\")\n",
" xr.tutorial.load_dataset(\"air_temperature\")\n",
" .air.resample(time=\"D\")\n",
" .max(keep_attrs=True)\n",
")\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/notebooks/sdba-advanced.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
"outputs": [],
"source": [
"# Daily temperature data from xarray's tutorials\n",
"ds = xr.tutorial.open_dataset(\"air_temperature\").resample(time=\"D\").mean()\n",
"ds = xr.tutorial.load_dataset(\"air_temperature\").resample(time=\"D\").mean()\n",
"tas = ds.isel(lat=0, lon=0).air\n",
"\n",
"# Compute the smoothed series\n",
Expand Down
4 changes: 2 additions & 2 deletions docs/notebooks/units.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
"outputs": [],
"source": [
"# See the Usage page for details on opening datasets, subsetting and resampling.\n",
"ds = xr.tutorial.open_dataset(\"air_temperature\")\n",
"ds = xr.tutorial.load_dataset(\"air_temperature\")\n",
"tas = (\n",
" ds.air.sel(lat=40, lon=270, method=\"nearest\")\n",
" .resample(time=\"D\")\n",
Expand Down Expand Up @@ -193,7 +193,7 @@
"metadata": {},
"outputs": [],
"source": [
"ds = xr.tutorial.open_dataset(\"air_temperature\")\n",
"ds = xr.tutorial.load_dataset(\"air_temperature\")\n",
"tas_6h = ds.air.sel(\n",
" lat=40, lon=270, method=\"nearest\"\n",
") # no resampling, original data is 6-hourly\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/notebooks/usage.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@
"source": [
"# Show that data is not at a daily time frequency\n",
"\n",
"ds6h = xr.tutorial.open_dataset(\"air_temperature\")\n",
"ds6h = xr.tutorial.load_dataset(\"air_temperature\")\n",
"xr.infer_freq(ds6h.time)"
]
},
Expand Down
1 change: 0 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
name: xclim
channels:
- conda-forge
- defaults
dependencies:
- python >=3.10,<3.14
- boltons >=20.1
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ target-version = [
]

[tool.bumpversion]
current_version = "0.53.3-dev.6"
current_version = "0.53.3-dev.7"
commit = true
commit_args = "--no-verify"
tag = false
Expand Down
2 changes: 1 addition & 1 deletion tests/test_sdba/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def ref_hist_sim_tuto(socket_enabled): # noqa: F841
"""

def _ref_hist_sim_tuto(sim_offset=3, delta=0.1, smth_win=3, trend=True):
ds = xr.tutorial.open_dataset("air_temperature")
ds = xr.tutorial.load_dataset("air_temperature")
ref = ds.air.resample(time="D").mean(keep_attrs=True)
hist = ref.rolling(time=smth_win, min_periods=1).mean(keep_attrs=True) + delta
hist.attrs["units"] = ref.attrs["units"]
Expand Down
157 changes: 48 additions & 109 deletions tests/test_sdba/test_adjustment.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,28 @@ def test_harmonize_units_multivariate(self, series, random, use_dask):
ds, ds2 = unstack_variables(da), unstack_variables(da2)
assert (ds.tas.units == ds2.tas.units) & (ds.pr.units == ds2.pr.units)

def test_matching_times(self, series, random):
n = 10
u = random.random(n)
da = series(u, "tas", start="2000-01-01")
da2 = series(u, "tas", start="2010-01-01")
with pytest.raises(
ValueError,
match="`ref` and `hist` have distinct time arrays, this is not supported for BaseAdjustment adjustment.",
):
BaseAdjustment._check_matching_times(ref=da, hist=da2)

def test_matching_time_sizes(self, series, random):
n = 10
u = random.random(n)
da = series(u, "tas", start="2000-01-01")
da2 = da.isel(time=slice(0, 5)).copy()
with pytest.raises(
ValueError,
match="Inputs have different size for the time array, this is not supported for BaseAdjustment adjustment.",
):
BaseAdjustment._check_matching_time_sizes(da, da2)


class TestLoci:
@pytest.mark.parametrize("group,dec", (["time", 2], ["time.month", 1]))
Expand Down Expand Up @@ -871,53 +893,6 @@ def test_compare_sbck(self, random, series):
scen_sbck = scen_sbck.to_numpy()
assert np.allclose(scen, scen_sbck)

def test_shape(self, random, series):
pytest.importorskip("ot")
pytest.importorskip("SBCK", minversion="0.4.0")
ref_ns = 300
hist_ns = 200
ref_u = random.random(ref_ns)
hist_u = random.random(hist_ns)

ref_xd = uniform(loc=1000, scale=100)
ref_yd = norm(loc=0, scale=100)
ref_zd = norm(loc=500, scale=100)
hist_xd = norm(loc=-500, scale=100)
hist_yd = uniform(loc=-1000, scale=100)
hist_zd = uniform(loc=-10, scale=100)

ref_x = ref_xd.ppf(ref_u)
ref_y = ref_yd.ppf(ref_u)
ref_z = ref_zd.ppf(ref_u)
hist_x = hist_xd.ppf(hist_u)
hist_y = hist_yd.ppf(hist_u)
hist_z = hist_zd.ppf(hist_u)

ref_na = 10
hist_na = 15
ref_idx = random.choice(range(ref_ns), size=ref_na, replace=False)
ref_x[ref_idx] = None
hist_idx = random.choice(range(hist_ns), size=hist_na, replace=False)
hist_x[hist_idx] = None

ref_x = series(ref_x, "tas").rename("x")
ref_y = series(ref_y, "tas").rename("y")
ref_z = series(ref_z, "tas").rename("z")
ref = xr.merge([ref_x, ref_y, ref_z])
ref = stack_variables(ref)

hist_x = series(hist_x, "tas").rename("x")
hist_y = series(hist_y, "tas").rename("y")
hist_z = series(hist_z, "tas").rename("z")
hist = xr.merge([hist_x, hist_y, hist_z])
hist = stack_variables(hist)

scen = OTC.adjust(ref, hist)

assert scen.shape == (3, hist_ns - hist_na)
hist = unstack_variables(hist)
assert not np.isin(hist.x[hist.x.isnull()].time.values, scen.time.values).any()


# TODO: Add tests for normalization methods
class TestdOTC:
Expand Down Expand Up @@ -1004,69 +979,33 @@ def test_compare_sbck(self, random, series, use_dask, cov_factor):
scen_sbck = scen_sbck.to_numpy()
assert np.allclose(scen, scen_sbck)

def test_shape(self, random, series):
# just check it runs
def test_different_times(self, tasmax_series, tasmin_series):
pytest.importorskip("ot")
pytest.importorskip("SBCK", minversion="0.4.0")
ref_ns = 300
hist_ns = 200
sim_ns = 400
ref_u = random.random(ref_ns)
hist_u = random.random(hist_ns)
sim_u = random.random(sim_ns)

ref_xd = uniform(loc=1000, scale=100)
ref_yd = norm(loc=0, scale=100)
ref_zd = norm(loc=500, scale=100)
hist_xd = norm(loc=-500, scale=100)
hist_yd = uniform(loc=-1000, scale=100)
hist_zd = uniform(loc=-10, scale=100)
sim_xd = norm(loc=0, scale=100)
sim_yd = uniform(loc=0, scale=100)
sim_zd = uniform(loc=10, scale=100)

ref_x = ref_xd.ppf(ref_u)
ref_y = ref_yd.ppf(ref_u)
ref_z = ref_zd.ppf(ref_u)
hist_x = hist_xd.ppf(hist_u)
hist_y = hist_yd.ppf(hist_u)
hist_z = hist_zd.ppf(hist_u)
sim_x = sim_xd.ppf(sim_u)
sim_y = sim_yd.ppf(sim_u)
sim_z = sim_zd.ppf(sim_u)

ref_na = 10
hist_na = 15
sim_na = 20
ref_idx = random.choice(range(ref_ns), size=ref_na, replace=False)
ref_x[ref_idx] = None
hist_idx = random.choice(range(hist_ns), size=hist_na, replace=False)
hist_x[hist_idx] = None
sim_idx = random.choice(range(sim_ns), size=sim_na, replace=False)
sim_x[sim_idx] = None

ref_x = series(ref_x, "tas").rename("x")
ref_y = series(ref_y, "tas").rename("y")
ref_z = series(ref_z, "tas").rename("z")
ref = xr.merge([ref_x, ref_y, ref_z])
ref = stack_variables(ref)

hist_x = series(hist_x, "tas").rename("x")
hist_y = series(hist_y, "tas").rename("y")
hist_z = series(hist_z, "tas").rename("z")
hist = xr.merge([hist_x, hist_y, hist_z])
hist = stack_variables(hist)

sim_x = series(sim_x, "tas").rename("x")
sim_y = series(sim_y, "tas").rename("y")
sim_z = series(sim_z, "tas").rename("z")
sim = xr.merge([sim_x, sim_y, sim_z])
sim = stack_variables(sim)

scen = dOTC.adjust(ref, hist, sim)

assert scen.shape == (3, sim_ns - sim_na)
sim = unstack_variables(sim)
assert not np.isin(sim.x[sim.x.isnull()].time.values, scen.time.values).any()
# `sim` has a different time than `ref,hist` (but same size)
ref = xr.merge(
[
tasmax_series(np.arange(730).astype(float), start="2000-01-01").chunk(
{"time": -1}
),
tasmin_series(np.arange(730).astype(float), start="2000-01-01").chunk(
{"time": -1}
),
]
)
hist = ref.copy()
sim = xr.merge(
[
tasmax_series(np.arange(730).astype(float), start="2020-01-01").chunk(
{"time": -1}
),
tasmin_series(np.arange(730).astype(float), start="2020-01-01").chunk(
{"time": -1}
),
]
)
ref, hist, sim = (stack_variables(arr) for arr in [ref, hist, sim])
dOTC.adjust(ref, hist, sim)


def test_raise_on_multiple_chunks(tas_series):
Expand Down
2 changes: 1 addition & 1 deletion xclim/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

__author__ = """Travis Logan"""
__email__ = "[email protected]"
__version__ = "0.53.3-dev.6"
__version__ = "0.53.3-dev.7"


with _resources.as_file(_resources.files("xclim.data")) as _module_data:
Expand Down
47 changes: 27 additions & 20 deletions xclim/sdba/_adjustment.py
Original file line number Diff line number Diff line change
Expand Up @@ -988,6 +988,12 @@ def _otc_adjust(
----------
:cite:cts:`sdba-robin_2021`
"""
# nans are removed and put back in place at the end
X_og = X.copy()
mask = (~np.isnan(X)).all(axis=1)
X = X[mask]
Y = Y[(~np.isnan(Y)).all(axis=1)]

# Initialize parameters
if bin_width is None:
bin_width = u.bin_width_estimator([Y, X])
Expand Down Expand Up @@ -1042,7 +1048,11 @@ def _otc_adjust(
if jitter_inside_bins:
out += np.random.uniform(low=-bin_width / 2, high=bin_width / 2, size=out.shape)

return out
# reintroduce nans
Z = X_og
Z[mask] = out
Z[~mask] = np.nan
return Z


@map_groups(scen=[Grouper.DIM])
Expand Down Expand Up @@ -1102,9 +1112,9 @@ def otc_adjust(
)

ref_map = {d: f"ref_{d}" for d in dim}
ref = ref.rename(ref_map).stack(dim_ref=ref_map.values()).dropna(dim="dim_ref")
ref = ref.rename(ref_map).stack(dim_ref=ref_map.values())

hist = hist.stack(dim_hist=dim).dropna(dim="dim_hist")
hist = hist.stack(dim_hist=dim)

if isinstance(bin_width, dict):
bin_width = {
Expand Down Expand Up @@ -1134,12 +1144,7 @@ def otc_adjust(
vectorize=True,
)

# Pad dim differences with NA to please map_blocks
ref = ref.unstack().rename({v: k for k, v in ref_map.items()})
scen = scen.unstack().rename("scen")
for d in dim:
full_d = xr.concat([ref[d], scen[d]], dim=d).drop_duplicates(d)
scen = scen.reindex({d: full_d})

return scen.to_dataset()

Expand Down Expand Up @@ -1193,6 +1198,12 @@ def _dotc_adjust(
----------
:cite:cts:`sdba-robin_2021`
"""
# nans are removed and put back in place at the end
X1_og = X1.copy()
mask = ~np.isnan(X1).any(axis=1)
X1 = X1[mask]
X0 = X0[~np.isnan(X0).any(axis=1)]
Y0 = Y0[~np.isnan(Y0).any(axis=1)]
# Initialize parameters
if isinstance(bin_width, dict):
_bin_width = u.bin_width_estimator([Y0, X0, X1])
Expand Down Expand Up @@ -1259,7 +1270,7 @@ def _dotc_adjust(
Y1[:, j] = Y0[:, j] + motion[:, j]

# Map sim to the evolution of ref
Z1 = _otc_adjust(
out = _otc_adjust(
X1,
Y1,
bin_width=bin_width,
Expand All @@ -1268,6 +1279,10 @@ def _dotc_adjust(
jitter_inside_bins=jitter_inside_bins,
normalization=normalization,
)
# reintroduce nans
Z1 = X1_og
Z1[mask] = out
Z1[~mask] = np.nan

return Z1

Expand Down Expand Up @@ -1339,14 +1354,12 @@ def dotc_adjust(

# Drop data added by map_blocks and prepare for apply_ufunc
hist_map = {d: f"hist_{d}" for d in dim}
hist = (
hist.rename(hist_map).stack(dim_hist=hist_map.values()).dropna(dim="dim_hist")
)
hist = hist.rename(hist_map).stack(dim_hist=hist_map.values())

ref_map = {d: f"ref_{d}" for d in dim}
ref = ref.rename(ref_map).stack(dim_ref=ref_map.values()).dropna(dim="dim_ref")
ref = ref.rename(ref_map).stack(dim_ref=ref_map.values())

sim = sim.stack(dim_sim=dim).dropna(dim="dim_sim")
sim = sim.stack(dim_sim=dim)

if kind is not None:
kind = {
Expand Down Expand Up @@ -1387,12 +1400,6 @@ def dotc_adjust(
vectorize=True,
)

# Pad dim differences with NA to please map_blocks
hist = hist.unstack().rename({v: k for k, v in hist_map.items()})
ref = ref.unstack().rename({v: k for k, v in ref_map.items()})
scen = scen.unstack().rename("scen")
for d in dim:
full_d = xr.concat([hist[d], ref[d], scen[d]], dim=d).drop_duplicates(d)
scen = scen.reindex({d: full_d})

return scen.to_dataset()
Loading

0 comments on commit a12d1f9

Please sign in to comment.