Skip to content

Commit

Permalink
example with nulls
Browse files Browse the repository at this point in the history
  • Loading branch information
DeaMariaLeon committed Nov 3, 2024
1 parent 1dfab2c commit 6f738cd
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 18 deletions.
31 changes: 31 additions & 0 deletions narwhals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,37 @@ def ewm_mean(
│ 1.666667 │
│ 2.428571 │
└──────────┘
pandas and Polars handle nulls differently. So, When calculating ewm over
a sequence with null values, leading to distinct results:
>>> data = {"a": [2.0, 4.0, None, 3.0, float("nan"), 3.0]}
>>> df_pd2 = pd.DataFrame(data)
>>> df_pl2 = pl.DataFrame(data)
>>> func(df_pd2)
a
0 2.000000
1 3.333333
2 3.333333
3 3.090909
4 3.090909
5 3.023256
>>> func(df_pl2) # doctest: +NORMALIZE_WHITESPACE
shape: (6, 1)
┌──────────┐
│ a │
│ --- │
│ f64 │
╞══════════╡
│ 2.0 │
│ 3.333333 │
│ null │
│ 3.090909 │
│ NaN │
│ NaN │
└──────────┘
"""
return self.__class__(
lambda plx: self._call(plx).ewm_mean(
Expand Down
27 changes: 27 additions & 0 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,33 @@ def ewm_mean(
2.428571
]
pandas and Polars handle nulls differently. So, When calculating ewm over
a sequence with null values, leading to distinct results:
>>> data = [2.0, 4.0, None, 3.0, float("nan"), 3.0]
>>> s_pd2 = pd.Series(name="a", data=data)
>>> s_pl2 = pl.Series(name="a", values=data)
>>> func(s_pd2)
0 2.000000
1 3.333333
2 3.333333
3 3.090909
4 3.090909
5 3.023256
Name: a, dtype: float64
>>> func(s_pl2) # doctest: +NORMALIZE_WHITESPACE
shape: (6,)
Series: 'a' [f64]
[
2.0
3.333333
null
3.090909
NaN
NaN
]
"""
return self._from_compliant_series(
self._compliant_series.ewm_mean(
Expand Down
50 changes: 32 additions & 18 deletions tests/expr_and_series/ewm_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,43 +81,57 @@ def test_ewm_mean_nulls(
ignore_nulls: bool, # noqa: FBT001
constructor: Constructor,
) -> None:
# When calculating ewm over a sequence with null values, pandas and Polars handle nulls differently,
# leading to distinct results:
# For non-null entries in the sequence, both exclude null values during ewm calculation,
# and both produce the same result for these non-null entries. The weights for these values are determined by
# the ignore_nulls parameter.
# For null values, however, Pandas calculates an ewm value, while Polars returns null for these positions.
#
# Also, NaN values are treated differently between the two libraries:
# In Polars, NaN values are not treated as nulls, so a NaN entry results in NaN for that entry and
# for all subsequent entries in the EWM calculation.
# In pandas, NaN values are considered nulls, so Pandas computes an EWM value for these entries instead.

if any(x in str(constructor) for x in ("pyarrow_table_", "dask")):
request.applymarker(pytest.mark.xfail)

df = nw.from_native(
constructor({"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]})
)
result = df.select(nw.col("a", "b").ewm_mean(com=1, ignore_nulls=ignore_nulls))

df = nw.from_native(constructor({"a": [2.0, 4.0, None, 3.0, float("nan"), 3.0]}))
result = df.select(nw.col("a").ewm_mean(com=1, ignore_nulls=ignore_nulls))
constructor_type = "polars" if "polars" in str(constructor) else "other"

expected_results: dict[tuple[str, bool], dict[str, list[float | None]]] = {
("polars", False): {
"a": [2.0, 3.3333333333333335, None, 3.090909090909091, 4.222222222222222],
"b": [2.0, 3.3333333333333335, float("nan"), float("nan"), float("nan")],
"a": [
2.0,
3.3333333333333335,
None,
3.090909090909091,
float("nan"),
float("nan"),
],
},
("polars", True): {
"a": [2.0, 3.3333333333333335, None, 3.142857142857143, 4.133333333333334],
"b": [2.0, 3.3333333333333335, float("nan"), float("nan"), float("nan")],
"a": [
2.0,
3.3333333333333335,
None,
3.142857142857143,
float("nan"),
float("nan"),
],
},
("other", False): {
"a": [2.000000, 3.333333, 3.333333, 3.090909, 4.222222],
"b": [2.000000, 3.333333, 3.333333, 3.090909, 4.222222],
"a": [2.000000, 3.333333, 3.333333, 3.090909, 3.090909, 3.023256],
},
("other", True): {
"a": [
2.0,
3.3333333333333335,
3.3333333333333335,
3.142857142857143,
4.133333333333334,
],
"b": [
2.0,
3.3333333333333335,
3.3333333333333335,
3.142857142857143,
4.133333333333334,
3.066666666666667,
],
},
}
Expand Down

0 comments on commit 6f738cd

Please sign in to comment.