Skip to content

Commit

Permalink
feat: add Series|Expr.is_finite method (#1341)
Browse files Browse the repository at this point in the history
  • Loading branch information
FBruzzesi authored Nov 18, 2024
1 parent 68e9bbe commit 2784596
Show file tree
Hide file tree
Showing 10 changed files with 178 additions and 2 deletions.
1 change: 1 addition & 0 deletions docs/api-reference/expr.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
- clip
- is_between
- is_duplicated
- is_finite
- is_first_distinct
- is_in
- is_last_distinct
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/series.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
- is_between
- is_duplicated
- is_empty
- is_finite
- is_first_distinct
- is_in
- is_last_distinct
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_arrow/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,9 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
dtypes=self._dtypes,
)

def is_finite(self: Self) -> Self:
return reuse_series_implementation(self, "is_finite")

def cum_count(self: Self, *, reverse: bool) -> Self:
return reuse_series_implementation(self, "cum_count", reverse=reverse)

Expand Down
5 changes: 5 additions & 0 deletions narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -818,6 +818,11 @@ def mode(self: Self) -> ArrowSeries:
plx.col(col_token) == plx.col(col_token).max()
)[self.name]

def is_finite(self: Self) -> Self:
import pyarrow.compute as pc # ignore-banned-import

return self._from_native_series(pc.is_finite(self._native_series))

def cum_count(self: Self, *, reverse: bool) -> Self:
return (~self.is_null()).cast(self._dtypes.UInt32()).cum_sum(reverse=reverse)

Expand Down
9 changes: 9 additions & 0 deletions narwhals/_dask/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,15 @@ def func(_input: Any, dtype: DType | type[DType]) -> Any:
returns_scalar=False,
)

def is_finite(self: Self) -> Self:
import dask.array as da # ignore-banned-import

return self._from_call(
lambda _input: da.isfinite(_input),
"is_finite",
returns_scalar=False,
)


class DaskExprStringNamespace:
def __init__(self, expr: DaskExpr) -> None:
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_pandas_like/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,9 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
dtypes=self._dtypes,
)

def is_finite(self: Self) -> Self:
return reuse_series_implementation(self, "is_finite")

def cum_count(self: Self, *, reverse: bool) -> Self:
return reuse_series_implementation(self, "cum_count", reverse=reverse)

Expand Down
4 changes: 4 additions & 0 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,10 @@ def cum_prod(self: Self, *, reverse: bool) -> Self:
def __iter__(self: Self) -> Iterator[Any]:
yield from self._native_series.__iter__()

def is_finite(self: Self) -> Self:
s = self._native_series
return self._from_native_series((s > float("-inf")) & (s < float("inf")))

@property
def str(self) -> PandasLikeSeriesStringNamespace:
return PandasLikeSeriesStringNamespace(self)
Expand Down
57 changes: 55 additions & 2 deletions narwhals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1496,8 +1496,8 @@ def is_null(self) -> Self:
A new expression.
Notes:
pandas and Polars handle null values differently. Polars distinguishes
between NaN and Null, whereas pandas doesn't.
pandas, Polars and PyArrow handle null values differently. Polars and PyArrow
distinguish between NaN and Null, whereas pandas doesn't.
Examples:
>>> import pandas as pd
Expand Down Expand Up @@ -2701,6 +2701,59 @@ def mode(self: Self) -> Self:
"""
return self.__class__(lambda plx: self._call(plx).mode())

def is_finite(self: Self) -> Self:
"""Returns boolean values indicating which original values are finite.
Warning:
Different backend handle null values differently. `is_finite` will return
False for NaN and Null's in the Dask and pandas non-nullable backend, while
for Polars, PyArrow and pandas nullable backends null values are kept as such.
Returns:
Expression of `Boolean` data type.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [float("nan"), float("inf"), 2.0, None]}
We define a library agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a").is_finite())
We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
>>> func(pd.DataFrame(data))
a
0 False
1 False
2 True
3 False
>>> func(pl.DataFrame(data))
shape: (4, 1)
┌───────┐
│ a │
│ --- │
│ bool │
╞═══════╡
│ false │
│ false │
│ true │
│ null │
└───────┘
>>> func(pa.table(data))
pyarrow.Table
a: bool
----
a: [[false,false,true,null]]
"""
return self.__class__(lambda plx: self._call(plx).is_finite())

def cum_count(self: Self, *, reverse: bool = False) -> Self:
r"""Return the cumulative count of the non-null values in the column.
Expand Down
56 changes: 56 additions & 0 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2634,6 +2634,62 @@ def mode(self: Self) -> Self:
"""
return self._from_compliant_series(self._compliant_series.mode())

def is_finite(self: Self) -> Self:
"""Returns a boolean Series indicating which values are finite.
Warning:
Different backend handle null values differently. `is_finite` will return
False for NaN and Null's in the Dask and pandas non-nullable backend, while
for Polars, PyArrow and pandas nullable backends null values are kept as such.
Returns:
Expression of `Boolean` data type.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = [float("nan"), float("inf"), 2.0, None]
We define a library agnostic function:
>>> @nw.narwhalify
... def func(s):
... return s.is_finite()
We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
>>> func(pd.Series(data))
0 False
1 False
2 True
3 False
dtype: bool
>>> func(pl.Series(data)) # doctest: +NORMALIZE_WHITESPACE
shape: (4,)
Series: '' [bool]
[
false
false
true
null
]
>>> func(pa.chunked_array([data])) # doctest: +ELLIPSIS
<pyarrow.lib.ChunkedArray object at ...>
[
[
false,
false,
true,
null
]
]
"""
return self._from_compliant_series(self._compliant_series.is_finite())

def cum_count(self: Self, *, reverse: bool = False) -> Self:
r"""Return the cumulative count of the non-null values in the series.
Expand Down
41 changes: 41 additions & 0 deletions tests/expr_and_series/is_finite_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from __future__ import annotations

import pytest

import narwhals.stable.v1 as nw
from tests.utils import Constructor
from tests.utils import ConstructorEager
from tests.utils import assert_equal_data

data = {"a": [float("nan"), float("inf"), 2.0, None]}


@pytest.mark.filterwarnings("ignore:invalid value encountered in cast")
def test_is_finite_expr(constructor: Constructor) -> None:
if "polars" in str(constructor) or "pyarrow_table" in str(constructor):
expected = {"a": [False, False, True, None]}
elif "pandas_constructor" in str(constructor) or "dask" in str(constructor):
expected = {"a": [False, False, True, False]}
else: # pandas_nullable_constructor, pandas_pyarrow_constructor, modin
expected = {"a": [None, False, True, None]}

df = nw.from_native(constructor(data))
result = df.select(nw.col("a").is_finite())
assert_equal_data(result, expected)


@pytest.mark.filterwarnings("ignore:invalid value encountered in cast")
def test_is_finite_series(constructor_eager: ConstructorEager) -> None:
if "polars" in str(constructor_eager) or "pyarrow_table" in str(constructor_eager):
expected = {"a": [False, False, True, None]}
elif "pandas_constructor" in str(constructor_eager) or "dask" in str(
constructor_eager
):
expected = {"a": [False, False, True, False]}
else: # pandas_nullable_constructor, pandas_pyarrow_constructor, modin
expected = {"a": [None, False, True, None]}

df = nw.from_native(constructor_eager(data), eager_only=True)
result = {"a": df["a"].is_finite()}

assert_equal_data(result, expected)

0 comments on commit 2784596

Please sign in to comment.