diff --git a/docs/api-reference/expr.md b/docs/api-reference/expr.md index 867702b60..1dfde8210 100644 --- a/docs/api-reference/expr.md +++ b/docs/api-reference/expr.md @@ -25,6 +25,7 @@ - clip - is_between - is_duplicated + - is_finite - is_first_distinct - is_in - is_last_distinct diff --git a/docs/api-reference/series.md b/docs/api-reference/series.md index b957bdbf2..6539dcc23 100644 --- a/docs/api-reference/series.md +++ b/docs/api-reference/series.md @@ -30,6 +30,7 @@ - is_between - is_duplicated - is_empty + - is_finite - is_first_distinct - is_in - is_last_distinct diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py index 772505ee6..4828ea4b2 100644 --- a/narwhals/_arrow/expr.py +++ b/narwhals/_arrow/expr.py @@ -438,6 +438,9 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: dtypes=self._dtypes, ) + def is_finite(self: Self) -> Self: + return reuse_series_implementation(self, "is_finite") + def cum_count(self: Self, *, reverse: bool) -> Self: return reuse_series_implementation(self, "cum_count", reverse=reverse) diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 133bf2165..0edaedd3f 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -818,6 +818,11 @@ def mode(self: Self) -> ArrowSeries: plx.col(col_token) == plx.col(col_token).max() )[self.name] + def is_finite(self: Self) -> Self: + import pyarrow.compute as pc # ignore-banned-import + + return self._from_native_series(pc.is_finite(self._native_series)) + def cum_count(self: Self, *, reverse: bool) -> Self: return (~self.is_null()).cast(self._dtypes.UInt32()).cum_sum(reverse=reverse) diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index 142349a8c..da1ee52e1 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -831,6 +831,15 @@ def func(_input: Any, dtype: DType | type[DType]) -> Any: returns_scalar=False, ) + def is_finite(self: Self) -> Self: + import dask.array as da # ignore-banned-import + + return self._from_call( + lambda _input: da.isfinite(_input), + "is_finite", + returns_scalar=False, + ) + class DaskExprStringNamespace: def __init__(self, expr: DaskExpr) -> None: diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index 80facd572..dd1ad405a 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -449,6 +449,9 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: dtypes=self._dtypes, ) + def is_finite(self: Self) -> Self: + return reuse_series_implementation(self, "is_finite") + def cum_count(self: Self, *, reverse: bool) -> Self: return reuse_series_implementation(self, "cum_count", reverse=reverse) diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 98caf9213..1c20b7ba0 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -801,6 +801,10 @@ def cum_prod(self: Self, *, reverse: bool) -> Self: def __iter__(self: Self) -> Iterator[Any]: yield from self._native_series.__iter__() + def is_finite(self: Self) -> Self: + s = self._native_series + return self._from_native_series((s > float("-inf")) & (s < float("inf"))) + @property def str(self) -> PandasLikeSeriesStringNamespace: return PandasLikeSeriesStringNamespace(self) diff --git a/narwhals/expr.py b/narwhals/expr.py index 0f78409a6..2cdeb3138 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -1496,8 +1496,8 @@ def is_null(self) -> Self: A new expression. Notes: - pandas and Polars handle null values differently. Polars distinguishes - between NaN and Null, whereas pandas doesn't. + pandas, Polars and PyArrow handle null values differently. Polars and PyArrow + distinguish between NaN and Null, whereas pandas doesn't. Examples: >>> import pandas as pd @@ -2701,6 +2701,59 @@ def mode(self: Self) -> Self: """ return self.__class__(lambda plx: self._call(plx).mode()) + def is_finite(self: Self) -> Self: + """Returns boolean values indicating which original values are finite. + + Warning: + Different backend handle null values differently. `is_finite` will return + False for NaN and Null's in the Dask and pandas non-nullable backend, while + for Polars, PyArrow and pandas nullable backends null values are kept as such. + + Returns: + Expression of `Boolean` data type. + + Examples: + >>> import narwhals as nw + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> data = {"a": [float("nan"), float("inf"), 2.0, None]} + + We define a library agnostic function: + + >>> @nw.narwhalify + ... def func(df): + ... return df.select(nw.col("a").is_finite()) + + We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: + + >>> func(pd.DataFrame(data)) + a + 0 False + 1 False + 2 True + 3 False + >>> func(pl.DataFrame(data)) + shape: (4, 1) + ┌───────┐ + │ a │ + │ --- │ + │ bool │ + ╞═══════╡ + │ false │ + │ false │ + │ true │ + │ null │ + └───────┘ + + >>> func(pa.table(data)) + pyarrow.Table + a: bool + ---- + a: [[false,false,true,null]] + """ + return self.__class__(lambda plx: self._call(plx).is_finite()) + def cum_count(self: Self, *, reverse: bool = False) -> Self: r"""Return the cumulative count of the non-null values in the column. diff --git a/narwhals/series.py b/narwhals/series.py index 2b308f286..654b84500 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -2634,6 +2634,62 @@ def mode(self: Self) -> Self: """ return self._from_compliant_series(self._compliant_series.mode()) + def is_finite(self: Self) -> Self: + """Returns a boolean Series indicating which values are finite. + + Warning: + Different backend handle null values differently. `is_finite` will return + False for NaN and Null's in the Dask and pandas non-nullable backend, while + for Polars, PyArrow and pandas nullable backends null values are kept as such. + + Returns: + Expression of `Boolean` data type. + + Examples: + >>> import narwhals as nw + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> data = [float("nan"), float("inf"), 2.0, None] + + We define a library agnostic function: + + >>> @nw.narwhalify + ... def func(s): + ... return s.is_finite() + + We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: + + >>> func(pd.Series(data)) + 0 False + 1 False + 2 True + 3 False + dtype: bool + + >>> func(pl.Series(data)) # doctest: +NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [bool] + [ + false + false + true + null + ] + + >>> func(pa.chunked_array([data])) # doctest: +ELLIPSIS + + [ + [ + false, + false, + true, + null + ] + ] + """ + return self._from_compliant_series(self._compliant_series.is_finite()) + def cum_count(self: Self, *, reverse: bool = False) -> Self: r"""Return the cumulative count of the non-null values in the series. diff --git a/tests/expr_and_series/is_finite_test.py b/tests/expr_and_series/is_finite_test.py new file mode 100644 index 000000000..90f4c3b6b --- /dev/null +++ b/tests/expr_and_series/is_finite_test.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +import pytest + +import narwhals.stable.v1 as nw +from tests.utils import Constructor +from tests.utils import ConstructorEager +from tests.utils import assert_equal_data + +data = {"a": [float("nan"), float("inf"), 2.0, None]} + + +@pytest.mark.filterwarnings("ignore:invalid value encountered in cast") +def test_is_finite_expr(constructor: Constructor) -> None: + if "polars" in str(constructor) or "pyarrow_table" in str(constructor): + expected = {"a": [False, False, True, None]} + elif "pandas_constructor" in str(constructor) or "dask" in str(constructor): + expected = {"a": [False, False, True, False]} + else: # pandas_nullable_constructor, pandas_pyarrow_constructor, modin + expected = {"a": [None, False, True, None]} + + df = nw.from_native(constructor(data)) + result = df.select(nw.col("a").is_finite()) + assert_equal_data(result, expected) + + +@pytest.mark.filterwarnings("ignore:invalid value encountered in cast") +def test_is_finite_series(constructor_eager: ConstructorEager) -> None: + if "polars" in str(constructor_eager) or "pyarrow_table" in str(constructor_eager): + expected = {"a": [False, False, True, None]} + elif "pandas_constructor" in str(constructor_eager) or "dask" in str( + constructor_eager + ): + expected = {"a": [False, False, True, False]} + else: # pandas_nullable_constructor, pandas_pyarrow_constructor, modin + expected = {"a": [None, False, True, None]} + + df = nw.from_native(constructor_eager(data), eager_only=True) + result = {"a": df["a"].is_finite()} + + assert_equal_data(result, expected)