Skip to content

Commit

Permalink
feat: add mode method
Browse files Browse the repository at this point in the history
  • Loading branch information
FBruzzesi committed Sep 10, 2024
1 parent 270adbd commit b9bd128
Show file tree
Hide file tree
Showing 10 changed files with 147 additions and 1 deletion.
1 change: 1 addition & 0 deletions docs/api-reference/expr.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
- max
- mean
- min
- mode
- null_count
- n_unique
- over
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/series.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
- max
- mean
- min
- mode
- name
- null_count
- n_unique
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_arrow/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,9 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
backend_version=self._backend_version,
)

def mode(self: Self) -> Self:
return reuse_series_implementation(self, "mode")

@property
def dt(self: Self) -> ArrowExprDateTimeNamespace:
return ArrowExprDateTimeNamespace(self)
Expand Down
13 changes: 13 additions & 0 deletions narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from typing_extensions import Self

from narwhals._arrow.dataframe import ArrowDataFrame
from narwhals._arrow.namespace import ArrowNamespace
from narwhals.dtypes import DType


Expand Down Expand Up @@ -65,6 +66,11 @@ def _from_iterable(
backend_version=backend_version,
)

def __narwhals_namespace__(self) -> ArrowNamespace:
from narwhals._arrow.namespace import ArrowNamespace

return ArrowNamespace(backend_version=self._backend_version)

def __len__(self) -> int:
return len(self._native_series)

Expand Down Expand Up @@ -667,6 +673,13 @@ def clip(
def to_arrow(self: Self) -> pa.Array:
return self._native_series.combine_chunks()

def mode(self: Self) -> ArrowSeries:
plx = self.__narwhals_namespace__()
col_token = generate_unique_token(n_bytes=8, columns=[self.name])
return self.value_counts(name=col_token, normalize=False).filter(
plx.col(col_token) == plx.col(col_token).max()
)[self.name]

@property
def shape(self) -> tuple[int]:
return (len(self._native_series),)
Expand Down
4 changes: 4 additions & 0 deletions narwhals/_dask/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,10 @@ def func(df: DaskLazyFrame) -> list[Any]:
backend_version=self._backend_version,
)

def mode(self: Self) -> Self:
msg = "`Expr.mode` is not supported for the Dask backend."
raise NotImplementedError(msg)

@property
def str(self: Self) -> DaskExprStringNamespace:
return DaskExprStringNamespace(self)
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_pandas_like/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,9 @@ def len(self: Self) -> Self:
def gather_every(self: Self, n: int, offset: int = 0) -> Self:
return reuse_series_implementation(self, "gather_every", n=n, offset=offset)

def mode(self: Self) -> Self:
return reuse_series_implementation(self, "mode")

@property
def str(self: Self) -> PandasLikeExprStringNamespace:
return PandasLikeExprStringNamespace(self)
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,9 @@ def to_arrow(self: Self) -> Any:

return pa.Array.from_pandas(self._native_series)

def mode(self: Self) -> Self:
return self._from_native_series(self._native_series.mode())

@property
def str(self) -> PandasLikeSeriesStringNamespace:
return PandasLikeSeriesStringNamespace(self)
Expand Down
45 changes: 44 additions & 1 deletion narwhals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1819,7 +1819,7 @@ def clip(
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>>
>>> s = [1, 2, 3]
>>> df_pd = pd.DataFrame({"s": s})
>>> df_pl = pl.DataFrame({"s": s})
Expand Down Expand Up @@ -1913,6 +1913,49 @@ def clip(
"""
return self.__class__(lambda plx: self._call(plx).clip(lower_bound, upper_bound))

def mode(self: Self) -> Self:
r"""Compute the most occurring value(s).
Can return multiple Values.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> data = {
... "a": [1, 1, 2, 3],
... "b": [1, 1, 2, 2],
... }
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
We define a library agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a", "b").mode()).sort("a", "b")
We can then pass either pandas or Polars to `func`:
>>> func(df_pd)
a b
0 1 1
1 1 2
>>> func(df_pl)
shape: (2, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 1 │
│ 1 ┆ 2 │
└─────┴─────┘
"""
return self.__class__(lambda plx: self._call(plx).mode())

@property
def str(self: Self) -> ExprStringNamespace:
return ExprStringNamespace(self)
Expand Down
38 changes: 38 additions & 0 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2278,6 +2278,44 @@ def to_arrow(self: Self) -> pa.Array:
"""
return self._compliant_series.to_arrow()

def mode(self: Self) -> Self:
r"""
Compute the most occurring value(s).
Can return multiple values.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> data = [1, 1, 2, 2, 3]
>>> s_pd = pd.Series(name="a", data=data)
>>> s_pl = pl.Series(name="a", values=data)
We define a library agnostic function:
>>> @nw.narwhalify
... def func(s):
... return s.mode().sort()
We can then pass either pandas or Polars to `func`:
>>> func(s_pd)
0 1
1 2
Name: a, dtype: int64
>>> func(s_pl) # doctest:+NORMALIZE_WHITESPACE
shape: (2,)
Series: 'a' [i64]
[
1
2
]
"""
return self._from_compliant_series(self._compliant_series.mode())

@property
def str(self) -> SeriesStringNamespace:
return SeriesStringNamespace(self)
Expand Down
37 changes: 37 additions & 0 deletions tests/expr_and_series/mode_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from typing import Any

import pytest

import narwhals.stable.v1 as nw
from tests.utils import compare_dicts

data = {
"a": [1, 1, 2, 2, 3],
"b": [1, 2, 3, 3, 4],
}


def test_mode_single_expr(constructor: Any, request: Any) -> None:
if "dask" in str(constructor):
request.applymarker(pytest.mark.xfail)

df = nw.from_native(constructor(data))
result = df.select(nw.col("a").mode()).sort("a")
expected = {"a": [1, 2]}
compare_dicts(result, expected)


def test_mode_multi_expr(constructor: Any, request: Any) -> None:
if "dask" in str(constructor):
request.applymarker(pytest.mark.xfail)
df = nw.from_native(constructor(data))
result = df.select(nw.col("a", "b").mode()).sort("a", "b")
expected = {"a": [1, 2], "b": [3, 3]}
compare_dicts(result, expected)


def test_mode_series(constructor_eager: Any) -> None:
series = nw.from_native(constructor_eager(data), eager_only=True)["a"]
result = series.mode().sort()
expected = {"a": [1, 2]}
compare_dicts({"a": result}, expected)

0 comments on commit b9bd128

Please sign in to comment.