Skip to content

Commit

Permalink
feat: add maintain_order to Expr.unique and Series.unique (#1333)
Browse files Browse the repository at this point in the history
  • Loading branch information
raisadz authored Nov 7, 2024
1 parent d35a8e5 commit e16b032
Show file tree
Hide file tree
Showing 11 changed files with 50 additions and 25 deletions.
2 changes: 1 addition & 1 deletion narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,7 @@ def unique(
) -> Self:
"""
NOTE:
The param `maintain_order` is only here for compatibility with the polars API
The param `maintain_order` is only here for compatibility with the Polars API
and has no effect on the output.
"""
import numpy as np # ignore-banned-import
Expand Down
4 changes: 2 additions & 2 deletions narwhals/_arrow/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,8 @@ def is_first_distinct(self: Self) -> Self:
def is_last_distinct(self: Self) -> Self:
return reuse_series_implementation(self, "is_last_distinct")

def unique(self: Self) -> Self:
return reuse_series_implementation(self, "unique")
def unique(self: Self, *, maintain_order: bool = False) -> Self:
return reuse_series_implementation(self, "unique", maintain_order=maintain_order)

def sort(self: Self, *, descending: bool = False, nulls_last: bool = False) -> Self:
return reuse_series_implementation(
Expand Down
7 changes: 6 additions & 1 deletion narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,12 @@ def is_sorted(self: Self, *, descending: bool = False) -> bool:
else:
return pc.all(pc.less_equal(ser[:-1], ser[1:])) # type: ignore[no-any-return]

def unique(self: Self) -> ArrowSeries:
def unique(self: Self, *, maintain_order: bool = False) -> ArrowSeries:
"""
NOTE:
The param `maintain_order` is only here for compatibility with the Polars API
and has no effect on the output.
"""
import pyarrow.compute as pc # ignore-banned-import()

return self._from_native_series(pc.unique(self._native_series))
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_dask/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def unique(
) -> Self:
"""
NOTE:
The param `maintain_order` is only here for compatibility with the polars API
The param `maintain_order` is only here for compatibility with the Polars API
and has no effect on the output.
"""
subset = flatten(subset) if subset else None
Expand Down
4 changes: 2 additions & 2 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ def iter_rows(
) -> Iterator[list[tuple[Any, ...]]] | Iterator[list[dict[str, Any]]]:
"""
NOTE:
The param ``buffer_size`` is only here for compatibility with the polars API
The param ``buffer_size`` is only here for compatibility with the Polars API
and has no effect on the output.
"""
if not named:
Expand Down Expand Up @@ -656,7 +656,7 @@ def unique(
) -> Self:
"""
NOTE:
The param `maintain_order` is only here for compatibility with the polars API
The param `maintain_order` is only here for compatibility with the Polars API
and has no effect on the output.
"""
mapped_keep = {"none": False, "any": "first"}.get(keep, keep)
Expand Down
4 changes: 2 additions & 2 deletions narwhals/_pandas_like/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,8 +282,8 @@ def abs(self) -> Self:
def cum_sum(self) -> Self:
return reuse_series_implementation(self, "cum_sum")

def unique(self) -> Self:
return reuse_series_implementation(self, "unique")
def unique(self, *, maintain_order: bool = False) -> Self:
return reuse_series_implementation(self, "unique", maintain_order=maintain_order)

def diff(self) -> Self:
return reuse_series_implementation(self, "diff")
Expand Down
7 changes: 6 additions & 1 deletion narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,12 @@ def abs(self) -> PandasLikeSeries:
def cum_sum(self) -> PandasLikeSeries:
return self._from_native_series(self._native_series.cumsum())

def unique(self) -> PandasLikeSeries:
def unique(self, *, maintain_order: bool = False) -> PandasLikeSeries:
"""
NOTE:
The param `maintain_order` is only here for compatibility with the Polars API
and has no effect on the output.
"""
return self._from_native_series(
self._native_series.__class__(
self._native_series.unique(), name=self._native_series.name
Expand Down
8 changes: 4 additions & 4 deletions narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1722,9 +1722,9 @@ def unique(
* 'none': Don't keep duplicate rows.
* 'first': Keep first unique row.
* 'last': Keep last unique row.
maintain_order: Keep the same order as the original DataFrame. This is more
maintain_order: Keep the same order as the original DataFrame. This may be more
expensive to compute. Settings this to `True` blocks the possibility
to run on the streaming engine for polars.
to run on the streaming engine for Polars.
Examples:
>>> import pandas as pd
Expand Down Expand Up @@ -3568,9 +3568,9 @@ def unique(
* 'none': Don't keep duplicate rows.
* 'first': Keep first unique row.
* 'last': Keep last unique row.
maintain_order: Keep the same order as the original DataFrame. This is more
maintain_order: Keep the same order as the original DataFrame. This may be more
expensive to compute. Settings this to `True` blocks the possibility
to run on the streaming engine for polars.
to run on the streaming engine for Polars.
Returns:
LazyFrame: LazyFrame with unique rows.
Expand Down
15 changes: 11 additions & 4 deletions narwhals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -707,9 +707,14 @@ def n_unique(self) -> Self:
"""
return self.__class__(lambda plx: self._call(plx).n_unique())

def unique(self) -> Self:
def unique(self, *, maintain_order: bool = False) -> Self:
"""
Return unique values
Return unique values of this expression.
Arguments:
maintain_order: Keep the same order as the original expression. This may be more
expensive to compute. Settings this to `True` blocks the possibility
to run on the streaming engine for Polars.
Examples:
>>> import polars as pl
Expand All @@ -724,7 +729,7 @@ def unique(self) -> Self:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a", "b").unique())
... return df.select(nw.col("a", "b").unique(maintain_order=True))
We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
Expand Down Expand Up @@ -752,7 +757,9 @@ def unique(self) -> Self:
a: [[1,3,5]]
b: [[2,4,6]]
"""
return self.__class__(lambda plx: self._call(plx).unique())
return self.__class__(
lambda plx: self._call(plx).unique(maintain_order=maintain_order)
)

def abs(self) -> Self:
"""
Expand Down
15 changes: 11 additions & 4 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1005,9 +1005,14 @@ def cum_sum(self) -> Self:
"""
return self._from_compliant_series(self._compliant_series.cum_sum())

def unique(self) -> Self:
def unique(self, *, maintain_order: bool = False) -> Self:
"""
Returns unique values
Returns unique values of the series.
Arguments:
maintain_order: Keep the same order as the original series. This may be more
expensive to compute. Settings this to `True` blocks the possibility
to run on the streaming engine for Polars.
Examples:
>>> import pandas as pd
Expand All @@ -1021,7 +1026,7 @@ def unique(self) -> Self:
>>> @nw.narwhalify
... def func(s):
... return s.unique()
... return s.unique(maintain_order=True)
We can then pass either pandas or Polars to `func`:
Expand All @@ -1039,7 +1044,9 @@ def unique(self) -> Self:
6
]
"""
return self._from_compliant_series(self._compliant_series.unique())
return self._from_compliant_series(
self._compliant_series.unique(maintain_order=maintain_order)
)

def diff(self) -> Self:
"""
Expand Down
7 changes: 4 additions & 3 deletions tests/expr_and_series/unique_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from tests.utils import assert_equal_data

data = {"a": [1, 1, 2]}
data_str = {"a": ["x", "x", "y"]}


def test_unique_expr(constructor: Constructor, request: pytest.FixtureRequest) -> None:
Expand All @@ -20,7 +21,7 @@ def test_unique_expr(constructor: Constructor, request: pytest.FixtureRequest) -


def test_unique_series(constructor_eager: ConstructorEager) -> None:
series = nw.from_native(constructor_eager(data), eager_only=True)["a"]
result = series.unique()
expected = {"a": [1, 2]}
series = nw.from_native(constructor_eager(data_str), eager_only=True)["a"]
result = series.unique(maintain_order=True)
expected = {"a": ["x", "y"]}
assert_equal_data({"a": result}, expected)

0 comments on commit e16b032

Please sign in to comment.