diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index ac845853a..a357bdeb9 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -621,7 +621,7 @@ def unique( ) -> Self: """ NOTE: - The param `maintain_order` is only here for compatibility with the polars API + The param `maintain_order` is only here for compatibility with the Polars API and has no effect on the output. """ import numpy as np # ignore-banned-import diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py index 35e936d72..7b2af5781 100644 --- a/narwhals/_arrow/expr.py +++ b/narwhals/_arrow/expr.py @@ -317,8 +317,8 @@ def is_first_distinct(self: Self) -> Self: def is_last_distinct(self: Self) -> Self: return reuse_series_implementation(self, "is_last_distinct") - def unique(self: Self) -> Self: - return reuse_series_implementation(self, "unique") + def unique(self: Self, *, maintain_order: bool = False) -> Self: + return reuse_series_implementation(self, "unique", maintain_order=maintain_order) def sort(self: Self, *, descending: bool = False, nulls_last: bool = False) -> Self: return reuse_series_implementation( diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 70009df43..d63a92aa4 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -645,7 +645,12 @@ def is_sorted(self: Self, *, descending: bool = False) -> bool: else: return pc.all(pc.less_equal(ser[:-1], ser[1:])) # type: ignore[no-any-return] - def unique(self: Self) -> ArrowSeries: + def unique(self: Self, *, maintain_order: bool = False) -> ArrowSeries: + """ + NOTE: + The param `maintain_order` is only here for compatibility with the Polars API + and has no effect on the output. + """ import pyarrow.compute as pc # ignore-banned-import() return self._from_native_series(pc.unique(self._native_series)) diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py index 150b0177c..cf5c8eae1 100644 --- a/narwhals/_dask/dataframe.py +++ b/narwhals/_dask/dataframe.py @@ -187,7 +187,7 @@ def unique( ) -> Self: """ NOTE: - The param `maintain_order` is only here for compatibility with the polars API + The param `maintain_order` is only here for compatibility with the Polars API and has no effect on the output. """ subset = flatten(subset) if subset else None diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index 5eab8b08f..df298ffca 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -296,7 +296,7 @@ def iter_rows( ) -> Iterator[list[tuple[Any, ...]]] | Iterator[list[dict[str, Any]]]: """ NOTE: - The param ``buffer_size`` is only here for compatibility with the polars API + The param ``buffer_size`` is only here for compatibility with the Polars API and has no effect on the output. """ if not named: @@ -656,7 +656,7 @@ def unique( ) -> Self: """ NOTE: - The param `maintain_order` is only here for compatibility with the polars API + The param `maintain_order` is only here for compatibility with the Polars API and has no effect on the output. """ mapped_keep = {"none": False, "any": "first"}.get(keep, keep) diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index a58597eea..fb9e577b3 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -282,8 +282,8 @@ def abs(self) -> Self: def cum_sum(self) -> Self: return reuse_series_implementation(self, "cum_sum") - def unique(self) -> Self: - return reuse_series_implementation(self, "unique") + def unique(self, *, maintain_order: bool = False) -> Self: + return reuse_series_implementation(self, "unique", maintain_order=maintain_order) def diff(self) -> Self: return reuse_series_implementation(self, "diff") diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 078e857b9..7bc916b55 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -471,7 +471,12 @@ def abs(self) -> PandasLikeSeries: def cum_sum(self) -> PandasLikeSeries: return self._from_native_series(self._native_series.cumsum()) - def unique(self) -> PandasLikeSeries: + def unique(self, *, maintain_order: bool = False) -> PandasLikeSeries: + """ + NOTE: + The param `maintain_order` is only here for compatibility with the Polars API + and has no effect on the output. + """ return self._from_native_series( self._native_series.__class__( self._native_series.unique(), name=self._native_series.name diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index bf8bb5c98..bb163b28d 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -1722,9 +1722,9 @@ def unique( * 'none': Don't keep duplicate rows. * 'first': Keep first unique row. * 'last': Keep last unique row. - maintain_order: Keep the same order as the original DataFrame. This is more + maintain_order: Keep the same order as the original DataFrame. This may be more expensive to compute. Settings this to `True` blocks the possibility - to run on the streaming engine for polars. + to run on the streaming engine for Polars. Examples: >>> import pandas as pd @@ -3568,9 +3568,9 @@ def unique( * 'none': Don't keep duplicate rows. * 'first': Keep first unique row. * 'last': Keep last unique row. - maintain_order: Keep the same order as the original DataFrame. This is more + maintain_order: Keep the same order as the original DataFrame. This may be more expensive to compute. Settings this to `True` blocks the possibility - to run on the streaming engine for polars. + to run on the streaming engine for Polars. Returns: LazyFrame: LazyFrame with unique rows. diff --git a/narwhals/expr.py b/narwhals/expr.py index 2f986760c..f1d9a1935 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -707,9 +707,14 @@ def n_unique(self) -> Self: """ return self.__class__(lambda plx: self._call(plx).n_unique()) - def unique(self) -> Self: + def unique(self, *, maintain_order: bool = False) -> Self: """ - Return unique values + Return unique values of this expression. + + Arguments: + maintain_order: Keep the same order as the original expression. This may be more + expensive to compute. Settings this to `True` blocks the possibility + to run on the streaming engine for Polars. Examples: >>> import polars as pl @@ -724,7 +729,7 @@ def unique(self) -> Self: >>> @nw.narwhalify ... def func(df): - ... return df.select(nw.col("a", "b").unique()) + ... return df.select(nw.col("a", "b").unique(maintain_order=True)) We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: @@ -752,7 +757,9 @@ def unique(self) -> Self: a: [[1,3,5]] b: [[2,4,6]] """ - return self.__class__(lambda plx: self._call(plx).unique()) + return self.__class__( + lambda plx: self._call(plx).unique(maintain_order=maintain_order) + ) def abs(self) -> Self: """ diff --git a/narwhals/series.py b/narwhals/series.py index dac5c6d79..424c82b4a 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -1005,9 +1005,14 @@ def cum_sum(self) -> Self: """ return self._from_compliant_series(self._compliant_series.cum_sum()) - def unique(self) -> Self: + def unique(self, *, maintain_order: bool = False) -> Self: """ - Returns unique values + Returns unique values of the series. + + Arguments: + maintain_order: Keep the same order as the original series. This may be more + expensive to compute. Settings this to `True` blocks the possibility + to run on the streaming engine for Polars. Examples: >>> import pandas as pd @@ -1021,7 +1026,7 @@ def unique(self) -> Self: >>> @nw.narwhalify ... def func(s): - ... return s.unique() + ... return s.unique(maintain_order=True) We can then pass either pandas or Polars to `func`: @@ -1039,7 +1044,9 @@ def unique(self) -> Self: 6 ] """ - return self._from_compliant_series(self._compliant_series.unique()) + return self._from_compliant_series( + self._compliant_series.unique(maintain_order=maintain_order) + ) def diff(self) -> Self: """ diff --git a/tests/expr_and_series/unique_test.py b/tests/expr_and_series/unique_test.py index acef3f60a..62d2e63a2 100644 --- a/tests/expr_and_series/unique_test.py +++ b/tests/expr_and_series/unique_test.py @@ -8,6 +8,7 @@ from tests.utils import assert_equal_data data = {"a": [1, 1, 2]} +data_str = {"a": ["x", "x", "y"]} def test_unique_expr(constructor: Constructor, request: pytest.FixtureRequest) -> None: @@ -20,7 +21,7 @@ def test_unique_expr(constructor: Constructor, request: pytest.FixtureRequest) - def test_unique_series(constructor_eager: ConstructorEager) -> None: - series = nw.from_native(constructor_eager(data), eager_only=True)["a"] - result = series.unique() - expected = {"a": [1, 2]} + series = nw.from_native(constructor_eager(data_str), eager_only=True)["a"] + result = series.unique(maintain_order=True) + expected = {"a": ["x", "y"]} assert_equal_data({"a": result}, expected)