From 5b92ccbcf512e4997ff5ce19cbcad053b70893ef Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 11 Jan 2025 15:54:20 +0000 Subject: [PATCH] coverage --- narwhals/expr.py | 303 +++++---------------- narwhals/stable/v1/__init__.py | 75 +++++ tests/expr_and_series/drop_nulls_test.py | 6 + tests/expr_and_series/gather_every_test.py | 4 + tests/expr_and_series/head_test.py | 6 +- tests/expr_and_series/sample_test.py | 4 + tests/expr_and_series/tail_test.py | 9 +- 7 files changed, 173 insertions(+), 234 deletions(-) diff --git a/narwhals/expr.py b/narwhals/expr.py index 20e61a96e..e48a68161 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -1287,11 +1287,11 @@ def n_unique(self) -> Self: def unique(self, *, maintain_order: bool = False) -> Self: """Return unique values of this expression. - Note: + !!! warning `Expr.unique` is deprecated and will be removed in a future version. Hint: instead of `df.select(nw.col('a').unique())`, use `df.select(nw.col('a')).unique()` instead. - Note: `strict` will remain available in `narwhals.stable.v1`. + Note: this will remain available in `narwhals.stable.v1`. See [stable api](../backcompat.md/) for more information. Arguments: @@ -1305,7 +1305,7 @@ def unique(self, *, maintain_order: bool = False) -> Self: msg = ( "`Expr.unique` is deprecated and will be removed in a future version.\n\n" "Hint: instead of `df.select(nw.col('a').unique())`, use `df.select(nw.col('a')).unique()`.\n\n" - "Note: `strict` will remain available in `narwhals.stable.v1`.\n" + "Note: this will remain available in `narwhals.stable.v1`.\n" "See [stable api](../backcompat.md/) for more information.\n" ) issue_deprecation_warning(msg, _version="1.22.0") @@ -2296,6 +2296,13 @@ def fill_null( def drop_nulls(self) -> Self: """Drop null values. + !!! warning + `Expr.drop_nulls` is deprecated and will be removed in a future version. + Hint: instead of `df.select(nw.col('a').drop_nulls())`, use + `df.select(nw.col('a')).drop_nulls()` instead. + Note: this will remain available in `narwhals.stable.v1`. + See [stable api](../backcompat.md/) for more information. + Returns: A new expression. @@ -2303,53 +2310,14 @@ def drop_nulls(self) -> Self: pandas handles null values differently from Polars and PyArrow. See [null_handling](../pandas_like_concepts/null_handling.md/) for reference. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> df_pd = pd.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]}) - >>> df_pl = pl.DataFrame({"a": [2.0, 4.0, None, 3.0, None, 5.0]}) - >>> df_pa = pa.table({"a": [2.0, 4.0, None, 3.0, None, 5.0]}) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_drop_nulls(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").drop_nulls()).to_native() - - We can then pass any supported library such as Pandas, Polars, or - PyArrow to `agnostic_drop_nulls`: - - >>> agnostic_drop_nulls(df_pd) - a - 0 2.0 - 1 4.0 - 3 3.0 - 5 5.0 - - >>> agnostic_drop_nulls(df_pl) - shape: (4, 1) - ┌─────┐ - │ a │ - │ --- │ - │ f64 │ - ╞═════╡ - │ 2.0 │ - │ 4.0 │ - │ 3.0 │ - │ 5.0 │ - └─────┘ - - >>> agnostic_drop_nulls(df_pa) - pyarrow.Table - a: double - ---- - a: [[2,4,3,5]] """ + msg = ( + "`Expr.drop_nulls` is deprecated and will be removed in a future version.\n\n" + "Hint: instead of `df.select(nw.col('a').drop_nulls())`, use `df.select(nw.col('a')).drop_nulls()`.\n\n" + "Note: this will remain available in `narwhals.stable.v1`.\n" + "See [stable api](../backcompat.md/) for more information.\n" + ) + issue_deprecation_warning(msg, _version="1.22.0") return self.__class__(lambda plx: self._to_compliant_expr(plx).drop_nulls()) def sample( @@ -2362,6 +2330,13 @@ def sample( ) -> Self: """Sample randomly from this expression. + !!! warning + `Expr.sample` is deprecated and will be removed in a future version. + Hint: instead of `df.select(nw.col('a').sample())`, use + `df.select(nw.col('a')).sample()` instead. + Note: this will remain available in `narwhals.stable.v1`. + See [stable api](../backcompat.md/) for more information. + Arguments: n: Number of items to return. Cannot be used with fraction. fraction: Fraction of items to return. Cannot be used with n. @@ -2371,54 +2346,14 @@ def sample( Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_sample(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("a").sample(fraction=1.0, with_replacement=True) - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_sample`: - - >>> agnostic_sample(df_pd) # doctest: +SKIP - a - 2 3 - 0 1 - 2 3 - - >>> agnostic_sample(df_pl) # doctest: +SKIP - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ f64 │ - ╞═════╡ - │ 2 │ - │ 3 │ - │ 3 │ - └─────┘ - - >>> agnostic_sample(df_pa) # doctest: +SKIP - pyarrow.Table - a: int64 - ---- - a: [[1,3,3]] """ + msg = ( + "`Expr.sample` is deprecated and will be removed in a future version.\n\n" + "Hint: instead of `df.select(nw.col('a').sample())`, use `df.select(nw.col('a')).sample()`.\n\n" + "Note: this will remain available in `narwhals.stable.v1`.\n" + "See [stable api](../backcompat.md/) for more information.\n" + ) + issue_deprecation_warning(msg, _version="1.22.0") return self.__class__( lambda plx: self._to_compliant_expr(plx).sample( n, fraction=fraction, with_replacement=with_replacement, seed=seed @@ -2875,113 +2810,51 @@ def quantile( def head(self, n: int = 10) -> Self: r"""Get the first `n` rows. + !!! warning + `Expr.head` is deprecated and will be removed in a future version. + Hint: instead of `df.select(nw.col('a').head())`, use + `df.select(nw.col('a')).head()` instead. + Note: this will remain available in `narwhals.stable.v1`. + See [stable api](../backcompat.md/) for more information. + Arguments: n: Number of rows to return. Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": list(range(10))} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function that returns the first 3 rows: - - >>> def agnostic_head(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").head(3)).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_head`: - - >>> agnostic_head(df_pd) - a - 0 0 - 1 1 - 2 2 - - >>> agnostic_head(df_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 0 │ - │ 1 │ - │ 2 │ - └─────┘ - - >>> agnostic_head(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[0,1,2]] """ + msg = ( + "`Expr.head` is deprecated and will be removed in a future version.\n\n" + "Hint: instead of `df.select(nw.col('a').head())`, use `df.select(nw.col('a')).unique()`.\n\n" + "Note: this will remain available in `narwhals.stable.v1`.\n" + "See [stable api](../backcompat.md/) for more information.\n" + ) + issue_deprecation_warning(msg, _version="1.22.0") return self.__class__(lambda plx: self._to_compliant_expr(plx).head(n)) def tail(self, n: int = 10) -> Self: r"""Get the last `n` rows. + !!! warning + `Expr.tail` is deprecated and will be removed in a future version. + Hint: instead of `df.select(nw.col('a').tail())`, use + `df.select(nw.col('a')).unique()` instead. + Note: this will remain available in `narwhals.stable.v1`. + See [stable api](../backcompat.md/) for more information. + Arguments: n: Number of rows to return. Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": list(range(10))} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function that returns the last 3 rows: - - >>> def agnostic_tail(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").tail(3)).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_tail`: - - >>> agnostic_tail(df_pd) - a - 7 7 - 8 8 - 9 9 - - >>> agnostic_tail(df_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 7 │ - │ 8 │ - │ 9 │ - └─────┘ - - >>> agnostic_tail(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[7,8,9]] """ + msg = ( + "`Expr.tail` is deprecated and will be removed in a future version.\n\n" + "Hint: instead of `df.select(nw.col('a').tail())`, use `df.select(nw.col('a')).unique()`.\n\n" + "Note: this will remain available in `narwhals.stable.v1`.\n" + "See [stable api](../backcompat.md/) for more information.\n" + ) + issue_deprecation_warning(msg, _version="1.22.0") return self.__class__(lambda plx: self._to_compliant_expr(plx).tail(n)) def round(self, decimals: int = 0) -> Self: @@ -3109,57 +2982,27 @@ def len(self) -> Self: def gather_every(self: Self, n: int, offset: int = 0) -> Self: r"""Take every nth value in the Series and return as new Series. + !!! warning + `Expr.gather_every` is deprecated and will be removed in a future version. + Hint: instead of `df.select(nw.col('a').gather_every())`, use + `df.select(nw.col('a')).gather_every()` instead. + Note: this will remain available in `narwhals.stable.v1`. + See [stable api](../backcompat.md/) for more information. + Arguments: n: Gather every *n*-th row. offset: Starting index. Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function in which gather every 2 rows, - starting from a offset of 1: - - >>> def agnostic_gather_every(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").gather_every(n=2, offset=1)).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_gather_every`: - - >>> agnostic_gather_every(df_pd) - a - 1 2 - 3 4 - - >>> agnostic_gather_every(df_pl) - shape: (2, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 2 │ - │ 4 │ - └─────┘ - - >>> agnostic_gather_every(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[2,4]] """ + msg = ( + "`Expr.gather_every` is deprecated and will be removed in a future version.\n\n" + "Hint: instead of `df.select(nw.col('a').gather_every())`, use `df.select(nw.col('a')).unique()`.\n\n" + "Note: this will remain available in `narwhals.stable.v1`.\n" + "See [stable api](../backcompat.md/) for more information.\n" + ) + issue_deprecation_warning(msg, _version="1.22.0") return self.__class__( lambda plx: self._to_compliant_expr(plx).gather_every(n=n, offset=offset) ) diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index a30fbda80..b990dc665 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -1617,6 +1617,68 @@ def rolling_std( ddof=ddof, ) + def head(self, n: int = 10) -> Self: + r"""Get the first `n` rows. + + Arguments: + n: Number of rows to return. + + Returns: + A new expression. + """ + return self.__class__(lambda plx: self._to_compliant_expr(plx).head(n)) + + def tail(self, n: int = 10) -> Self: + r"""Get the last `n` rows. + + Arguments: + n: Number of rows to return. + + Returns: + A new expression. + """ + return self.__class__(lambda plx: self._to_compliant_expr(plx).tail(n)) + + def sample( + self: Self, + n: int | None = None, + *, + fraction: float | None = None, + with_replacement: bool = False, + seed: int | None = None, + ) -> Self: + """Sample randomly from this expression. + + Arguments: + n: Number of items to return. Cannot be used with fraction. + fraction: Fraction of items to return. Cannot be used with n. + with_replacement: Allow values to be sampled more than once. + seed: Seed for the random number generator. If set to None (default), a random + seed is generated for each sample operation. + + Returns: + A new expression. + """ + return self.__class__( + lambda plx: self._to_compliant_expr(plx).sample( + n, fraction=fraction, with_replacement=with_replacement, seed=seed + ) + ) + + def gather_every(self: Self, n: int, offset: int = 0) -> Self: + r"""Take every nth value in the Series and return as new Series. + + Arguments: + n: Gather every *n*-th row. + offset: Starting index. + + Returns: + A new expression. + """ + return self.__class__( + lambda plx: self._to_compliant_expr(plx).gather_every(n=n, offset=offset) + ) + def unique(self, *, maintain_order: bool = False) -> Self: """Return unique values of this expression. @@ -1632,6 +1694,19 @@ def unique(self, *, maintain_order: bool = False) -> Self: lambda plx: self._to_compliant_expr(plx).unique(maintain_order=maintain_order) ) + def drop_nulls(self) -> Self: + """Drop null values. + + Returns: + A new expression. + + Notes: + pandas handles null values differently from Polars and PyArrow. + See [null_handling](../pandas_like_concepts/null_handling.md/) + for reference. + """ + return self.__class__(lambda plx: self._to_compliant_expr(plx).drop_nulls()) + class Schema(NwSchema): """Ordered mapping of column names to their data type. diff --git a/tests/expr_and_series/drop_nulls_test.py b/tests/expr_and_series/drop_nulls_test.py index 0584674e6..ec1cdd60f 100644 --- a/tests/expr_and_series/drop_nulls_test.py +++ b/tests/expr_and_series/drop_nulls_test.py @@ -1,5 +1,8 @@ from __future__ import annotations +import pytest + +import narwhals as nw_main import narwhals.stable.v1 as nw from tests.utils import ConstructorEager from tests.utils import assert_equal_data @@ -29,6 +32,9 @@ def test_drop_nulls(constructor_eager: ConstructorEager) -> None: assert_equal_data(result_c, expected_c) assert_equal_data(result_d, expected_d) + with pytest.deprecated_call(): + df.select(nw_main.col("A").drop_nulls()) + def test_drop_nulls_series(constructor_eager: ConstructorEager) -> None: data = { diff --git a/tests/expr_and_series/gather_every_test.py b/tests/expr_and_series/gather_every_test.py index fdaaac08b..bcda13d83 100644 --- a/tests/expr_and_series/gather_every_test.py +++ b/tests/expr_and_series/gather_every_test.py @@ -2,6 +2,7 @@ import pytest +import narwhals as nw_main import narwhals.stable.v1 as nw from tests.utils import ConstructorEager from tests.utils import assert_equal_data @@ -21,6 +22,9 @@ def test_gather_every_expr( assert_equal_data(result, expected) + with pytest.deprecated_call(): + df.select(nw_main.col("a").gather_every(n=n, offset=offset)) + @pytest.mark.parametrize("n", [1, 2, 3]) @pytest.mark.parametrize("offset", [1, 2, 3]) diff --git a/tests/expr_and_series/head_test.py b/tests/expr_and_series/head_test.py index 49cc41248..952b3de16 100644 --- a/tests/expr_and_series/head_test.py +++ b/tests/expr_and_series/head_test.py @@ -2,7 +2,8 @@ import pytest -import narwhals as nw +import narwhals as nw_main +import narwhals.stable.v1 as nw from tests.utils import ConstructorEager from tests.utils import assert_equal_data @@ -18,6 +19,9 @@ def test_head( expected = {"a": [1, 2]} assert_equal_data(result, expected) + with pytest.deprecated_call(): + df.select(nw_main.col("a").head(5)) + @pytest.mark.parametrize("n", [2, -1]) def test_head_series(constructor_eager: ConstructorEager, n: int) -> None: diff --git a/tests/expr_and_series/sample_test.py b/tests/expr_and_series/sample_test.py index 8e88ba7c7..cd0f724ab 100644 --- a/tests/expr_and_series/sample_test.py +++ b/tests/expr_and_series/sample_test.py @@ -2,6 +2,7 @@ import pytest +import narwhals as nw_main import narwhals.stable.v1 as nw from tests.utils import ConstructorEager from tests.utils import assert_equal_data @@ -18,6 +19,9 @@ def test_expr_sample(constructor_eager: ConstructorEager) -> None: expected_series = (2,) assert result_series == expected_series + with pytest.deprecated_call(): + df.select(nw_main.col("a").sample(n=2)) + def test_expr_sample_fraction( constructor_eager: ConstructorEager, request: pytest.FixtureRequest diff --git a/tests/expr_and_series/tail_test.py b/tests/expr_and_series/tail_test.py index e04813814..9667795e0 100644 --- a/tests/expr_and_series/tail_test.py +++ b/tests/expr_and_series/tail_test.py @@ -2,13 +2,13 @@ import pytest -import narwhals as nw +import narwhals.stable.v1 as nw from tests.utils import ConstructorEager from tests.utils import assert_equal_data @pytest.mark.parametrize("n", [2, -1]) -def test_head( +def test_tail( constructor_eager: ConstructorEager, n: int, request: pytest.FixtureRequest ) -> None: if "polars" in str(constructor_eager) and n < 0: @@ -18,9 +18,12 @@ def test_head( expected = {"a": [2, 3]} assert_equal_data(result, expected) + with pytest.deprecated_call(): + df.select(nw.col("a").tail(5)) + @pytest.mark.parametrize("n", [2, -1]) -def test_head_series(constructor_eager: ConstructorEager, n: int) -> None: +def test_tail_series(constructor_eager: ConstructorEager, n: int) -> None: df = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True) result = df.select(df["a"].tail(n)) expected = {"a": [2, 3]}