From d50b3e1b5d6377c4da91ee507c354ddb3cda0df8 Mon Sep 17 00:00:00 2001 From: AlessandroMiola Date: Wed, 1 Jan 2025 19:47:10 +0100 Subject: [PATCH] docs: let lf docstrings examples run on Polars and Dask only --- narwhals/dataframe.py | 709 ++++++++++++++------------------- narwhals/stable/v1/__init__.py | 29 +- 2 files changed, 326 insertions(+), 412 deletions(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 4bc706e47..781b91b2c 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -3275,14 +3275,23 @@ def implementation(self) -> Implementation: Examples: >>> import narwhals as nw >>> import polars as pl - >>> lf_native = pl.LazyFrame({"a": [1, 2, 3]}) - >>> lf = nw.from_native(lf_native) + >>> import dask.dataframe as dd + >>> lf_pl = pl.LazyFrame({"a": [1, 2, 3]}) + >>> lf_dask = dd.from_dict({"a": [1, 2, 3]}, npartitions=2) + + >>> lf = nw.from_native(lf_pl) >>> lf.implementation >>> lf.implementation.is_pandas() False >>> lf.implementation.is_polars() True + + >>> lf = nw.from_native(lf_dask) + >>> lf.implementation + + >>> lf.implementation.is_dask() + True """ return self._compliant_frame._implementation # type: ignore[no-any-return] @@ -3299,13 +3308,15 @@ def collect(self) -> DataFrame[Any]: Examples: >>> import narwhals as nw >>> import polars as pl - >>> lf_pl = pl.LazyFrame( - ... { - ... "a": ["a", "b", "a", "b", "b", "c"], - ... "b": [1, 2, 3, 4, 5, 6], - ... "c": [6, 5, 4, 3, 2, 1], - ... } - ... ) + >>> import dask.dataframe as dd + >>> data = { + ... "a": ["a", "b", "a", "b", "b", "c"], + ... "b": [1, 2, 3, 4, 5, 6], + ... "c": [6, 5, 4, 3, 2, 1], + ... } + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + >>> lf = nw.from_native(lf_pl) >>> lf ┌───────────────────────────────────────┐ @@ -3324,6 +3335,19 @@ def collect(self) -> DataFrame[Any]: │ b ┆ 11 ┆ 10 │ │ c ┆ 6 ┆ 1 │ └─────┴─────┴─────┘ + + >>> lf = nw.from_native(lf_dask) + >>> lf + ┌───────────────────────────────────────┐ + | Narwhals LazyFrame | + | Use `.to_native` to see native output | + └───────────────────────────────────────┘ + >>> df = lf.group_by("a").agg(nw.col("b", "c").sum()).collect() + >>> df.to_native() + a b c + 0 a 4 10 + 1 b 11 10 + 2 c 6 1 """ return self._dataframe( self._compliant_frame.collect(), @@ -3337,23 +3361,16 @@ def to_native(self) -> FrameT: Object of class that user started with. Examples: - >>> import pandas as pd >>> import polars as pl - >>> import pyarrow as pa + >>> import dask.dataframe as dd >>> import narwhals as nw >>> >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) >>> lf_pl = pl.LazyFrame(data) - >>> df_pa = pa.table(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) - Calling `to_native` on a Narwhals DataFrame returns the native object: + Calling `to_native` on a Narwhals LazyFrame returns the native object: - >>> nw.from_native(df_pd).lazy().to_native() - foo bar ham - 0 1 6.0 a - 1 2 7.0 b - 2 3 8.0 c >>> nw.from_native(lf_pl).to_native().collect() shape: (3, 3) ┌─────┬─────┬─────┐ @@ -3365,6 +3382,11 @@ def to_native(self) -> FrameT: │ 2 ┆ 7.0 ┆ b │ │ 3 ┆ 8.0 ┆ c │ └─────┴─────┴─────┘ + >>> nw.from_native(lf_dask).to_native().compute() + foo bar ham + 0 1 6.0 a + 1 2 7.0 b + 2 3 8.0 c """ return to_native(narwhals_object=self, pass_through=False) @@ -3382,13 +3404,13 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se Examples: >>> import polars as pl - >>> import pandas as pd + >>> import dask.dataframe as dd >>> import narwhals as nw >>> from narwhals.typing import IntoFrameT >>> >>> data = {"a": [1, 2, 3], "ba": [4, 5, 6]} - >>> df_pd = pd.DataFrame(data) >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) Let's define a dataframe-agnostic function: @@ -3396,13 +3418,8 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se ... df = nw.from_native(df_native) ... return df.pipe(lambda _df: _df.select("a")).to_native() - We can then pass either pandas or Polars: + We can then pass any supported library such as Polars or Dask to `agnostic_pipe`: - >>> agnostic_pipe(df_pd) - a - 0 1 - 1 2 - 2 3 >>> agnostic_pipe(lf_pl).collect() shape: (3, 1) ┌─────┐ @@ -3414,6 +3431,11 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se │ 2 │ │ 3 │ └─────┘ + >>> agnostic_pipe(lf_dask).compute() + a + 0 1 + 1 2 + 2 3 """ return super().pipe(function, *args, **kwargs) @@ -3434,13 +3456,13 @@ def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self: Examples: >>> import polars as pl - >>> import pandas as pd + >>> import dask.dataframe as dd >>> import narwhals as nw >>> from narwhals.typing import IntoFrameT >>> >>> data = {"a": [1.0, 2.0, None], "ba": [1.0, None, 2.0]} - >>> df_pd = pd.DataFrame(data) >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) Let's define a dataframe-agnostic function: @@ -3448,11 +3470,8 @@ def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self: ... df = nw.from_native(df_native) ... return df.drop_nulls().to_native() - We can then pass any supported library such as Pandas or Polars to `agnostic_drop_nulls`: + We can then pass any supported library such as Polars or Dask to `agnostic_drop_nulls`: - >>> agnostic_drop_nulls(df_pd) - a ba - 0 1.0 1.0 >>> agnostic_drop_nulls(lf_pl).collect() shape: (1, 2) ┌─────┬─────┐ @@ -3462,6 +3481,9 @@ def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self: ╞═════╪═════╡ │ 1.0 ┆ 1.0 │ └─────┴─────┘ + >>> agnostic_drop_nulls(lf_dask).compute() + a ba + 0 1.0 1.0 """ return super().drop_nulls(subset=subset) @@ -3476,13 +3498,13 @@ def with_row_index(self, name: str = "index") -> Self: Examples: >>> import polars as pl - >>> import pandas as pd + >>> import dask.dataframe as dd >>> import narwhals as nw >>> from narwhals.typing import IntoFrameT >>> >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} - >>> df_pd = pd.DataFrame(data) >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) Let's define a dataframe-agnostic function: @@ -3490,13 +3512,8 @@ def with_row_index(self, name: str = "index") -> Self: ... df = nw.from_native(df_native) ... return df.with_row_index().to_native() - We can then pass either pandas or Polars: + We can then pass any supported library such as Polars or Dask to `agnostic_with_row_index`: - >>> agnostic_with_row_index(df_pd) - index a b - 0 0 1 4 - 1 1 2 5 - 2 2 3 6 >>> agnostic_with_row_index(lf_pl).collect() shape: (3, 3) ┌───────┬─────┬─────┐ @@ -3508,6 +3525,11 @@ def with_row_index(self, name: str = "index") -> Self: │ 1 ┆ 2 ┆ 5 │ │ 2 ┆ 3 ┆ 6 │ └───────┴─────┴─────┘ + >>> agnostic_with_row_index(lf_dask).compute() + a b index + 0 1 4 0 + 1 2 5 1 + 2 3 6 2 """ return super().with_row_index(name) @@ -3520,17 +3542,23 @@ def schema(self) -> Schema: Examples: >>> import polars as pl + >>> import dask.dataframe as dd >>> import narwhals as nw - >>> lf_pl = pl.LazyFrame( - ... { - ... "foo": [1, 2, 3], - ... "bar": [6.0, 7.0, 8.0], - ... "ham": ["a", "b", "c"], - ... } - ... ) + >>> data = { + ... "foo": [1, 2, 3], + ... "bar": [6.0, 7.0, 8.0], + ... "ham": ["a", "b", "c"], + ... } + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + >>> lf = nw.from_native(lf_pl) >>> lf.schema # doctest: +SKIP - Schema({'foo': Int64, 'bar': Float64, 'ham', String}) + Schema({'foo': Int64, 'bar': Float64, 'ham': String}) + + >>> lf = nw.from_native(lf_dask) + >>> lf.schema # doctest: +SKIP + Schema({'foo': Int64, 'bar': Float64, 'ham': String}) """ return super().schema @@ -3542,17 +3570,23 @@ def collect_schema(self: Self) -> Schema: Examples: >>> import polars as pl + >>> import dask.dataframe as dd >>> import narwhals as nw - >>> lf_pl = pl.LazyFrame( - ... { - ... "foo": [1, 2, 3], - ... "bar": [6.0, 7.0, 8.0], - ... "ham": ["a", "b", "c"], - ... } - ... ) + >>> data = { + ... "foo": [1, 2, 3], + ... "bar": [6.0, 7.0, 8.0], + ... "ham": ["a", "b", "c"], + ... } + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + >>> lf = nw.from_native(lf_pl) >>> lf.collect_schema() Schema({'foo': Int64, 'bar': Float64, 'ham': String}) + + >>> lf = nw.from_native(lf_dask) + >>> lf.collect_schema() + Schema({'foo': Int64, 'bar': Float64, 'ham': String}) """ return super().collect_schema() @@ -3564,14 +3598,14 @@ def columns(self) -> list[str]: The column names stored in a list. Examples: - >>> import pandas as pd >>> import polars as pl + >>> import dask.dataframe as dd >>> import narwhals as nw >>> from narwhals.typing import IntoFrame >>> - >>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} - >>> df_pd = pd.DataFrame(df) - >>> lf_pl = pl.LazyFrame(df) + >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) We define a library agnostic function: @@ -3579,12 +3613,12 @@ def columns(self) -> list[str]: ... df = nw.from_native(df_native) ... return df.columns - We can then pass either pandas or Polars to `agnostic_columns`: + We can then pass any supported library such as Polars or Dask to `agnostic_columns`: - >>> agnostic_columns(df_pd) - ['foo', 'bar', 'ham'] >>> agnostic_columns(lf_pl) # doctest: +SKIP ['foo', 'bar', 'ham'] + >>> agnostic_columns(lf_dask) + ['foo', 'bar', 'ham'] """ return super().columns @@ -3611,19 +3645,18 @@ def with_columns( existing data. Examples: - >>> import pandas as pd >>> import polars as pl + >>> import dask.dataframe as dd >>> import narwhals as nw >>> from narwhals.typing import IntoFrameT >>> - >>> df = { + >>> data = { ... "a": [1, 2, 3, 4], ... "b": [0.5, 4, 10, 13], ... "c": [True, True, False, True], ... } - >>> df_pd = pd.DataFrame(df) - >>> df_pl = pl.DataFrame(df) - >>> lf_pl = pl.LazyFrame(df) + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) Let's define a dataframe-agnostic function in which we pass an expression to add it as a new column: @@ -3632,26 +3665,8 @@ def with_columns( ... df = nw.from_native(df_native) ... return df.with_columns((nw.col("a") * 2).alias("2a")).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as Polars or Dask to `agnostic_with_columns`: - >>> agnostic_with_columns(df_pd) - a b c 2a - 0 1 0.5 True 2 - 1 2 4.0 True 4 - 2 3 10.0 False 6 - 3 4 13.0 True 8 - >>> agnostic_with_columns(df_pl) - shape: (4, 4) - ┌─────┬──────┬───────┬─────┐ - │ a ┆ b ┆ c ┆ 2a │ - │ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ f64 ┆ bool ┆ i64 │ - ╞═════╪══════╪═══════╪═════╡ - │ 1 ┆ 0.5 ┆ true ┆ 2 │ - │ 2 ┆ 4.0 ┆ true ┆ 4 │ - │ 3 ┆ 10.0 ┆ false ┆ 6 │ - │ 4 ┆ 13.0 ┆ true ┆ 8 │ - └─────┴──────┴───────┴─────┘ >>> agnostic_with_columns(lf_pl).collect() shape: (4, 4) ┌─────┬──────┬───────┬─────┐ @@ -3664,6 +3679,12 @@ def with_columns( │ 3 ┆ 10.0 ┆ false ┆ 6 │ │ 4 ┆ 13.0 ┆ true ┆ 8 │ └─────┴──────┴───────┴─────┘ + >>> agnostic_with_columns(lf_dask).compute() + a b c 2a + 0 1 0.5 True 2 + 1 2 4.0 True 4 + 2 3 10.0 False 6 + 3 4 13.0 True 8 """ return super().with_columns(*exprs, **named_exprs) @@ -3690,19 +3711,18 @@ def select( `0` use `df.select(nw.col(0))`, not `df.select(0)`. Examples: - >>> import pandas as pd >>> import polars as pl + >>> import dask.dataframe as dd >>> import narwhals as nw >>> from narwhals.typing import IntoFrameT >>> - >>> df = { + >>> data = { ... "foo": [1, 2, 3], ... "bar": [6, 7, 8], ... "ham": ["a", "b", "c"], ... } - >>> df_pd = pd.DataFrame(df) - >>> df_pl = pl.DataFrame(df) - >>> lf_pl = pl.LazyFrame(df) + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) Let's define a dataframe-agnostic function in which we pass the name of a column to select that column. @@ -3711,24 +3731,8 @@ def select( ... df = nw.from_native(df_native) ... return df.select("foo").to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as Polars or Dask to `agnostic_select`: - >>> agnostic_select(df_pd) - foo - 0 1 - 1 2 - 2 3 - >>> agnostic_select(df_pl) - shape: (3, 1) - ┌─────┐ - │ foo │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 1 │ - │ 2 │ - │ 3 │ - └─────┘ >>> agnostic_select(lf_pl).collect() shape: (3, 1) ┌─────┐ @@ -3740,29 +3744,18 @@ def select( │ 2 │ │ 3 │ └─────┘ + >>> agnostic_select(lf_dask).compute() + foo + 0 1 + 1 2 + 2 3 Multiple columns can be selected by passing a list of column names. >>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT: ... df = nw.from_native(df_native) ... return df.select(["foo", "bar"]).to_native() - >>> - >>> agnostic_select(df_pd) - foo bar - 0 1 6 - 1 2 7 - 2 3 8 - >>> agnostic_select(df_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ foo ┆ bar │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 6 │ - │ 2 ┆ 7 │ - │ 3 ┆ 8 │ - └─────┴─────┘ + >>> agnostic_select(lf_pl).collect() shape: (3, 2) ┌─────┬─────┐ @@ -3774,6 +3767,11 @@ def select( │ 2 ┆ 7 │ │ 3 ┆ 8 │ └─────┴─────┘ + >>> agnostic_select(lf_dask).compute() + foo bar + 0 1 6 + 1 2 7 + 2 3 8 Multiple columns can also be selected using positional arguments instead of a list. Expressions are also accepted. @@ -3781,23 +3779,7 @@ def select( >>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT: ... df = nw.from_native(df_native) ... return df.select(nw.col("foo"), nw.col("bar") + 1).to_native() - >>> - >>> agnostic_select(df_pd) - foo bar - 0 1 7 - 1 2 8 - 2 3 9 - >>> agnostic_select(df_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ foo ┆ bar │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 7 │ - │ 2 ┆ 8 │ - │ 3 ┆ 9 │ - └─────┴─────┘ + >>> agnostic_select(lf_pl).collect() shape: (3, 2) ┌─────┬─────┐ @@ -3809,29 +3791,18 @@ def select( │ 2 ┆ 8 │ │ 3 ┆ 9 │ └─────┴─────┘ + >>> agnostic_select(lf_dask).compute() + foo bar + 0 1 7 + 1 2 8 + 2 3 9 Use keyword arguments to easily name your expression inputs. >>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT: ... df = nw.from_native(df_native) ... return df.select(threshold=nw.col("foo") * 2).to_native() - >>> - >>> agnostic_select(df_pd) - threshold - 0 2 - 1 4 - 2 6 - >>> agnostic_select(df_pl) - shape: (3, 1) - ┌───────────┐ - │ threshold │ - │ --- │ - │ i64 │ - ╞═══════════╡ - │ 2 │ - │ 4 │ - │ 6 │ - └───────────┘ + >>> agnostic_select(lf_pl).collect() shape: (3, 1) ┌───────────┐ @@ -3843,6 +3814,11 @@ def select( │ 4 │ │ 6 │ └───────────┘ + >>> agnostic_select(lf_dask).compute() + threshold + 0 2 + 1 4 + 2 6 """ return super().select(*exprs, **named_exprs) @@ -3858,14 +3834,14 @@ def rename(self, mapping: dict[str, str]) -> Self: The LazyFrame with the specified columns renamed. Examples: - >>> import pandas as pd >>> import polars as pl + >>> import dask.dataframe as dd >>> import narwhals as nw >>> from narwhals.typing import IntoFrameT >>> >>> data = {"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) We define a library agnostic function: @@ -3873,13 +3849,8 @@ def rename(self, mapping: dict[str, str]) -> Self: ... df = nw.from_native(df_native) ... return df.rename({"foo": "apple"}).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as Polars or Dask to `agnostic_rename`: - >>> agnostic_rename(df_pd) - apple bar ham - 0 1 6 a - 1 2 7 b - 2 3 8 c >>> agnostic_rename(lf_pl).collect() shape: (3, 3) ┌───────┬─────┬─────┐ @@ -3891,6 +3862,11 @@ def rename(self, mapping: dict[str, str]) -> Self: │ 2 ┆ 7 ┆ b │ │ 3 ┆ 8 ┆ c │ └───────┴─────┴─────┘ + >>> agnostic_rename(lf_dask).compute() + apple bar ham + 0 1 6 a + 1 2 7 b + 2 3 8 c """ return super().rename(mapping) @@ -3905,17 +3881,16 @@ def head(self, n: int = 5) -> Self: Examples: >>> import narwhals as nw - >>> import pandas as pd >>> import polars as pl + >>> import dask.dataframe as dd >>> from narwhals.typing import IntoFrameT >>> >>> data = { ... "a": [1, 2, 3, 4, 5, 6], ... "b": [7, 8, 9, 10, 11, 12], ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) Let's define a dataframe-agnostic function that gets the first 3 rows. @@ -3923,24 +3898,8 @@ def head(self, n: int = 5) -> Self: ... df = nw.from_native(df_native) ... return df.head(3).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as Polars or Dask to `agnostic_head`: - >>> agnostic_head(df_pd) - a b - 0 1 7 - 1 2 8 - 2 3 9 - >>> agnostic_head(df_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 7 │ - │ 2 ┆ 8 │ - │ 3 ┆ 9 │ - └─────┴─────┘ >>> agnostic_head(lf_pl).collect() shape: (3, 2) ┌─────┬─────┐ @@ -3952,6 +3911,11 @@ def head(self, n: int = 5) -> Self: │ 2 ┆ 8 │ │ 3 ┆ 9 │ └─────┴─────┘ + >>> agnostic_head(lf_dask).compute() + a b + 0 1 7 + 1 2 8 + 2 3 9 """ return super().head(n) @@ -3964,19 +3928,22 @@ def tail(self, n: int = 5) -> Self: Returns: A subset of the LazyFrame of shape (n, n_columns). + Notes: + `LazyFrame.tail` is not supported for the Dask backend with multiple + partitions. + Examples: >>> import narwhals as nw - >>> import pandas as pd >>> import polars as pl + >>> import dask.dataframe as dd >>> from narwhals.typing import IntoFrameT >>> >>> data = { ... "a": [1, 2, 3, 4, 5, 6], ... "b": [7, 8, 9, 10, 11, 12], ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=1) Let's define a dataframe-agnostic function that gets the last 3 rows. @@ -3984,24 +3951,8 @@ def tail(self, n: int = 5) -> Self: ... df = nw.from_native(df_native) ... return df.tail(3).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as Polars or Dask to `agnostic_tail`: - >>> agnostic_tail(df_pd) - a b - 3 4 10 - 4 5 11 - 5 6 12 - >>> agnostic_tail(df_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 4 ┆ 10 │ - │ 5 ┆ 11 │ - │ 6 ┆ 12 │ - └─────┴─────┘ >>> agnostic_tail(lf_pl).collect() shape: (3, 2) ┌─────┬─────┐ @@ -4013,6 +3964,11 @@ def tail(self, n: int = 5) -> Self: │ 5 ┆ 11 │ │ 6 ┆ 12 │ └─────┴─────┘ + >>> agnostic_tail(lf_dask).compute() + a b + 3 4 10 + 4 5 11 + 5 6 12 """ return super().tail(n) @@ -4033,14 +3989,14 @@ def drop(self, *columns: str | Iterable[str], strict: bool = True) -> Self: Please consider upgrading to a newer version or pass to eager mode. Examples: - >>> import pandas as pd >>> import polars as pl + >>> import dask.dataframe as dd >>> import narwhals as nw >>> from narwhals.typing import IntoFrameT >>> >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) We define a library agnostic function: @@ -4048,13 +4004,8 @@ def drop(self, *columns: str | Iterable[str], strict: bool = True) -> Self: ... df = nw.from_native(df_native) ... return df.drop("ham").to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as Polars or Dask to `agnostic_drop`: - >>> agnostic_drop(df_pd) - foo bar - 0 1 6.0 - 1 2 7.0 - 2 3 8.0 >>> agnostic_drop(lf_pl).collect() shape: (3, 2) ┌─────┬─────┐ @@ -4066,6 +4017,11 @@ def drop(self, *columns: str | Iterable[str], strict: bool = True) -> Self: │ 2 ┆ 7.0 │ │ 3 ┆ 8.0 │ └─────┴─────┘ + >>> agnostic_drop(lf_dask).compute() + foo bar + 0 1 6.0 + 1 2 7.0 + 2 3 8.0 Use positional arguments to drop multiple columns. @@ -4073,11 +4029,6 @@ def drop(self, *columns: str | Iterable[str], strict: bool = True) -> Self: ... df = nw.from_native(df_native) ... return df.drop("foo", "ham").to_native() - >>> agnostic_drop(df_pd) - bar - 0 6.0 - 1 7.0 - 2 8.0 >>> agnostic_drop(lf_pl).collect() shape: (3, 1) ┌─────┐ @@ -4089,6 +4040,11 @@ def drop(self, *columns: str | Iterable[str], strict: bool = True) -> Self: │ 7.0 │ │ 8.0 │ └─────┘ + >>> agnostic_drop(lf_dask).compute() + bar + 0 6.0 + 1 7.0 + 2 8.0 """ return super().drop(*flatten(columns), strict=strict) @@ -4117,11 +4073,11 @@ def unique( to run on the streaming engine for Polars. Returns: - LazyFrame: LazyFrame with unique rows. + The LazyFrame with unique rows. Examples: - >>> import pandas as pd >>> import polars as pl + >>> import dask.dataframe as dd >>> import narwhals as nw >>> from narwhals.typing import IntoFrameT >>> @@ -4130,8 +4086,8 @@ def unique( ... "bar": ["a", "a", "a", "a"], ... "ham": ["b", "b", "b", "b"], ... } - >>> df_pd = pd.DataFrame(data) >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) We define a library agnostic function: @@ -4139,11 +4095,8 @@ def unique( ... df = nw.from_native(df_native) ... return df.unique(["bar", "ham"]).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as Polars or Dask to `agnostic_unique`: - >>> agnostic_unique(df_pd) - foo bar ham - 0 1 a b >>> agnostic_unique(lf_pl).collect() shape: (1, 3) ┌─────┬─────┬─────┐ @@ -4153,6 +4106,9 @@ def unique( ╞═════╪═════╪═════╡ │ 1 ┆ a ┆ b │ └─────┴─────┴─────┘ + >>> agnostic_unique(lf_dask).compute() + foo bar ham + 0 1 a b """ return super().unique(subset, keep=keep, maintain_order=maintain_order) @@ -4174,8 +4130,8 @@ def filter( The filtered LazyFrame. Examples: - >>> import pandas as pd >>> import polars as pl + >>> import dask.dataframe as dd >>> import narwhals as nw >>> from narwhals.typing import IntoFrameT >>> @@ -4184,9 +4140,8 @@ def filter( ... "bar": [6, 7, 8], ... "ham": ["a", "b", "c"], ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) Let's define a dataframe-agnostic function in which we filter on one condition. @@ -4195,22 +4150,8 @@ def filter( ... df = nw.from_native(df_native) ... return df.filter(nw.col("foo") > 1).to_native() - We can then pass either pandas or Polars to `agnostic_filter`: + We can then pass any supported library such as Polars or Dask to `agnostic_filter`: - >>> agnostic_filter(df_pd) - foo bar ham - 1 2 7 b - 2 3 8 c - >>> agnostic_filter(df_pl) - shape: (2, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ str │ - ╞═════╪═════╪═════╡ - │ 2 ┆ 7 ┆ b │ - │ 3 ┆ 8 ┆ c │ - └─────┴─────┴─────┘ >>> agnostic_filter(lf_pl).collect() shape: (2, 3) ┌─────┬─────┬─────┐ @@ -4221,25 +4162,17 @@ def filter( │ 2 ┆ 7 ┆ b │ │ 3 ┆ 8 ┆ c │ └─────┴─────┴─────┘ + >>> agnostic_filter(lf_dask).compute() + foo bar ham + 1 2 7 b + 2 3 8 c Filter on multiple conditions: >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: ... df = nw.from_native(df_native) ... return df.filter((nw.col("foo") < 3) & (nw.col("ham") == "a")).to_native() - >>> - >>> agnostic_filter(df_pd) - foo bar ham - 0 1 6 a - >>> agnostic_filter(df_pl) - shape: (1, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ str │ - ╞═════╪═════╪═════╡ - │ 1 ┆ 6 ┆ a │ - └─────┴─────┴─────┘ + >>> agnostic_filter(lf_pl).collect() shape: (1, 3) ┌─────┬─────┬─────┐ @@ -4249,6 +4182,9 @@ def filter( ╞═════╪═════╪═════╡ │ 1 ┆ 6 ┆ a │ └─────┴─────┴─────┘ + >>> agnostic_filter(lf_dask).compute() + foo bar ham + 0 1 6 a Provide multiple filters using `*args` syntax: @@ -4258,19 +4194,7 @@ def filter( ... nw.col("foo") == 1, ... nw.col("ham") == "a", ... ).to_native() - >>> - >>> agnostic_filter(df_pd) - foo bar ham - 0 1 6 a - >>> agnostic_filter(df_pl) - shape: (1, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ str │ - ╞═════╪═════╪═════╡ - │ 1 ┆ 6 ┆ a │ - └─────┴─────┴─────┘ + >>> agnostic_filter(lf_pl).collect() shape: (1, 3) ┌─────┬─────┬─────┐ @@ -4280,6 +4204,9 @@ def filter( ╞═════╪═════╪═════╡ │ 1 ┆ 6 ┆ a │ └─────┴─────┴─────┘ + >>> agnostic_filter(lf_dask).compute() + foo bar ham + 0 1 6 a Filter on an OR condition: @@ -4288,21 +4215,7 @@ def filter( ... return df.filter( ... (nw.col("foo") == 1) | (nw.col("ham") == "c") ... ).to_native() - >>> - >>> agnostic_filter(df_pd) - foo bar ham - 0 1 6 a - 2 3 8 c - >>> agnostic_filter(df_pl) - shape: (2, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ str │ - ╞═════╪═════╪═════╡ - │ 1 ┆ 6 ┆ a │ - │ 3 ┆ 8 ┆ c │ - └─────┴─────┴─────┘ + >>> agnostic_filter(lf_pl).collect() shape: (2, 3) ┌─────┬─────┬─────┐ @@ -4313,25 +4226,17 @@ def filter( │ 1 ┆ 6 ┆ a │ │ 3 ┆ 8 ┆ c │ └─────┴─────┴─────┘ + >>> agnostic_filter(lf_dask).compute() + foo bar ham + 0 1 6 a + 2 3 8 c Provide multiple filters using `**kwargs` syntax: >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: ... df = nw.from_native(df_native) ... return df.filter(foo=2, ham="b").to_native() - >>> - >>> agnostic_filter(df_pd) - foo bar ham - 1 2 7 b - >>> agnostic_filter(df_pl) - shape: (1, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ str │ - ╞═════╪═════╪═════╡ - │ 2 ┆ 7 ┆ b │ - └─────┴─────┴─────┘ + >>> agnostic_filter(lf_pl).collect() shape: (1, 3) ┌─────┬─────┬─────┐ @@ -4341,6 +4246,9 @@ def filter( ╞═════╪═════╪═════╡ │ 2 ┆ 7 ┆ b │ └─────┴─────┴─────┘ + >>> agnostic_filter(lf_dask).compute() + foo bar ham + 1 2 7 b """ return super().filter(*predicates, **constraints) @@ -4357,25 +4265,24 @@ def group_by( included in the result. Returns: - LazyGroupBy: Object which can be used to perform aggregations. + Object which can be used to perform aggregations. Examples: Group by one column and call `agg` to compute the grouped sum of another column. - >>> import pandas as pd >>> import polars as pl + >>> import dask.dataframe as dd >>> import narwhals as nw >>> from narwhals.typing import IntoFrameT >>> - >>> df = { + >>> data = { ... "a": ["a", "b", "a", "b", "c"], ... "b": [1, 2, 1, 3, 3], ... "c": [5, 4, 3, 2, 1], ... } - >>> df_pd = pd.DataFrame(df) - >>> df_pl = pl.DataFrame(df) - >>> lf_pl = pl.LazyFrame(df) + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) Let's define a dataframe-agnostic function in which we group by one column and call `agg` to compute the grouped sum of another column. @@ -4384,24 +4291,8 @@ def group_by( ... df = nw.from_native(df_native) ... return df.group_by("a").agg(nw.col("b").sum()).sort("a").to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as Polars or Dask to `agnostic_group_by_agg`: - >>> agnostic_group_by_agg(df_pd) - a b - 0 a 2 - 1 b 5 - 2 c 3 - >>> agnostic_group_by_agg(df_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ str ┆ i64 │ - ╞═════╪═════╡ - │ a ┆ 2 │ - │ b ┆ 5 │ - │ c ┆ 3 │ - └─────┴─────┘ >>> agnostic_group_by_agg(lf_pl).collect() shape: (3, 2) ┌─────┬─────┐ @@ -4413,6 +4304,11 @@ def group_by( │ b ┆ 5 │ │ c ┆ 3 │ └─────┴─────┘ + >>> agnostic_group_by_agg(lf_dask).compute() + a b + 0 a 2 + 1 b 5 + 2 c 3 Group by multiple columns by passing a list of column names. @@ -4421,25 +4317,7 @@ def group_by( ... return ( ... df.group_by(["a", "b"]).agg(nw.max("c")).sort(["a", "b"]).to_native() ... ) - >>> - >>> agnostic_group_by_agg(df_pd) - a b c - 0 a 1 5 - 1 b 2 4 - 2 b 3 2 - 3 c 3 1 - >>> agnostic_group_by_agg(df_pl) - shape: (4, 3) - ┌─────┬─────┬─────┐ - │ a ┆ b ┆ c │ - │ --- ┆ --- ┆ --- │ - │ str ┆ i64 ┆ i64 │ - ╞═════╪═════╪═════╡ - │ a ┆ 1 ┆ 5 │ - │ b ┆ 2 ┆ 4 │ - │ b ┆ 3 ┆ 2 │ - │ c ┆ 3 ┆ 1 │ - └─────┴─────┴─────┘ + >>> agnostic_group_by_agg(lf_pl).collect() shape: (4, 3) ┌─────┬─────┬─────┐ @@ -4452,6 +4330,12 @@ def group_by( │ b ┆ 3 ┆ 2 │ │ c ┆ 3 ┆ 1 │ └─────┴─────┴─────┘ + >>> agnostic_group_by_agg(lf_dask).compute() + a b c + 0 a 1 5 + 1 b 2 4 + 2 b 3 2 + 3 c 3 1 """ from narwhals.expr import Expr from narwhals.group_by import LazyGroupBy @@ -4493,8 +4377,8 @@ def sort( Examples: >>> import narwhals as nw - >>> import pandas as pd >>> import polars as pl + >>> import dask.dataframe as dd >>> from narwhals.typing import IntoFrameT >>> >>> data = { @@ -4502,8 +4386,8 @@ def sort( ... "b": [6.0, 5.0, 4.0], ... "c": ["a", "c", "b"], ... } - >>> df_pd = pd.DataFrame(data) >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) Let's define a dataframe-agnostic function in which we sort by multiple columns in different orders @@ -4512,13 +4396,8 @@ def sort( ... df = nw.from_native(df_native) ... return df.sort("c", "a", descending=[False, True]).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as Polars or Dask to `agnostic_sort`: - >>> agnostic_sort(df_pd) - a b c - 0 1.0 6.0 a - 2 NaN 4.0 b - 1 2.0 5.0 c >>> agnostic_sort(lf_pl).collect() shape: (3, 3) ┌──────┬─────┬─────┐ @@ -4530,6 +4409,11 @@ def sort( │ null ┆ 4.0 ┆ b │ │ 2 ┆ 5.0 ┆ c │ └──────┴─────┴─────┘ + >>> agnostic_sort(lf_dask).compute() + a b c + 0 1.0 6.0 a + 2 NaN 4.0 b + 1 2.0 5.0 c """ return super().sort(by, *more_by, descending=descending, nulls_last=nulls_last) @@ -4565,8 +4449,8 @@ def join( Examples: >>> import narwhals as nw - >>> import pandas as pd >>> import polars as pl + >>> import dask.dataframe as dd >>> from narwhals.typing import IntoFrameT >>> >>> data = { @@ -4579,11 +4463,10 @@ def join( ... "ham": ["a", "b", "d"], ... } - >>> df_pd = pd.DataFrame(data) - >>> other_pd = pd.DataFrame(data_other) - >>> lf_pl = pl.LazyFrame(data) >>> other_pl = pl.LazyFrame(data_other) + >>> lf_dask = dd.from_dict(data, npartitions=2) + >>> other_dask = dd.from_dict(data_other, npartitions=2) Let's define a dataframe-agnostic function in which we join over "ham" column: @@ -4593,14 +4476,11 @@ def join( ... ) -> IntoFrameT: ... df = nw.from_native(df_native) ... other = nw.from_native(other_native) - ... return df.join(other, left_on="ham", right_on="ham").to_native() - - We can now pass either pandas or Polars to the function: + ... return ( + ... df.join(other, left_on="ham", right_on="ham").sort("ham").to_native() + ... ) - >>> agnostic_join_on_ham(df_pd, other_pd) - foo bar ham apple - 0 1 6.0 a x - 1 2 7.0 b y + We can then pass any supported library such as Polars or Dask to `agnostic_join_on_ham`: >>> agnostic_join_on_ham(lf_pl, other_pl).collect() shape: (2, 4) @@ -4612,6 +4492,10 @@ def join( │ 1 ┆ 6.0 ┆ a ┆ x │ │ 2 ┆ 7.0 ┆ b ┆ y │ └─────┴─────┴─────┴───────┘ + >>> agnostic_join_on_ham(lf_dask, other_dask).compute() + foo bar ham apple + 0 1 6.0 a x + 0 2 7.0 b y """ return super().join( other, how=how, left_on=left_on, right_on=right_on, on=on, suffix=suffix @@ -4662,8 +4546,8 @@ def join_asof( Examples: >>> from datetime import datetime >>> import narwhals as nw - >>> import pandas as pd >>> import polars as pl + >>> import dask.dataframe as dd >>> from typing import Literal >>> from narwhals.typing import IntoFrameT >>> @@ -4685,10 +4569,10 @@ def join_asof( ... ], ... "population": [82.19, 82.66, 83.12], ... } - >>> gdp_pd = pd.DataFrame(data_gdp) - >>> population_pd = pd.DataFrame(data_population) - >>> gdp_pl = pl.LazyFrame(data_gdp).sort("datetime") - >>> population_pl = pl.LazyFrame(data_population).sort("datetime") + >>> gdp_pl = pl.LazyFrame(data_gdp) + >>> population_pl = pl.LazyFrame(data_population) + >>> gdp_dask = dd.from_dict(data_gdp, npartitions=2) + >>> population_dask = dd.from_dict(data_population, npartitions=2) Let's define a dataframe-agnostic function in which we join over "datetime" column: @@ -4699,15 +4583,13 @@ def join_asof( ... ) -> IntoFrameT: ... df = nw.from_native(df_native) ... other = nw.from_native(other_native) - ... return df.join_asof(other, on="datetime", strategy=strategy).to_native() - - We can now pass either pandas or Polars to the function: + ... return ( + ... df.sort("datetime") + ... .join_asof(other, on="datetime", strategy=strategy) + ... .to_native() + ... ) - >>> agnostic_join_asof_datetime(population_pd, gdp_pd, strategy="backward") - datetime population gdp - 0 2016-03-01 82.19 4164 - 1 2018-08-01 82.66 4566 - 2 2019-01-01 83.12 4696 + We can then pass any supported library such as Polars or Dask to `agnostic_join_asof_datetime`: >>> agnostic_join_asof_datetime( ... population_pl, gdp_pl, strategy="backward" @@ -4722,13 +4604,20 @@ def join_asof( │ 2018-08-01 00:00:00 ┆ 82.66 ┆ 4566 │ │ 2019-01-01 00:00:00 ┆ 83.12 ┆ 4696 │ └─────────────────────┴────────────┴──────┘ + >>> agnostic_join_asof_datetime( + ... population_dask, gdp_dask, strategy="backward" + ... ).compute() + datetime population gdp + 0 2016-03-01 82.19 4164 + 1 2018-08-01 82.66 4566 + 0 2019-01-01 83.12 4696 Here is a real-world times-series example that uses `by` argument. >>> from datetime import datetime >>> import narwhals as nw - >>> import pandas as pd >>> import polars as pl + >>> import dask.dataframe as dd >>> from narwhals.typing import IntoFrameT >>> >>> data_quotes = { @@ -4767,10 +4656,10 @@ def join_asof( ... "price": [51.95, 51.95, 720.77, 720.92, 98.0], ... "quantity": [75, 155, 100, 100, 100], ... } - >>> quotes_pd = pd.DataFrame(data_quotes) - >>> trades_pd = pd.DataFrame(data_trades) - >>> quotes_pl = pl.LazyFrame(data_quotes).sort("datetime") - >>> trades_pl = pl.LazyFrame(data_trades).sort("datetime") + >>> quotes_pl = pl.LazyFrame(data_quotes) + >>> trades_pl = pl.LazyFrame(data_trades) + >>> quotes_dask = dd.from_dict(data_quotes, npartitions=2) + >>> trades_dask = dd.from_dict(data_trades, npartitions=2) Let's define a dataframe-agnostic function in which we join over "datetime" and by "ticker" columns: @@ -4780,17 +4669,13 @@ def join_asof( ... ) -> IntoFrameT: ... df = nw.from_native(df_native) ... other = nw.from_native(other_native) - ... return df.join_asof(other, on="datetime", by="ticker").to_native() - - We can now pass either pandas or Polars to the function: + ... return ( + ... df.sort("datetime") + ... .join_asof(other, on="datetime", by="ticker") + ... .to_native() + ... ) - >>> agnostic_join_asof_datetime_by_ticker(trades_pd, quotes_pd) - datetime ticker price quantity bid ask - 0 2016-05-25 13:30:00.000023 MSFT 51.95 75 51.95 51.96 - 1 2016-05-25 13:30:00.000038 MSFT 51.95 155 51.97 51.98 - 2 2016-05-25 13:30:00.000048 GOOG 720.77 100 720.50 720.93 - 3 2016-05-25 13:30:00.000048 GOOG 720.92 100 720.50 720.93 - 4 2016-05-25 13:30:00.000048 AAPL 98.00 100 NaN NaN + We can then pass any supported library such as Polars or Dask to `agnostic_join_asof_datetime_by_ticker`: >>> agnostic_join_asof_datetime_by_ticker(trades_pl, quotes_pl).collect() shape: (5, 6) @@ -4805,6 +4690,13 @@ def join_asof( │ 2016-05-25 13:30:00.000048 ┆ GOOG ┆ 720.92 ┆ 100 ┆ 720.5 ┆ 720.93 │ │ 2016-05-25 13:30:00.000048 ┆ AAPL ┆ 98.0 ┆ 100 ┆ null ┆ null │ └────────────────────────────┴────────┴────────┴──────────┴───────┴────────┘ + >>> agnostic_join_asof_datetime_by_ticker(trades_dask, quotes_dask).compute() + datetime ticker price quantity bid ask + 0 2016-05-25 13:30:00.000023 MSFT 51.95 75 51.95 51.96 + 0 2016-05-25 13:30:00.000038 MSFT 51.95 155 51.97 51.98 + 1 2016-05-25 13:30:00.000048 GOOG 720.92 100 720.50 720.93 + 2 2016-05-25 13:30:00.000048 AAPL 98.00 100 NaN NaN + 3 2016-05-25 13:30:00.000048 GOOG 720.77 100 720.50 720.93 """ return super().join_asof( other, @@ -4825,12 +4717,10 @@ def clone(self) -> Self: Examples: >>> import narwhals as nw - >>> import pandas as pd >>> import polars as pl >>> from narwhals.typing import IntoFrameT >>> >>> data = {"a": [1, 2], "b": [3, 4]} - >>> df_pd = pd.DataFrame(data) >>> lf_pl = pl.LazyFrame(data) Let's define a dataframe-agnostic function in which we copy the DataFrame: @@ -4839,10 +4729,7 @@ def clone(self) -> Self: ... df = nw.from_native(df_native) ... return df.clone().to_native() - >>> agnostic_clone(df_pd) - a b - 0 1 3 - 1 2 4 + We can then pass any supported library such as Polars to `agnostic_clone`: >>> agnostic_clone(lf_pl).collect() shape: (2, 2) @@ -4907,25 +4794,22 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self: Examples: >>> import narwhals as nw - >>> import pandas as pd >>> import polars as pl + >>> import dask.dataframe as dd >>> from narwhals.typing import IntoFrameT >>> >>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]} - >>> df_pd = pd.DataFrame(data) >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) - Let's define a dataframe-agnostic function in which gather every 2 rows, + Let's define a dataframe-agnostic function in which we gather every 2 rows, starting from a offset of 1: >>> def agnostic_gather_every(df_native: IntoFrameT) -> IntoFrameT: ... df = nw.from_native(df_native) ... return df.gather_every(n=2, offset=1).to_native() - >>> agnostic_gather_every(df_pd) - a b - 1 2 6 - 3 4 8 + We can then pass any supported library such as Polars or Dask to `agnostic_gather_every`: >>> agnostic_gather_every(lf_pl).collect() shape: (2, 2) @@ -4937,6 +4821,10 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self: │ 2 ┆ 6 │ │ 4 ┆ 8 │ └─────┴─────┘ + >>> agnostic_gather_every(lf_dask).compute() + a b + 1 2 6 + 3 4 8 """ return super().gather_every(n=n, offset=offset) @@ -4975,6 +4863,7 @@ def unpivot( Examples: >>> import narwhals as nw >>> import polars as pl + >>> import dask.dataframe as dd >>> from narwhals.typing import IntoFrameT >>> >>> data = { @@ -4983,6 +4872,7 @@ def unpivot( ... "c": [2, 4, 6], ... } >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) We define a library agnostic function: @@ -4992,6 +4882,8 @@ def unpivot( ... df.unpivot(on=["b", "c"], index="a").sort(["variable", "a"]) ... ).to_native() + We can then pass any supported library such as Polars or Dask to `agnostic_unpivot`: + >>> agnostic_unpivot(lf_pl).collect() shape: (6, 3) ┌─────┬──────────┬───────┐ @@ -5006,6 +4898,14 @@ def unpivot( │ y ┆ c ┆ 4 │ │ z ┆ c ┆ 6 │ └─────┴──────────┴───────┘ + >>> agnostic_unpivot(lf_dask).compute() + a variable value + 0 x b 1 + 1 y b 3 + 0 z b 5 + 2 x c 2 + 3 y c 4 + 1 z c 6 """ return super().unpivot( on=on, index=index, variable_name=variable_name, value_name=value_name @@ -5015,7 +4915,7 @@ def explode(self: Self, columns: str | Sequence[str], *more_columns: str) -> Sel """Explode the dataframe to long format by exploding the given columns. Notes: - It is possible to explode multiple columns only if these columns must have + It is possible to explode multiple columns only if these columns have matching element counts. Arguments: @@ -5045,8 +4945,7 @@ def explode(self: Self, columns: str | Sequence[str], *more_columns: str) -> Sel ... .to_native() ... ) - We can then pass any supported library such as pandas, Polars (eager), - or PyArrow to `agnostic_explode`: + We can then pass any supported library such as Polars to `agnostic_explode`: >>> agnostic_explode(pl.LazyFrame(data)).collect() shape: (5, 3) diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index 2b5be2eee..3708e8e13 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -405,13 +405,15 @@ def collect(self) -> DataFrame[Any]: Examples: >>> import narwhals as nw >>> import polars as pl - >>> lf_pl = pl.LazyFrame( - ... { - ... "a": ["a", "b", "a", "b", "b", "c"], - ... "b": [1, 2, 3, 4, 5, 6], - ... "c": [6, 5, 4, 3, 2, 1], - ... } - ... ) + >>> import dask.dataframe as dd + >>> data = { + ... "a": ["a", "b", "a", "b", "b", "c"], + ... "b": [1, 2, 3, 4, 5, 6], + ... "c": [6, 5, 4, 3, 2, 1], + ... } + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + >>> lf = nw.from_native(lf_pl) >>> lf ┌───────────────────────────────────────┐ @@ -430,6 +432,19 @@ def collect(self) -> DataFrame[Any]: │ b ┆ 11 ┆ 10 │ │ c ┆ 6 ┆ 1 │ └─────┴─────┴─────┘ + + >>> lf = nw.from_native(lf_dask) + >>> lf + ┌───────────────────────────────────────┐ + | Narwhals LazyFrame | + | Use `.to_native` to see native output | + └───────────────────────────────────────┘ + >>> df = lf.group_by("a").agg(nw.col("b", "c").sum()).collect() + >>> df.to_native() + a b c + 0 a 4 10 + 1 b 11 10 + 2 c 6 1 """ return super().collect() # type: ignore[return-value]