diff --git a/.github/workflows/mkdocs.yml b/.github/workflows/mkdocs.yml index 8aaaba516..5024a474f 100644 --- a/.github/workflows/mkdocs.yml +++ b/.github/workflows/mkdocs.yml @@ -31,5 +31,7 @@ jobs: # hopefully temporary until https://github.com/mkdocstrings/mkdocstrings/issues/716 run: pip install git+https://github.com/MarcoGorelli/griffe.git@no-overloads - run: pip install -e .[docs] + - name: insert-docstrings + run: git ls-files narwhals | xargs python utils/add_docstring_examples.py - run: mkdocs gh-deploy --force diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index bb46b4f0d..8baf119e3 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -90,6 +90,11 @@ jobs: if: matrix.python-version == '3.11' - name: show-deps run: uv pip freeze + - name: insert-docstrings + run: | + du -c --apparent-size $(git ls-files) | grep total$ + git ls-files narwhals | xargs python utils/add_docstring_examples.py + du -c --apparent-size $(git ls-files) | grep total$ - name: Run pytest run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=100 --runslow --all-cpu-constructors - name: Run doctests diff --git a/docs/docstring_examples/__init__.py b/docs/docstring_examples/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/docs/docstring_examples/dataframe.py b/docs/docstring_examples/dataframe.py new file mode 100644 index 000000000..568e4eb14 --- /dev/null +++ b/docs/docstring_examples/dataframe.py @@ -0,0 +1,1993 @@ +from __future__ import annotations + +EXAMPLES = { + "implementation": """ + >>> import narwhals as nw + >>> import polars as pl + >>> import dask.dataframe as dd + >>> lf_pl = pl.LazyFrame({"a": [1, 2, 3]}) + >>> lf_dask = dd.from_dict({"a": [1, 2, 3]}, npartitions=2) + + >>> lf = nw.from_native(lf_pl) + >>> lf.implementation + + >>> lf.implementation.is_pandas() + False + >>> lf.implementation.is_polars() + True + + >>> lf = nw.from_native(lf_dask) + >>> lf.implementation + + >>> lf.implementation.is_dask() + True + """, + "lazy": """ + Construct pandas and Polars objects: + + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} + >>> df_pd = pd.DataFrame(df) + >>> lf_pl = pl.LazyFrame(df) + + We define a library agnostic function: + + >>> def agnostic_lazy(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.lazy().to_native() + + Note that then, pandas dataframe stay eager, and the Polars LazyFrame stays lazy: + + >>> agnostic_lazy(df_pd) + foo bar ham + 0 1 6.0 a + 1 2 7.0 b + 2 3 8.0 c + >>> agnostic_lazy(lf_pl) + + """, + "to_native": """ + >>> import polars as pl + >>> import dask.dataframe as dd + >>> import narwhals as nw + >>> + >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + + Calling `to_native` on a Narwhals LazyFrame returns the native object: + + >>> nw.from_native(lf_pl).to_native().collect() + shape: (3, 3) + ┌─────┬─────┬─────┐ + │ foo ┆ bar ┆ ham │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ f64 ┆ str │ + ╞═════╪═════╪═════╡ + │ 1 ┆ 6.0 ┆ a │ + │ 2 ┆ 7.0 ┆ b │ + │ 3 ┆ 8.0 ┆ c │ + └─────┴─────┴─────┘ + >>> nw.from_native(lf_dask).to_native().compute() + foo bar ham + 0 1 6.0 a + 1 2 7.0 b + 2 3 8.0 c + """, + "to_pandas": """ + Construct pandas, Polars (eager) and PyArrow DataFrames: + + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_to_pandas(df_native: IntoDataFrame) -> pd.DataFrame: + ... df = nw.from_native(df_native) + ... return df.to_pandas() + + We can then pass any supported library such as pandas, Polars (eager), or + PyArrow to `agnostic_to_pandas`: + + >>> agnostic_to_pandas(df_pd) + foo bar ham + 0 1 6.0 a + 1 2 7.0 b + 2 3 8.0 c + >>> agnostic_to_pandas(df_pl) + foo bar ham + 0 1 6.0 a + 1 2 7.0 b + 2 3 8.0 c + >>> agnostic_to_pandas(df_pa) + foo bar ham + 0 1 6.0 a + 1 2 7.0 b + 2 3 8.0 c + """, + "write_csv": """ + Construct pandas, Polars (eager) and PyArrow DataFrames: + + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_write_csv(df_native: IntoDataFrame) -> str: + ... df = nw.from_native(df_native) + ... return df.write_csv() + + We can pass any supported library such as pandas, Polars or PyArrow to `agnostic_write_csv`: + + >>> agnostic_write_csv(df_pd) + 'foo,bar,ham\\n1,6.0,a\\n2,7.0,b\\n3,8.0,c\\n' + >>> agnostic_write_csv(df_pl) + 'foo,bar,ham\\n1,6.0,a\\n2,7.0,b\\n3,8.0,c\\n' + >>> agnostic_write_csv(df_pa) + '"foo","bar","ham"\\n1,6,"a"\\n2,7,"b"\\n3,8,"c"\\n' + + If we had passed a file name to `write_csv`, it would have been + written to that file. + """, + "write_parquet": """ + Construct pandas, Polars and PyArrow DataFrames: + + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_write_parquet(df_native: IntoDataFrame): + ... df = nw.from_native(df_native) + ... df.write_parquet("foo.parquet") + + We can then pass either pandas, Polars or PyArrow to `agnostic_write_parquet`: + + >>> agnostic_write_parquet(df_pd) # doctest:+SKIP + >>> agnostic_write_parquet(df_pl) # doctest:+SKIP + >>> agnostic_write_parquet(df_pa) # doctest:+SKIP + """, + "to_numpy": """ + Construct pandas and polars DataFrames: + + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> import numpy as np + >>> from narwhals.typing import IntoDataFrame + >>> data = {"foo": [1, 2, 3], "bar": [6.5, 7.0, 8.5], "ham": ["a", "b", "c"]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_to_numpy(df_native: IntoDataFrame) -> np.ndarray: + ... df = nw.from_native(df_native) + ... return df.to_numpy() + + We can then pass either pandas, Polars or PyArrow to `agnostic_to_numpy`: + + >>> agnostic_to_numpy(df_pd) + array([[1, 6.5, 'a'], + [2, 7.0, 'b'], + [3, 8.5, 'c']], dtype=object) + >>> agnostic_to_numpy(df_pl) + array([[1, 6.5, 'a'], + [2, 7.0, 'b'], + [3, 8.5, 'c']], dtype=object) + >>> agnostic_to_numpy(df_pa) + array([[1, 6.5, 'a'], + [2, 7.0, 'b'], + [3, 8.5, 'c']], dtype=object) + """, + "shape": """ + Construct pandas and polars DataFrames: + + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> data = {"foo": [1, 2, 3, 4, 5]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_shape(df_native: IntoDataFrame) -> tuple[int, int]: + ... df = nw.from_native(df_native) + ... return df.shape + + We can then pass either pandas, Polars or PyArrow to `agnostic_shape`: + + >>> agnostic_shape(df_pd) + (5, 1) + >>> agnostic_shape(df_pl) + (5, 1) + >>> agnostic_shape(df_pa) + (5, 1) + """, + "get_column": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> from narwhals.typing import IntoSeries + >>> data = {"a": [1, 2], "b": [3, 4]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_get_column(df_native: IntoDataFrame) -> IntoSeries: + ... df = nw.from_native(df_native) + ... name = df.columns[0] + ... return df.get_column(name).to_native() + + We can then pass either pandas, Polars or PyArrow to `agnostic_get_column`: + + >>> agnostic_get_column(df_pd) + 0 1 + 1 2 + Name: a, dtype: int64 + >>> agnostic_get_column(df_pl) # doctest:+NORMALIZE_WHITESPACE + shape: (2,) + Series: 'a' [i64] + [ + 1 + 2 + ] + >>> agnostic_get_column(df_pa) # doctest:+ELLIPSIS + + [ + [ + 1, + 2 + ] + ] + """, + "estimated_size": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrameT + >>> data = { + ... "foo": [1, 2, 3], + ... "bar": [6.0, 7.0, 8.0], + ... "ham": ["a", "b", "c"], + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_estimated_size(df_native: IntoDataFrameT) -> int | float: + ... df = nw.from_native(df_native) + ... return df.estimated_size() + + We can then pass either pandas, Polars or PyArrow to `agnostic_estimated_size`: + + >>> agnostic_estimated_size(df_pd) + np.int64(330) + >>> agnostic_estimated_size(df_pl) + 51 + >>> agnostic_estimated_size(df_pa) + 63 + """, + "__getitem__": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> from narwhals.typing import IntoSeries + >>> data = {"a": [1, 2], "b": [3, 4]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_slice(df_native: IntoDataFrame) -> IntoSeries: + ... df = nw.from_native(df_native) + ... return df["a"].to_native() + + We can then pass either pandas, Polars or PyArrow to `agnostic_slice`: + + >>> agnostic_slice(df_pd) + 0 1 + 1 2 + Name: a, dtype: int64 + >>> agnostic_slice(df_pl) # doctest:+NORMALIZE_WHITESPACE + shape: (2,) + Series: 'a' [i64] + [ + 1 + 2 + ] + >>> agnostic_slice(df_pa) # doctest:+ELLIPSIS + + [ + [ + 1, + 2 + ] + ] + """, + "to_dict": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> data = { + ... "A": [1, 2, 3, 4, 5], + ... "fruits": ["banana", "banana", "apple", "apple", "banana"], + ... "B": [5, 4, 3, 2, 1], + ... "animals": ["beetle", "fly", "beetle", "beetle", "beetle"], + ... "optional": [28, 300, None, 2, -30], + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_to_dict( + ... df_native: IntoDataFrame, + ... ) -> dict[str, list[int | str | float | None]]: + ... df = nw.from_native(df_native) + ... return df.to_dict(as_series=False) + + We can then pass either pandas, Polars or PyArrow to `agnostic_to_dict`: + + >>> agnostic_to_dict(df_pd) + {'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'animals': ['beetle', 'fly', 'beetle', 'beetle', 'beetle'], 'optional': [28.0, 300.0, nan, 2.0, -30.0]} + >>> agnostic_to_dict(df_pl) + {'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'animals': ['beetle', 'fly', 'beetle', 'beetle', 'beetle'], 'optional': [28, 300, None, 2, -30]} + >>> agnostic_to_dict(df_pa) + {'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'animals': ['beetle', 'fly', 'beetle', 'beetle', 'beetle'], 'optional': [28, 300, None, 2, -30]} + """, + "row": """ + >>> import narwhals as nw + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> from narwhals.typing import IntoDataFrame + >>> from typing import Any + >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a library-agnostic function to get the second row. + + >>> def agnostic_row(df_native: IntoDataFrame) -> tuple[Any, ...]: + ... return nw.from_native(df_native).row(1) + + We can then pass either pandas, Polars or PyArrow to `agnostic_row`: + + >>> agnostic_row(df_pd) + (2, 5) + >>> agnostic_row(df_pl) + (2, 5) + >>> agnostic_row(df_pa) + (, ) + """, + "pipe": """ + >>> import polars as pl + >>> import dask.dataframe as dd + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, 3], "ba": [4, 5, 6]} + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_pipe(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.pipe(lambda _df: _df.select("a")).collect().to_native() + + We can then pass any supported library such as Polars or Dask to `agnostic_pipe`: + + >>> agnostic_pipe(lf_pl) + shape: (3, 1) + ┌─────┐ + │ a │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 1 │ + │ 2 │ + │ 3 │ + └─────┘ + >>> agnostic_pipe(lf_dask) + a + 0 1 + 1 2 + 2 3 + """, + "drop_nulls": """ + >>> import polars as pl + >>> import dask.dataframe as dd + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1.0, 2.0, None], "ba": [1.0, None, 2.0]} + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_drop_nulls(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.drop_nulls().collect().to_native() + + We can then pass any supported library such as Polars or Dask to `agnostic_drop_nulls`: + + >>> agnostic_drop_nulls(lf_pl) + shape: (1, 2) + ┌─────┬─────┐ + │ a ┆ ba │ + │ --- ┆ --- │ + │ f64 ┆ f64 │ + ╞═════╪═════╡ + │ 1.0 ┆ 1.0 │ + └─────┴─────┘ + >>> agnostic_drop_nulls(lf_dask) + a ba + 0 1.0 1.0 + """, + "with_row_index": """ + >>> import polars as pl + >>> import dask.dataframe as dd + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_with_row_index(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_row_index().collect().to_native() + + We can then pass any supported library such as Polars or Dask to `agnostic_with_row_index`: + + >>> agnostic_with_row_index(lf_pl) + shape: (3, 3) + ┌───────┬─────┬─────┐ + │ index ┆ a ┆ b │ + │ --- ┆ --- ┆ --- │ + │ u32 ┆ i64 ┆ i64 │ + ╞═══════╪═════╪═════╡ + │ 0 ┆ 1 ┆ 4 │ + │ 1 ┆ 2 ┆ 5 │ + │ 2 ┆ 3 ┆ 6 │ + └───────┴─────┴─────┘ + >>> agnostic_with_row_index(lf_dask) + index a b + 0 0 1 4 + 1 1 2 5 + 2 2 3 6 + """, + "schema": """ + >>> import polars as pl + >>> import dask.dataframe as dd + >>> import narwhals as nw + >>> data = { + ... "foo": [1, 2, 3], + ... "bar": [6.0, 7.0, 8.0], + ... "ham": ["a", "b", "c"], + ... } + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + + >>> lf = nw.from_native(lf_pl) + >>> lf.schema # doctest: +SKIP + Schema({'foo': Int64, 'bar': Float64, 'ham': String}) + + >>> lf = nw.from_native(lf_dask) + >>> lf.schema # doctest: +SKIP + Schema({'foo': Int64, 'bar': Float64, 'ham': String}) + """, + "collect_schema": """ + >>> import polars as pl + >>> import dask.dataframe as dd + >>> import narwhals as nw + >>> data = { + ... "foo": [1, 2, 3], + ... "bar": [6.0, 7.0, 8.0], + ... "ham": ["a", "b", "c"], + ... } + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + + >>> lf = nw.from_native(lf_pl) + >>> lf.collect_schema() + Schema({'foo': Int64, 'bar': Float64, 'ham': String}) + + >>> lf = nw.from_native(lf_dask) + >>> lf.collect_schema() + Schema({'foo': Int64, 'bar': Float64, 'ham': String}) + """, + "columns": """ + >>> import polars as pl + >>> import dask.dataframe as dd + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrame + >>> + >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + + We define a library agnostic function: + + >>> def agnostic_columns(df_native: IntoFrame) -> list[str]: + ... df = nw.from_native(df_native) + ... return df.columns + + We can then pass any supported library such as Polars or Dask to `agnostic_columns`: + + >>> agnostic_columns(lf_pl) # doctest: +SKIP + ['foo', 'bar', 'ham'] + >>> agnostic_columns(lf_dask) + ['foo', 'bar', 'ham'] + """, + "rows": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_rows(df_native: IntoDataFrame, *, named: bool): + ... return nw.from_native(df_native, eager_only=True).rows(named=named) + + We can then pass any supported library such as Pandas, Polars, or PyArrow + to `agnostic_rows`: + + >>> agnostic_rows(df_pd, named=False) + [(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')] + >>> agnostic_rows(df_pd, named=True) + [{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}] + >>> agnostic_rows(df_pl, named=False) + [(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')] + >>> agnostic_rows(df_pl, named=True) + [{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}] + >>> agnostic_rows(df_pa, named=False) + [(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')] + >>> agnostic_rows(df_pa, named=True) + [{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}] + """, + "iter_rows": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_iter_rows(df_native: IntoDataFrame, *, named: bool): + ... return nw.from_native(df_native, eager_only=True).iter_rows(named=named) + + We can then pass any supported library such as Pandas, Polars, or PyArrow + to `agnostic_iter_rows`: + + >>> [row for row in agnostic_iter_rows(df_pd, named=False)] + [(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')] + >>> [row for row in agnostic_iter_rows(df_pd, named=True)] + [{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}] + >>> [row for row in agnostic_iter_rows(df_pl, named=False)] + [(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')] + >>> [row for row in agnostic_iter_rows(df_pl, named=True)] + [{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}] + >>> [row for row in agnostic_iter_rows(df_pa, named=False)] + [(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')] + >>> [row for row in agnostic_iter_rows(df_pa, named=True)] + [{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}] + """, + "with_columns": """ + >>> import polars as pl + >>> import dask.dataframe as dd + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "a": [1, 2, 3, 4], + ... "b": [0.5, 4, 10, 13], + ... "c": [True, True, False, True], + ... } + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + + Let's define a dataframe-agnostic function in which we pass an expression + to add it as a new column: + + >>> def agnostic_with_columns(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return ( + ... df.with_columns((nw.col("a") * 2).alias("2a")).collect().to_native() + ... ) + + We can then pass any supported library such as Polars or Dask to `agnostic_with_columns`: + + >>> agnostic_with_columns(lf_pl) + shape: (4, 4) + ┌─────┬──────┬───────┬─────┐ + │ a ┆ b ┆ c ┆ 2a │ + │ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ f64 ┆ bool ┆ i64 │ + ╞═════╪══════╪═══════╪═════╡ + │ 1 ┆ 0.5 ┆ true ┆ 2 │ + │ 2 ┆ 4.0 ┆ true ┆ 4 │ + │ 3 ┆ 10.0 ┆ false ┆ 6 │ + │ 4 ┆ 13.0 ┆ true ┆ 8 │ + └─────┴──────┴───────┴─────┘ + >>> agnostic_with_columns(lf_dask) + a b c 2a + 0 1 0.5 True 2 + 1 2 4.0 True 4 + 2 3 10.0 False 6 + 3 4 13.0 True 8 + """, + "select": """ + >>> import polars as pl + >>> import dask.dataframe as dd + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "foo": [1, 2, 3], + ... "bar": [6, 7, 8], + ... "ham": ["a", "b", "c"], + ... } + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + + Let's define a dataframe-agnostic function in which we pass the name of a + column to select that column. + + >>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select("foo").collect().to_native() + + We can then pass any supported library such as Polars or Dask to `agnostic_select`: + + >>> agnostic_select(lf_pl) + shape: (3, 1) + ┌─────┐ + │ foo │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 1 │ + │ 2 │ + │ 3 │ + └─────┘ + >>> agnostic_select(lf_dask) + foo + 0 1 + 1 2 + 2 3 + + Multiple columns can be selected by passing a list of column names. + + >>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(["foo", "bar"]).collect().to_native() + + >>> agnostic_select(lf_pl) + shape: (3, 2) + ┌─────┬─────┐ + │ foo ┆ bar │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 1 ┆ 6 │ + │ 2 ┆ 7 │ + │ 3 ┆ 8 │ + └─────┴─────┘ + >>> agnostic_select(lf_dask) + foo bar + 0 1 6 + 1 2 7 + 2 3 8 + + Multiple columns can also be selected using positional arguments instead of a + list. Expressions are also accepted. + + >>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("foo"), nw.col("bar") + 1).collect().to_native() + + >>> agnostic_select(lf_pl) + shape: (3, 2) + ┌─────┬─────┐ + │ foo ┆ bar │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 1 ┆ 7 │ + │ 2 ┆ 8 │ + │ 3 ┆ 9 │ + └─────┴─────┘ + >>> agnostic_select(lf_dask) + foo bar + 0 1 7 + 1 2 8 + 2 3 9 + + Use keyword arguments to easily name your expression inputs. + + >>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(threshold=nw.col("foo") * 2).collect().to_native() + + >>> agnostic_select(lf_pl) + shape: (3, 1) + ┌───────────┐ + │ threshold │ + │ --- │ + │ i64 │ + ╞═══════════╡ + │ 2 │ + │ 4 │ + │ 6 │ + └───────────┘ + >>> agnostic_select(lf_dask) + threshold + 0 2 + 1 4 + 2 6 + """, + "rename": """ + >>> import polars as pl + >>> import dask.dataframe as dd + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]} + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + + We define a library agnostic function: + + >>> def agnostic_rename(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.rename({"foo": "apple"}).collect().to_native() + + We can then pass any supported library such as Polars or Dask to `agnostic_rename`: + + >>> agnostic_rename(lf_pl) + shape: (3, 3) + ┌───────┬─────┬─────┐ + │ apple ┆ bar ┆ ham │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ str │ + ╞═══════╪═════╪═════╡ + │ 1 ┆ 6 ┆ a │ + │ 2 ┆ 7 ┆ b │ + │ 3 ┆ 8 ┆ c │ + └───────┴─────┴─────┘ + >>> agnostic_rename(lf_dask) + apple bar ham + 0 1 6 a + 1 2 7 b + 2 3 8 c + """, + "head": """ + >>> import narwhals as nw + >>> import polars as pl + >>> import dask.dataframe as dd + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "a": [1, 2, 3, 4, 5, 6], + ... "b": [7, 8, 9, 10, 11, 12], + ... } + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + + Let's define a dataframe-agnostic function that gets the first 3 rows. + + >>> def agnostic_head(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.head(3).collect().to_native() + + We can then pass any supported library such as Polars or Dask to `agnostic_head`: + + >>> agnostic_head(lf_pl) + shape: (3, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 1 ┆ 7 │ + │ 2 ┆ 8 │ + │ 3 ┆ 9 │ + └─────┴─────┘ + >>> agnostic_head(lf_dask) + a b + 0 1 7 + 1 2 8 + 2 3 9 + """, + "tail": """ + >>> import narwhals as nw + >>> import polars as pl + >>> import dask.dataframe as dd + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "a": [1, 2, 3, 4, 5, 6], + ... "b": [7, 8, 9, 10, 11, 12], + ... } + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=1) + + Let's define a dataframe-agnostic function that gets the last 3 rows. + + >>> def agnostic_tail(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.tail(3).collect().to_native() + + We can then pass any supported library such as Polars or Dask to `agnostic_tail`: + + >>> agnostic_tail(lf_pl) + shape: (3, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 4 ┆ 10 │ + │ 5 ┆ 11 │ + │ 6 ┆ 12 │ + └─────┴─────┘ + >>> agnostic_tail(lf_dask) + a b + 3 4 10 + 4 5 11 + 5 6 12 + """, + "drop": """ + >>> import polars as pl + >>> import dask.dataframe as dd + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + + We define a library agnostic function: + + >>> def agnostic_drop(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.drop("ham").collect().to_native() + + We can then pass any supported library such as Polars or Dask to `agnostic_drop`: + + >>> agnostic_drop(lf_pl) + shape: (3, 2) + ┌─────┬─────┐ + │ foo ┆ bar │ + │ --- ┆ --- │ + │ i64 ┆ f64 │ + ╞═════╪═════╡ + │ 1 ┆ 6.0 │ + │ 2 ┆ 7.0 │ + │ 3 ┆ 8.0 │ + └─────┴─────┘ + >>> agnostic_drop(lf_dask) + foo bar + 0 1 6.0 + 1 2 7.0 + 2 3 8.0 + + Use positional arguments to drop multiple columns. + + >>> def agnostic_drop(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.drop("foo", "ham").collect().to_native() + + >>> agnostic_drop(lf_pl) + shape: (3, 1) + ┌─────┐ + │ bar │ + │ --- │ + │ f64 │ + ╞═════╡ + │ 6.0 │ + │ 7.0 │ + │ 8.0 │ + └─────┘ + >>> agnostic_drop(lf_dask) + bar + 0 6.0 + 1 7.0 + 2 8.0 + """, + "unique": """ + >>> import polars as pl + >>> import dask.dataframe as dd + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "foo": [1, 2, 3, 1], + ... "bar": ["a", "a", "a", "a"], + ... "ham": ["b", "b", "b", "b"], + ... } + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + + We define a library agnostic function: + + >>> def agnostic_unique(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.unique(["bar", "ham"]).collect().to_native() + + We can then pass any supported library such as Polars or Dask to `agnostic_unique`: + + >>> agnostic_unique(lf_pl) + shape: (1, 3) + ┌─────┬─────┬─────┐ + │ foo ┆ bar ┆ ham │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ str ┆ str │ + ╞═════╪═════╪═════╡ + │ 1 ┆ a ┆ b │ + └─────┴─────┴─────┘ + >>> agnostic_unique(lf_dask) + foo bar ham + 0 1 a b + """, + "filter": """ + >>> import polars as pl + >>> import dask.dataframe as dd + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "foo": [1, 2, 3], + ... "bar": [6, 7, 8], + ... "ham": ["a", "b", "c"], + ... } + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + + Let's define a dataframe-agnostic function in which we filter on + one condition. + + >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.filter(nw.col("foo") > 1).collect().to_native() + + We can then pass any supported library such as Polars or Dask to `agnostic_filter`: + + >>> agnostic_filter(lf_pl) + shape: (2, 3) + ┌─────┬─────┬─────┐ + │ foo ┆ bar ┆ ham │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ str │ + ╞═════╪═════╪═════╡ + │ 2 ┆ 7 ┆ b │ + │ 3 ┆ 8 ┆ c │ + └─────┴─────┴─────┘ + >>> agnostic_filter(lf_dask) + foo bar ham + 1 2 7 b + 2 3 8 c + + Filter on multiple conditions: + + >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return ( + ... df.filter((nw.col("foo") < 3) & (nw.col("ham") == "a")) + ... .collect() + ... .to_native() + ... ) + + >>> agnostic_filter(lf_pl) + shape: (1, 3) + ┌─────┬─────┬─────┐ + │ foo ┆ bar ┆ ham │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ str │ + ╞═════╪═════╪═════╡ + │ 1 ┆ 6 ┆ a │ + └─────┴─────┴─────┘ + >>> agnostic_filter(lf_dask) + foo bar ham + 0 1 6 a + + Provide multiple filters using `*args` syntax: + + >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return ( + ... df.filter( + ... nw.col("foo") == 1, + ... nw.col("ham") == "a", + ... ) + ... .collect() + ... .to_native() + ... ) + + >>> agnostic_filter(lf_pl) + shape: (1, 3) + ┌─────┬─────┬─────┐ + │ foo ┆ bar ┆ ham │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ str │ + ╞═════╪═════╪═════╡ + │ 1 ┆ 6 ┆ a │ + └─────┴─────┴─────┘ + >>> agnostic_filter(lf_dask) + foo bar ham + 0 1 6 a + + Filter on an OR condition: + + >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return ( + ... df.filter((nw.col("foo") == 1) | (nw.col("ham") == "c")) + ... .collect() + ... .to_native() + ... ) + + >>> agnostic_filter(lf_pl) + shape: (2, 3) + ┌─────┬─────┬─────┐ + │ foo ┆ bar ┆ ham │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ str │ + ╞═════╪═════╪═════╡ + │ 1 ┆ 6 ┆ a │ + │ 3 ┆ 8 ┆ c │ + └─────┴─────┴─────┘ + >>> agnostic_filter(lf_dask) + foo bar ham + 0 1 6 a + 2 3 8 c + + Provide multiple filters using `**kwargs` syntax: + + >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.filter(foo=2, ham="b").collect().to_native() + + >>> agnostic_filter(lf_pl) + shape: (1, 3) + ┌─────┬─────┬─────┐ + │ foo ┆ bar ┆ ham │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ str │ + ╞═════╪═════╪═════╡ + │ 2 ┆ 7 ┆ b │ + └─────┴─────┴─────┘ + >>> agnostic_filter(lf_dask) + foo bar ham + 1 2 7 b + """, + "group_by": """ + Group by one column and call `agg` to compute the grouped sum of + another column. + + >>> import polars as pl + >>> import dask.dataframe as dd + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "a": ["a", "b", "a", "b", "c"], + ... "b": [1, 2, 1, 3, 3], + ... "c": [5, 4, 3, 2, 1], + ... } + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + + Let's define a dataframe-agnostic function in which we group by one column + and call `agg` to compute the grouped sum of another column. + + >>> def agnostic_group_by_agg(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return ( + ... df.group_by("a") + ... .agg(nw.col("b").sum()) + ... .sort("a") + ... .collect() + ... .to_native() + ... ) + + We can then pass any supported library such as Polars or Dask to `agnostic_group_by_agg`: + + >>> agnostic_group_by_agg(lf_pl) + shape: (3, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ str ┆ i64 │ + ╞═════╪═════╡ + │ a ┆ 2 │ + │ b ┆ 5 │ + │ c ┆ 3 │ + └─────┴─────┘ + >>> agnostic_group_by_agg(lf_dask) + a b + 0 a 2 + 1 b 5 + 2 c 3 + + Group by multiple columns by passing a list of column names. + + >>> def agnostic_group_by_agg(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return ( + ... df.group_by(["a", "b"]) + ... .agg(nw.max("c")) + ... .sort(["a", "b"]) + ... .collect() + ... .to_native() + ... ) + + >>> agnostic_group_by_agg(lf_pl) + shape: (4, 3) + ┌─────┬─────┬─────┐ + │ a ┆ b ┆ c │ + │ --- ┆ --- ┆ --- │ + │ str ┆ i64 ┆ i64 │ + ╞═════╪═════╪═════╡ + │ a ┆ 1 ┆ 5 │ + │ b ┆ 2 ┆ 4 │ + │ b ┆ 3 ┆ 2 │ + │ c ┆ 3 ┆ 1 │ + └─────┴─────┴─────┘ + >>> agnostic_group_by_agg(lf_dask) + a b c + 0 a 1 5 + 1 b 2 4 + 2 b 3 2 + 3 c 3 1 + """, + "sort": """ + >>> import narwhals as nw + >>> import polars as pl + >>> import dask.dataframe as dd + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "a": [1, 2, None], + ... "b": [6.0, 5.0, 4.0], + ... "c": ["a", "c", "b"], + ... } + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + + Let's define a dataframe-agnostic function in which we sort by multiple + columns in different orders + + >>> def agnostic_sort(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.sort("c", "a", descending=[False, True]).collect().to_native() + + We can then pass any supported library such as Polars or Dask to `agnostic_sort`: + + >>> agnostic_sort(lf_pl) + shape: (3, 3) + ┌──────┬─────┬─────┐ + │ a ┆ b ┆ c │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ f64 ┆ str │ + ╞══════╪═════╪═════╡ + │ 1 ┆ 6.0 ┆ a │ + │ null ┆ 4.0 ┆ b │ + │ 2 ┆ 5.0 ┆ c │ + └──────┴─────┴─────┘ + >>> agnostic_sort(lf_dask) + a b c + 0 1.0 6.0 a + 2 NaN 4.0 b + 1 2.0 5.0 c + """, + "join": """ + >>> import narwhals as nw + >>> import polars as pl + >>> import dask.dataframe as dd + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "foo": [1, 2, 3], + ... "bar": [6.0, 7.0, 8.0], + ... "ham": ["a", "b", "c"], + ... } + >>> data_other = { + ... "apple": ["x", "y", "z"], + ... "ham": ["a", "b", "d"], + ... } + + >>> lf_pl = pl.LazyFrame(data) + >>> other_pl = pl.LazyFrame(data_other) + >>> lf_dask = dd.from_dict(data, npartitions=2) + >>> other_dask = dd.from_dict(data_other, npartitions=2) + + Let's define a dataframe-agnostic function in which we join over "ham" column: + + >>> def agnostic_join_on_ham( + ... df_native: IntoFrameT, + ... other_native: IntoFrameT, + ... ) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... other = nw.from_native(other_native) + ... return ( + ... df.join(other, left_on="ham", right_on="ham") + ... .sort("ham") + ... .collect() + ... .to_native() + ... ) + + We can then pass any supported library such as Polars or Dask to `agnostic_join_on_ham`: + + >>> agnostic_join_on_ham(lf_pl, other_pl) + shape: (2, 4) + ┌─────┬─────┬─────┬───────┐ + │ foo ┆ bar ┆ ham ┆ apple │ + │ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ f64 ┆ str ┆ str │ + ╞═════╪═════╪═════╪═══════╡ + │ 1 ┆ 6.0 ┆ a ┆ x │ + │ 2 ┆ 7.0 ┆ b ┆ y │ + └─────┴─────┴─────┴───────┘ + >>> agnostic_join_on_ham(lf_dask, other_dask) + foo bar ham apple + 0 1 6.0 a x + 0 2 7.0 b y + """, + "join_asof": """ + >>> from datetime import datetime + >>> import narwhals as nw + >>> import polars as pl + >>> import dask.dataframe as dd + >>> from typing import Literal + >>> from narwhals.typing import IntoFrameT + >>> + >>> data_gdp = { + ... "datetime": [ + ... datetime(2016, 1, 1), + ... datetime(2017, 1, 1), + ... datetime(2018, 1, 1), + ... datetime(2019, 1, 1), + ... datetime(2020, 1, 1), + ... ], + ... "gdp": [4164, 4411, 4566, 4696, 4827], + ... } + >>> data_population = { + ... "datetime": [ + ... datetime(2016, 3, 1), + ... datetime(2018, 8, 1), + ... datetime(2019, 1, 1), + ... ], + ... "population": [82.19, 82.66, 83.12], + ... } + >>> gdp_pl = pl.LazyFrame(data_gdp) + >>> population_pl = pl.LazyFrame(data_population) + >>> gdp_dask = dd.from_dict(data_gdp, npartitions=2) + >>> population_dask = dd.from_dict(data_population, npartitions=2) + + Let's define a dataframe-agnostic function in which we join over "datetime" column: + + >>> def agnostic_join_asof_datetime( + ... df_native: IntoFrameT, + ... other_native: IntoFrameT, + ... strategy: Literal["backward", "forward", "nearest"], + ... ) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... other = nw.from_native(other_native) + ... return ( + ... df.sort("datetime") + ... .join_asof(other, on="datetime", strategy=strategy) + ... .collect() + ... .to_native() + ... ) + + We can then pass any supported library such as Polars or Dask to `agnostic_join_asof_datetime`: + + >>> agnostic_join_asof_datetime(population_pl, gdp_pl, strategy="backward") + shape: (3, 3) + ┌─────────────────────┬────────────┬──────┐ + │ datetime ┆ population ┆ gdp │ + │ --- ┆ --- ┆ --- │ + │ datetime[μs] ┆ f64 ┆ i64 │ + ╞═════════════════════╪════════════╪══════╡ + │ 2016-03-01 00:00:00 ┆ 82.19 ┆ 4164 │ + │ 2018-08-01 00:00:00 ┆ 82.66 ┆ 4566 │ + │ 2019-01-01 00:00:00 ┆ 83.12 ┆ 4696 │ + └─────────────────────┴────────────┴──────┘ + >>> agnostic_join_asof_datetime(population_dask, gdp_dask, strategy="backward") + datetime population gdp + 0 2016-03-01 82.19 4164 + 1 2018-08-01 82.66 4566 + 0 2019-01-01 83.12 4696 + + Here is a real-world times-series example that uses `by` argument. + + >>> from datetime import datetime + >>> import narwhals as nw + >>> import polars as pl + >>> import dask.dataframe as dd + >>> from narwhals.typing import IntoFrameT + >>> + >>> data_quotes = { + ... "datetime": [ + ... datetime(2016, 5, 25, 13, 30, 0, 23), + ... datetime(2016, 5, 25, 13, 30, 0, 23), + ... datetime(2016, 5, 25, 13, 30, 0, 30), + ... datetime(2016, 5, 25, 13, 30, 0, 41), + ... datetime(2016, 5, 25, 13, 30, 0, 48), + ... datetime(2016, 5, 25, 13, 30, 0, 49), + ... datetime(2016, 5, 25, 13, 30, 0, 72), + ... datetime(2016, 5, 25, 13, 30, 0, 75), + ... ], + ... "ticker": [ + ... "GOOG", + ... "MSFT", + ... "MSFT", + ... "MSFT", + ... "GOOG", + ... "AAPL", + ... "GOOG", + ... "MSFT", + ... ], + ... "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01], + ... "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03], + ... } + >>> data_trades = { + ... "datetime": [ + ... datetime(2016, 5, 25, 13, 30, 0, 23), + ... datetime(2016, 5, 25, 13, 30, 0, 38), + ... datetime(2016, 5, 25, 13, 30, 0, 48), + ... datetime(2016, 5, 25, 13, 30, 0, 49), + ... datetime(2016, 5, 25, 13, 30, 0, 48), + ... ], + ... "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"], + ... "price": [51.95, 51.95, 720.77, 720.92, 98.0], + ... "quantity": [75, 155, 100, 100, 100], + ... } + >>> quotes_pl = pl.LazyFrame(data_quotes) + >>> trades_pl = pl.LazyFrame(data_trades) + >>> quotes_dask = dd.from_dict(data_quotes, npartitions=2) + >>> trades_dask = dd.from_dict(data_trades, npartitions=2) + + Let's define a dataframe-agnostic function in which we join over "datetime" and by "ticker" columns: + + >>> def agnostic_join_asof_datetime_by_ticker( + ... df_native: IntoFrameT, + ... other_native: IntoFrameT, + ... ) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... other = nw.from_native(other_native) + ... return ( + ... df.sort("datetime", "ticker") + ... .join_asof(other, on="datetime", by="ticker") + ... .sort("datetime", "ticker") + ... .collect() + ... .to_native() + ... ) + + We can then pass any supported library such as Polars or Dask to `agnostic_join_asof_datetime_by_ticker`: + + >>> agnostic_join_asof_datetime_by_ticker(trades_pl, quotes_pl) + shape: (5, 6) + ┌────────────────────────────┬────────┬────────┬──────────┬───────┬────────┐ + │ datetime ┆ ticker ┆ price ┆ quantity ┆ bid ┆ ask │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ datetime[μs] ┆ str ┆ f64 ┆ i64 ┆ f64 ┆ f64 │ + ╞════════════════════════════╪════════╪════════╪══════════╪═══════╪════════╡ + │ 2016-05-25 13:30:00.000023 ┆ MSFT ┆ 51.95 ┆ 75 ┆ 51.95 ┆ 51.96 │ + │ 2016-05-25 13:30:00.000038 ┆ MSFT ┆ 51.95 ┆ 155 ┆ 51.97 ┆ 51.98 │ + │ 2016-05-25 13:30:00.000048 ┆ AAPL ┆ 98.0 ┆ 100 ┆ null ┆ null │ + │ 2016-05-25 13:30:00.000048 ┆ GOOG ┆ 720.77 ┆ 100 ┆ 720.5 ┆ 720.93 │ + │ 2016-05-25 13:30:00.000049 ┆ GOOG ┆ 720.92 ┆ 100 ┆ 720.5 ┆ 720.93 │ + └────────────────────────────┴────────┴────────┴──────────┴───────┴────────┘ + >>> agnostic_join_asof_datetime_by_ticker(trades_dask, quotes_dask) + datetime ticker price quantity bid ask + 0 2016-05-25 13:30:00.000023 MSFT 51.95 75 51.95 51.96 + 0 2016-05-25 13:30:00.000038 MSFT 51.95 155 51.97 51.98 + 1 2016-05-25 13:30:00.000048 AAPL 98.00 100 NaN NaN + 2 2016-05-25 13:30:00.000048 GOOG 720.77 100 720.50 720.93 + 3 2016-05-25 13:30:00.000049 GOOG 720.92 100 720.50 720.93 + """, + "is_duplicated": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> from narwhals.typing import IntoSeries + >>> data = { + ... "a": [1, 2, 3, 1], + ... "b": ["x", "y", "z", "x"], + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_is_duplicated(df_native: IntoDataFrame) -> IntoSeries: + ... df = nw.from_native(df_native, eager_only=True) + ... return df.is_duplicated().to_native() + + We can then pass any supported library such as Pandas, Polars, or PyArrow + to `agnostic_is_duplicated`: + + >>> agnostic_is_duplicated(df_pd) + 0 True + 1 False + 2 False + 3 True + dtype: bool + + >>> agnostic_is_duplicated(df_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [bool] + [ + true + false + false + true + ] + >>> agnostic_is_duplicated(df_pa) # doctest: +ELLIPSIS + + [ + [ + true, + false, + false, + true + ] + ] + """, + "is_empty": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + + Let's define a dataframe-agnostic function that filters rows in which "foo" + values are greater than 10, and then checks if the result is empty or not: + + >>> def agnostic_is_empty(df_native: IntoDataFrame) -> bool: + ... df = nw.from_native(df_native, eager_only=True) + ... return df.filter(nw.col("foo") > 10).is_empty() + + We can then pass any supported library such as Pandas, Polars, or PyArrow + to `agnostic_is_empty`: + + >>> data = {"foo": [1, 2, 3], "bar": [4, 5, 6]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + >>> agnostic_is_empty(df_pd), agnostic_is_empty(df_pl), agnostic_is_empty(df_pa) + (True, True, True) + + >>> data = {"foo": [100, 2, 3], "bar": [4, 5, 6]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + >>> agnostic_is_empty(df_pd), agnostic_is_empty(df_pl), agnostic_is_empty(df_pa) + (False, False, False) + """, + "is_unique": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> from narwhals.typing import IntoSeries + >>> data = { + ... "a": [1, 2, 3, 1], + ... "b": ["x", "y", "z", "x"], + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_is_unique(df_native: IntoDataFrame) -> IntoSeries: + ... df = nw.from_native(df_native, eager_only=True) + ... return df.is_unique().to_native() + + We can then pass any supported library such as Pandas, Polars, or PyArrow + to `agnostic_is_unique`: + + >>> agnostic_is_unique(df_pd) + 0 False + 1 True + 2 True + 3 False + dtype: bool + + >>> agnostic_is_unique(df_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [bool] + [ + false + true + true + false + ] + >>> agnostic_is_unique(df_pa) # doctest: +ELLIPSIS + + [ + [ + false, + true, + true, + false + ] + ] + """, + "null_count": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> data = { + ... "foo": [1, None, 3], + ... "bar": [6, 7, None], + ... "ham": ["a", "b", "c"], + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function that returns the null count of + each columns: + + >>> def agnostic_null_count(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.null_count().to_native() + + We can then pass any supported library such as Pandas, Polars, or PyArrow to + `agnostic_null_count`: + + >>> agnostic_null_count(df_pd) + foo bar ham + 0 1 1 0 + + >>> agnostic_null_count(df_pl) + shape: (1, 3) + ┌─────┬─────┬─────┐ + │ foo ┆ bar ┆ ham │ + │ --- ┆ --- ┆ --- │ + │ u32 ┆ u32 ┆ u32 │ + ╞═════╪═════╪═════╡ + │ 1 ┆ 1 ┆ 0 │ + └─────┴─────┴─────┘ + + >>> agnostic_null_count(df_pa) + pyarrow.Table + foo: int64 + bar: int64 + ham: int64 + ---- + foo: [[1]] + bar: [[1]] + ham: [[0]] + """, + "item": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function that returns item at given row/column + + >>> def agnostic_item( + ... df_native: IntoDataFrame, row: int | None, column: int | str | None + ... ): + ... df = nw.from_native(df_native, eager_only=True) + ... return df.item(row, column) + + We can then pass any supported library such as Pandas, Polars, or PyArrow + to `agnostic_item`: + + >>> agnostic_item(df_pd, 1, 1), agnostic_item(df_pd, 2, "b") + (np.int64(5), np.int64(6)) + >>> agnostic_item(df_pl, 1, 1), agnostic_item(df_pl, 2, "b") + (5, 6) + >>> agnostic_item(df_pa, 1, 1), agnostic_item(df_pa, 2, "b") + (5, 6) + """, + "clone": """ + >>> import narwhals as nw + >>> import polars as pl + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2], "b": [3, 4]} + >>> lf_pl = pl.LazyFrame(data) + + Let's define a dataframe-agnostic function in which we copy the DataFrame: + + >>> def agnostic_clone(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.clone().collect().to_native() + + We can then pass any supported library such as Polars to `agnostic_clone`: + + >>> agnostic_clone(lf_pl) + shape: (2, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 1 ┆ 3 │ + │ 2 ┆ 4 │ + └─────┴─────┘ + """, + "gather_every": """ + >>> import narwhals as nw + >>> import polars as pl + >>> import dask.dataframe as dd + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]} + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + + Let's define a dataframe-agnostic function in which we gather every 2 rows, + starting from a offset of 1: + + >>> def agnostic_gather_every(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.gather_every(n=2, offset=1).collect().to_native() + + We can then pass any supported library such as Polars or Dask to `agnostic_gather_every`: + + >>> agnostic_gather_every(lf_pl) + shape: (2, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 2 ┆ 6 │ + │ 4 ┆ 8 │ + └─────┴─────┘ + >>> agnostic_gather_every(lf_dask) + a b + 1 2 6 + 3 4 8 + """, + "pivot": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrameT + >>> data = { + ... "ix": [1, 1, 2, 2, 1, 2], + ... "col": ["a", "a", "a", "a", "b", "b"], + ... "foo": [0, 1, 2, 2, 7, 1], + ... "bar": [0, 2, 0, 0, 9, 4], + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_pivot(df_native: IntoDataFrameT) -> IntoDataFrameT: + ... df = nw.from_native(df_native, eager_only=True) + ... return df.pivot("col", index="ix", aggregate_function="sum").to_native() + + We can then pass any supported library such as Pandas or Polars + to `agnostic_pivot`: + + >>> agnostic_pivot(df_pd) + ix foo_a foo_b bar_a bar_b + 0 1 1 7 2 9 + 1 2 4 1 0 4 + >>> agnostic_pivot(df_pl) + shape: (2, 5) + ┌─────┬───────┬───────┬───────┬───────┐ + │ ix ┆ foo_a ┆ foo_b ┆ bar_a ┆ bar_b │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞═════╪═══════╪═══════╪═══════╪═══════╡ + │ 1 ┆ 1 ┆ 7 ┆ 2 ┆ 9 │ + │ 2 ┆ 4 ┆ 1 ┆ 0 ┆ 4 │ + └─────┴───────┴───────┴───────┴───────┘ + """, + "to_arrow": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> data = {"foo": [1, 2, 3], "bar": ["a", "b", "c"]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function that converts to arrow table: + + >>> def agnostic_to_arrow(df_native: IntoDataFrame) -> pa.Table: + ... df = nw.from_native(df_native, eager_only=True) + ... return df.to_arrow() + + We can then pass any supported library such as Pandas, Polars, or PyArrow + to `agnostic_to_arrow`: + + >>> agnostic_to_arrow(df_pd) + pyarrow.Table + foo: int64 + bar: string + ---- + foo: [[1,2,3]] + bar: [["a","b","c"]] + + >>> agnostic_to_arrow(df_pl) + pyarrow.Table + foo: int64 + bar: large_string + ---- + foo: [[1,2,3]] + bar: [["a","b","c"]] + + >>> agnostic_to_arrow(df_pa) + pyarrow.Table + foo: int64 + bar: string + ---- + foo: [[1,2,3]] + bar: [["a","b","c"]] + """, + "sample": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrameT + >>> data = {"a": [1, 2, 3, 4], "b": ["x", "y", "x", "y"]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_sample(df_native: IntoDataFrameT) -> IntoDataFrameT: + ... df = nw.from_native(df_native, eager_only=True) + ... return df.sample(n=2, seed=123).to_native() + + We can then pass any supported library such as Pandas, Polars, or PyArrow + to `agnostic_sample`: + + >>> agnostic_sample(df_pd) + a b + 3 4 y + 0 1 x + >>> agnostic_sample(df_pl) + shape: (2, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ str │ + ╞═════╪═════╡ + │ 2 ┆ y │ + │ 3 ┆ x │ + └─────┴─────┘ + >>> agnostic_sample(df_pa) + pyarrow.Table + a: int64 + b: string + ---- + a: [[1,3]] + b: [["x","x"]] + + As you can see, by using the same seed, the result will be consistent within + the same backend, but not necessarely across different backends. + """, + "unpivot": """ + >>> import narwhals as nw + >>> import polars as pl + >>> import dask.dataframe as dd + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "a": ["x", "y", "z"], + ... "b": [1, 3, 5], + ... "c": [2, 4, 6], + ... } + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + + We define a library agnostic function: + + >>> def agnostic_unpivot(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return ( + ... (df.unpivot(on=["b", "c"], index="a").sort(["variable", "a"])) + ... .collect() + ... .to_native() + ... ) + + We can then pass any supported library such as Polars or Dask to `agnostic_unpivot`: + + >>> agnostic_unpivot(lf_pl) + shape: (6, 3) + ┌─────┬──────────┬───────┐ + │ a ┆ variable ┆ value │ + │ --- ┆ --- ┆ --- │ + │ str ┆ str ┆ i64 │ + ╞═════╪══════════╪═══════╡ + │ x ┆ b ┆ 1 │ + │ y ┆ b ┆ 3 │ + │ z ┆ b ┆ 5 │ + │ x ┆ c ┆ 2 │ + │ y ┆ c ┆ 4 │ + │ z ┆ c ┆ 6 │ + └─────┴──────────┴───────┘ + >>> agnostic_unpivot(lf_dask) + a variable value + 0 x b 1 + 1 y b 3 + 0 z b 5 + 2 x c 2 + 3 y c 4 + 1 z c 6 + """, + "explode": """ + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> import polars as pl + >>> data = { + ... "a": ["x", "y", "z", "w"], + ... "lst1": [[1, 2], None, [None], []], + ... "lst2": [[3, None], None, [42], []], + ... } + + We define a library agnostic function: + + >>> def agnostic_explode(df_native: IntoFrameT) -> IntoFrameT: + ... return ( + ... nw.from_native(df_native) + ... .with_columns(nw.col("lst1", "lst2").cast(nw.List(nw.Int32()))) + ... .explode("lst1", "lst2") + ... .collect() + ... .to_native() + ... ) + + We can then pass any supported library such as Polars to `agnostic_explode`: + + >>> agnostic_explode(pl.LazyFrame(data)) + shape: (5, 3) + ┌─────┬──────┬──────┐ + │ a ┆ lst1 ┆ lst2 │ + │ --- ┆ --- ┆ --- │ + │ str ┆ i32 ┆ i32 │ + ╞═════╪══════╪══════╡ + │ x ┆ 1 ┆ 3 │ + │ x ┆ 2 ┆ null │ + │ y ┆ null ┆ null │ + │ z ┆ null ┆ 42 │ + │ w ┆ null ┆ null │ + └─────┴──────┴──────┘ + """, + "collect": """ + >>> import narwhals as nw + >>> import polars as pl + >>> import dask.dataframe as dd + >>> data = { + ... "a": ["a", "b", "a", "b", "b", "c"], + ... "b": [1, 2, 3, 4, 5, 6], + ... "c": [6, 5, 4, 3, 2, 1], + ... } + >>> lf_pl = pl.LazyFrame(data) + >>> lf_dask = dd.from_dict(data, npartitions=2) + + >>> lf = nw.from_native(lf_pl) + >>> lf # doctest:+ELLIPSIS + ┌─────────────────────────────┐ + | Narwhals LazyFrame | + |-----------------------------| + |>> df = lf.group_by("a").agg(nw.all().sum()).collect() + >>> df.to_native().sort("a") + shape: (3, 3) + ┌─────┬─────┬─────┐ + │ a ┆ b ┆ c │ + │ --- ┆ --- ┆ --- │ + │ str ┆ i64 ┆ i64 │ + ╞═════╪═════╪═════╡ + │ a ┆ 4 ┆ 10 │ + │ b ┆ 11 ┆ 10 │ + │ c ┆ 6 ┆ 1 │ + └─────┴─────┴─────┘ + + >>> lf = nw.from_native(lf_dask) + >>> lf + ┌───────────────────────────────────┐ + | Narwhals LazyFrame | + |-----------------------------------| + |Dask DataFrame Structure: | + | a b c| + |npartitions=2 | + |0 string int64 int64| + |3 ... ... ...| + |5 ... ... ...| + |Dask Name: frompandas, 1 expression| + |Expr=df | + └───────────────────────────────────┘ + >>> df = lf.group_by("a").agg(nw.col("b", "c").sum()).collect() + >>> df.to_native() + a b c + 0 a 4 10 + 1 b 11 10 + 2 c 6 1 + """, +} diff --git a/docs/docstring_examples/dependencies.py b/docs/docstring_examples/dependencies.py new file mode 100644 index 000000000..6cf1fff02 --- /dev/null +++ b/docs/docstring_examples/dependencies.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +EXAMPLES = { + "is_into_series": """ + >>> import pandas as pd + >>> import polars as pl + >>> import numpy as np + >>> import narwhals as nw + + >>> s_pd = pd.Series([1, 2, 3]) + >>> s_pl = pl.Series([1, 2, 3]) + >>> np_arr = np.array([1, 2, 3]) + + >>> nw.dependencies.is_into_series(s_pd) + True + >>> nw.dependencies.is_into_series(s_pl) + True + >>> nw.dependencies.is_into_series(np_arr) + False + """, + "is_into_dataframe": """ + >>> import pandas as pd + >>> import polars as pl + >>> import numpy as np + >>> from narwhals.dependencies import is_into_dataframe + + >>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + >>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + >>> np_arr = np.array([[1, 4], [2, 5], [3, 6]]) + + >>> is_into_dataframe(df_pd) + True + >>> is_into_dataframe(df_pl) + True + >>> is_into_dataframe(np_arr) + False + """, +} diff --git a/docs/docstring_examples/dtypes.py b/docs/docstring_examples/dtypes.py new file mode 100644 index 000000000..05f4e5647 --- /dev/null +++ b/docs/docstring_examples/dtypes.py @@ -0,0 +1,406 @@ +from __future__ import annotations + +EXAMPLES = { + "Decimal": """ + >>> import polars as pl + >>> import narwhals as nw + >>> s = pl.Series(["1.5"], dtype=pl.Decimal) + >>> nw.from_native(s, series_only=True).dtype + Decimal + """, + "Int64": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> data = [2, 1, 3, 7] + >>> ser_pd = pd.Series(data) + >>> ser_pl = pl.Series(data) + >>> ser_pa = pa.chunked_array([data]) + + >>> nw.from_native(ser_pd, series_only=True).dtype + Int64 + >>> nw.from_native(ser_pl, series_only=True).dtype + Int64 + >>> nw.from_native(ser_pa, series_only=True).dtype + Int64 + """, + "Int32": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> data = [2, 1, 3, 7] + >>> ser_pd = pd.Series(data) + >>> ser_pl = pl.Series(data) + >>> ser_pa = pa.chunked_array([data]) + + >>> def func(ser): + ... ser_nw = nw.from_native(ser, series_only=True) + ... return ser_nw.cast(nw.Int32).dtype + + >>> func(ser_pd) + Int32 + >>> func(ser_pl) + Int32 + >>> func(ser_pa) + Int32 + """, + "Int16": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> data = [2, 1, 3, 7] + >>> ser_pd = pd.Series(data) + >>> ser_pl = pl.Series(data) + >>> ser_pa = pa.chunked_array([data]) + + >>> def func(ser): + ... ser_nw = nw.from_native(ser, series_only=True) + ... return ser_nw.cast(nw.Int16).dtype + + >>> func(ser_pd) + Int16 + >>> func(ser_pl) + Int16 + >>> func(ser_pa) + Int16 + """, + "Int8": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> data = [2, 1, 3, 7] + >>> ser_pd = pd.Series(data) + >>> ser_pl = pl.Series(data) + >>> ser_pa = pa.chunked_array([data]) + + >>> def func(ser): + ... ser_nw = nw.from_native(ser, series_only=True) + ... return ser_nw.cast(nw.Int8).dtype + + >>> func(ser_pd) + Int8 + >>> func(ser_pl) + Int8 + >>> func(ser_pa) + Int8 + """, + "UInt64": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> data = [2, 1, 3, 7] + >>> ser_pd = pd.Series(data) + >>> ser_pl = pl.Series(data) + >>> ser_pa = pa.chunked_array([data]) + + >>> def func(ser): + ... ser_nw = nw.from_native(ser, series_only=True) + ... return ser_nw.cast(nw.UInt64).dtype + + >>> func(ser_pd) + UInt64 + >>> func(ser_pl) + UInt64 + >>> func(ser_pa) + UInt64 + """, + "UInt32": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> data = [2, 1, 3, 7] + >>> ser_pd = pd.Series(data) + >>> ser_pl = pl.Series(data) + >>> ser_pa = pa.chunked_array([data]) + + >>> def func(ser): + ... ser_nw = nw.from_native(ser, series_only=True) + ... return ser_nw.cast(nw.UInt32).dtype + + >>> func(ser_pd) + UInt32 + >>> func(ser_pl) + UInt32 + >>> func(ser_pa) + UInt32 + """, + "UInt16": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> data = [2, 1, 3, 7] + >>> ser_pd = pd.Series(data) + >>> ser_pl = pl.Series(data) + >>> ser_pa = pa.chunked_array([data]) + + >>> def func(ser): + ... ser_nw = nw.from_native(ser, series_only=True) + ... return ser_nw.cast(nw.UInt16).dtype + + >>> func(ser_pd) + UInt16 + >>> func(ser_pl) + UInt16 + >>> func(ser_pa) + UInt16 + """, + "UInt8": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> data = [2, 1, 3, 7] + >>> ser_pd = pd.Series(data) + >>> ser_pl = pl.Series(data) + >>> ser_pa = pa.chunked_array([data]) + + >>> def func(ser): + ... ser_nw = nw.from_native(ser, series_only=True) + ... return ser_nw.cast(nw.UInt8).dtype + + >>> func(ser_pd) + UInt8 + >>> func(ser_pl) + UInt8 + >>> func(ser_pa) + UInt8 + """, + "Float64": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> data = [0.001, 0.1, 0.01, 0.1] + >>> ser_pd = pd.Series(data) + >>> ser_pl = pl.Series(data) + >>> ser_pa = pa.chunked_array([data]) + + >>> nw.from_native(ser_pd, series_only=True).dtype + Float64 + >>> nw.from_native(ser_pl, series_only=True).dtype + Float64 + >>> nw.from_native(ser_pa, series_only=True).dtype + Float64 + """, + "Float32": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> data = [0.001, 0.1, 0.01, 0.1] + >>> ser_pd = pd.Series(data) + >>> ser_pl = pl.Series(data) + >>> ser_pa = pa.chunked_array([data]) + + >>> def func(ser): + ... ser_nw = nw.from_native(ser, series_only=True) + ... return ser_nw.cast(nw.Float32).dtype + + >>> func(ser_pd) + Float32 + >>> func(ser_pl) + Float32 + >>> func(ser_pa) + Float32 + """, + "String": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> data = ["beluga", "narwhal", "orca", "vaquita"] + >>> ser_pd = pd.Series(data) + >>> ser_pl = pl.Series(data) + >>> ser_pa = pa.chunked_array([data]) + + >>> nw.from_native(ser_pd, series_only=True).dtype + String + >>> nw.from_native(ser_pl, series_only=True).dtype + String + >>> nw.from_native(ser_pa, series_only=True).dtype + String + """, + "Boolean": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> data = [True, False, False, True] + >>> ser_pd = pd.Series(data) + >>> ser_pl = pl.Series(data) + >>> ser_pa = pa.chunked_array([data]) + + >>> nw.from_native(ser_pd, series_only=True).dtype + Boolean + >>> nw.from_native(ser_pl, series_only=True).dtype + Boolean + >>> nw.from_native(ser_pa, series_only=True).dtype + Boolean + """, + "Object": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> class Foo: ... + >>> ser_pd = pd.Series([Foo(), Foo()]) + >>> ser_pl = pl.Series([Foo(), Foo()]) + + >>> nw.from_native(ser_pd, series_only=True).dtype + Object + >>> nw.from_native(ser_pl, series_only=True).dtype + Object + """, + "Unknown": """ + >>> import pandas as pd + >>> import narwhals as nw + >>> data = pd.period_range("2000-01", periods=4, freq="M") + >>> ser_pd = pd.Series(data) + + >>> nw.from_native(ser_pd, series_only=True).dtype + Unknown + """, + "Datetime": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import pyarrow.compute as pc + >>> import narwhals as nw + >>> from datetime import datetime, timedelta + >>> data = [datetime(2024, 12, 9) + timedelta(days=n) for n in range(5)] + >>> ser_pd = ( + ... pd.Series(data) + ... .dt.tz_localize("Africa/Accra") + ... .astype("datetime64[ms, Africa/Accra]") + ... ) + >>> ser_pl = ( + ... pl.Series(data).cast(pl.Datetime("ms")).dt.replace_time_zone("Africa/Accra") + ... ) + >>> ser_pa = pc.assume_timezone( + ... pa.chunked_array([data], type=pa.timestamp("ms")), "Africa/Accra" + ... ) + + >>> nw.from_native(ser_pd, series_only=True).dtype + Datetime(time_unit='ms', time_zone='Africa/Accra') + >>> nw.from_native(ser_pl, series_only=True).dtype + Datetime(time_unit='ms', time_zone='Africa/Accra') + >>> nw.from_native(ser_pa, series_only=True).dtype + Datetime(time_unit='ms', time_zone='Africa/Accra') + """, + "Duration": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from datetime import timedelta + >>> data = [timedelta(seconds=d) for d in range(1, 4)] + >>> ser_pd = pd.Series(data).astype("timedelta64[ms]") + >>> ser_pl = pl.Series(data).cast(pl.Duration("ms")) + >>> ser_pa = pa.chunked_array([data], type=pa.duration("ms")) + + >>> nw.from_native(ser_pd, series_only=True).dtype + Duration(time_unit='ms') + >>> nw.from_native(ser_pl, series_only=True).dtype + Duration(time_unit='ms') + >>> nw.from_native(ser_pa, series_only=True).dtype + Duration(time_unit='ms') + """, + "Categorical": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> data = ["beluga", "narwhal", "orca", "vaquita"] + >>> ser_pd = pd.Series(data) + >>> ser_pl = pl.Series(data) + >>> ser_pa = pa.chunked_array([data]) + + >>> nw.from_native(ser_pd, series_only=True).cast(nw.Categorical).dtype + Categorical + >>> nw.from_native(ser_pl, series_only=True).cast(nw.Categorical).dtype + Categorical + >>> nw.from_native(ser_pa, series_only=True).cast(nw.Categorical).dtype + Categorical + """, + "Enum": """ + >>> import polars as pl + >>> import narwhals as nw + >>> data = ["beluga", "narwhal", "orca", "vaquita"] + >>> ser_pl = pl.Series(data, dtype=pl.Enum(data)) + + >>> nw.from_native(ser_pl, series_only=True).dtype + Enum + """, + "Struct": """ + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> data = [{"a": 1, "b": ["narwhal", "beluga"]}, {"a": 2, "b": ["orca"]}] + >>> ser_pl = pl.Series(data) + >>> ser_pa = pa.chunked_array([data]) + + >>> nw.from_native(ser_pl, series_only=True).dtype + Struct({'a': Int64, 'b': List(String)}) + >>> nw.from_native(ser_pa, series_only=True).dtype + Struct({'a': Int64, 'b': List(String)}) + """, + "List": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> data = [["narwhal", "orca"], ["beluga", "vaquita"]] + >>> ser_pd = pd.Series(data, dtype=pd.ArrowDtype(pa.large_list(pa.large_string()))) + >>> ser_pl = pl.Series(data) + >>> ser_pa = pa.chunked_array([data]) + + >>> nw.from_native(ser_pd, series_only=True).dtype + List(String) + >>> nw.from_native(ser_pl, series_only=True).dtype + List(String) + >>> nw.from_native(ser_pa, series_only=True).dtype + List(String) + """, + "Array": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> data = [[1, 2], [3, 4], [5, 6]] + >>> ser_pd = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int32(), 2))) + >>> ser_pl = pl.Series(data, dtype=pl.Array(pl.Int32, 2)) + >>> ser_pa = pa.chunked_array([data], type=pa.list_(pa.int32(), 2)) + + >>> nw.from_native(ser_pd, series_only=True).dtype + Array(Int32, 2) + >>> nw.from_native(ser_pl, series_only=True).dtype + Array(Int32, 2) + >>> nw.from_native(ser_pa, series_only=True).dtype + Array(Int32, 2) + """, + "Date": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from datetime import date, timedelta + >>> data = [date(2024, 12, 1) + timedelta(days=d) for d in range(4)] + >>> ser_pd = pd.Series(data, dtype="date32[pyarrow]") + >>> ser_pl = pl.Series(data) + >>> ser_pa = pa.chunked_array([data]) + + >>> nw.from_native(ser_pd, series_only=True).dtype + Date + >>> nw.from_native(ser_pl, series_only=True).dtype + Date + >>> nw.from_native(ser_pa, series_only=True).dtype + Date + """, +} diff --git a/docs/docstring_examples/expr.py b/docs/docstring_examples/expr.py new file mode 100644 index 000000000..a4bdcdfec --- /dev/null +++ b/docs/docstring_examples/expr.py @@ -0,0 +1,2978 @@ +from __future__ import annotations + +EXAMPLES = { + "alias": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2], "b": [4, 5]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_alias(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select((nw.col("b") + 10).alias("c")).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_alias`: + + >>> agnostic_alias(df_pd) + c + 0 14 + 1 15 + + >>> agnostic_alias(df_pl) + shape: (2, 1) + ┌─────┐ + │ c │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 14 │ + │ 15 │ + └─────┘ + + >>> agnostic_alias(df_pa) + pyarrow.Table + c: int64 + ---- + c: [[14,15]] + """, + "pipe": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, 3, 4]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Lets define a library-agnostic function: + + >>> def agnostic_pipe(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a").pipe(lambda x: x + 1)).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_pipe`: + + >>> agnostic_pipe(df_pd) + a + 0 2 + 1 3 + 2 4 + 3 5 + + >>> agnostic_pipe(df_pl) + shape: (4, 1) + ┌─────┐ + │ a │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 2 │ + │ 3 │ + │ 4 │ + │ 5 │ + └─────┘ + + >>> agnostic_pipe(df_pa) + pyarrow.Table + a: int64 + ---- + a: [[2,3,4,5]] + """, + "cast": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_cast(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select( + ... nw.col("foo").cast(nw.Float32), nw.col("bar").cast(nw.UInt8) + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_cast`: + + >>> agnostic_cast(df_pd) + foo bar + 0 1.0 6 + 1 2.0 7 + 2 3.0 8 + >>> agnostic_cast(df_pl) + shape: (3, 2) + ┌─────┬─────┐ + │ foo ┆ bar │ + │ --- ┆ --- │ + │ f32 ┆ u8 │ + ╞═════╪═════╡ + │ 1.0 ┆ 6 │ + │ 2.0 ┆ 7 │ + │ 3.0 ┆ 8 │ + └─────┴─────┘ + >>> agnostic_cast(df_pa) + pyarrow.Table + foo: float + bar: uint8 + ---- + foo: [[1,2,3]] + bar: [[6,7,8]] + """, + "any": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [True, False], "b": [True, True]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_any(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a", "b").any()).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_any`: + + >>> agnostic_any(df_pd) + a b + 0 True True + + >>> agnostic_any(df_pl) + shape: (1, 2) + ┌──────┬──────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ bool ┆ bool │ + ╞══════╪══════╡ + │ true ┆ true │ + └──────┴──────┘ + + >>> agnostic_any(df_pa) + pyarrow.Table + a: bool + b: bool + ---- + a: [[true]] + b: [[true]] + """, + "all": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [True, False], "b": [True, True]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_all(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a", "b").all()).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_all`: + + >>> agnostic_all(df_pd) + a b + 0 False True + + >>> agnostic_all(df_pl) + shape: (1, 2) + ┌───────┬──────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ bool ┆ bool │ + ╞═══════╪══════╡ + │ false ┆ true │ + └───────┴──────┘ + + >>> agnostic_all(df_pa) + pyarrow.Table + a: bool + b: bool + ---- + a: [[false]] + b: [[true]] + """, + "ewm_mean": """ + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, 3]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + + We define a library agnostic function: + + >>> def agnostic_ewm_mean(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select( + ... nw.col("a").ewm_mean(com=1, ignore_nulls=False) + ... ).to_native() + + We can then pass either pandas or Polars to `agnostic_ewm_mean`: + + >>> agnostic_ewm_mean(df_pd) + a + 0 1.000000 + 1 1.666667 + 2 2.428571 + + >>> agnostic_ewm_mean(df_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3, 1) + ┌──────────┐ + │ a │ + │ --- │ + │ f64 │ + ╞══════════╡ + │ 1.0 │ + │ 1.666667 │ + │ 2.428571 │ + └──────────┘ + """, + "mean": """ + >>> import polars as pl + >>> import pandas as pd + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [-1, 0, 1], "b": [2, 4, 6]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_mean(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a", "b").mean()).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_mean`: + + >>> agnostic_mean(df_pd) + a b + 0 0.0 4.0 + + >>> agnostic_mean(df_pl) + shape: (1, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ f64 ┆ f64 │ + ╞═════╪═════╡ + │ 0.0 ┆ 4.0 │ + └─────┴─────┘ + + >>> agnostic_mean(df_pa) + pyarrow.Table + a: double + b: double + ---- + a: [[0]] + b: [[4]] + """, + "median": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 8, 3], "b": [4, 5, 2]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_median(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a", "b").median()).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_median`: + + >>> agnostic_median(df_pd) + a b + 0 3.0 4.0 + + >>> agnostic_median(df_pl) + shape: (1, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ f64 ┆ f64 │ + ╞═════╪═════╡ + │ 3.0 ┆ 4.0 │ + └─────┴─────┘ + + >>> agnostic_median(df_pa) + pyarrow.Table + a: double + b: double + ---- + a: [[3]] + b: [[4]] + """, + "std": """ + >>> import polars as pl + >>> import pandas as pd + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [20, 25, 60], "b": [1.5, 1, -1.4]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_std(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a", "b").std(ddof=0)).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_std`: + + >>> agnostic_std(df_pd) + a b + 0 17.79513 1.265789 + >>> agnostic_std(df_pl) + shape: (1, 2) + ┌──────────┬──────────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ f64 ┆ f64 │ + ╞══════════╪══════════╡ + │ 17.79513 ┆ 1.265789 │ + └──────────┴──────────┘ + >>> agnostic_std(df_pa) + pyarrow.Table + a: double + b: double + ---- + a: [[17.795130420052185]] + b: [[1.2657891697365016]] + """, + "var": """ + >>> import polars as pl + >>> import pandas as pd + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [20, 25, 60], "b": [1.5, 1, -1.4]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_var(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a", "b").var(ddof=0)).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_var`: + + >>> agnostic_var(df_pd) + a b + 0 316.666667 1.602222 + + >>> agnostic_var(df_pl) + shape: (1, 2) + ┌────────────┬──────────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ f64 ┆ f64 │ + ╞════════════╪══════════╡ + │ 316.666667 ┆ 1.602222 │ + └────────────┴──────────┘ + + >>> agnostic_var(df_pa) + pyarrow.Table + a: double + b: double + ---- + a: [[316.6666666666667]] + b: [[1.6022222222222222]] + """, + "map_batches": """ + >>> import polars as pl + >>> import pandas as pd + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_map_batches(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select( + ... nw.col("a", "b").map_batches( + ... lambda s: s.to_numpy() + 1, return_dtype=nw.Float64 + ... ) + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_map_batches`: + + >>> agnostic_map_batches(df_pd) + a b + 0 2.0 5.0 + 1 3.0 6.0 + 2 4.0 7.0 + >>> agnostic_map_batches(df_pl) + shape: (3, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ f64 ┆ f64 │ + ╞═════╪═════╡ + │ 2.0 ┆ 5.0 │ + │ 3.0 ┆ 6.0 │ + │ 4.0 ┆ 7.0 │ + └─────┴─────┘ + >>> agnostic_map_batches(df_pa) + pyarrow.Table + a: double + b: double + ---- + a: [[2,3,4]] + b: [[5,6,7]] + """, + "skew": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, 3, 4, 5], "b": [1, 1, 2, 10, 100]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_skew(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a", "b").skew()).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_skew`: + + >>> agnostic_skew(df_pd) + a b + 0 0.0 1.472427 + + >>> agnostic_skew(df_pl) + shape: (1, 2) + ┌─────┬──────────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ f64 ┆ f64 │ + ╞═════╪══════════╡ + │ 0.0 ┆ 1.472427 │ + └─────┴──────────┘ + + >>> agnostic_skew(df_pa) + pyarrow.Table + a: double + b: double + ---- + a: [[0]] + b: [[1.4724267269058975]] + """, + "sum": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [5, 10], "b": [50, 100]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_sum(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a", "b").sum()).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_sum`: + + >>> agnostic_sum(df_pd) + a b + 0 15 150 + >>> agnostic_sum(df_pl) + shape: (1, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 15 ┆ 150 │ + └─────┴─────┘ + >>> agnostic_sum(df_pa) + pyarrow.Table + a: int64 + b: int64 + ---- + a: [[15]] + b: [[150]] + """, + "min": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2], "b": [4, 3]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_min(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.min("a", "b")).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_min`: + + >>> agnostic_min(df_pd) + a b + 0 1 3 + + >>> agnostic_min(df_pl) + shape: (1, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 1 ┆ 3 │ + └─────┴─────┘ + + >>> agnostic_min(df_pa) + pyarrow.Table + a: int64 + b: int64 + ---- + a: [[1]] + b: [[3]] + """, + "max": """ + >>> import polars as pl + >>> import pandas as pd + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [10, 20], "b": [50, 100]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_max(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.max("a", "b")).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_max`: + + >>> agnostic_max(df_pd) + a b + 0 20 100 + + >>> agnostic_max(df_pl) + shape: (1, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 20 ┆ 100 │ + └─────┴─────┘ + + >>> agnostic_max(df_pa) + pyarrow.Table + a: int64 + b: int64 + ---- + a: [[20]] + b: [[100]] + """, + "arg_min": """ + >>> import polars as pl + >>> import pandas as pd + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [10, 20], "b": [150, 100]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_arg_min(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select( + ... nw.col("a", "b").arg_min().name.suffix("_arg_min") + ... ).to_native() + + We can then pass any supported library such as Pandas, Polars, or + PyArrow to `agnostic_arg_min`: + + >>> agnostic_arg_min(df_pd) + a_arg_min b_arg_min + 0 0 1 + + >>> agnostic_arg_min(df_pl) + shape: (1, 2) + ┌───────────┬───────────┐ + │ a_arg_min ┆ b_arg_min │ + │ --- ┆ --- │ + │ u32 ┆ u32 │ + ╞═══════════╪═══════════╡ + │ 0 ┆ 1 │ + └───────────┴───────────┘ + + >>> agnostic_arg_min(df_pa) + pyarrow.Table + a_arg_min: int64 + b_arg_min: int64 + ---- + a_arg_min: [[0]] + b_arg_min: [[1]] + """, + "arg_max": """ + >>> import polars as pl + >>> import pandas as pd + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [10, 20], "b": [150, 100]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_arg_max(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select( + ... nw.col("a", "b").arg_max().name.suffix("_arg_max") + ... ).to_native() + + We can then pass any supported library such as Pandas, Polars, or + PyArrow to `agnostic_arg_max`: + + >>> agnostic_arg_max(df_pd) + a_arg_max b_arg_max + 0 1 0 + + >>> agnostic_arg_max(df_pl) + shape: (1, 2) + ┌───────────┬───────────┐ + │ a_arg_max ┆ b_arg_max │ + │ --- ┆ --- │ + │ u32 ┆ u32 │ + ╞═══════════╪═══════════╡ + │ 1 ┆ 0 │ + └───────────┴───────────┘ + + >>> agnostic_arg_max(df_pa) + pyarrow.Table + a_arg_max: int64 + b_arg_max: int64 + ---- + a_arg_max: [[1]] + b_arg_max: [[0]] + """, + "count": """ + >>> import polars as pl + >>> import pandas as pd + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, 3], "b": [None, 4, 4]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_count(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.all().count()).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_count`: + + >>> agnostic_count(df_pd) + a b + 0 3 2 + + >>> agnostic_count(df_pl) + shape: (1, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ u32 ┆ u32 │ + ╞═════╪═════╡ + │ 3 ┆ 2 │ + └─────┴─────┘ + + >>> agnostic_count(df_pa) + pyarrow.Table + a: int64 + b: int64 + ---- + a: [[3]] + b: [[2]] + """, + "n_unique": """ + >>> import polars as pl + >>> import pandas as pd + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_n_unique(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a", "b").n_unique()).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_n_unique`: + + >>> agnostic_n_unique(df_pd) + a b + 0 5 3 + >>> agnostic_n_unique(df_pl) + shape: (1, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ u32 ┆ u32 │ + ╞═════╪═════╡ + │ 5 ┆ 3 │ + └─────┴─────┘ + >>> agnostic_n_unique(df_pa) + pyarrow.Table + a: int64 + b: int64 + ---- + a: [[5]] + b: [[3]] + """, + "unique": """ + >>> import polars as pl + >>> import pandas as pd + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_unique(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a", "b").unique(maintain_order=True)).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_unique`: + + >>> agnostic_unique(df_pd) + a b + 0 1 2 + 1 3 4 + 2 5 6 + + >>> agnostic_unique(df_pl) + shape: (3, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 1 ┆ 2 │ + │ 3 ┆ 4 │ + │ 5 ┆ 6 │ + └─────┴─────┘ + + >>> agnostic_unique(df_pa) + pyarrow.Table + a: int64 + b: int64 + ---- + a: [[1,3,5]] + b: [[2,4,6]] + """, + "abs": """ + >>> import polars as pl + >>> import pandas as pd + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, -2], "b": [-3, 4]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_abs(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a", "b").abs()).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_abs`: + + >>> agnostic_abs(df_pd) + a b + 0 1 3 + 1 2 4 + + >>> agnostic_abs(df_pl) + shape: (2, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 1 ┆ 3 │ + │ 2 ┆ 4 │ + └─────┴─────┘ + + >>> agnostic_abs(df_pa) + pyarrow.Table + a: int64 + b: int64 + ---- + a: [[1,2]] + b: [[3,4]] + """, + "cum_sum": """ + >>> import polars as pl + >>> import pandas as pd + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_cum_sum(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a", "b").cum_sum()).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_cum_sum`: + + >>> agnostic_cum_sum(df_pd) + a b + 0 1 2 + 1 2 6 + 2 5 10 + 3 10 16 + 4 15 22 + >>> agnostic_cum_sum(df_pl) + shape: (5, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 1 ┆ 2 │ + │ 2 ┆ 6 │ + │ 5 ┆ 10 │ + │ 10 ┆ 16 │ + │ 15 ┆ 22 │ + └─────┴─────┘ + >>> agnostic_cum_sum(df_pa) + pyarrow.Table + a: int64 + b: int64 + ---- + a: [[1,2,5,10,15]] + b: [[2,6,10,16,22]] + """, + "diff": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 1, 3, 5, 5]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_diff(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(a_diff=nw.col("a").diff()).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_diff`: + + >>> agnostic_diff(df_pd) + a_diff + 0 NaN + 1 0.0 + 2 2.0 + 3 2.0 + 4 0.0 + + >>> agnostic_diff(df_pl) + shape: (5, 1) + ┌────────┐ + │ a_diff │ + │ --- │ + │ i64 │ + ╞════════╡ + │ null │ + │ 0 │ + │ 2 │ + │ 2 │ + │ 0 │ + └────────┘ + + >>> agnostic_diff(df_pa) + pyarrow.Table + a_diff: int64 + ---- + a_diff: [[null,0,2,2,0]] + """, + "shift": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 1, 3, 5, 5]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_shift(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(a_shift=nw.col("a").shift(n=1)).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_shift`: + + >>> agnostic_shift(df_pd) + a_shift + 0 NaN + 1 1.0 + 2 1.0 + 3 3.0 + 4 5.0 + + >>> agnostic_shift(df_pl) + shape: (5, 1) + ┌─────────┐ + │ a_shift │ + │ --- │ + │ i64 │ + ╞═════════╡ + │ null │ + │ 1 │ + │ 1 │ + │ 3 │ + │ 5 │ + └─────────┘ + + >>> agnostic_shift(df_pa) + pyarrow.Table + a_shift: int64 + ---- + a_shift: [[null,1,1,3,5]] + """, + "replace_strict": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [3, 0, 1, 2]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define dataframe-agnostic functions: + + >>> def agnostic_replace_strict(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... b=nw.col("a").replace_strict( + ... [0, 1, 2, 3], + ... ["zero", "one", "two", "three"], + ... return_dtype=nw.String, + ... ) + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_replace_strict`: + + >>> agnostic_replace_strict(df_pd) + a b + 0 3 three + 1 0 zero + 2 1 one + 3 2 two + + >>> agnostic_replace_strict(df_pl) + shape: (4, 2) + ┌─────┬───────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ str │ + ╞═════╪═══════╡ + │ 3 ┆ three │ + │ 0 ┆ zero │ + │ 1 ┆ one │ + │ 2 ┆ two │ + └─────┴───────┘ + + >>> agnostic_replace_strict(df_pa) + pyarrow.Table + a: int64 + b: string + ---- + a: [[3,0,1,2]] + b: [["three","zero","one","two"]] + """, + "sort": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [5, None, 1, 2]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define dataframe-agnostic functions: + + >>> def agnostic_sort(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a").sort()).to_native() + + >>> def agnostic_sort_descending(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a").sort(descending=True)).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_sort` and `agnostic_sort_descending`: + + >>> agnostic_sort(df_pd) + a + 1 NaN + 2 1.0 + 3 2.0 + 0 5.0 + + >>> agnostic_sort(df_pl) + shape: (4, 1) + ┌──────┐ + │ a │ + │ --- │ + │ i64 │ + ╞══════╡ + │ null │ + │ 1 │ + │ 2 │ + │ 5 │ + └──────┘ + + >>> agnostic_sort(df_pa) + pyarrow.Table + a: int64 + ---- + a: [[null,1,2,5]] + + >>> agnostic_sort_descending(df_pd) + a + 1 NaN + 0 5.0 + 3 2.0 + 2 1.0 + + >>> agnostic_sort_descending(df_pl) + shape: (4, 1) + ┌──────┐ + │ a │ + │ --- │ + │ i64 │ + ╞══════╡ + │ null │ + │ 5 │ + │ 2 │ + │ 1 │ + └──────┘ + + >>> agnostic_sort_descending(df_pa) + pyarrow.Table + a: int64 + ---- + a: [[null,5,2,1]] + """, + "is_between": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, 3, 4, 5]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_is_between(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a").is_between(2, 4, "right")).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_between`: + + >>> agnostic_is_between(df_pd) + a + 0 False + 1 False + 2 True + 3 True + 4 False + + >>> agnostic_is_between(df_pl) + shape: (5, 1) + ┌───────┐ + │ a │ + │ --- │ + │ bool │ + ╞═══════╡ + │ false │ + │ false │ + │ true │ + │ true │ + │ false │ + └───────┘ + + >>> agnostic_is_between(df_pa) + pyarrow.Table + a: bool + ---- + a: [[false,false,true,true,false]] + """, + "is_in": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, 9, 10]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_is_in(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns(b=nw.col("a").is_in([1, 2])).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_in`: + + >>> agnostic_is_in(df_pd) + a b + 0 1 True + 1 2 True + 2 9 False + 3 10 False + + >>> agnostic_is_in(df_pl) + shape: (4, 2) + ┌─────┬───────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ bool │ + ╞═════╪═══════╡ + │ 1 ┆ true │ + │ 2 ┆ true │ + │ 9 ┆ false │ + │ 10 ┆ false │ + └─────┴───────┘ + + >>> agnostic_is_in(df_pa) + pyarrow.Table + a: int64 + b: bool + ---- + a: [[1,2,9,10]] + b: [[true,true,false,false]] + """, + "filter": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select( + ... nw.col("a").filter(nw.col("a") > 4), + ... nw.col("b").filter(nw.col("b") < 13), + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_filter`: + + >>> agnostic_filter(df_pd) + a b + 3 5 10 + 4 6 11 + 5 7 12 + + >>> agnostic_filter(df_pl) + shape: (3, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 5 ┆ 10 │ + │ 6 ┆ 11 │ + │ 7 ┆ 12 │ + └─────┴─────┘ + + >>> agnostic_filter(df_pa) + pyarrow.Table + a: int64 + b: int64 + ---- + a: [[5,6,7]] + b: [[10,11,12]] + """, + "is_null": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> df_pd = pd.DataFrame( + ... { + ... "a": [2, 4, None, 3, 5], + ... "b": [2.0, 4.0, float("nan"), 3.0, 5.0], + ... } + ... ) + >>> data = { + ... "a": [2, 4, None, 3, 5], + ... "b": [2.0, 4.0, None, 3.0, 5.0], + ... } + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_is_null(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... a_is_null=nw.col("a").is_null(), b_is_null=nw.col("b").is_null() + ... ).to_native() + + We can then pass any supported library such as Pandas, Polars, or + PyArrow to `agnostic_is_null`: + + >>> agnostic_is_null(df_pd) + a b a_is_null b_is_null + 0 2.0 2.0 False False + 1 4.0 4.0 False False + 2 NaN NaN True True + 3 3.0 3.0 False False + 4 5.0 5.0 False False + + >>> agnostic_is_null(df_pl) + shape: (5, 4) + ┌──────┬──────┬───────────┬───────────┐ + │ a ┆ b ┆ a_is_null ┆ b_is_null │ + │ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ f64 ┆ bool ┆ bool │ + ╞══════╪══════╪═══════════╪═══════════╡ + │ 2 ┆ 2.0 ┆ false ┆ false │ + │ 4 ┆ 4.0 ┆ false ┆ false │ + │ null ┆ null ┆ true ┆ true │ + │ 3 ┆ 3.0 ┆ false ┆ false │ + │ 5 ┆ 5.0 ┆ false ┆ false │ + └──────┴──────┴───────────┴───────────┘ + + >>> agnostic_is_null(df_pa) + pyarrow.Table + a: int64 + b: double + a_is_null: bool + b_is_null: bool + ---- + a: [[2,4,null,3,5]] + b: [[2,4,null,3,5]] + a_is_null: [[false,false,true,false,false]] + b_is_null: [[false,false,true,false,false]] + """, + "is_nan": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"orig": [0.0, None, 2.0]} + >>> df_pd = pd.DataFrame(data).astype({"orig": "Float64"}) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_self_div_is_nan(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... divided=nw.col("orig") / nw.col("orig"), + ... divided_is_nan=(nw.col("orig") / nw.col("orig")).is_nan(), + ... ).to_native() + + We can then pass any supported library such as Pandas, Polars, or + PyArrow to `agnostic_self_div_is_nan`: + + >>> print(agnostic_self_div_is_nan(df_pd)) + orig divided divided_is_nan + 0 0.0 NaN True + 1 + 2 2.0 1.0 False + + >>> print(agnostic_self_div_is_nan(df_pl)) + shape: (3, 3) + ┌──────┬─────────┬────────────────┐ + │ orig ┆ divided ┆ divided_is_nan │ + │ --- ┆ --- ┆ --- │ + │ f64 ┆ f64 ┆ bool │ + ╞══════╪═════════╪════════════════╡ + │ 0.0 ┆ NaN ┆ true │ + │ null ┆ null ┆ null │ + │ 2.0 ┆ 1.0 ┆ false │ + └──────┴─────────┴────────────────┘ + + >>> print(agnostic_self_div_is_nan(df_pa)) + pyarrow.Table + orig: double + divided: double + divided_is_nan: bool + ---- + orig: [[0,null,2]] + divided: [[nan,null,1]] + divided_is_nan: [[true,null,false]] + """, + "arg_true": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, None, None, 2]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_arg_true(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a").is_null().arg_true()).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_arg_true`: + + >>> agnostic_arg_true(df_pd) + a + 1 1 + 2 2 + + >>> agnostic_arg_true(df_pl) + shape: (2, 1) + ┌─────┐ + │ a │ + │ --- │ + │ u32 │ + ╞═════╡ + │ 1 │ + │ 2 │ + └─────┘ + + >>> agnostic_arg_true(df_pa) + pyarrow.Table + a: int64 + ---- + a: [[1,2]] + """, + "fill_null": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> df_pd = pd.DataFrame( + ... { + ... "a": [2, 4, None, None, 3, 5], + ... "b": [2.0, 4.0, float("nan"), float("nan"), 3.0, 5.0], + ... } + ... ) + >>> data = { + ... "a": [2, 4, None, None, 3, 5], + ... "b": [2.0, 4.0, None, None, 3.0, 5.0], + ... } + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_fill_null(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns(nw.col("a", "b").fill_null(0)).to_native() + + We can then pass any supported library such as Pandas, Polars, or + PyArrow to `agnostic_fill_null`: + + >>> agnostic_fill_null(df_pd) + a b + 0 2.0 2.0 + 1 4.0 4.0 + 2 0.0 0.0 + 3 0.0 0.0 + 4 3.0 3.0 + 5 5.0 5.0 + + >>> agnostic_fill_null(df_pl) + shape: (6, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ f64 │ + ╞═════╪═════╡ + │ 2 ┆ 2.0 │ + │ 4 ┆ 4.0 │ + │ 0 ┆ 0.0 │ + │ 0 ┆ 0.0 │ + │ 3 ┆ 3.0 │ + │ 5 ┆ 5.0 │ + └─────┴─────┘ + + >>> agnostic_fill_null(df_pa) + pyarrow.Table + a: int64 + b: double + ---- + a: [[2,4,0,0,3,5]] + b: [[2,4,0,0,3,5]] + + Using a strategy: + + >>> def agnostic_fill_null_with_strategy(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... nw.col("a", "b") + ... .fill_null(strategy="forward", limit=1) + ... .name.suffix("_filled") + ... ).to_native() + + >>> agnostic_fill_null_with_strategy(df_pd) + a b a_filled b_filled + 0 2.0 2.0 2.0 2.0 + 1 4.0 4.0 4.0 4.0 + 2 NaN NaN 4.0 4.0 + 3 NaN NaN NaN NaN + 4 3.0 3.0 3.0 3.0 + 5 5.0 5.0 5.0 5.0 + + >>> agnostic_fill_null_with_strategy(df_pl) + shape: (6, 4) + ┌──────┬──────┬──────────┬──────────┐ + │ a ┆ b ┆ a_filled ┆ b_filled │ + │ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ f64 ┆ i64 ┆ f64 │ + ╞══════╪══════╪══════════╪══════════╡ + │ 2 ┆ 2.0 ┆ 2 ┆ 2.0 │ + │ 4 ┆ 4.0 ┆ 4 ┆ 4.0 │ + │ null ┆ null ┆ 4 ┆ 4.0 │ + │ null ┆ null ┆ null ┆ null │ + │ 3 ┆ 3.0 ┆ 3 ┆ 3.0 │ + │ 5 ┆ 5.0 ┆ 5 ┆ 5.0 │ + └──────┴──────┴──────────┴──────────┘ + + >>> agnostic_fill_null_with_strategy(df_pa) + pyarrow.Table + a: int64 + b: double + a_filled: int64 + b_filled: double + ---- + a: [[2,4,null,null,3,5]] + b: [[2,4,null,null,3,5]] + a_filled: [[2,4,4,null,3,5]] + b_filled: [[2,4,4,null,3,5]] + """, + "drop_nulls": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> df_pd = pd.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]}) + >>> df_pl = pl.DataFrame({"a": [2.0, 4.0, None, 3.0, None, 5.0]}) + >>> df_pa = pa.table({"a": [2.0, 4.0, None, 3.0, None, 5.0]}) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_drop_nulls(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a").drop_nulls()).to_native() + + We can then pass any supported library such as Pandas, Polars, or + PyArrow to `agnostic_drop_nulls`: + + >>> agnostic_drop_nulls(df_pd) + a + 0 2.0 + 1 4.0 + 3 3.0 + 5 5.0 + + >>> agnostic_drop_nulls(df_pl) + shape: (4, 1) + ┌─────┐ + │ a │ + │ --- │ + │ f64 │ + ╞═════╡ + │ 2.0 │ + │ 4.0 │ + │ 3.0 │ + │ 5.0 │ + └─────┘ + + >>> agnostic_drop_nulls(df_pa) + pyarrow.Table + a: double + ---- + a: [[2,4,3,5]] + """, + "sample": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, 3]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_sample(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select( + ... nw.col("a").sample(fraction=1.0, with_replacement=True) + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_sample`: + + >>> agnostic_sample(df_pd) # doctest: +SKIP + a + 2 3 + 0 1 + 2 3 + + >>> agnostic_sample(df_pl) # doctest: +SKIP + shape: (3, 1) + ┌─────┐ + │ a │ + │ --- │ + │ f64 │ + ╞═════╡ + │ 2 │ + │ 3 │ + │ 3 │ + └─────┘ + + >>> agnostic_sample(df_pa) # doctest: +SKIP + pyarrow.Table + a: int64 + ---- + a: [[1,3,3]] + """, + "over": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, 3], "b": [1, 1, 2]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_min_over_b(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... a_min_per_group=nw.col("a").min().over("b") + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_min_over_b`: + + >>> agnostic_min_over_b(df_pd) + a b a_min_per_group + 0 1 1 1 + 1 2 1 1 + 2 3 2 3 + + >>> agnostic_min_over_b(df_pl) + shape: (3, 3) + ┌─────┬─────┬─────────────────┐ + │ a ┆ b ┆ a_min_per_group │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ i64 │ + ╞═════╪═════╪═════════════════╡ + │ 1 ┆ 1 ┆ 1 │ + │ 2 ┆ 1 ┆ 1 │ + │ 3 ┆ 2 ┆ 3 │ + └─────┴─────┴─────────────────┘ + + >>> agnostic_min_over_b(df_pa) + pyarrow.Table + a: int64 + b: int64 + a_min_per_group: int64 + ---- + a: [[1,2,3]] + b: [[1,1,2]] + a_min_per_group: [[1,1,3]] + + Cumulative operations are also supported, but (currently) only for + pandas and Polars: + + >>> def agnostic_cum_sum(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns(c=nw.col("a").cum_sum().over("b")).to_native() + + >>> agnostic_cum_sum(df_pd) + a b c + 0 1 1 1 + 1 2 1 3 + 2 3 2 3 + + >>> agnostic_cum_sum(df_pl) + shape: (3, 3) + ┌─────┬─────┬─────┐ + │ a ┆ b ┆ c │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ i64 │ + ╞═════╪═════╪═════╡ + │ 1 ┆ 1 ┆ 1 │ + │ 2 ┆ 1 ┆ 3 │ + │ 3 ┆ 2 ┆ 3 │ + └─────┴─────┴─────┘ + """, + "is_duplicated": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_is_duplicated(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.all().is_duplicated()).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_duplicated`: + + >>> agnostic_is_duplicated(df_pd) + a b + 0 True True + 1 False True + 2 False False + 3 True False + + >>> agnostic_is_duplicated(df_pl) + shape: (4, 2) + ┌───────┬───────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ bool ┆ bool │ + ╞═══════╪═══════╡ + │ true ┆ true │ + │ false ┆ true │ + │ false ┆ false │ + │ true ┆ false │ + └───────┴───────┘ + + >>> agnostic_is_duplicated(df_pa) + pyarrow.Table + a: bool + b: bool + ---- + a: [[true,false,false,true]] + b: [[true,true,false,false]] + """, + "is_unique": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_is_unique(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.all().is_unique()).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_unique`: + + >>> agnostic_is_unique(df_pd) + a b + 0 False False + 1 True False + 2 True True + 3 False True + + >>> agnostic_is_unique(df_pl) + shape: (4, 2) + ┌───────┬───────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ bool ┆ bool │ + ╞═══════╪═══════╡ + │ false ┆ false │ + │ true ┆ false │ + │ true ┆ true │ + │ false ┆ true │ + └───────┴───────┘ + + >>> agnostic_is_unique(df_pa) + pyarrow.Table + a: bool + b: bool + ---- + a: [[false,true,true,false]] + b: [[false,false,true,true]] + """, + "null_count": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, None, 1], "b": ["a", None, "b", None]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_null_count(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.all().null_count()).to_native() + + We can then pass any supported library such as Pandas, Polars, or + PyArrow to `agnostic_null_count`: + + >>> agnostic_null_count(df_pd) + a b + 0 1 2 + + >>> agnostic_null_count(df_pl) + shape: (1, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ u32 ┆ u32 │ + ╞═════╪═════╡ + │ 1 ┆ 2 │ + └─────┴─────┘ + + >>> agnostic_null_count(df_pa) + pyarrow.Table + a: int64 + b: int64 + ---- + a: [[1]] + b: [[2]] + """, + "is_first_distinct": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_is_first_distinct(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.all().is_first_distinct()).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_first_distinct`: + + >>> agnostic_is_first_distinct(df_pd) + a b + 0 True True + 1 True False + 2 True True + 3 False True + + >>> agnostic_is_first_distinct(df_pl) + shape: (4, 2) + ┌───────┬───────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ bool ┆ bool │ + ╞═══════╪═══════╡ + │ true ┆ true │ + │ true ┆ false │ + │ true ┆ true │ + │ false ┆ true │ + └───────┴───────┘ + + >>> agnostic_is_first_distinct(df_pa) + pyarrow.Table + a: bool + b: bool + ---- + a: [[true,true,true,false]] + b: [[true,false,true,true]] + """, + "is_last_distinct": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_is_last_distinct(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.all().is_last_distinct()).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_last_distinct`: + + >>> agnostic_is_last_distinct(df_pd) + a b + 0 False False + 1 True True + 2 True True + 3 True True + + >>> agnostic_is_last_distinct(df_pl) + shape: (4, 2) + ┌───────┬───────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ bool ┆ bool │ + ╞═══════╪═══════╡ + │ false ┆ false │ + │ true ┆ true │ + │ true ┆ true │ + │ true ┆ true │ + └───────┴───────┘ + + >>> agnostic_is_last_distinct(df_pa) + pyarrow.Table + a: bool + b: bool + ---- + a: [[false,true,true,true]] + b: [[false,true,true,true]] + """, + "quantile": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": list(range(50)), "b": list(range(50, 100))} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_quantile(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select( + ... nw.col("a", "b").quantile(0.5, interpolation="linear") + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_quantile`: + + >>> agnostic_quantile(df_pd) + a b + 0 24.5 74.5 + + >>> agnostic_quantile(df_pl) + shape: (1, 2) + ┌──────┬──────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ f64 ┆ f64 │ + ╞══════╪══════╡ + │ 24.5 ┆ 74.5 │ + └──────┴──────┘ + + >>> agnostic_quantile(df_pa) + pyarrow.Table + a: double + b: double + ---- + a: [[24.5]] + b: [[74.5]] + """, + "head": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": list(range(10))} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function that returns the first 3 rows: + + >>> def agnostic_head(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a").head(3)).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_head`: + + >>> agnostic_head(df_pd) + a + 0 0 + 1 1 + 2 2 + + >>> agnostic_head(df_pl) + shape: (3, 1) + ┌─────┐ + │ a │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 0 │ + │ 1 │ + │ 2 │ + └─────┘ + + >>> agnostic_head(df_pa) + pyarrow.Table + a: int64 + ---- + a: [[0,1,2]] + """, + "tail": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": list(range(10))} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function that returns the last 3 rows: + + >>> def agnostic_tail(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a").tail(3)).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_tail`: + + >>> agnostic_tail(df_pd) + a + 7 7 + 8 8 + 9 9 + + >>> agnostic_tail(df_pl) + shape: (3, 1) + ┌─────┐ + │ a │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 7 │ + │ 8 │ + │ 9 │ + └─────┘ + + >>> agnostic_tail(df_pa) + pyarrow.Table + a: int64 + ---- + a: [[7,8,9]] + """, + "round": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1.12345, 2.56789, 3.901234]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function that rounds to the first decimal: + + >>> def agnostic_round(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a").round(1)).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_round`: + + >>> agnostic_round(df_pd) + a + 0 1.1 + 1 2.6 + 2 3.9 + + >>> agnostic_round(df_pl) + shape: (3, 1) + ┌─────┐ + │ a │ + │ --- │ + │ f64 │ + ╞═════╡ + │ 1.1 │ + │ 2.6 │ + │ 3.9 │ + └─────┘ + + >>> agnostic_round(df_pa) + pyarrow.Table + a: double + ---- + a: [[1.1,2.6,3.9]] + """, + "len": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": ["x", "y", "z"], "b": [1, 2, 1]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function that computes the len over + different values of "b" column: + + >>> def agnostic_len(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select( + ... nw.col("a").filter(nw.col("b") == 1).len().alias("a1"), + ... nw.col("a").filter(nw.col("b") == 2).len().alias("a2"), + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_len`: + + >>> agnostic_len(df_pd) + a1 a2 + 0 2 1 + + >>> agnostic_len(df_pl) + shape: (1, 2) + ┌─────┬─────┐ + │ a1 ┆ a2 │ + │ --- ┆ --- │ + │ u32 ┆ u32 │ + ╞═════╪═════╡ + │ 2 ┆ 1 │ + └─────┴─────┘ + + >>> agnostic_len(df_pa) + pyarrow.Table + a1: int64 + a2: int64 + ---- + a1: [[2]] + a2: [[1]] + """, + "gather_every": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function in which gather every 2 rows, + starting from a offset of 1: + + >>> def agnostic_gather_every(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a").gather_every(n=2, offset=1)).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_gather_every`: + + >>> agnostic_gather_every(df_pd) + a + 1 2 + 3 4 + + >>> agnostic_gather_every(df_pl) + shape: (2, 1) + ┌─────┐ + │ a │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 2 │ + │ 4 │ + └─────┘ + + >>> agnostic_gather_every(df_pa) + pyarrow.Table + a: int64 + ---- + a: [[2,4]] + """, + "clip": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, 3]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_clip_lower(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a").clip(2)).to_native() + + We can then pass any supported library such as Pandas, Polars, or + PyArrow to `agnostic_clip_lower`: + + >>> agnostic_clip_lower(df_pd) + a + 0 2 + 1 2 + 2 3 + + >>> agnostic_clip_lower(df_pl) + shape: (3, 1) + ┌─────┐ + │ a │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 2 │ + │ 2 │ + │ 3 │ + └─────┘ + + >>> agnostic_clip_lower(df_pa) + pyarrow.Table + a: int64 + ---- + a: [[2,2,3]] + + We define another library agnostic function: + + >>> def agnostic_clip_upper(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a").clip(upper_bound=2)).to_native() + + We can then pass any supported library such as Pandas, Polars, or + PyArrow to `agnostic_clip_upper`: + + >>> agnostic_clip_upper(df_pd) + a + 0 1 + 1 2 + 2 2 + + >>> agnostic_clip_upper(df_pl) + shape: (3, 1) + ┌─────┐ + │ a │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 1 │ + │ 2 │ + │ 2 │ + └─────┘ + + >>> agnostic_clip_upper(df_pa) + pyarrow.Table + a: int64 + ---- + a: [[1,2,2]] + + We can have both at the same time + + >>> data = {"a": [-1, 1, -3, 3, -5, 5]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_clip(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a").clip(-1, 3)).to_native() + + We can pass any supported library such as Pandas, Polars, or + PyArrow to `agnostic_clip`: + + >>> agnostic_clip(df_pd) + a + 0 -1 + 1 1 + 2 -1 + 3 3 + 4 -1 + 5 3 + + >>> agnostic_clip(df_pl) + shape: (6, 1) + ┌─────┐ + │ a │ + │ --- │ + │ i64 │ + ╞═════╡ + │ -1 │ + │ 1 │ + │ -1 │ + │ 3 │ + │ -1 │ + │ 3 │ + └─────┘ + + >>> agnostic_clip(df_pa) + pyarrow.Table + a: int64 + ---- + a: [[-1,1,-1,3,-1,3]] + """, + "mode": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "a": [1, 1, 2, 3], + ... "b": [1, 1, 2, 2], + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_mode(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a").mode()).sort("a").to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_mode`: + + >>> agnostic_mode(df_pd) + a + 0 1 + + >>> agnostic_mode(df_pl) + shape: (1, 1) + ┌─────┐ + │ a │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 1 │ + └─────┘ + + >>> agnostic_mode(df_pa) + pyarrow.Table + a: int64 + ---- + a: [[1]] + """, + "is_finite": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [float("nan"), float("inf"), 2.0, None]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_is_finite(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a").is_finite()).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_finite`: + + >>> agnostic_is_finite(df_pd) + a + 0 False + 1 False + 2 True + 3 False + + >>> agnostic_is_finite(df_pl) + shape: (4, 1) + ┌───────┐ + │ a │ + │ --- │ + │ bool │ + ╞═══════╡ + │ false │ + │ false │ + │ true │ + │ null │ + └───────┘ + + >>> agnostic_is_finite(df_pa) + pyarrow.Table + a: bool + ---- + a: [[false,false,true,null]] + """, + "cum_count": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": ["x", "k", None, "d"]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_cum_count(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... nw.col("a").cum_count().alias("cum_count"), + ... nw.col("a").cum_count(reverse=True).alias("cum_count_reverse"), + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_cum_count`: + + >>> agnostic_cum_count(df_pd) + a cum_count cum_count_reverse + 0 x 1 3 + 1 k 2 2 + 2 None 2 1 + 3 d 3 1 + + >>> agnostic_cum_count(df_pl) + shape: (4, 3) + ┌──────┬───────────┬───────────────────┐ + │ a ┆ cum_count ┆ cum_count_reverse │ + │ --- ┆ --- ┆ --- │ + │ str ┆ u32 ┆ u32 │ + ╞══════╪═══════════╪═══════════════════╡ + │ x ┆ 1 ┆ 3 │ + │ k ┆ 2 ┆ 2 │ + │ null ┆ 2 ┆ 1 │ + │ d ┆ 3 ┆ 1 │ + └──────┴───────────┴───────────────────┘ + + >>> agnostic_cum_count(df_pa) + pyarrow.Table + a: string + cum_count: uint32 + cum_count_reverse: uint32 + ---- + a: [["x","k",null,"d"]] + cum_count: [[1,2,2,3]] + cum_count_reverse: [[3,2,1,1]] + """, + "cum_min": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [3, 1, None, 2]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_cum_min(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... nw.col("a").cum_min().alias("cum_min"), + ... nw.col("a").cum_min(reverse=True).alias("cum_min_reverse"), + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_cum_min`: + + >>> agnostic_cum_min(df_pd) + a cum_min cum_min_reverse + 0 3.0 3.0 1.0 + 1 1.0 1.0 1.0 + 2 NaN NaN NaN + 3 2.0 1.0 2.0 + + >>> agnostic_cum_min(df_pl) + shape: (4, 3) + ┌──────┬─────────┬─────────────────┐ + │ a ┆ cum_min ┆ cum_min_reverse │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ i64 │ + ╞══════╪═════════╪═════════════════╡ + │ 3 ┆ 3 ┆ 1 │ + │ 1 ┆ 1 ┆ 1 │ + │ null ┆ null ┆ null │ + │ 2 ┆ 1 ┆ 2 │ + └──────┴─────────┴─────────────────┘ + + >>> agnostic_cum_min(df_pa) + pyarrow.Table + a: int64 + cum_min: int64 + cum_min_reverse: int64 + ---- + a: [[3,1,null,2]] + cum_min: [[3,1,null,1]] + cum_min_reverse: [[1,1,null,2]] + """, + "cum_max": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 3, None, 2]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_cum_max(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... nw.col("a").cum_max().alias("cum_max"), + ... nw.col("a").cum_max(reverse=True).alias("cum_max_reverse"), + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_`: + + >>> agnostic_cum_max(df_pd) + a cum_max cum_max_reverse + 0 1.0 1.0 3.0 + 1 3.0 3.0 3.0 + 2 NaN NaN NaN + 3 2.0 3.0 2.0 + + >>> agnostic_cum_max(df_pl) + shape: (4, 3) + ┌──────┬─────────┬─────────────────┐ + │ a ┆ cum_max ┆ cum_max_reverse │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ i64 │ + ╞══════╪═════════╪═════════════════╡ + │ 1 ┆ 1 ┆ 3 │ + │ 3 ┆ 3 ┆ 3 │ + │ null ┆ null ┆ null │ + │ 2 ┆ 3 ┆ 2 │ + └──────┴─────────┴─────────────────┘ + + >>> agnostic_cum_max(df_pa) + pyarrow.Table + a: int64 + cum_max: int64 + cum_max_reverse: int64 + ---- + a: [[1,3,null,2]] + cum_max: [[1,3,null,3]] + cum_max_reverse: [[3,3,null,2]] + """, + "cum_prod": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 3, None, 2]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_cum_prod(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... nw.col("a").cum_prod().alias("cum_prod"), + ... nw.col("a").cum_prod(reverse=True).alias("cum_prod_reverse"), + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_cum_prod`: + + >>> agnostic_cum_prod(df_pd) + a cum_prod cum_prod_reverse + 0 1.0 1.0 6.0 + 1 3.0 3.0 6.0 + 2 NaN NaN NaN + 3 2.0 6.0 2.0 + + >>> agnostic_cum_prod(df_pl) + shape: (4, 3) + ┌──────┬──────────┬──────────────────┐ + │ a ┆ cum_prod ┆ cum_prod_reverse │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ i64 │ + ╞══════╪══════════╪══════════════════╡ + │ 1 ┆ 1 ┆ 6 │ + │ 3 ┆ 3 ┆ 6 │ + │ null ┆ null ┆ null │ + │ 2 ┆ 6 ┆ 2 │ + └──────┴──────────┴──────────────────┘ + + >>> agnostic_cum_prod(df_pa) + pyarrow.Table + a: int64 + cum_prod: int64 + cum_prod_reverse: int64 + ---- + a: [[1,3,null,2]] + cum_prod: [[1,3,null,6]] + cum_prod_reverse: [[6,6,null,2]] + """, + "rolling_sum": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1.0, 2.0, None, 4.0]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_rolling_sum(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... b=nw.col("a").rolling_sum(window_size=3, min_periods=1) + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_rolling_sum`: + + >>> agnostic_rolling_sum(df_pd) + a b + 0 1.0 1.0 + 1 2.0 3.0 + 2 NaN 3.0 + 3 4.0 6.0 + + >>> agnostic_rolling_sum(df_pl) + shape: (4, 2) + ┌──────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ f64 ┆ f64 │ + ╞══════╪═════╡ + │ 1.0 ┆ 1.0 │ + │ 2.0 ┆ 3.0 │ + │ null ┆ 3.0 │ + │ 4.0 ┆ 6.0 │ + └──────┴─────┘ + + >>> agnostic_rolling_sum(df_pa) + pyarrow.Table + a: double + b: double + ---- + a: [[1,2,null,4]] + b: [[1,3,3,6]] + """, + "rolling_mean": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1.0, 2.0, None, 4.0]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_rolling_mean(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... b=nw.col("a").rolling_mean(window_size=3, min_periods=1) + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_rolling_mean`: + + >>> agnostic_rolling_mean(df_pd) + a b + 0 1.0 1.0 + 1 2.0 1.5 + 2 NaN 1.5 + 3 4.0 3.0 + + >>> agnostic_rolling_mean(df_pl) + shape: (4, 2) + ┌──────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ f64 ┆ f64 │ + ╞══════╪═════╡ + │ 1.0 ┆ 1.0 │ + │ 2.0 ┆ 1.5 │ + │ null ┆ 1.5 │ + │ 4.0 ┆ 3.0 │ + └──────┴─────┘ + + >>> agnostic_rolling_mean(df_pa) + pyarrow.Table + a: double + b: double + ---- + a: [[1,2,null,4]] + b: [[1,1.5,1.5,3]] + """, + "rolling_var": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1.0, 2.0, None, 4.0]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_rolling_var(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... b=nw.col("a").rolling_var(window_size=3, min_periods=1) + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_rolling_var`: + + >>> agnostic_rolling_var(df_pd) + a b + 0 1.0 NaN + 1 2.0 0.5 + 2 NaN 0.5 + 3 4.0 2.0 + + >>> agnostic_rolling_var(df_pl) # doctest:+SKIP + shape: (4, 2) + ┌──────┬──────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ f64 ┆ f64 │ + ╞══════╪══════╡ + │ 1.0 ┆ null │ + │ 2.0 ┆ 0.5 │ + │ null ┆ 0.5 │ + │ 4.0 ┆ 2.0 │ + └──────┴──────┘ + + >>> agnostic_rolling_var(df_pa) + pyarrow.Table + a: double + b: double + ---- + a: [[1,2,null,4]] + b: [[nan,0.5,0.5,2]] + """, + "rolling_std": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1.0, 2.0, None, 4.0]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_rolling_std(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... b=nw.col("a").rolling_std(window_size=3, min_periods=1) + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_rolling_std`: + + >>> agnostic_rolling_std(df_pd) + a b + 0 1.0 NaN + 1 2.0 0.707107 + 2 NaN 0.707107 + 3 4.0 1.414214 + + >>> agnostic_rolling_std(df_pl) # doctest:+SKIP + shape: (4, 2) + ┌──────┬──────────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ f64 ┆ f64 │ + ╞══════╪══════════╡ + │ 1.0 ┆ null │ + │ 2.0 ┆ 0.707107 │ + │ null ┆ 0.707107 │ + │ 4.0 ┆ 1.414214 │ + └──────┴──────────┘ + + >>> agnostic_rolling_std(df_pa) + pyarrow.Table + a: double + b: double + ---- + a: [[1,2,null,4]] + b: [[nan,0.7071067811865476,0.7071067811865476,1.4142135623730951]] + """, + "rank": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [3, 6, 1, 1, 6]} + + We define a dataframe-agnostic function that computes the dense rank for + the data: + + >>> def agnostic_dense_rank(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... result = df.with_columns(rnk=nw.col("a").rank(method="dense")) + ... return result.to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dense_rank`: + + >>> agnostic_dense_rank(pd.DataFrame(data)) + a rnk + 0 3 2.0 + 1 6 3.0 + 2 1 1.0 + 3 1 1.0 + 4 6 3.0 + + >>> agnostic_dense_rank(pl.DataFrame(data)) + shape: (5, 2) + ┌─────┬─────┐ + │ a ┆ rnk │ + │ --- ┆ --- │ + │ i64 ┆ u32 │ + ╞═════╪═════╡ + │ 3 ┆ 2 │ + │ 6 ┆ 3 │ + │ 1 ┆ 1 │ + │ 1 ┆ 1 │ + │ 6 ┆ 3 │ + └─────┴─────┘ + + >>> agnostic_dense_rank(pa.table(data)) + pyarrow.Table + a: int64 + rnk: uint64 + ---- + a: [[3,6,1,1,6]] + rnk: [[2,3,1,1,3]] + """, +} diff --git a/docs/docstring_examples/expr_cat.py b/docs/docstring_examples/expr_cat.py new file mode 100644 index 000000000..0031a3bb9 --- /dev/null +++ b/docs/docstring_examples/expr_cat.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +EXAMPLES = { + "get_categories": """ + Let's create some dataframes: + + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"fruits": ["apple", "mango", "mango"]} + >>> df_pd = pd.DataFrame(data, dtype="category") + >>> df_pl = pl.DataFrame(data, schema={"fruits": pl.Categorical}) + + We define a dataframe-agnostic function to get unique categories + from column 'fruits': + + >>> def agnostic_cat_get_categories(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("fruits").cat.get_categories()).to_native() + + We can then pass any supported library such as pandas or Polars to + `agnostic_cat_get_categories`: + + >>> agnostic_cat_get_categories(df_pd) + fruits + 0 apple + 1 mango + + >>> agnostic_cat_get_categories(df_pl) + shape: (2, 1) + ┌────────┐ + │ fruits │ + │ --- │ + │ str │ + ╞════════╡ + │ apple │ + │ mango │ + └────────┘ + """, +} diff --git a/docs/docstring_examples/expr_dt.py b/docs/docstring_examples/expr_dt.py new file mode 100644 index 000000000..23f6ff28a --- /dev/null +++ b/docs/docstring_examples/expr_dt.py @@ -0,0 +1,1097 @@ +from __future__ import annotations + +EXAMPLES = { + "date": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [datetime(2012, 1, 7, 10, 20), datetime(2023, 3, 10, 11, 32)]} + >>> df_pd = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow") + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_dt_date(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a").dt.date()).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dt_date`: + + >>> agnostic_dt_date(df_pd) + a + 0 2012-01-07 + 1 2023-03-10 + + >>> agnostic_dt_date(df_pl) + shape: (2, 1) + ┌────────────┐ + │ a │ + │ --- │ + │ date │ + ╞════════════╡ + │ 2012-01-07 │ + │ 2023-03-10 │ + └────────────┘ + + >>> agnostic_dt_date(df_pa) + pyarrow.Table + a: date32[day] + ---- + a: [[2012-01-07,2023-03-10]] + """, + "year": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "datetime": [ + ... datetime(1978, 6, 1), + ... datetime(2024, 12, 13), + ... datetime(2065, 1, 1), + ... ] + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_dt_year(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... nw.col("datetime").dt.year().alias("year") + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dt_year`: + + >>> agnostic_dt_year(df_pd) + datetime year + 0 1978-06-01 1978 + 1 2024-12-13 2024 + 2 2065-01-01 2065 + + >>> agnostic_dt_year(df_pl) + shape: (3, 2) + ┌─────────────────────┬──────┐ + │ datetime ┆ year │ + │ --- ┆ --- │ + │ datetime[μs] ┆ i32 │ + ╞═════════════════════╪══════╡ + │ 1978-06-01 00:00:00 ┆ 1978 │ + │ 2024-12-13 00:00:00 ┆ 2024 │ + │ 2065-01-01 00:00:00 ┆ 2065 │ + └─────────────────────┴──────┘ + + >>> agnostic_dt_year(df_pa) + pyarrow.Table + datetime: timestamp[us] + year: int64 + ---- + datetime: [[1978-06-01 00:00:00.000000,2024-12-13 00:00:00.000000,2065-01-01 00:00:00.000000]] + year: [[1978,2024,2065]] + """, + "month": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "datetime": [ + ... datetime(1978, 6, 1), + ... datetime(2024, 12, 13), + ... datetime(2065, 1, 1), + ... ] + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_dt_month(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... nw.col("datetime").dt.month().alias("month"), + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dt_month`: + + >>> agnostic_dt_month(df_pd) + datetime month + 0 1978-06-01 6 + 1 2024-12-13 12 + 2 2065-01-01 1 + + >>> agnostic_dt_month(df_pl) + shape: (3, 2) + ┌─────────────────────┬───────┐ + │ datetime ┆ month │ + │ --- ┆ --- │ + │ datetime[μs] ┆ i8 │ + ╞═════════════════════╪═══════╡ + │ 1978-06-01 00:00:00 ┆ 6 │ + │ 2024-12-13 00:00:00 ┆ 12 │ + │ 2065-01-01 00:00:00 ┆ 1 │ + └─────────────────────┴───────┘ + + >>> agnostic_dt_month(df_pa) + pyarrow.Table + datetime: timestamp[us] + month: int64 + ---- + datetime: [[1978-06-01 00:00:00.000000,2024-12-13 00:00:00.000000,2065-01-01 00:00:00.000000]] + month: [[6,12,1]] + """, + "day": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "datetime": [ + ... datetime(1978, 6, 1), + ... datetime(2024, 12, 13), + ... datetime(2065, 1, 1), + ... ] + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_dt_day(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... nw.col("datetime").dt.day().alias("day"), + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dt_day`: + + >>> agnostic_dt_day(df_pd) + datetime day + 0 1978-06-01 1 + 1 2024-12-13 13 + 2 2065-01-01 1 + + >>> agnostic_dt_day(df_pl) + shape: (3, 2) + ┌─────────────────────┬─────┐ + │ datetime ┆ day │ + │ --- ┆ --- │ + │ datetime[μs] ┆ i8 │ + ╞═════════════════════╪═════╡ + │ 1978-06-01 00:00:00 ┆ 1 │ + │ 2024-12-13 00:00:00 ┆ 13 │ + │ 2065-01-01 00:00:00 ┆ 1 │ + └─────────────────────┴─────┘ + + >>> agnostic_dt_day(df_pa) + pyarrow.Table + datetime: timestamp[us] + day: int64 + ---- + datetime: [[1978-06-01 00:00:00.000000,2024-12-13 00:00:00.000000,2065-01-01 00:00:00.000000]] + day: [[1,13,1]] + """, + "hour": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "datetime": [ + ... datetime(1978, 1, 1, 1), + ... datetime(2024, 10, 13, 5), + ... datetime(2065, 1, 1, 10), + ... ] + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_dt_hour(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... nw.col("datetime").dt.hour().alias("hour") + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dt_hour`: + + >>> agnostic_dt_hour(df_pd) + datetime hour + 0 1978-01-01 01:00:00 1 + 1 2024-10-13 05:00:00 5 + 2 2065-01-01 10:00:00 10 + + >>> agnostic_dt_hour(df_pl) + shape: (3, 2) + ┌─────────────────────┬──────┐ + │ datetime ┆ hour │ + │ --- ┆ --- │ + │ datetime[μs] ┆ i8 │ + ╞═════════════════════╪══════╡ + │ 1978-01-01 01:00:00 ┆ 1 │ + │ 2024-10-13 05:00:00 ┆ 5 │ + │ 2065-01-01 10:00:00 ┆ 10 │ + └─────────────────────┴──────┘ + + >>> agnostic_dt_hour(df_pa) + pyarrow.Table + datetime: timestamp[us] + hour: int64 + ---- + datetime: [[1978-01-01 01:00:00.000000,2024-10-13 05:00:00.000000,2065-01-01 10:00:00.000000]] + hour: [[1,5,10]] + """, + "minute": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "datetime": [ + ... datetime(1978, 1, 1, 1, 1), + ... datetime(2024, 10, 13, 5, 30), + ... datetime(2065, 1, 1, 10, 20), + ... ] + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_dt_minute(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... nw.col("datetime").dt.minute().alias("minute"), + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dt_minute`: + + >>> agnostic_dt_minute(df_pd) + datetime minute + 0 1978-01-01 01:01:00 1 + 1 2024-10-13 05:30:00 30 + 2 2065-01-01 10:20:00 20 + + >>> agnostic_dt_minute(df_pl) + shape: (3, 2) + ┌─────────────────────┬────────┐ + │ datetime ┆ minute │ + │ --- ┆ --- │ + │ datetime[μs] ┆ i8 │ + ╞═════════════════════╪════════╡ + │ 1978-01-01 01:01:00 ┆ 1 │ + │ 2024-10-13 05:30:00 ┆ 30 │ + │ 2065-01-01 10:20:00 ┆ 20 │ + └─────────────────────┴────────┘ + + >>> agnostic_dt_minute(df_pa) + pyarrow.Table + datetime: timestamp[us] + minute: int64 + ---- + datetime: [[1978-01-01 01:01:00.000000,2024-10-13 05:30:00.000000,2065-01-01 10:20:00.000000]] + minute: [[1,30,20]] + """, + "second": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "datetime": [ + ... datetime(1978, 1, 1, 1, 1, 1), + ... datetime(2024, 10, 13, 5, 30, 14), + ... datetime(2065, 1, 1, 10, 20, 30), + ... ] + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_dt_second(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... nw.col("datetime").dt.second().alias("second"), + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dt_second`: + + >>> agnostic_dt_second(df_pd) + datetime second + 0 1978-01-01 01:01:01 1 + 1 2024-10-13 05:30:14 14 + 2 2065-01-01 10:20:30 30 + + >>> agnostic_dt_second(df_pl) + shape: (3, 2) + ┌─────────────────────┬────────┐ + │ datetime ┆ second │ + │ --- ┆ --- │ + │ datetime[μs] ┆ i8 │ + ╞═════════════════════╪════════╡ + │ 1978-01-01 01:01:01 ┆ 1 │ + │ 2024-10-13 05:30:14 ┆ 14 │ + │ 2065-01-01 10:20:30 ┆ 30 │ + └─────────────────────┴────────┘ + + >>> agnostic_dt_second(df_pa) + pyarrow.Table + datetime: timestamp[us] + second: int64 + ---- + datetime: [[1978-01-01 01:01:01.000000,2024-10-13 05:30:14.000000,2065-01-01 10:20:30.000000]] + second: [[1,14,30]] + """, + "millisecond": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "datetime": [ + ... datetime(1978, 1, 1, 1, 1, 1, 0), + ... datetime(2024, 10, 13, 5, 30, 14, 505000), + ... datetime(2065, 1, 1, 10, 20, 30, 67000), + ... ] + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_dt_millisecond(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... nw.col("datetime").dt.millisecond().alias("millisecond"), + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dt_millisecond`: + + >>> agnostic_dt_millisecond(df_pd) + datetime millisecond + 0 1978-01-01 01:01:01.000 0 + 1 2024-10-13 05:30:14.505 505 + 2 2065-01-01 10:20:30.067 67 + + >>> agnostic_dt_millisecond(df_pl) + shape: (3, 2) + ┌─────────────────────────┬─────────────┐ + │ datetime ┆ millisecond │ + │ --- ┆ --- │ + │ datetime[μs] ┆ i32 │ + ╞═════════════════════════╪═════════════╡ + │ 1978-01-01 01:01:01 ┆ 0 │ + │ 2024-10-13 05:30:14.505 ┆ 505 │ + │ 2065-01-01 10:20:30.067 ┆ 67 │ + └─────────────────────────┴─────────────┘ + + >>> agnostic_dt_millisecond(df_pa) + pyarrow.Table + datetime: timestamp[us] + millisecond: int64 + ---- + datetime: [[1978-01-01 01:01:01.000000,2024-10-13 05:30:14.505000,2065-01-01 10:20:30.067000]] + millisecond: [[0,505,67]] + """, + "microsecond": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "datetime": [ + ... datetime(1978, 1, 1, 1, 1, 1, 0), + ... datetime(2024, 10, 13, 5, 30, 14, 505000), + ... datetime(2065, 1, 1, 10, 20, 30, 67000), + ... ] + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_dt_microsecond(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... nw.col("datetime").dt.microsecond().alias("microsecond"), + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dt_microsecond`: + + >>> agnostic_dt_microsecond(df_pd) + datetime microsecond + 0 1978-01-01 01:01:01.000 0 + 1 2024-10-13 05:30:14.505 505000 + 2 2065-01-01 10:20:30.067 67000 + + >>> agnostic_dt_microsecond(df_pl) + shape: (3, 2) + ┌─────────────────────────┬─────────────┐ + │ datetime ┆ microsecond │ + │ --- ┆ --- │ + │ datetime[μs] ┆ i32 │ + ╞═════════════════════════╪═════════════╡ + │ 1978-01-01 01:01:01 ┆ 0 │ + │ 2024-10-13 05:30:14.505 ┆ 505000 │ + │ 2065-01-01 10:20:30.067 ┆ 67000 │ + └─────────────────────────┴─────────────┘ + + >>> agnostic_dt_microsecond(df_pa) + pyarrow.Table + datetime: timestamp[us] + microsecond: int64 + ---- + datetime: [[1978-01-01 01:01:01.000000,2024-10-13 05:30:14.505000,2065-01-01 10:20:30.067000]] + microsecond: [[0,505000,67000]] + """, + "nanosecond": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "datetime": [ + ... datetime(1978, 1, 1, 1, 1, 1, 0), + ... datetime(2024, 10, 13, 5, 30, 14, 500000), + ... datetime(2065, 1, 1, 10, 20, 30, 60000), + ... ] + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_dt_nanosecond(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... nw.col("datetime").dt.nanosecond().alias("nanosecond"), + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dt_nanosecond`: + + >>> agnostic_dt_nanosecond(df_pd) + datetime nanosecond + 0 1978-01-01 01:01:01.000 0 + 1 2024-10-13 05:30:14.500 500000000 + 2 2065-01-01 10:20:30.060 60000000 + + >>> agnostic_dt_nanosecond(df_pl) + shape: (3, 2) + ┌─────────────────────────┬────────────┐ + │ datetime ┆ nanosecond │ + │ --- ┆ --- │ + │ datetime[μs] ┆ i32 │ + ╞═════════════════════════╪════════════╡ + │ 1978-01-01 01:01:01 ┆ 0 │ + │ 2024-10-13 05:30:14.500 ┆ 500000000 │ + │ 2065-01-01 10:20:30.060 ┆ 60000000 │ + └─────────────────────────┴────────────┘ + + >>> agnostic_dt_nanosecond(df_pa) + pyarrow.Table + datetime: timestamp[us] + nanosecond: int64 + ---- + datetime: [[1978-01-01 01:01:01.000000,2024-10-13 05:30:14.500000,2065-01-01 10:20:30.060000]] + nanosecond: [[0,500000000,60000000]] + """, + "ordinal_day": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [datetime(2020, 1, 1), datetime(2020, 8, 3)]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_dt_ordinal_day(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... a_ordinal_day=nw.col("a").dt.ordinal_day() + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dt_ordinal_day`: + + >>> agnostic_dt_ordinal_day(df_pd) + a a_ordinal_day + 0 2020-01-01 1 + 1 2020-08-03 216 + + >>> agnostic_dt_ordinal_day(df_pl) + shape: (2, 2) + ┌─────────────────────┬───────────────┐ + │ a ┆ a_ordinal_day │ + │ --- ┆ --- │ + │ datetime[μs] ┆ i16 │ + ╞═════════════════════╪═══════════════╡ + │ 2020-01-01 00:00:00 ┆ 1 │ + │ 2020-08-03 00:00:00 ┆ 216 │ + └─────────────────────┴───────────────┘ + + >>> agnostic_dt_ordinal_day(df_pa) + pyarrow.Table + a: timestamp[us] + a_ordinal_day: int64 + ---- + a: [[2020-01-01 00:00:00.000000,2020-08-03 00:00:00.000000]] + a_ordinal_day: [[1,216]] + """, + "weekday": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [datetime(2020, 1, 1), datetime(2020, 8, 3)]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_dt_weekday(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns(a_weekday=nw.col("a").dt.weekday()).to_native() + + We can then pass either pandas, Polars, PyArrow, and other supported libraries to + `agnostic_dt_weekday`: + + >>> agnostic_dt_weekday(df_pd) + a a_weekday + 0 2020-01-01 3 + 1 2020-08-03 1 + + >>> agnostic_dt_weekday(df_pl) + shape: (2, 2) + ┌─────────────────────┬───────────┐ + │ a ┆ a_weekday │ + │ --- ┆ --- │ + │ datetime[μs] ┆ i8 │ + ╞═════════════════════╪═══════════╡ + │ 2020-01-01 00:00:00 ┆ 3 │ + │ 2020-08-03 00:00:00 ┆ 1 │ + └─────────────────────┴───────────┘ + + >>> agnostic_dt_weekday(df_pa) + pyarrow.Table + a: timestamp[us] + a_weekday: int64 + ---- + a: [[2020-01-01 00:00:00.000000,2020-08-03 00:00:00.000000]] + a_weekday: [[3,1]] + """, + "total_minutes": """ + >>> from datetime import timedelta + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [timedelta(minutes=10), timedelta(minutes=20, seconds=40)]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_dt_total_minutes(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... a_total_minutes=nw.col("a").dt.total_minutes() + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dt_total_minutes`: + + >>> agnostic_dt_total_minutes(df_pd) + a a_total_minutes + 0 0 days 00:10:00 10 + 1 0 days 00:20:40 20 + + >>> agnostic_dt_total_minutes(df_pl) + shape: (2, 2) + ┌──────────────┬─────────────────┐ + │ a ┆ a_total_minutes │ + │ --- ┆ --- │ + │ duration[μs] ┆ i64 │ + ╞══════════════╪═════════════════╡ + │ 10m ┆ 10 │ + │ 20m 40s ┆ 20 │ + └──────────────┴─────────────────┘ + + >>> agnostic_dt_total_minutes(df_pa) + pyarrow.Table + a: duration[us] + a_total_minutes: int64 + ---- + a: [[600000000,1240000000]] + a_total_minutes: [[10,20]] + """, + "total_seconds": """ + >>> from datetime import timedelta + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [timedelta(seconds=10), timedelta(seconds=20, milliseconds=40)]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_dt_total_seconds(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... a_total_seconds=nw.col("a").dt.total_seconds() + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dt_total_seconds`: + + >>> agnostic_dt_total_seconds(df_pd) + a a_total_seconds + 0 0 days 00:00:10 10 + 1 0 days 00:00:20.040000 20 + + >>> agnostic_dt_total_seconds(df_pl) + shape: (2, 2) + ┌──────────────┬─────────────────┐ + │ a ┆ a_total_seconds │ + │ --- ┆ --- │ + │ duration[μs] ┆ i64 │ + ╞══════════════╪═════════════════╡ + │ 10s ┆ 10 │ + │ 20s 40ms ┆ 20 │ + └──────────────┴─────────────────┘ + + >>> agnostic_dt_total_seconds(df_pa) + pyarrow.Table + a: duration[us] + a_total_seconds: int64 + ---- + a: [[10000000,20040000]] + a_total_seconds: [[10,20]] + """, + "total_milliseconds": """ + >>> from datetime import timedelta + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "a": [ + ... timedelta(milliseconds=10), + ... timedelta(milliseconds=20, microseconds=40), + ... ] + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_dt_total_milliseconds(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... a_total_milliseconds=nw.col("a").dt.total_milliseconds() + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dt_total_milliseconds`: + + >>> agnostic_dt_total_milliseconds(df_pd) + a a_total_milliseconds + 0 0 days 00:00:00.010000 10 + 1 0 days 00:00:00.020040 20 + + >>> agnostic_dt_total_milliseconds(df_pl) + shape: (2, 2) + ┌──────────────┬──────────────────────┐ + │ a ┆ a_total_milliseconds │ + │ --- ┆ --- │ + │ duration[μs] ┆ i64 │ + ╞══════════════╪══════════════════════╡ + │ 10ms ┆ 10 │ + │ 20040µs ┆ 20 │ + └──────────────┴──────────────────────┘ + + >>> agnostic_dt_total_milliseconds(df_pa) + pyarrow.Table + a: duration[us] + a_total_milliseconds: int64 + ---- + a: [[10000,20040]] + a_total_milliseconds: [[10,20]] + """, + "total_microseconds": """ + >>> from datetime import timedelta + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "a": [ + ... timedelta(microseconds=10), + ... timedelta(milliseconds=1, microseconds=200), + ... ] + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_dt_total_microseconds(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... a_total_microseconds=nw.col("a").dt.total_microseconds() + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dt_total_microseconds`: + + >>> agnostic_dt_total_microseconds(df_pd) + a a_total_microseconds + 0 0 days 00:00:00.000010 10 + 1 0 days 00:00:00.001200 1200 + + >>> agnostic_dt_total_microseconds(df_pl) + shape: (2, 2) + ┌──────────────┬──────────────────────┐ + │ a ┆ a_total_microseconds │ + │ --- ┆ --- │ + │ duration[μs] ┆ i64 │ + ╞══════════════╪══════════════════════╡ + │ 10µs ┆ 10 │ + │ 1200µs ┆ 1200 │ + └──────────────┴──────────────────────┘ + + >>> agnostic_dt_total_microseconds(df_pa) + pyarrow.Table + a: duration[us] + a_total_microseconds: int64 + ---- + a: [[10,1200]] + a_total_microseconds: [[10,1200]] + """, + "total_nanoseconds": """ + >>> from datetime import timedelta + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = ["2024-01-01 00:00:00.000000001", "2024-01-01 00:00:00.000000002"] + >>> df_pd = pd.DataFrame({"a": pd.to_datetime(data)}) + >>> df_pl = pl.DataFrame({"a": data}).with_columns( + ... pl.col("a").str.to_datetime(time_unit="ns") + ... ) + + We define a dataframe-agnostic function: + + >>> def agnostic_dt_total_nanoseconds(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... a_diff_total_nanoseconds=nw.col("a").diff().dt.total_nanoseconds() + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dt_total_nanoseconds`: + + >>> agnostic_dt_total_nanoseconds(df_pd) + a a_diff_total_nanoseconds + 0 2024-01-01 00:00:00.000000001 NaN + 1 2024-01-01 00:00:00.000000002 1.0 + + >>> agnostic_dt_total_nanoseconds(df_pl) + shape: (2, 2) + ┌───────────────────────────────┬──────────────────────────┐ + │ a ┆ a_diff_total_nanoseconds │ + │ --- ┆ --- │ + │ datetime[ns] ┆ i64 │ + ╞═══════════════════════════════╪══════════════════════════╡ + │ 2024-01-01 00:00:00.000000001 ┆ null │ + │ 2024-01-01 00:00:00.000000002 ┆ 1 │ + └───────────────────────────────┴──────────────────────────┘ + """, + "to_string": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "a": [ + ... datetime(2020, 3, 1), + ... datetime(2020, 4, 1), + ... datetime(2020, 5, 1), + ... ] + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_dt_to_string(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select( + ... nw.col("a").dt.to_string("%Y/%m/%d %H:%M:%S") + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dt_to_string`: + + >>> agnostic_dt_to_string(df_pd) + a + 0 2020/03/01 00:00:00 + 1 2020/04/01 00:00:00 + 2 2020/05/01 00:00:00 + + >>> agnostic_dt_to_string(df_pl) + shape: (3, 1) + ┌─────────────────────┐ + │ a │ + │ --- │ + │ str │ + ╞═════════════════════╡ + │ 2020/03/01 00:00:00 │ + │ 2020/04/01 00:00:00 │ + │ 2020/05/01 00:00:00 │ + └─────────────────────┘ + + >>> agnostic_dt_to_string(df_pa) + pyarrow.Table + a: string + ---- + a: [["2020/03/01 00:00:00.000000","2020/04/01 00:00:00.000000","2020/05/01 00:00:00.000000"]] + """, + "replace_time_zone": """ + >>> from datetime import datetime, timezone + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "a": [ + ... datetime(2024, 1, 1, tzinfo=timezone.utc), + ... datetime(2024, 1, 2, tzinfo=timezone.utc), + ... ] + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_dt_replace_time_zone(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select( + ... nw.col("a").dt.replace_time_zone("Asia/Kathmandu") + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dt_replace_time_zone`: + + >>> agnostic_dt_replace_time_zone(df_pd) + a + 0 2024-01-01 00:00:00+05:45 + 1 2024-01-02 00:00:00+05:45 + + >>> agnostic_dt_replace_time_zone(df_pl) + shape: (2, 1) + ┌──────────────────────────────┐ + │ a │ + │ --- │ + │ datetime[μs, Asia/Kathmandu] │ + ╞══════════════════════════════╡ + │ 2024-01-01 00:00:00 +0545 │ + │ 2024-01-02 00:00:00 +0545 │ + └──────────────────────────────┘ + + >>> agnostic_dt_replace_time_zone(df_pa) + pyarrow.Table + a: timestamp[us, tz=Asia/Kathmandu] + ---- + a: [[2023-12-31 18:15:00.000000Z,2024-01-01 18:15:00.000000Z]] + """, + "convert_time_zone": """ + >>> from datetime import datetime, timezone + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "a": [ + ... datetime(2024, 1, 1, tzinfo=timezone.utc), + ... datetime(2024, 1, 2, tzinfo=timezone.utc), + ... ] + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_dt_convert_time_zone(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select( + ... nw.col("a").dt.convert_time_zone("Asia/Kathmandu") + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dt_convert_time_zone`: + + >>> agnostic_dt_convert_time_zone(df_pd) + a + 0 2024-01-01 05:45:00+05:45 + 1 2024-01-02 05:45:00+05:45 + + >>> agnostic_dt_convert_time_zone(df_pl) + shape: (2, 1) + ┌──────────────────────────────┐ + │ a │ + │ --- │ + │ datetime[μs, Asia/Kathmandu] │ + ╞══════════════════════════════╡ + │ 2024-01-01 05:45:00 +0545 │ + │ 2024-01-02 05:45:00 +0545 │ + └──────────────────────────────┘ + + >>> agnostic_dt_convert_time_zone(df_pa) + pyarrow.Table + a: timestamp[us, tz=Asia/Kathmandu] + ---- + a: [[2024-01-01 00:00:00.000000Z,2024-01-02 00:00:00.000000Z]] + """, + "timestamp": """ + >>> from datetime import date + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"date": [date(2001, 1, 1), None, date(2001, 1, 3)]} + >>> df_pd = pd.DataFrame(data, dtype="datetime64[ns]") + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_dt_timestamp(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... nw.col("date").dt.timestamp().alias("timestamp_us"), + ... nw.col("date").dt.timestamp("ms").alias("timestamp_ms"), + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dt_timestamp`: + + >>> agnostic_dt_timestamp(df_pd) + date timestamp_us timestamp_ms + 0 2001-01-01 9.783072e+14 9.783072e+11 + 1 NaT NaN NaN + 2 2001-01-03 9.784800e+14 9.784800e+11 + + >>> agnostic_dt_timestamp(df_pl) + shape: (3, 3) + ┌────────────┬─────────────────┬──────────────┐ + │ date ┆ timestamp_us ┆ timestamp_ms │ + │ --- ┆ --- ┆ --- │ + │ date ┆ i64 ┆ i64 │ + ╞════════════╪═════════════════╪══════════════╡ + │ 2001-01-01 ┆ 978307200000000 ┆ 978307200000 │ + │ null ┆ null ┆ null │ + │ 2001-01-03 ┆ 978480000000000 ┆ 978480000000 │ + └────────────┴─────────────────┴──────────────┘ + + >>> agnostic_dt_timestamp(df_pa) + pyarrow.Table + date: date32[day] + timestamp_us: int64 + timestamp_ms: int64 + ---- + date: [[2001-01-01,null,2001-01-03]] + timestamp_us: [[978307200000000,null,978480000000000]] + timestamp_ms: [[978307200000,null,978480000000]] + """, +} diff --git a/docs/docstring_examples/expr_list.py b/docs/docstring_examples/expr_list.py new file mode 100644 index 000000000..92e3e952d --- /dev/null +++ b/docs/docstring_examples/expr_list.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +EXAMPLES = { + "len": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [[1, 2], [3, 4, None], None, []]} + + Let's define a dataframe-agnostic function: + + >>> def agnostic_list_len(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns(a_len=nw.col("a").list.len()).to_native() + + We can then pass pandas / PyArrow / Polars / any other supported library: + + >>> agnostic_list_len( + ... pd.DataFrame(data).astype({"a": pd.ArrowDtype(pa.list_(pa.int64()))}) + ... ) # doctest: +SKIP + a a_len + 0 [1. 2.] 2 + 1 [ 3. 4. nan] 3 + 2 + 3 [] 0 + + >>> agnostic_list_len(pl.DataFrame(data)) + shape: (4, 2) + ┌──────────────┬───────┐ + │ a ┆ a_len │ + │ --- ┆ --- │ + │ list[i64] ┆ u32 │ + ╞══════════════╪═══════╡ + │ [1, 2] ┆ 2 │ + │ [3, 4, null] ┆ 3 │ + │ null ┆ null │ + │ [] ┆ 0 │ + └──────────────┴───────┘ + + >>> agnostic_list_len(pa.table(data)) + pyarrow.Table + a: list + child 0, item: int64 + a_len: uint32 + ---- + a: [[[1,2],[3,4,null],null,[]]] + a_len: [[2,3,null,0]] + """, +} diff --git a/docs/docstring_examples/expr_name.py b/docs/docstring_examples/expr_name.py new file mode 100644 index 000000000..795ebd0c6 --- /dev/null +++ b/docs/docstring_examples/expr_name.py @@ -0,0 +1,185 @@ +from __future__ import annotations + +EXAMPLES = { + "keep": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrame + >>> + >>> data = {"foo": [1, 2], "BAR": [4, 5]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_name_keep(df_native: IntoFrame) -> list[str]: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("foo").alias("alias_for_foo").name.keep()).columns + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_name_keep`: + + >>> agnostic_name_keep(df_pd) + ['foo'] + + >>> agnostic_name_keep(df_pl) + ['foo'] + + >>> agnostic_name_keep(df_pa) + ['foo'] + """, + "map": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrame + >>> + >>> data = {"foo": [1, 2], "BAR": [4, 5]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> renaming_func = lambda s: s[::-1] # reverse column name + >>> def agnostic_name_map(df_native: IntoFrame) -> list[str]: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("foo", "BAR").name.map(renaming_func)).columns + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_name_map`: + + >>> agnostic_name_map(df_pd) + ['oof', 'RAB'] + + >>> agnostic_name_map(df_pl) + ['oof', 'RAB'] + + >>> agnostic_name_map(df_pa) + ['oof', 'RAB'] + """, + "prefix": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrame + >>> + >>> data = {"foo": [1, 2], "BAR": [4, 5]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_name_prefix(df_native: IntoFrame, prefix: str) -> list[str]: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("foo", "BAR").name.prefix(prefix)).columns + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_name_prefix`: + + >>> agnostic_name_prefix(df_pd, "with_prefix_") + ['with_prefix_foo', 'with_prefix_BAR'] + + >>> agnostic_name_prefix(df_pl, "with_prefix_") + ['with_prefix_foo', 'with_prefix_BAR'] + + >>> agnostic_name_prefix(df_pa, "with_prefix_") + ['with_prefix_foo', 'with_prefix_BAR'] + """, + "suffix": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrame + >>> + >>> data = {"foo": [1, 2], "BAR": [4, 5]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_name_suffix(df_native: IntoFrame, suffix: str) -> list[str]: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("foo", "BAR").name.suffix(suffix)).columns + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_name_suffix`: + + >>> agnostic_name_suffix(df_pd, "_with_suffix") + ['foo_with_suffix', 'BAR_with_suffix'] + + >>> agnostic_name_suffix(df_pl, "_with_suffix") + ['foo_with_suffix', 'BAR_with_suffix'] + + >>> agnostic_name_suffix(df_pa, "_with_suffix") + ['foo_with_suffix', 'BAR_with_suffix'] + """, + "to_lowercase": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrame + >>> + >>> data = {"foo": [1, 2], "BAR": [4, 5]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_name_to_lowercase(df_native: IntoFrame) -> list[str]: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("foo", "BAR").name.to_lowercase()).columns + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_name_to_lowercase`: + + >>> agnostic_name_to_lowercase(df_pd) + ['foo', 'bar'] + + >>> agnostic_name_to_lowercase(df_pl) + ['foo', 'bar'] + + >>> agnostic_name_to_lowercase(df_pa) + ['foo', 'bar'] + """, + "to_uppercase": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrame + >>> + >>> data = {"foo": [1, 2], "BAR": [4, 5]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_name_to_uppercase(df_native: IntoFrame) -> list[str]: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("foo", "BAR").name.to_uppercase()).columns + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_name_to_uppercase`: + + >>> agnostic_name_to_uppercase(df_pd) + ['FOO', 'BAR'] + + >>> agnostic_name_to_uppercase(df_pl) + ['FOO', 'BAR'] + + >>> agnostic_name_to_uppercase(df_pa) + ['FOO', 'BAR'] + """, +} diff --git a/docs/docstring_examples/expr_str.py b/docs/docstring_examples/expr_str.py new file mode 100644 index 000000000..4cad7dafd --- /dev/null +++ b/docs/docstring_examples/expr_str.py @@ -0,0 +1,670 @@ +from __future__ import annotations + +EXAMPLES = { + "len_chars": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"words": ["foo", "Café", "345", "東京", None]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_str_len_chars(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... words_len=nw.col("words").str.len_chars() + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_str_len_chars`: + + >>> agnostic_str_len_chars(df_pd) + words words_len + 0 foo 3.0 + 1 Café 4.0 + 2 345 3.0 + 3 東京 2.0 + 4 None NaN + + >>> agnostic_str_len_chars(df_pl) + shape: (5, 2) + ┌───────┬───────────┐ + │ words ┆ words_len │ + │ --- ┆ --- │ + │ str ┆ u32 │ + ╞═══════╪═══════════╡ + │ foo ┆ 3 │ + │ Café ┆ 4 │ + │ 345 ┆ 3 │ + │ 東京 ┆ 2 │ + │ null ┆ null │ + └───────┴───────────┘ + + >>> agnostic_str_len_chars(df_pa) + pyarrow.Table + words: string + words_len: int32 + ---- + words: [["foo","Café","345","東京",null]] + words_len: [[3,4,3,2,null]] + """, + "replace": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"foo": ["123abc", "abc abc123"]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_str_replace(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... df = df.with_columns(replaced=nw.col("foo").str.replace("abc", "")) + ... return df.to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_str_replace`: + + >>> agnostic_str_replace(df_pd) + foo replaced + 0 123abc 123 + 1 abc abc123 abc123 + + >>> agnostic_str_replace(df_pl) + shape: (2, 2) + ┌────────────┬──────────┐ + │ foo ┆ replaced │ + │ --- ┆ --- │ + │ str ┆ str │ + ╞════════════╪══════════╡ + │ 123abc ┆ 123 │ + │ abc abc123 ┆ abc123 │ + └────────────┴──────────┘ + + >>> agnostic_str_replace(df_pa) + pyarrow.Table + foo: string + replaced: string + ---- + foo: [["123abc","abc abc123"]] + replaced: [["123"," abc123"]] + """, + "replace_all": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"foo": ["123abc", "abc abc123"]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_str_replace_all(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... df = df.with_columns(replaced=nw.col("foo").str.replace_all("abc", "")) + ... return df.to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_str_replace_all`: + + >>> agnostic_str_replace_all(df_pd) + foo replaced + 0 123abc 123 + 1 abc abc123 123 + + >>> agnostic_str_replace_all(df_pl) + shape: (2, 2) + ┌────────────┬──────────┐ + │ foo ┆ replaced │ + │ --- ┆ --- │ + │ str ┆ str │ + ╞════════════╪══════════╡ + │ 123abc ┆ 123 │ + │ abc abc123 ┆ 123 │ + └────────────┴──────────┘ + + >>> agnostic_str_replace_all(df_pa) + pyarrow.Table + foo: string + replaced: string + ---- + foo: [["123abc","abc abc123"]] + replaced: [["123"," 123"]] + """, + "strip_chars": """ + >>> from typing import Any + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrame + >>> + >>> data = {"fruits": ["apple", "\\nmango"]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_str_strip_chars(df_native: IntoFrame) -> dict[str, Any]: + ... df = nw.from_native(df_native) + ... df = df.with_columns(stripped=nw.col("fruits").str.strip_chars()) + ... return df.to_dict(as_series=False) + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_str_strip_chars`: + + >>> agnostic_str_strip_chars(df_pd) + {'fruits': ['apple', '\\nmango'], 'stripped': ['apple', 'mango']} + + >>> agnostic_str_strip_chars(df_pl) + {'fruits': ['apple', '\\nmango'], 'stripped': ['apple', 'mango']} + + >>> agnostic_str_strip_chars(df_pa) + {'fruits': ['apple', '\\nmango'], 'stripped': ['apple', 'mango']} + """, + "starts_with": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"fruits": ["apple", "mango", None]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_str_starts_with(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... has_prefix=nw.col("fruits").str.starts_with("app") + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_str_starts_with`: + + >>> agnostic_str_starts_with(df_pd) + fruits has_prefix + 0 apple True + 1 mango False + 2 None None + + >>> agnostic_str_starts_with(df_pl) + shape: (3, 2) + ┌────────┬────────────┐ + │ fruits ┆ has_prefix │ + │ --- ┆ --- │ + │ str ┆ bool │ + ╞════════╪════════════╡ + │ apple ┆ true │ + │ mango ┆ false │ + │ null ┆ null │ + └────────┴────────────┘ + + >>> agnostic_str_starts_with(df_pa) + pyarrow.Table + fruits: string + has_prefix: bool + ---- + fruits: [["apple","mango",null]] + has_prefix: [[true,false,null]] + """, + "ends_with": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"fruits": ["apple", "mango", None]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_str_ends_with(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... has_suffix=nw.col("fruits").str.ends_with("ngo") + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_str_ends_with`: + + >>> agnostic_str_ends_with(df_pd) + fruits has_suffix + 0 apple False + 1 mango True + 2 None None + + >>> agnostic_str_ends_with(df_pl) + shape: (3, 2) + ┌────────┬────────────┐ + │ fruits ┆ has_suffix │ + │ --- ┆ --- │ + │ str ┆ bool │ + ╞════════╪════════════╡ + │ apple ┆ false │ + │ mango ┆ true │ + │ null ┆ null │ + └────────┴────────────┘ + + >>> agnostic_str_ends_with(df_pa) + pyarrow.Table + fruits: string + has_suffix: bool + ---- + fruits: [["apple","mango",null]] + has_suffix: [[false,true,null]] + """, + "contains": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"pets": ["cat", "dog", "rabbit and parrot", "dove", None]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_str_contains(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... default_match=nw.col("pets").str.contains("parrot|Dove"), + ... case_insensitive_match=nw.col("pets").str.contains("(?i)parrot|Dove"), + ... literal_match=nw.col("pets").str.contains( + ... "parrot|Dove", literal=True + ... ), + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_str_contains`: + + >>> agnostic_str_contains(df_pd) + pets default_match case_insensitive_match literal_match + 0 cat False False False + 1 dog False False False + 2 rabbit and parrot True True False + 3 dove False True False + 4 None None None None + + >>> agnostic_str_contains(df_pl) + shape: (5, 4) + ┌───────────────────┬───────────────┬────────────────────────┬───────────────┐ + │ pets ┆ default_match ┆ case_insensitive_match ┆ literal_match │ + │ --- ┆ --- ┆ --- ┆ --- │ + │ str ┆ bool ┆ bool ┆ bool │ + ╞═══════════════════╪═══════════════╪════════════════════════╪═══════════════╡ + │ cat ┆ false ┆ false ┆ false │ + │ dog ┆ false ┆ false ┆ false │ + │ rabbit and parrot ┆ true ┆ true ┆ false │ + │ dove ┆ false ┆ true ┆ false │ + │ null ┆ null ┆ null ┆ null │ + └───────────────────┴───────────────┴────────────────────────┴───────────────┘ + + >>> agnostic_str_contains(df_pa) + pyarrow.Table + pets: string + default_match: bool + case_insensitive_match: bool + literal_match: bool + ---- + pets: [["cat","dog","rabbit and parrot","dove",null]] + default_match: [[false,false,true,false,null]] + case_insensitive_match: [[false,false,true,true,null]] + literal_match: [[false,false,false,false,null]] + """, + "slice": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"s": ["pear", None, "papaya", "dragonfruit"]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_str_slice(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... s_sliced=nw.col("s").str.slice(4, length=3) + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_str_slice`: + + >>> agnostic_str_slice(df_pd) # doctest: +NORMALIZE_WHITESPACE + s s_sliced + 0 pear + 1 None None + 2 papaya ya + 3 dragonfruit onf + + >>> agnostic_str_slice(df_pl) + shape: (4, 2) + ┌─────────────┬──────────┐ + │ s ┆ s_sliced │ + │ --- ┆ --- │ + │ str ┆ str │ + ╞═════════════╪══════════╡ + │ pear ┆ │ + │ null ┆ null │ + │ papaya ┆ ya │ + │ dragonfruit ┆ onf │ + └─────────────┴──────────┘ + + >>> agnostic_str_slice(df_pa) + pyarrow.Table + s: string + s_sliced: string + ---- + s: [["pear",null,"papaya","dragonfruit"]] + s_sliced: [["",null,"ya","onf"]] + + Using negative indexes: + + >>> def agnostic_str_slice_negative(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns(s_sliced=nw.col("s").str.slice(-3)).to_native() + + >>> agnostic_str_slice_negative(df_pd) + s s_sliced + 0 pear ear + 1 None None + 2 papaya aya + 3 dragonfruit uit + + >>> agnostic_str_slice_negative(df_pl) + shape: (4, 2) + ┌─────────────┬──────────┐ + │ s ┆ s_sliced │ + │ --- ┆ --- │ + │ str ┆ str │ + ╞═════════════╪══════════╡ + │ pear ┆ ear │ + │ null ┆ null │ + │ papaya ┆ aya │ + │ dragonfruit ┆ uit │ + └─────────────┴──────────┘ + + >>> agnostic_str_slice_negative(df_pa) + pyarrow.Table + s: string + s_sliced: string + ---- + s: [["pear",null,"papaya","dragonfruit"]] + s_sliced: [["ear",null,"aya","uit"]] + """, + "head": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"lyrics": ["Atatata", "taata", "taatatata", "zukkyun"]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_str_head(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... lyrics_head=nw.col("lyrics").str.head() + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_str_head`: + + >>> agnostic_str_head(df_pd) + lyrics lyrics_head + 0 Atatata Atata + 1 taata taata + 2 taatatata taata + 3 zukkyun zukky + + >>> agnostic_str_head(df_pl) + shape: (4, 2) + ┌───────────┬─────────────┐ + │ lyrics ┆ lyrics_head │ + │ --- ┆ --- │ + │ str ┆ str │ + ╞═══════════╪═════════════╡ + │ Atatata ┆ Atata │ + │ taata ┆ taata │ + │ taatatata ┆ taata │ + │ zukkyun ┆ zukky │ + └───────────┴─────────────┘ + + >>> agnostic_str_head(df_pa) + pyarrow.Table + lyrics: string + lyrics_head: string + ---- + lyrics: [["Atatata","taata","taatatata","zukkyun"]] + lyrics_head: [["Atata","taata","taata","zukky"]] + """, + "tail": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"lyrics": ["Atatata", "taata", "taatatata", "zukkyun"]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_str_tail(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... lyrics_tail=nw.col("lyrics").str.tail() + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_str_tail`: + + >>> agnostic_str_tail(df_pd) + lyrics lyrics_tail + 0 Atatata atata + 1 taata taata + 2 taatatata atata + 3 zukkyun kkyun + + >>> agnostic_str_tail(df_pl) + shape: (4, 2) + ┌───────────┬─────────────┐ + │ lyrics ┆ lyrics_tail │ + │ --- ┆ --- │ + │ str ┆ str │ + ╞═══════════╪═════════════╡ + │ Atatata ┆ atata │ + │ taata ┆ taata │ + │ taatatata ┆ atata │ + │ zukkyun ┆ kkyun │ + └───────────┴─────────────┘ + + >>> agnostic_str_tail(df_pa) + pyarrow.Table + lyrics: string + lyrics_tail: string + ---- + lyrics: [["Atatata","taata","taatatata","zukkyun"]] + lyrics_tail: [["atata","taata","atata","kkyun"]] + """, + "to_datetime": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = ["2020-01-01", "2020-01-02"] + >>> df_pd = pd.DataFrame({"a": data}) + >>> df_pl = pl.DataFrame({"a": data}) + >>> df_pa = pa.table({"a": data}) + + We define a dataframe-agnostic function: + + >>> def agnostic_str_to_datetime(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select( + ... nw.col("a").str.to_datetime(format="%Y-%m-%d") + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_str_to_datetime`: + + >>> agnostic_str_to_datetime(df_pd) + a + 0 2020-01-01 + 1 2020-01-02 + + >>> agnostic_str_to_datetime(df_pl) + shape: (2, 1) + ┌─────────────────────┐ + │ a │ + │ --- │ + │ datetime[μs] │ + ╞═════════════════════╡ + │ 2020-01-01 00:00:00 │ + │ 2020-01-02 00:00:00 │ + └─────────────────────┘ + + >>> agnostic_str_to_datetime(df_pa) + pyarrow.Table + a: timestamp[us] + ---- + a: [[2020-01-01 00:00:00.000000,2020-01-02 00:00:00.000000]] + """, + "to_uppercase": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"fruits": ["apple", "mango", None]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_str_to_uppercase(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... upper_col=nw.col("fruits").str.to_uppercase() + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_str_to_uppercase`: + + >>> agnostic_str_to_uppercase(df_pd) + fruits upper_col + 0 apple APPLE + 1 mango MANGO + 2 None None + + >>> agnostic_str_to_uppercase(df_pl) + shape: (3, 2) + ┌────────┬───────────┐ + │ fruits ┆ upper_col │ + │ --- ┆ --- │ + │ str ┆ str │ + ╞════════╪═══════════╡ + │ apple ┆ APPLE │ + │ mango ┆ MANGO │ + │ null ┆ null │ + └────────┴───────────┘ + + >>> agnostic_str_to_uppercase(df_pa) + pyarrow.Table + fruits: string + upper_col: string + ---- + fruits: [["apple","mango",null]] + upper_col: [["APPLE","MANGO",null]] + """, + "to_lowercase": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"fruits": ["APPLE", "MANGO", None]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_str_to_lowercase(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... lower_col=nw.col("fruits").str.to_lowercase() + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_str_to_lowercase`: + + >>> agnostic_str_to_lowercase(df_pd) + fruits lower_col + 0 APPLE apple + 1 MANGO mango + 2 None None + + >>> agnostic_str_to_lowercase(df_pl) + shape: (3, 2) + ┌────────┬───────────┐ + │ fruits ┆ lower_col │ + │ --- ┆ --- │ + │ str ┆ str │ + ╞════════╪═══════════╡ + │ APPLE ┆ apple │ + │ MANGO ┆ mango │ + │ null ┆ null │ + └────────┴───────────┘ + + >>> agnostic_str_to_lowercase(df_pa) + pyarrow.Table + fruits: string + lower_col: string + ---- + fruits: [["APPLE","MANGO",null]] + lower_col: [["apple","mango",null]] + """, +} diff --git a/docs/docstring_examples/functions.py b/docs/docstring_examples/functions.py new file mode 100644 index 000000000..a387ad404 --- /dev/null +++ b/docs/docstring_examples/functions.py @@ -0,0 +1,1339 @@ +from __future__ import annotations + +EXAMPLES = { + "concat": """ + Let's take an example of vertical concatenation: + + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> data_1 = {"a": [1, 2, 3], "b": [4, 5, 6]} + >>> data_2 = {"a": [5, 2], "b": [1, 4]} + + >>> df_pd_1 = pd.DataFrame(data_1) + >>> df_pd_2 = pd.DataFrame(data_2) + >>> df_pl_1 = pl.DataFrame(data_1) + >>> df_pl_2 = pl.DataFrame(data_2) + + Let's define a dataframe-agnostic function: + + >>> @nw.narwhalify + ... def agnostic_vertical_concat(df1, df2): + ... return nw.concat([df1, df2], how="vertical") + + >>> agnostic_vertical_concat(df_pd_1, df_pd_2) + a b + 0 1 4 + 1 2 5 + 2 3 6 + 0 5 1 + 1 2 4 + >>> agnostic_vertical_concat(df_pl_1, df_pl_2) + shape: (5, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 1 ┆ 4 │ + │ 2 ┆ 5 │ + │ 3 ┆ 6 │ + │ 5 ┆ 1 │ + │ 2 ┆ 4 │ + └─────┴─────┘ + + Let's look at case a for horizontal concatenation: + + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> data_1 = {"a": [1, 2, 3], "b": [4, 5, 6]} + >>> data_2 = {"c": [5, 2], "d": [1, 4]} + + >>> df_pd_1 = pd.DataFrame(data_1) + >>> df_pd_2 = pd.DataFrame(data_2) + >>> df_pl_1 = pl.DataFrame(data_1) + >>> df_pl_2 = pl.DataFrame(data_2) + + Defining a dataframe-agnostic function: + + >>> @nw.narwhalify + ... def agnostic_horizontal_concat(df1, df2): + ... return nw.concat([df1, df2], how="horizontal") + + >>> agnostic_horizontal_concat(df_pd_1, df_pd_2) + a b c d + 0 1 4 5.0 1.0 + 1 2 5 2.0 4.0 + 2 3 6 NaN NaN + + >>> agnostic_horizontal_concat(df_pl_1, df_pl_2) + shape: (3, 4) + ┌─────┬─────┬──────┬──────┐ + │ a ┆ b ┆ c ┆ d │ + │ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞═════╪═════╪══════╪══════╡ + │ 1 ┆ 4 ┆ 5 ┆ 1 │ + │ 2 ┆ 5 ┆ 2 ┆ 4 │ + │ 3 ┆ 6 ┆ null ┆ null │ + └─────┴─────┴──────┴──────┘ + + Let's look at case a for diagonal concatenation: + + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> data_1 = {"a": [1, 2], "b": [3.5, 4.5]} + >>> data_2 = {"a": [3, 4], "z": ["x", "y"]} + + >>> df_pd_1 = pd.DataFrame(data_1) + >>> df_pd_2 = pd.DataFrame(data_2) + >>> df_pl_1 = pl.DataFrame(data_1) + >>> df_pl_2 = pl.DataFrame(data_2) + + Defining a dataframe-agnostic function: + + >>> @nw.narwhalify + ... def agnostic_diagonal_concat(df1, df2): + ... return nw.concat([df1, df2], how="diagonal") + + >>> agnostic_diagonal_concat(df_pd_1, df_pd_2) + a b z + 0 1 3.5 NaN + 1 2 4.5 NaN + 0 3 NaN x + 1 4 NaN y + + >>> agnostic_diagonal_concat(df_pl_1, df_pl_2) + shape: (4, 3) + ┌─────┬──────┬──────┐ + │ a ┆ b ┆ z │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ f64 ┆ str │ + ╞═════╪══════╪══════╡ + │ 1 ┆ 3.5 ┆ null │ + │ 2 ┆ 4.5 ┆ null │ + │ 3 ┆ null ┆ x │ + │ 4 ┆ null ┆ y │ + └─────┴──────┴──────┘ + """, + "new_series": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT, IntoSeriesT + >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} + + Let's define a dataframe-agnostic function: + + >>> def agnostic_new_series(df_native: IntoFrameT) -> IntoSeriesT: + ... values = [4, 1, 2, 3] + ... native_namespace = nw.get_native_namespace(df_native) + ... return nw.new_series( + ... name="a", + ... values=values, + ... dtype=nw.Int32, + ... native_namespace=native_namespace, + ... ).to_native() + + We can then pass any supported eager library, such as pandas / Polars / PyArrow: + + >>> agnostic_new_series(pd.DataFrame(data)) + 0 4 + 1 1 + 2 2 + 3 3 + Name: a, dtype: int32 + >>> agnostic_new_series(pl.DataFrame(data)) # doctest: +NORMALIZE_WHITESPACE + shape: (4,) + Series: 'a' [i32] + [ + 4 + 1 + 2 + 3 + ] + >>> agnostic_new_series(pa.table(data)) + + [ + [ + 4, + 1, + 2, + 3 + ] + ] + """, + "from_dict": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} + + Let's create a new dataframe of the same class as the dataframe we started with, from a dict of new data: + + >>> def agnostic_from_dict(df_native: IntoFrameT) -> IntoFrameT: + ... new_data = {"c": [5, 2], "d": [1, 4]} + ... native_namespace = nw.get_native_namespace(df_native) + ... return nw.from_dict(new_data, native_namespace=native_namespace).to_native() + + Let's see what happens when passing pandas, Polars or PyArrow input: + + >>> agnostic_from_dict(pd.DataFrame(data)) + c d + 0 5 1 + 1 2 4 + >>> agnostic_from_dict(pl.DataFrame(data)) + shape: (2, 2) + ┌─────┬─────┐ + │ c ┆ d │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 5 ┆ 1 │ + │ 2 ┆ 4 │ + └─────┴─────┘ + >>> agnostic_from_dict(pa.table(data)) + pyarrow.Table + c: int64 + d: int64 + ---- + c: [[5,2]] + d: [[1,4]] + """, + "from_numpy": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> import numpy as np + >>> from narwhals.typing import IntoFrameT + >>> data = {"a": [1, 2], "b": [3, 4]} + + Let's create a new dataframe of the same class as the dataframe we started with, from a NumPy ndarray of new data: + + >>> def agnostic_from_numpy(df_native: IntoFrameT) -> IntoFrameT: + ... new_data = np.array([[5, 2, 1], [1, 4, 3]]) + ... df = nw.from_native(df_native) + ... native_namespace = nw.get_native_namespace(df) + ... return nw.from_numpy(new_data, native_namespace=native_namespace).to_native() + + Let's see what happens when passing pandas, Polars or PyArrow input: + + >>> agnostic_from_numpy(pd.DataFrame(data)) + column_0 column_1 column_2 + 0 5 2 1 + 1 1 4 3 + >>> agnostic_from_numpy(pl.DataFrame(data)) + shape: (2, 3) + ┌──────────┬──────────┬──────────┐ + │ column_0 ┆ column_1 ┆ column_2 │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ i64 │ + ╞══════════╪══════════╪══════════╡ + │ 5 ┆ 2 ┆ 1 │ + │ 1 ┆ 4 ┆ 3 │ + └──────────┴──────────┴──────────┘ + >>> agnostic_from_numpy(pa.table(data)) + pyarrow.Table + column_0: int64 + column_1: int64 + column_2: int64 + ---- + column_0: [[5,1]] + column_1: [[2,4]] + column_2: [[1,3]] + + Let's specify the column names: + + >>> def agnostic_from_numpy(df_native: IntoFrameT) -> IntoFrameT: + ... new_data = np.array([[5, 2, 1], [1, 4, 3]]) + ... schema = ["c", "d", "e"] + ... df = nw.from_native(df_native) + ... native_namespace = nw.get_native_namespace(df) + ... return nw.from_numpy( + ... new_data, native_namespace=native_namespace, schema=schema + ... ).to_native() + + Let's see the modified outputs: + + >>> agnostic_from_numpy(pd.DataFrame(data)) + c d e + 0 5 2 1 + 1 1 4 3 + >>> agnostic_from_numpy(pl.DataFrame(data)) + shape: (2, 3) + ┌─────┬─────┬─────┐ + │ c ┆ d ┆ e │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ i64 │ + ╞═════╪═════╪═════╡ + │ 5 ┆ 2 ┆ 1 │ + │ 1 ┆ 4 ┆ 3 │ + └─────┴─────┴─────┘ + >>> agnostic_from_numpy(pa.table(data)) + pyarrow.Table + c: int64 + d: int64 + e: int64 + ---- + c: [[5,1]] + d: [[2,4]] + e: [[1,3]] + + Let's modify the function so that it specifies the schema: + + >>> def agnostic_from_numpy(df_native: IntoFrameT) -> IntoFrameT: + ... new_data = np.array([[5, 2, 1], [1, 4, 3]]) + ... schema = {"c": nw.Int16(), "d": nw.Float32(), "e": nw.Int8()} + ... df = nw.from_native(df_native) + ... native_namespace = nw.get_native_namespace(df) + ... return nw.from_numpy( + ... new_data, native_namespace=native_namespace, schema=schema + ... ).to_native() + + Let's see the outputs: + + >>> agnostic_from_numpy(pd.DataFrame(data)) + c d e + 0 5 2.0 1 + 1 1 4.0 3 + >>> agnostic_from_numpy(pl.DataFrame(data)) + shape: (2, 3) + ┌─────┬─────┬─────┐ + │ c ┆ d ┆ e │ + │ --- ┆ --- ┆ --- │ + │ i16 ┆ f32 ┆ i8 │ + ╞═════╪═════╪═════╡ + │ 5 ┆ 2.0 ┆ 1 │ + │ 1 ┆ 4.0 ┆ 3 │ + └─────┴─────┴─────┘ + >>> agnostic_from_numpy(pa.table(data)) + pyarrow.Table + c: int16 + d: float + e: int8 + ---- + c: [[5,1]] + d: [[2,4]] + e: [[1,3]] + """, + "from_arrow": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} + + Let's define a dataframe-agnostic function which creates a PyArrow + Table. + + >>> def agnostic_to_arrow(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return nw.from_arrow(df, native_namespace=pa).to_native() + + Let's see what happens when passing pandas / Polars input: + + >>> agnostic_to_arrow(pd.DataFrame(data)) + pyarrow.Table + a: int64 + b: int64 + ---- + a: [[1,2,3]] + b: [[4,5,6]] + >>> agnostic_to_arrow(pl.DataFrame(data)) + pyarrow.Table + a: int64 + b: int64 + ---- + a: [[1,2,3]] + b: [[4,5,6]] + """, + "show_versions": """ + >>> from narwhals import show_versions + >>> show_versions() # doctest: +SKIP + """, + "read_csv": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> from types import ModuleType + + Let's create an agnostic function that reads a csv file with a specified native namespace: + + >>> def agnostic_read_csv(native_namespace: ModuleType) -> IntoDataFrame: + ... return nw.read_csv("file.csv", native_namespace=native_namespace).to_native() + + Then we can read the file by passing pandas, Polars or PyArrow namespaces: + + >>> agnostic_read_csv(native_namespace=pd) # doctest:+SKIP + a b + 0 1 4 + 1 2 5 + 2 3 6 + >>> agnostic_read_csv(native_namespace=pl) # doctest:+SKIP + shape: (3, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 1 ┆ 4 │ + │ 2 ┆ 5 │ + │ 3 ┆ 6 │ + └─────┴─────┘ + >>> agnostic_read_csv(native_namespace=pa) # doctest:+SKIP + pyarrow.Table + a: int64 + b: int64 + ---- + a: [[1,2,3]] + b: [[4,5,6]] + """, + "scan_csv": """ + >>> import dask.dataframe as dd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrame + >>> from types import ModuleType + + Let's create an agnostic function that lazily reads a csv file with a specified native namespace: + + >>> def agnostic_scan_csv(native_namespace: ModuleType) -> IntoFrame: + ... return nw.scan_csv("file.csv", native_namespace=native_namespace).to_native() + + Then we can read the file by passing, for example, Polars or Dask namespaces: + + >>> agnostic_scan_csv(native_namespace=pl).collect() # doctest:+SKIP + shape: (3, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 1 ┆ 4 │ + │ 2 ┆ 5 │ + │ 3 ┆ 6 │ + └─────┴─────┘ + >>> agnostic_scan_csv(native_namespace=dd).compute() # doctest:+SKIP + a b + 0 1 4 + 1 2 5 + 2 3 6 + """, + "read_parquet": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> from types import ModuleType + + Let's create an agnostic function that reads a parquet file with a specified native namespace: + + >>> def agnostic_read_parquet(native_namespace: ModuleType) -> IntoDataFrame: + ... return nw.read_parquet( + ... "file.parquet", native_namespace=native_namespace + ... ).to_native() + + Then we can read the file by passing pandas, Polars or PyArrow namespaces: + + >>> agnostic_read_parquet(native_namespace=pd) # doctest:+SKIP + a b + 0 1 4 + 1 2 5 + 2 3 6 + >>> agnostic_read_parquet(native_namespace=pl) # doctest:+SKIP + shape: (3, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 1 ┆ 4 │ + │ 2 ┆ 5 │ + │ 3 ┆ 6 │ + └─────┴─────┘ + >>> agnostic_read_parquet(native_namespace=pa) # doctest:+SKIP + pyarrow.Table + a: int64 + b: int64 + ---- + a: [[1,2,3]] + b: [[4,5,6]] + """, + "scan_parquet": """ + >>> import dask.dataframe as dd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrame + >>> from types import ModuleType + + Let's create an agnostic function that lazily reads a parquet file with a specified native namespace: + + >>> def agnostic_scan_parquet(native_namespace: ModuleType) -> IntoFrame: + ... return nw.scan_parquet( + ... "file.parquet", native_namespace=native_namespace + ... ).to_native() + + Then we can read the file by passing, for example, Polars or Dask namespaces: + + >>> agnostic_scan_parquet(native_namespace=pl).collect() # doctest:+SKIP + shape: (3, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 1 ┆ 4 │ + │ 2 ┆ 5 │ + │ 3 ┆ 6 │ + └─────┴─────┘ + >>> agnostic_scan_parquet(native_namespace=dd).compute() # doctest:+SKIP + a b + 0 1 4 + 1 2 5 + 2 3 6 + """, + "col": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2], "b": [3, 4]} + >>> df_pl = pl.DataFrame(data) + >>> df_pd = pd.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_col(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.col("a") * nw.col("b")).to_native() + + We can pass any supported library such as Pandas, Polars, or PyArrow to + `agnostic_col`: + + >>> agnostic_col(df_pd) + a + 0 3 + 1 8 + + >>> agnostic_col(df_pl) + shape: (2, 1) + ┌─────┐ + │ a │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 3 │ + │ 8 │ + └─────┘ + + >>> agnostic_col(df_pa) + pyarrow.Table + a: int64 + ---- + a: [[3,8]] + """, + "nth": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2], "b": [3, 4]} + >>> df_pl = pl.DataFrame(data) + >>> df_pd = pd.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_nth(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.nth(0) * 2).to_native() + + We can pass any supported library such as Pandas, Polars, or PyArrow to `agnostic_nth`: + + >>> agnostic_nth(df_pd) + a + 0 2 + 1 4 + + >>> agnostic_nth(df_pl) + shape: (2, 1) + ┌─────┐ + │ a │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 2 │ + │ 4 │ + └─────┘ + + >>> agnostic_nth(df_pa) + pyarrow.Table + a: int64 + ---- + a: [[2,4]] + """, + "all_": """ + >>> import polars as pl + >>> import pandas as pd + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_all(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.all() * 2).to_native() + + We can pass any supported library such as Pandas, Polars, or PyArrow to + `agnostic_all`: + + >>> agnostic_all(df_pd) + a b + 0 2 8 + 1 4 10 + 2 6 12 + + >>> agnostic_all(df_pl) + shape: (3, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 2 ┆ 8 │ + │ 4 ┆ 10 │ + │ 6 ┆ 12 │ + └─────┴─────┘ + + >>> agnostic_all(df_pa) + pyarrow.Table + a: int64 + b: int64 + ---- + a: [[2,4,6]] + b: [[8,10,12]] + """, + "len_": """ + >>> import polars as pl + >>> import pandas as pd + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2], "b": [5, 10]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_len(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.len()).to_native() + + We can pass any supported library such as Pandas, Polars, or PyArrow to + `agnostic_len`: + + >>> agnostic_len(df_pd) + len + 0 2 + >>> agnostic_len(df_pl) + shape: (1, 1) + ┌─────┐ + │ len │ + │ --- │ + │ u32 │ + ╞═════╡ + │ 2 │ + └─────┘ + >>> agnostic_len(df_pa) + pyarrow.Table + len: int64 + ---- + len: [[2]] + """, + "sum": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2]} + >>> df_pl = pl.DataFrame(data) + >>> df_pd = pd.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_sum(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.sum("a")).to_native() + + We can pass any supported library such as Pandas, Polars, or PyArrow to + `agnostic_sum`: + + >>> agnostic_sum(df_pd) + a + 0 3 + + >>> agnostic_sum(df_pl) + shape: (1, 1) + ┌─────┐ + │ a │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 3 │ + └─────┘ + + >>> agnostic_sum(df_pa) + pyarrow.Table + a: int64 + ---- + a: [[3]] + """, + "mean": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 8, 3]} + >>> df_pl = pl.DataFrame(data) + >>> df_pd = pd.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe agnostic function: + + >>> def agnostic_mean(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.mean("a")).to_native() + + We can pass any supported library such as Pandas, Polars, or PyArrow to + `agnostic_mean`: + + >>> agnostic_mean(df_pd) + a + 0 4.0 + + >>> agnostic_mean(df_pl) + shape: (1, 1) + ┌─────┐ + │ a │ + │ --- │ + │ f64 │ + ╞═════╡ + │ 4.0 │ + └─────┘ + + >>> agnostic_mean(df_pa) + pyarrow.Table + a: double + ---- + a: [[4]] + """, + "median": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [4, 5, 2]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe agnostic function: + + >>> def agnostic_median(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.median("a")).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_median`: + + >>> agnostic_median(df_pd) + a + 0 4.0 + + >>> agnostic_median(df_pl) + shape: (1, 1) + ┌─────┐ + │ a │ + │ --- │ + │ f64 │ + ╞═════╡ + │ 4.0 │ + └─────┘ + + >>> agnostic_median(df_pa) + pyarrow.Table + a: double + ---- + a: [[4]] + """, + "min": """ + >>> import polars as pl + >>> import pandas as pd + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2], "b": [5, 10]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_min(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.min("b")).to_native() + + We can pass any supported library such as Pandas, Polars, or PyArrow to + `agnostic_min`: + + >>> agnostic_min(df_pd) + b + 0 5 + + >>> agnostic_min(df_pl) + shape: (1, 1) + ┌─────┐ + │ b │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 5 │ + └─────┘ + + >>> agnostic_min(df_pa) + pyarrow.Table + b: int64 + ---- + b: [[5]] + """, + "max": """ + >>> import polars as pl + >>> import pandas as pd + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2], "b": [5, 10]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_max(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.max("a")).to_native() + + We can pass any supported library such as Pandas, Polars, or PyArrow to + `agnostic_max`: + + >>> agnostic_max(df_pd) + a + 0 2 + + >>> agnostic_max(df_pl) + shape: (1, 1) + ┌─────┐ + │ a │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 2 │ + └─────┘ + + >>> agnostic_max(df_pa) + pyarrow.Table + a: int64 + ---- + a: [[2]] + """, + "sum_horizontal": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, 3], "b": [5, 10, None]} + >>> df_pl = pl.DataFrame(data) + >>> df_pd = pd.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_sum_horizontal(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.sum_horizontal("a", "b")).to_native() + + We can pass any supported library such as Pandas, Polars, or PyArrow to `agnostic_sum_horizontal`: + + >>> agnostic_sum_horizontal(df_pd) + a + 0 6.0 + 1 12.0 + 2 3.0 + + >>> agnostic_sum_horizontal(df_pl) + shape: (3, 1) + ┌─────┐ + │ a │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 6 │ + │ 12 │ + │ 3 │ + └─────┘ + + >>> agnostic_sum_horizontal(df_pa) + pyarrow.Table + a: int64 + ---- + a: [[6,12,3]] + """, + "min_horizontal": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "a": [1, 8, 3], + ... "b": [4, 5, None], + ... "c": ["x", "y", "z"], + ... } + + We define a dataframe-agnostic function that computes the horizontal min of "a" + and "b" columns: + + >>> def agnostic_min_horizontal(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.min_horizontal("a", "b")).to_native() + + We can pass any supported library such as Pandas, Polars, or PyArrow to + `agnostic_min_horizontal`: + + >>> agnostic_min_horizontal(pd.DataFrame(data)) + a + 0 1.0 + 1 5.0 + 2 3.0 + + >>> agnostic_min_horizontal(pl.DataFrame(data)) + shape: (3, 1) + ┌─────┐ + │ a │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 1 │ + │ 5 │ + │ 3 │ + └─────┘ + + >>> agnostic_min_horizontal(pa.table(data)) + pyarrow.Table + a: int64 + ---- + a: [[1,5,3]] + """, + "max_horizontal": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "a": [1, 8, 3], + ... "b": [4, 5, None], + ... "c": ["x", "y", "z"], + ... } + + We define a dataframe-agnostic function that computes the horizontal max of "a" + and "b" columns: + + >>> def agnostic_max_horizontal(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.max_horizontal("a", "b")).to_native() + + We can pass any supported library such as Pandas, Polars, or PyArrow to + `agnostic_max_horizontal`: + + >>> agnostic_max_horizontal(pd.DataFrame(data)) + a + 0 4.0 + 1 8.0 + 2 3.0 + + >>> agnostic_max_horizontal(pl.DataFrame(data)) + shape: (3, 1) + ┌─────┐ + │ a │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 4 │ + │ 8 │ + │ 3 │ + └─────┘ + + >>> agnostic_max_horizontal(pa.table(data)) + pyarrow.Table + a: int64 + ---- + a: [[4,8,3]] + """, + "when": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2, 3], "b": [5, 10, 15]} + >>> df_pl = pl.DataFrame(data) + >>> df_pd = pd.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_when_then_otherwise(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns( + ... nw.when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when") + ... ).to_native() + + We can pass any supported library such as Pandas, Polars, or PyArrow to + `agnostic_when_then_otherwise`: + + >>> agnostic_when_then_otherwise(df_pd) + a b a_when + 0 1 5 5 + 1 2 10 5 + 2 3 15 6 + + >>> agnostic_when_then_otherwise(df_pl) + shape: (3, 3) + ┌─────┬─────┬────────┐ + │ a ┆ b ┆ a_when │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ i32 │ + ╞═════╪═════╪════════╡ + │ 1 ┆ 5 ┆ 5 │ + │ 2 ┆ 10 ┆ 5 │ + │ 3 ┆ 15 ┆ 6 │ + └─────┴─────┴────────┘ + + >>> agnostic_when_then_otherwise(df_pa) + pyarrow.Table + a: int64 + b: int64 + a_when: int64 + ---- + a: [[1,2,3]] + b: [[5,10,15]] + a_when: [[5,5,6]] + """, + "all_horizontal": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "a": [False, False, True, True, False, None], + ... "b": [False, True, True, None, None, None], + ... } + >>> df_pl = pl.DataFrame(data) + >>> df_pd = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow") + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_all_horizontal(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select("a", "b", all=nw.all_horizontal("a", "b")).to_native() + + We can pass any supported library such as Pandas, Polars, or PyArrow to + `agnostic_all_horizontal`: + + >>> agnostic_all_horizontal(df_pd) + a b all + 0 False False False + 1 False True False + 2 True True True + 3 True + 4 False False + 5 + + >>> agnostic_all_horizontal(df_pl) + shape: (6, 3) + ┌───────┬───────┬───────┐ + │ a ┆ b ┆ all │ + │ --- ┆ --- ┆ --- │ + │ bool ┆ bool ┆ bool │ + ╞═══════╪═══════╪═══════╡ + │ false ┆ false ┆ false │ + │ false ┆ true ┆ false │ + │ true ┆ true ┆ true │ + │ true ┆ null ┆ null │ + │ false ┆ null ┆ false │ + │ null ┆ null ┆ null │ + └───────┴───────┴───────┘ + + >>> agnostic_all_horizontal(df_pa) + pyarrow.Table + a: bool + b: bool + all: bool + ---- + a: [[false,false,true,true,false,null]] + b: [[false,true,true,null,null,null]] + all: [[false,false,true,null,false,null]] + """, + "lit": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = {"a": [1, 2]} + >>> df_pl = pl.DataFrame(data) + >>> df_pd = pd.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_lit(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns(nw.lit(3)).to_native() + + We can pass any supported library such as Pandas, Polars, or PyArrow to + `agnostic_lit`: + + >>> agnostic_lit(df_pd) + a literal + 0 1 3 + 1 2 3 + + >>> agnostic_lit(df_pl) + shape: (2, 2) + ┌─────┬─────────┐ + │ a ┆ literal │ + │ --- ┆ --- │ + │ i64 ┆ i32 │ + ╞═════╪═════════╡ + │ 1 ┆ 3 │ + │ 2 ┆ 3 │ + └─────┴─────────┘ + + >>> agnostic_lit(df_pa) + pyarrow.Table + a: int64 + literal: int64 + ---- + a: [[1,2]] + literal: [[3,3]] + """, + "any_horizontal": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "a": [False, False, True, True, False, None], + ... "b": [False, True, True, None, None, None], + ... } + >>> df_pl = pl.DataFrame(data) + >>> df_pd = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow") + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function: + + >>> def agnostic_any_horizontal(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select("a", "b", any=nw.any_horizontal("a", "b")).to_native() + + We can pass any supported library such as Pandas, Polars, or PyArrow to + `agnostic_any_horizontal`: + + >>> agnostic_any_horizontal(df_pd) + a b any + 0 False False False + 1 False True True + 2 True True True + 3 True True + 4 False + 5 + + >>> agnostic_any_horizontal(df_pl) + shape: (6, 3) + ┌───────┬───────┬───────┐ + │ a ┆ b ┆ any │ + │ --- ┆ --- ┆ --- │ + │ bool ┆ bool ┆ bool │ + ╞═══════╪═══════╪═══════╡ + │ false ┆ false ┆ false │ + │ false ┆ true ┆ true │ + │ true ┆ true ┆ true │ + │ true ┆ null ┆ true │ + │ false ┆ null ┆ null │ + │ null ┆ null ┆ null │ + └───────┴───────┴───────┘ + + >>> agnostic_any_horizontal(df_pa) + pyarrow.Table + a: bool + b: bool + any: bool + ---- + a: [[false,false,true,true,false,null]] + b: [[false,true,true,null,null,null]] + any: [[false,true,true,true,null,null]] + """, + "mean_horizontal": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "a": [1, 8, 3], + ... "b": [4, 5, None], + ... "c": ["x", "y", "z"], + ... } + >>> df_pl = pl.DataFrame(data) + >>> df_pd = pd.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a dataframe-agnostic function that computes the horizontal mean of "a" + and "b" columns: + + >>> def agnostic_mean_horizontal(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select(nw.mean_horizontal("a", "b")).to_native() + + We can pass any supported library such as Pandas, Polars, or PyArrow to + `agnostic_mean_horizontal`: + + >>> agnostic_mean_horizontal(df_pd) + a + 0 2.5 + 1 6.5 + 2 3.0 + + >>> agnostic_mean_horizontal(df_pl) + shape: (3, 1) + ┌─────┐ + │ a │ + │ --- │ + │ f64 │ + ╞═════╡ + │ 2.5 │ + │ 6.5 │ + │ 3.0 │ + └─────┘ + + >>> agnostic_mean_horizontal(df_pa) + pyarrow.Table + a: double + ---- + a: [[2.5,6.5,3]] + """, + "concat_str": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> + >>> data = { + ... "a": [1, 2, 3], + ... "b": ["dogs", "cats", None], + ... "c": ["play", "swim", "walk"], + ... } + + We define a dataframe-agnostic function that computes the horizontal string + concatenation of different columns + + >>> def agnostic_concat_str(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.select( + ... nw.concat_str( + ... [ + ... nw.col("a") * 2, + ... nw.col("b"), + ... nw.col("c"), + ... ], + ... separator=" ", + ... ).alias("full_sentence") + ... ).to_native() + + We can pass any supported library such as Pandas, Polars, or PyArrow + to `agnostic_concat_str`: + + >>> agnostic_concat_str(pd.DataFrame(data)) + full_sentence + 0 2 dogs play + 1 4 cats swim + 2 None + + >>> agnostic_concat_str(pl.DataFrame(data)) + shape: (3, 1) + ┌───────────────┐ + │ full_sentence │ + │ --- │ + │ str │ + ╞═══════════════╡ + │ 2 dogs play │ + │ 4 cats swim │ + │ null │ + └───────────────┘ + + >>> agnostic_concat_str(pa.table(data)) + pyarrow.Table + full_sentence: string + ---- + full_sentence: [["2 dogs play","4 cats swim",null]] + """, +} diff --git a/docs/docstring_examples/group_by.py b/docs/docstring_examples/group_by.py new file mode 100644 index 000000000..7a290b0eb --- /dev/null +++ b/docs/docstring_examples/group_by.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +EXAMPLES = { + "agg": """ + Group by one column or by multiple columns and call `agg` to compute + the grouped sum of another column. + + >>> import polars as pl + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> lf_pl = pl.LazyFrame( + ... { + ... "a": ["a", "b", "a", "b", "c"], + ... "b": [1, 2, 1, 3, 3], + ... "c": [5, 4, 3, 2, 1], + ... } + ... ) + + We define library agnostic functions: + + >>> def agnostic_func_one_col(lf_native: IntoFrameT) -> IntoFrameT: + ... lf = nw.from_native(lf_native) + ... return nw.to_native(lf.group_by("a").agg(nw.col("b").sum()).sort("a")) + + >>> def agnostic_func_mult_col(lf_native: IntoFrameT) -> IntoFrameT: + ... lf = nw.from_native(lf_native) + ... return nw.to_native(lf.group_by("a", "b").agg(nw.sum("c")).sort("a", "b")) + + We can then pass a lazy frame and materialise it with `collect`: + + >>> agnostic_func_one_col(lf_pl).collect() + shape: (3, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ str ┆ i64 │ + ╞═════╪═════╡ + │ a ┆ 2 │ + │ b ┆ 5 │ + │ c ┆ 3 │ + └─────┴─────┘ + >>> agnostic_func_mult_col(lf_pl).collect() + shape: (4, 3) + ┌─────┬─────┬─────┐ + │ a ┆ b ┆ c │ + │ --- ┆ --- ┆ --- │ + │ str ┆ i64 ┆ i64 │ + ╞═════╪═════╪═════╡ + │ a ┆ 1 ┆ 8 │ + │ b ┆ 2 ┆ 4 │ + │ b ┆ 3 ┆ 2 │ + │ c ┆ 3 ┆ 1 │ + └─────┴─────┴─────┘ + """, +} diff --git a/docs/docstring_examples/schema.py b/docs/docstring_examples/schema.py new file mode 100644 index 000000000..9923dd9ff --- /dev/null +++ b/docs/docstring_examples/schema.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +EXAMPLES = { + "Schema": """ + Define a schema by passing *instantiated* data types. + + >>> import narwhals as nw + >>> schema = nw.Schema({"foo": nw.Int8(), "bar": nw.String()}) + >>> schema + Schema({'foo': Int8, 'bar': String}) + + Access the data type associated with a specific column name. + + >>> schema["foo"] + Int8 + + Access various schema properties using the `names`, `dtypes`, and `len` methods. + + >>> schema.names() + ['foo', 'bar'] + >>> schema.dtypes() + [Int8, String] + >>> schema.len() + 2 + """, +} diff --git a/docs/docstring_examples/selectors.py b/docs/docstring_examples/selectors.py new file mode 100644 index 000000000..baa10ceeb --- /dev/null +++ b/docs/docstring_examples/selectors.py @@ -0,0 +1,208 @@ +from __future__ import annotations + +EXAMPLES = { + "by_dtype": """ + >>> import narwhals as nw + >>> import narwhals.selectors as ncs + >>> import pandas as pd + >>> import polars as pl + >>> + >>> data = {"a": [1, 2], "b": ["x", "y"], "c": [4.1, 2.3]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + + Let's define a dataframe-agnostic function to select int64 and float64 + dtypes and multiplies each value by 2: + + >>> @nw.narwhalify + ... def func(df): + ... return df.select(ncs.by_dtype(nw.Int64, nw.Float64) * 2) + + We can then pass either pandas or Polars dataframes: + + >>> func(df_pd) + a c + 0 2 8.2 + 1 4 4.6 + >>> func(df_pl) + shape: (2, 2) + ┌─────┬─────┐ + │ a ┆ c │ + │ --- ┆ --- │ + │ i64 ┆ f64 │ + ╞═════╪═════╡ + │ 2 ┆ 8.2 │ + │ 4 ┆ 4.6 │ + └─────┴─────┘ + """, + "numeric": """ + >>> import narwhals as nw + >>> import narwhals.selectors as ncs + >>> import pandas as pd + >>> import polars as pl + >>> + >>> data = {"a": [1, 2], "b": ["x", "y"], "c": [4.1, 2.3]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + + Let's define a dataframe-agnostic function to select numeric + dtypes and multiplies each value by 2: + + >>> @nw.narwhalify + ... def func(df): + ... return df.select(ncs.numeric() * 2) + + We can then pass either pandas or Polars dataframes: + + >>> func(df_pd) + a c + 0 2 8.2 + 1 4 4.6 + >>> func(df_pl) + shape: (2, 2) + ┌─────┬─────┐ + │ a ┆ c │ + │ --- ┆ --- │ + │ i64 ┆ f64 │ + ╞═════╪═════╡ + │ 2 ┆ 8.2 │ + │ 4 ┆ 4.6 │ + └─────┴─────┘ + """, + "boolean": """ + >>> import narwhals as nw + >>> import narwhals.selectors as ncs + >>> import pandas as pd + >>> import polars as pl + >>> + >>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + + Let's define a dataframe-agnostic function to select boolean + dtypes: + + >>> @nw.narwhalify + ... def func(df): + ... return df.select(ncs.boolean()) + + We can then pass either pandas or Polars dataframes: + + >>> func(df_pd) + c + 0 False + 1 True + >>> func(df_pl) + shape: (2, 1) + ┌───────┐ + │ c │ + │ --- │ + │ bool │ + ╞═══════╡ + │ false │ + │ true │ + └───────┘ + """, + "string": """ + >>> import narwhals as nw + >>> import narwhals.selectors as ncs + >>> import pandas as pd + >>> import polars as pl + >>> + >>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + + Let's define a dataframe-agnostic function to select string + dtypes: + + >>> @nw.narwhalify + ... def func(df): + ... return df.select(ncs.string()) + + We can then pass either pandas or Polars dataframes: + + >>> func(df_pd) + b + 0 x + 1 y + >>> func(df_pl) + shape: (2, 1) + ┌─────┐ + │ b │ + │ --- │ + │ str │ + ╞═════╡ + │ x │ + │ y │ + └─────┘ + """, + "categorical": """ + >>> import narwhals as nw + >>> import narwhals.selectors as ncs + >>> import pandas as pd + >>> import polars as pl + >>> + >>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]} + >>> df_pd = pd.DataFrame(data).astype({"b": "category"}) + >>> df_pl = pl.DataFrame(data, schema_overrides={"b": pl.Categorical}) + + Let's define a dataframe-agnostic function to select string + dtypes: + + >>> @nw.narwhalify + ... def func(df): + ... return df.select(ncs.categorical()) + + We can then pass either pandas or Polars dataframes: + + >>> func(df_pd) + b + 0 x + 1 y + >>> func(df_pl) + shape: (2, 1) + ┌─────┐ + │ b │ + │ --- │ + │ cat │ + ╞═════╡ + │ x │ + │ y │ + └─────┘ + """, + "all": """ + >>> import narwhals as nw + >>> import narwhals.selectors as ncs + >>> import pandas as pd + >>> import polars as pl + >>> + >>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]} + >>> df_pd = pd.DataFrame(data).astype({"b": "category"}) + >>> df_pl = pl.DataFrame(data, schema_overrides={"b": pl.Categorical}) + + Let's define a dataframe-agnostic function to select string + dtypes: + + >>> @nw.narwhalify + ... def func(df): + ... return df.select(ncs.all()) + + We can then pass either pandas or Polars dataframes: + + >>> func(df_pd) + a b c + 0 1 x False + 1 2 y True + >>> func(df_pl) + shape: (2, 3) + ┌─────┬─────┬───────┐ + │ a ┆ b ┆ c │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ cat ┆ bool │ + ╞═════╪═════╪═══════╡ + │ 1 ┆ x ┆ false │ + │ 2 ┆ y ┆ true │ + └─────┴─────┴───────┘ + """, +} diff --git a/docs/docstring_examples/series.py b/docs/docstring_examples/series.py new file mode 100644 index 000000000..23fd21b24 --- /dev/null +++ b/docs/docstring_examples/series.py @@ -0,0 +1,3321 @@ +from __future__ import annotations + +EXAMPLES = { + "implementation": """ + >>> import narwhals as nw + >>> import pandas as pd + + >>> s_native = pd.Series([1, 2, 3]) + >>> s = nw.from_native(s_native, series_only=True) + + >>> s.implementation + + + >>> s.implementation.is_pandas() + True + + >>> s.implementation.is_pandas_like() + True + + >>> s.implementation.is_polars() + False + """, + "__getitem__": """ + >>> from typing import Any + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_get_first_item(s_native: IntoSeriesT) -> Any: + ... s = nw.from_native(s_native, series_only=True) + ... return s[0] + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_get_first_item`: + + >>> agnostic_get_first_item(s_pd) + np.int64(1) + + >>> agnostic_get_first_item(s_pl) + 1 + + >>> agnostic_get_first_item(s_pa) + 1 + + We can also make a function to slice the Series: + + >>> def agnostic_slice(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s[:2].to_native() + + >>> agnostic_slice(s_pd) + 0 1 + 1 2 + dtype: int64 + + >>> agnostic_slice(s_pl) # doctest:+NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [i64] + [ + 1 + 2 + ] + + >>> agnostic_slice(s_pa) # doctest:+ELLIPSIS + + [ + [ + 1, + 2 + ] + ] + """, + "to_native": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_to_native(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_native`: + + >>> agnostic_to_native(s_pd) + 0 1 + 1 2 + 2 3 + dtype: int64 + + >>> agnostic_to_native(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [i64] + [ + 1 + 2 + 3 + ] + + >>> agnostic_to_native(s_pa) # doctest:+ELLIPSIS + + [ + [ + 1, + 2, + 3 + ] + ] + """, + "scatter": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + + >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + We define a library agnostic function: + + >>> def agnostic_scatter(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns(df["a"].scatter([0, 1], [999, 888])).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_scatter`: + + >>> agnostic_scatter(df_pd) + a b + 0 999 4 + 1 888 5 + 2 3 6 + + >>> agnostic_scatter(df_pl) + shape: (3, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 999 ┆ 4 │ + │ 888 ┆ 5 │ + │ 3 ┆ 6 │ + └─────┴─────┘ + + >>> agnostic_scatter(df_pa) + pyarrow.Table + a: int64 + b: int64 + ---- + a: [[999,888,3]] + b: [[4,5,6]] + """, + "shape": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_shape(s_native: IntoSeries) -> tuple[int]: + ... s = nw.from_native(s_native, series_only=True) + ... return s.shape + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_shape`: + + >>> agnostic_shape(s_pd) + (3,) + + >>> agnostic_shape(s_pl) + (3,) + + >>> agnostic_shape(s_pa) + (3,) + """, + "pipe": """ + >>> import polars as pl + >>> import pandas as pd + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a function to pipe into: + + >>> def agnostic_pipe(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.pipe(lambda x: x + 2).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_pipe`: + + >>> agnostic_pipe(s_pd) + 0 3 + 1 4 + 2 5 + dtype: int64 + + >>> agnostic_pipe(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [i64] + [ + 3 + 4 + 5 + ] + + >>> agnostic_pipe(s_pa) # doctest: +ELLIPSIS + + [ + [ + 3, + 4, + 5 + ] + ] + """, + "len": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 2, None] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a dataframe-agnostic function that computes the len of the series: + + >>> def agnostic_len(s_native: IntoSeries) -> int: + ... s = nw.from_native(s_native, series_only=True) + ... return s.len() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_len`: + + >>> agnostic_len(s_pd) + 3 + + >>> agnostic_len(s_pl) + 3 + + >>> agnostic_len(s_pa) + 3 + """, + "dtype": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_dtype(s_native: IntoSeriesT) -> nw.dtypes.DType: + ... s = nw.from_native(s_native, series_only=True) + ... return s.dtype + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dtype`: + + >>> agnostic_dtype(s_pd) + Int64 + + >>> agnostic_dtype(s_pl) + Int64 + + >>> agnostic_dtype(s_pa) + Int64 + """, + "name": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data, name="foo") + >>> s_pl = pl.Series("foo", data) + + We define a library agnostic function: + + >>> def agnostic_name(s_native: IntoSeries) -> str: + ... s = nw.from_native(s_native, series_only=True) + ... return s.name + + We can then pass any supported library such as pandas or Polars + to `agnostic_name`: + + >>> agnostic_name(s_pd) + 'foo' + + >>> agnostic_name(s_pl) + 'foo' + """, + "ewm_mean": """ + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(name="a", data=data) + >>> s_pl = pl.Series(name="a", values=data) + + We define a library agnostic function: + + >>> def agnostic_ewm_mean(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.ewm_mean(com=1, ignore_nulls=False).to_native() + + We can then pass any supported library such as pandas or Polars + to `agnostic_ewm_mean`: + + >>> agnostic_ewm_mean(s_pd) + 0 1.000000 + 1 1.666667 + 2 2.428571 + Name: a, dtype: float64 + + >>> agnostic_ewm_mean(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: 'a' [f64] + [ + 1.0 + 1.666667 + 2.428571 + ] + """, + "cast": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [True, False, True] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a dataframe-agnostic function: + + >>> def agnostic_cast(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.cast(nw.Int64).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_cast`: + + >>> agnostic_cast(s_pd) + 0 1 + 1 0 + 2 1 + dtype: int64 + + >>> agnostic_cast(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [i64] + [ + 1 + 0 + 1 + ] + + >>> agnostic_cast(s_pa) # doctest: +ELLIPSIS + + [ + [ + 1, + 0, + 1 + ] + ] + """, + "to_frame": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 2] + >>> s_pd = pd.Series(data, name="a") + >>> s_pl = pl.Series("a", data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_to_frame(s_native: IntoSeries) -> IntoDataFrame: + ... s = nw.from_native(s_native, series_only=True) + ... return s.to_frame().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_frame`: + + >>> agnostic_to_frame(s_pd) + a + 0 1 + 1 2 + + >>> agnostic_to_frame(s_pl) + shape: (2, 1) + ┌─────┐ + │ a │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 1 │ + │ 2 │ + └─────┘ + + >>> agnostic_to_frame(s_pa) + pyarrow.Table + : int64 + ---- + : [[1,2]] + """, + "to_list": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_to_list(s_native: IntoSeries): + ... s = nw.from_native(s_native, series_only=True) + ... return s.to_list() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_list`: + + >>> agnostic_to_list(s_pd) + [1, 2, 3] + + >>> agnostic_to_list(s_pl) + [1, 2, 3] + + >>> agnostic_to_list(s_pa) + [1, 2, 3] + """, + "mean": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_mean(s_native: IntoSeries) -> float: + ... s = nw.from_native(s_native, series_only=True) + ... return s.mean() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_mean`: + + >>> agnostic_mean(s_pd) + np.float64(2.0) + + >>> agnostic_mean(s_pl) + 2.0 + + >>> agnostic_mean(s_pa) + 2.0 + """, + "median": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [5, 3, 8] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a library agnostic function: + + >>> def agnostic_median(s_native: IntoSeries) -> float: + ... s = nw.from_native(s_native, series_only=True) + ... return s.median() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_median`: + + >>> agnostic_median(s_pd) + np.float64(5.0) + + >>> agnostic_median(s_pl) + 5.0 + + >>> agnostic_median(s_pa) + 5.0 + """, + "skew": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 1, 2, 10, 100] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_skew(s_native: IntoSeries) -> float: + ... s = nw.from_native(s_native, series_only=True) + ... return s.skew() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_skew`: + + >>> agnostic_skew(s_pd) + np.float64(1.4724267269058975) + + >>> agnostic_skew(s_pl) + 1.4724267269058975 + + >>> agnostic_skew(s_pa) + 1.4724267269058975 + + Notes: + The skewness is a measure of the asymmetry of the probability distribution. + A perfectly symmetric distribution has a skewness of 0. + """, + "count": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_count(s_native: IntoSeries) -> int: + ... s = nw.from_native(s_native, series_only=True) + ... return s.count() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_count`: + + >>> agnostic_count(s_pd) + np.int64(3) + + >>> agnostic_count(s_pl) + 3 + + >>> agnostic_count(s_pa) + 3 + """, + "any": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [False, True, False] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_any(s_native: IntoSeries) -> bool: + ... s = nw.from_native(s_native, series_only=True) + ... return s.any() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_any`: + + >>> agnostic_any(s_pd) + np.True_ + + >>> agnostic_any(s_pl) + True + + >>> agnostic_any(s_pa) + True + """, + "all": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [False, True, False] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_all(s_native: IntoSeries) -> bool: + ... s = nw.from_native(s_native, series_only=True) + ... return s.all() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_all`: + + >>> agnostic_all(s_pd) + np.False_ + + >>> agnostic_all(s_pl) + False + + >>> agnostic_all(s_pa) + False + """, + "min": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_min(s_native: IntoSeries): + ... s = nw.from_native(s_native, series_only=True) + ... return s.min() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_min`: + + >>> agnostic_min(s_pd) + np.int64(1) + + >>> agnostic_min(s_pl) + 1 + + >>> agnostic_min(s_pa) + 1 + """, + "max": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_max(s_native: IntoSeries): + ... s = nw.from_native(s_native, series_only=True) + ... return s.max() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_max`: + + >>> agnostic_max(s_pd) + np.int64(3) + + >>> agnostic_max(s_pl) + 3 + + >>> agnostic_max(s_pa) + 3 + """, + "arg_min": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_arg_min(s_native: IntoSeries): + ... s = nw.from_native(s_native, series_only=True) + ... return s.arg_min() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_arg_min`: + + >>> agnostic_arg_min(s_pd) + np.int64(0) + + >>> agnostic_arg_min(s_pl) + 0 + + >>> agnostic_arg_min(s_pa) + 0 + """, + "arg_max": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_arg_max(s_native: IntoSeries): + ... s = nw.from_native(s_native, series_only=True) + ... return s.arg_max() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_arg_max`: + + >>> agnostic_arg_max(s_pd) + np.int64(2) + + >>> agnostic_arg_max(s_pl) + 2 + + >>> agnostic_arg_max(s_pa) + 2 + """, + "sum": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_sum(s_native: IntoSeries): + ... s = nw.from_native(s_native, series_only=True) + ... return s.sum() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_sum`: + + >>> agnostic_sum(s_pd) + np.int64(6) + + >>> agnostic_sum(s_pl) + 6 + + >>> agnostic_sum(s_pa) + 6 + """, + "std": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_std(s_native: IntoSeries) -> float: + ... s = nw.from_native(s_native, series_only=True) + ... return s.std() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_std`: + + >>> agnostic_std(s_pd) + np.float64(1.0) + + >>> agnostic_std(s_pl) + 1.0 + + >>> agnostic_std(s_pa) + 1.0 + """, + "var": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_var(s_native: IntoSeries) -> float: + ... s = nw.from_native(s_native, series_only=True) + ... return s.var() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_var`: + + >>> agnostic_var(s_pd) + np.float64(1.0) + + >>> agnostic_var(s_pl) + 1.0 + + >>> agnostic_var(s_pa) + 1.0 + """, + "clip": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_clip_lower(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.clip(2).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_clip_lower`: + + >>> agnostic_clip_lower(s_pd) + 0 2 + 1 2 + 2 3 + dtype: int64 + + >>> agnostic_clip_lower(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [i64] + [ + 2 + 2 + 3 + ] + + >>> agnostic_clip_lower(s_pa) # doctest: +ELLIPSIS + + [ + [ + 2, + 2, + 3 + ] + ] + + We define another library agnostic function: + + >>> def agnostic_clip_upper(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.clip(upper_bound=2).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_clip_upper`: + + >>> agnostic_clip_upper(s_pd) + 0 1 + 1 2 + 2 2 + dtype: int64 + + >>> agnostic_clip_upper(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [i64] + [ + 1 + 2 + 2 + ] + + >>> agnostic_clip_upper(s_pa) # doctest: +ELLIPSIS + + [ + [ + 1, + 2, + 2 + ] + ] + + We can have both at the same time + + >>> data = [-1, 1, -3, 3, -5, 5] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_clip(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.clip(-1, 3).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_clip`: + + >>> agnostic_clip(s_pd) + 0 -1 + 1 1 + 2 -1 + 3 3 + 4 -1 + 5 3 + dtype: int64 + + >>> agnostic_clip(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (6,) + Series: '' [i64] + [ + -1 + 1 + -1 + 3 + -1 + 3 + ] + + >>> agnostic_clip_upper(s_pa) # doctest: +ELLIPSIS + + [ + [ + -1, + 1, + -3, + 2, + -5, + 2 + ] + ] + """, + "is_in": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_is_in(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.is_in([3, 2, 8]).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_in`: + + >>> agnostic_is_in(s_pd) + 0 False + 1 True + 2 True + dtype: bool + + >>> agnostic_is_in(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [bool] + [ + false + true + true + ] + + >>> agnostic_is_in(s_pa) # doctest: +ELLIPSIS + + [ + [ + false, + true, + true + ] + ] + """, + "arg_true": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, None, None, 2] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_arg_true(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.is_null().arg_true().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_arg_true`: + + >>> agnostic_arg_true(s_pd) + 1 1 + 2 2 + dtype: int64 + + >>> agnostic_arg_true(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [u32] + [ + 1 + 2 + ] + + >>> agnostic_arg_true(s_pa) # doctest: +ELLIPSIS + + [ + [ + 1, + 2 + ] + ] + """, + "drop_nulls": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [2, 4, None, 3, 5] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_drop_nulls(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.drop_nulls().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_drop_nulls`: + + >>> agnostic_drop_nulls(s_pd) + 0 2.0 + 1 4.0 + 3 3.0 + 4 5.0 + dtype: float64 + + >>> agnostic_drop_nulls(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [i64] + [ + 2 + 4 + 3 + 5 + ] + + >>> agnostic_drop_nulls(s_pa) # doctest: +ELLIPSIS + + [ + [ + 2, + 4, + 3, + 5 + ] + ] + """, + "abs": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [2, -4, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a dataframe-agnostic function: + + >>> def agnostic_abs(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.abs().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_abs`: + + >>> agnostic_abs(s_pd) + 0 2 + 1 4 + 2 3 + dtype: int64 + + >>> agnostic_abs(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [i64] + [ + 2 + 4 + 3 + ] + + >>> agnostic_abs(s_pa) # doctest: +ELLIPSIS + + [ + [ + 2, + 4, + 3 + ] + ] + """, + "cum_sum": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [2, 4, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a dataframe-agnostic function: + + >>> def agnostic_cum_sum(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.cum_sum().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_cum_sum`: + + >>> agnostic_cum_sum(s_pd) + 0 2 + 1 6 + 2 9 + dtype: int64 + + >>> agnostic_cum_sum(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [i64] + [ + 2 + 6 + 9 + ] + + >>> agnostic_cum_sum(s_pa) # doctest: +ELLIPSIS + + [ + [ + 2, + 6, + 9 + ] + ] + """, + "unique": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [2, 4, 4, 6] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_unique(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.unique(maintain_order=True).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_unique`: + + >>> agnostic_unique(s_pd) + 0 2 + 1 4 + 2 6 + dtype: int64 + + >>> agnostic_unique(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [i64] + [ + 2 + 4 + 6 + ] + + >>> agnostic_unique(s_pa) # doctest: +ELLIPSIS + + [ + [ + 2, + 4, + 6 + ] + ] + """, + "diff": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [2, 4, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a dataframe-agnostic function: + + >>> def agnostic_diff(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.diff().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_diff`: + + >>> agnostic_diff(s_pd) + 0 NaN + 1 2.0 + 2 -1.0 + dtype: float64 + + >>> agnostic_diff(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [i64] + [ + null + 2 + -1 + ] + + >>> agnostic_diff(s_pa) # doctest: +ELLIPSIS + + [ + [ + null, + 2, + -1 + ] + ] + """, + "shift": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [2, 4, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a dataframe-agnostic function: + + >>> def agnostic_shift(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.shift(1).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_shift`: + + >>> agnostic_shift(s_pd) + 0 NaN + 1 2.0 + 2 4.0 + dtype: float64 + + >>> agnostic_shift(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [i64] + [ + null + 2 + 4 + ] + + >>> agnostic_shift(s_pa) # doctest: +ELLIPSIS + + [ + [ + null, + 2, + 4 + ] + ] + """, + "sample": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 2, 3, 4] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_sample(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.sample(fraction=1.0, with_replacement=True).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_sample`: + + >>> agnostic_sample(s_pd) # doctest: +SKIP + a + 2 3 + 1 2 + 3 4 + 3 4 + + >>> agnostic_sample(s_pl) # doctest: +SKIP + shape: (4,) + Series: '' [i64] + [ + 1 + 4 + 3 + 4 + ] + + >>> agnostic_sample(s_pa) # doctest: +SKIP + + [ + [ + 1, + 4, + 3, + 4 + ] + ] + """, + "alias": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data, name="foo") + >>> s_pl = pl.Series("foo", data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_alias(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.alias("bar").to_native() + + We can then pass any supported library such as pandas or Polars, or + PyArrow to `agnostic_alias`: + + >>> agnostic_alias(s_pd) + 0 1 + 1 2 + 2 3 + Name: bar, dtype: int64 + + >>> agnostic_alias(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: 'bar' [i64] + [ + 1 + 2 + 3 + ] + + >>> agnostic_alias(s_pa) # doctest: +ELLIPSIS + + [ + [ + 1, + 2, + 3 + ] + ] + """, + "rename": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data, name="foo") + >>> s_pl = pl.Series("foo", data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_rename(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.rename("bar").to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_rename`: + + >>> agnostic_rename(s_pd) + 0 1 + 1 2 + 2 3 + Name: bar, dtype: int64 + + >>> agnostic_rename(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: 'bar' [i64] + [ + 1 + 2 + 3 + ] + + >>> agnostic_rename(s_pa) # doctest: +ELLIPSIS + + [ + [ + 1, + 2, + 3 + ] + ] + """, + "replace_strict": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = {"a": [3, 0, 1, 2]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) + + Let's define dataframe-agnostic functions: + + >>> def agnostic_replace_strict(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.replace_strict( + ... [0, 1, 2, 3], ["zero", "one", "two", "three"], return_dtype=nw.String + ... ).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_replace_strict`: + + >>> agnostic_replace_strict(df_pd["a"]) + 0 three + 1 zero + 2 one + 3 two + Name: a, dtype: object + + >>> agnostic_replace_strict(df_pl["a"]) # doctest: +NORMALIZE_WHITESPACE + shape: (4,) + Series: 'a' [str] + [ + "three" + "zero" + "one" + "two" + ] + + >>> agnostic_replace_strict(df_pa["a"]) + + [ + [ + "three", + "zero", + "one", + "two" + ] + ] + """, + "sort": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [5, None, 1, 2] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define library agnostic functions: + + >>> def agnostic_sort(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.sort().to_native() + + >>> def agnostic_sort_descending(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.sort(descending=True).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_sort` and `agnostic_sort_descending`: + + >>> agnostic_sort(s_pd) + 1 NaN + 2 1.0 + 3 2.0 + 0 5.0 + dtype: float64 + + >>> agnostic_sort(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [i64] + [ + null + 1 + 2 + 5 + ] + + >>> agnostic_sort(s_pa) # doctest: +ELLIPSIS + + [ + [ + null, + 1, + 2, + 5 + ] + ] + + >>> agnostic_sort_descending(s_pd) + 1 NaN + 0 5.0 + 3 2.0 + 2 1.0 + dtype: float64 + + >>> agnostic_sort_descending(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [i64] + [ + null + 5 + 2 + 1 + ] + + >>> agnostic_sort_descending(s_pa) # doctest: +ELLIPSIS + + [ + [ + null, + 5, + 2, + 1 + ] + ] + """, + "is_null": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 2, None] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_is_null(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.is_null().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_null`: + + >>> agnostic_is_null(s_pd) + 0 False + 1 False + 2 True + dtype: bool + + >>> agnostic_is_null(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [bool] + [ + false + false + true + ] + + >>> agnostic_is_null(s_pa) # doctest:+ELLIPSIS + + [ + [ + false, + false, + true + ] + ] + """, + "is_nan": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [0.0, None, 2.0] + >>> s_pd = pd.Series(data, dtype="Float64") + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data], type=pa.float64()) + + >>> def agnostic_self_div_is_nan(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.is_nan().to_native() + + >>> print(agnostic_self_div_is_nan(s_pd)) + 0 False + 1 + 2 False + dtype: boolean + + >>> print(agnostic_self_div_is_nan(s_pl)) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [bool] + [ + false + null + false + ] + + >>> print(agnostic_self_div_is_nan(s_pa)) # doctest: +NORMALIZE_WHITESPACE + [ + [ + false, + null, + false + ] + ] + """, + "fill_null": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 2, None] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_fill_null(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.fill_null(5).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_fill_null`: + + >>> agnostic_fill_null(s_pd) + 0 1.0 + 1 2.0 + 2 5.0 + dtype: float64 + + >>> agnostic_fill_null(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [i64] + [ + 1 + 2 + 5 + ] + + >>> agnostic_fill_null(s_pa) # doctest:+ELLIPSIS + + [ + [ + 1, + 2, + 5 + ] + ] + + Using a strategy: + + >>> def agnostic_fill_null_with_strategy(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.fill_null(strategy="forward", limit=1).to_native() + + >>> agnostic_fill_null_with_strategy(s_pd) + 0 1.0 + 1 2.0 + 2 2.0 + dtype: float64 + + >>> agnostic_fill_null_with_strategy(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [i64] + [ + 1 + 2 + 2 + ] + + >>> agnostic_fill_null_with_strategy(s_pa) # doctest:+ELLIPSIS + + [ + [ + 1, + 2, + 2 + ] + ] + """, + "is_between": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 2, 3, 4, 5] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_is_between(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.is_between(2, 4, "right").to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_between`: + + >>> agnostic_is_between(s_pd) + 0 False + 1 False + 2 True + 3 True + 4 False + dtype: bool + + >>> agnostic_is_between(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (5,) + Series: '' [bool] + [ + false + false + true + true + false + ] + + >>> agnostic_is_between(s_pa) # doctest: +ELLIPSIS + + [ + [ + false, + false, + true, + true, + false + ] + ] + """, + "n_unique": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 2, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_n_unique(s_native: IntoSeries) -> int: + ... s = nw.from_native(s_native, series_only=True) + ... return s.n_unique() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_n_unique`: + + >>> agnostic_n_unique(s_pd) + 3 + + >>> agnostic_n_unique(s_pl) + 3 + + >>> agnostic_n_unique(s_pa) + 3 + """, + "to_numpy": """ + >>> import numpy as np + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data, name="a") + >>> s_pl = pl.Series("a", data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_to_numpy(s_native: IntoSeries) -> np.ndarray: + ... s = nw.from_native(s_native, series_only=True) + ... return s.to_numpy() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_numpy`: + + >>> agnostic_to_numpy(s_pd) + array([1, 2, 3]...) + + >>> agnostic_to_numpy(s_pl) + array([1, 2, 3]...) + + >>> agnostic_to_numpy(s_pa) + array([1, 2, 3]...) + """, + "to_pandas": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data, name="a") + >>> s_pl = pl.Series("a", data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_to_pandas(s_native: IntoSeries) -> pd.Series: + ... s = nw.from_native(s_native, series_only=True) + ... return s.to_pandas() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_pandas`: + + >>> agnostic_to_pandas(s_pd) + 0 1 + 1 2 + 2 3 + Name: a, dtype: int64 + + >>> agnostic_to_pandas(s_pl) + 0 1 + 1 2 + 2 3 + Name: a, dtype: int64 + + >>> agnostic_to_pandas(s_pa) + 0 1 + 1 2 + 2 3 + Name: , dtype: int64 + """, + "filter": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [4, 10, 15, 34, 50] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_filter(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.filter(s > 10).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_filter`: + + >>> agnostic_filter(s_pd) + 2 15 + 3 34 + 4 50 + dtype: int64 + + >>> agnostic_filter(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [i64] + [ + 15 + 34 + 50 + ] + + >>> agnostic_filter(s_pa) # doctest: +ELLIPSIS + + [ + [ + 15, + 34, + 50 + ] + ] + """, + "is_duplicated": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 2, 3, 1] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_is_duplicated(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.is_duplicated().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_duplicated`: + + >>> agnostic_is_duplicated(s_pd) + 0 True + 1 False + 2 False + 3 True + dtype: bool + + >>> agnostic_is_duplicated(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [bool] + [ + true + false + false + true + ] + + >>> agnostic_is_duplicated(s_pa) # doctest: +ELLIPSIS + + [ + [ + true, + false, + false, + true + ] + ] + """, + "is_empty": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + Let's define a dataframe-agnostic function that filters rows in which "foo" + values are greater than 10, and then checks if the result is empty or not: + + >>> def agnostic_is_empty(s_native: IntoSeries) -> bool: + ... s = nw.from_native(s_native, series_only=True) + ... return s.filter(s > 10).is_empty() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_empty`: + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + >>> agnostic_is_empty(s_pd), agnostic_is_empty(s_pl), agnostic_is_empty(s_pa) + (True, True, True) + + >>> data = [100, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + >>> agnostic_is_empty(s_pd), agnostic_is_empty(s_pl), agnostic_is_empty(s_pa) + (False, False, False) + """, + "is_unique": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 2, 3, 1] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_is_unique(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.is_unique().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_unique`: + + >>> agnostic_is_unique(s_pd) + 0 False + 1 True + 2 True + 3 False + dtype: bool + + >>> agnostic_is_unique(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [bool] + [ + false + true + true + false + ] + >>> agnostic_is_unique(s_pa) # doctest: +ELLIPSIS + + [ + [ + false, + true, + true, + false + ] + ] + """, + "null_count": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [1, None, None] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a dataframe-agnostic function that returns the null count of + the series: + + >>> def agnostic_null_count(s_native: IntoSeries) -> int: + ... s = nw.from_native(s_native, series_only=True) + ... return s.null_count() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_null_count`: + + >>> agnostic_null_count(s_pd) + np.int64(2) + + >>> agnostic_null_count(s_pl) + 2 + + >>> agnostic_null_count(s_pa) + 2 + """, + "is_first_distinct": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 1, 2, 3, 2] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_is_first_distinct(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.is_first_distinct().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_first_distinct`: + + >>> agnostic_is_first_distinct(s_pd) + 0 True + 1 False + 2 True + 3 True + 4 False + dtype: bool + + >>> agnostic_is_first_distinct(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (5,) + Series: '' [bool] + [ + true + false + true + true + false + ] + + >>> agnostic_is_first_distinct(s_pa) # doctest: +ELLIPSIS + + [ + [ + true, + false, + true, + true, + false + ] + ] + """, + "is_last_distinct": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 1, 2, 3, 2] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_is_last_distinct(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.is_last_distinct().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_last_distinct`: + + >>> agnostic_is_last_distinct(s_pd) + 0 False + 1 True + 2 False + 3 True + 4 True + dtype: bool + + >>> agnostic_is_last_distinct(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (5,) + Series: '' [bool] + [ + false + true + false + true + true + ] + + >>> agnostic_is_last_distinct(s_pa) # doctest: +ELLIPSIS + + [ + [ + false, + true, + false, + true, + true + ] + ] + """, + "is_sorted": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> unsorted_data = [1, 3, 2] + >>> sorted_data = [3, 2, 1] + + Let's define a dataframe-agnostic function: + + >>> def agnostic_is_sorted(s_native: IntoSeries, descending: bool = False): + ... s = nw.from_native(s_native, series_only=True) + ... return s.is_sorted(descending=descending) + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_sorted`: + + >>> agnostic_is_sorted(pd.Series(unsorted_data)) + False + + >>> agnostic_is_sorted(pd.Series(sorted_data), descending=True) + True + + >>> agnostic_is_sorted(pl.Series(unsorted_data)) + False + + >>> agnostic_is_sorted(pl.Series(sorted_data), descending=True) + True + + >>> agnostic_is_sorted(pa.chunked_array([unsorted_data])) + False + + >>> agnostic_is_sorted(pa.chunked_array([sorted_data]), descending=True) + True + """, + "value_counts": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 1, 2, 3, 2] + >>> s_pd = pd.Series(data, name="s") + >>> s_pl = pl.Series(values=data, name="s") + >>> s_pa = pa.chunked_array([data]) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_value_counts(s_native: IntoSeries) -> IntoDataFrame: + ... s = nw.from_native(s_native, series_only=True) + ... return s.value_counts(sort=True).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_value_counts`: + + >>> agnostic_value_counts(s_pd) + s count + 0 1 2 + 1 2 2 + 2 3 1 + + >>> agnostic_value_counts(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3, 2) + ┌─────┬───────┐ + │ s ┆ count │ + │ --- ┆ --- │ + │ i64 ┆ u32 │ + ╞═════╪═══════╡ + │ 1 ┆ 2 │ + │ 2 ┆ 2 │ + │ 3 ┆ 1 │ + └─────┴───────┘ + + >>> agnostic_value_counts(s_pa) + pyarrow.Table + : int64 + count: int64 + ---- + : [[1,2,3]] + count: [[2,2,1]] + """, + "quantile": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = list(range(50)) + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_quantile(s_native: IntoSeries) -> list[float]: + ... s = nw.from_native(s_native, series_only=True) + ... return [ + ... s.quantile(quantile=q, interpolation="nearest") + ... for q in (0.1, 0.25, 0.5, 0.75, 0.9) + ... ] + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_quantile`: + + >>> agnostic_quantile(s_pd) + [np.int64(5), np.int64(12), np.int64(24), np.int64(37), np.int64(44)] + + >>> agnostic_quantile(s_pl) + [5.0, 12.0, 25.0, 37.0, 44.0] + + >>> agnostic_quantile(s_pa) + [5, 12, 24, 37, 44] + """, + "zip_with": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 2, 3, 4, 5] + >>> other = [5, 4, 3, 2, 1] + >>> mask = [True, False, True, False, True] + + Let's define a dataframe-agnostic function: + + >>> def agnostic_zip_with( + ... s1_native: IntoSeriesT, mask_native: IntoSeriesT, s2_native: IntoSeriesT + ... ) -> IntoSeriesT: + ... s1 = nw.from_native(s1_native, series_only=True) + ... mask = nw.from_native(mask_native, series_only=True) + ... s2 = nw.from_native(s2_native, series_only=True) + ... return s1.zip_with(mask, s2).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_zip_with`: + + >>> agnostic_zip_with( + ... s1_native=pl.Series(data), + ... mask_native=pl.Series(mask), + ... s2_native=pl.Series(other), + ... ) # doctest: +NORMALIZE_WHITESPACE + shape: (5,) + Series: '' [i64] + [ + 1 + 4 + 3 + 2 + 5 + ] + + >>> agnostic_zip_with( + ... s1_native=pd.Series(data), + ... mask_native=pd.Series(mask), + ... s2_native=pd.Series(other), + ... ) + 0 1 + 1 4 + 2 3 + 3 2 + 4 5 + dtype: int64 + + >>> agnostic_zip_with( + ... s1_native=pa.chunked_array([data]), + ... mask_native=pa.chunked_array([mask]), + ... s2_native=pa.chunked_array([other]), + ... ) # doctest: +ELLIPSIS + + [ + [ + 1, + 4, + 3, + 2, + 5 + ] + ] + """, + "item": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + Let's define a dataframe-agnostic function that returns item at given index + + >>> def agnostic_item(s_native: IntoSeries, index=None): + ... s = nw.from_native(s_native, series_only=True) + ... return s.item(index) + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_item`: + + >>> ( + ... agnostic_item(pl.Series("a", [1]), None), + ... agnostic_item(pd.Series([1]), None), + ... agnostic_item(pa.chunked_array([[1]]), None), + ... ) + (1, np.int64(1), 1) + + >>> ( + ... agnostic_item(pl.Series("a", [9, 8, 7]), -1), + ... agnostic_item(pl.Series([9, 8, 7]), -2), + ... agnostic_item(pa.chunked_array([[9, 8, 7]]), -3), + ... ) + (7, 8, 9) + """, + "head": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = list(range(10)) + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a dataframe-agnostic function that returns the first 3 rows: + + >>> def agnostic_head(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.head(3).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_head`: + + >>> agnostic_head(s_pd) + 0 0 + 1 1 + 2 2 + dtype: int64 + + >>> agnostic_head(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [i64] + [ + 0 + 1 + 2 + ] + + >>> agnostic_head(s_pa) # doctest: +ELLIPSIS + + [ + [ + 0, + 1, + 2 + ] + ] + """, + "tail": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = list(range(10)) + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a dataframe-agnostic function that returns the last 3 rows: + + >>> def agnostic_tail(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.tail(3).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_tail`: + + >>> agnostic_tail(s_pd) + 7 7 + 8 8 + 9 9 + dtype: int64 + + >>> agnostic_tail(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [i64] + [ + 7 + 8 + 9 + ] + + >>> agnostic_tail(s_pa) # doctest: +ELLIPSIS + + [ + [ + 7, + 8, + 9 + ] + ] + """, + "round": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1.12345, 2.56789, 3.901234] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a dataframe-agnostic function that rounds to the first decimal: + + >>> def agnostic_round(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.round(1).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_round`: + + >>> agnostic_round(s_pd) + 0 1.1 + 1 2.6 + 2 3.9 + dtype: float64 + + >>> agnostic_round(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [f64] + [ + 1.1 + 2.6 + 3.9 + ] + + >>> agnostic_round(s_pa) # doctest: +ELLIPSIS + + [ + [ + 1.1, + 2.6, + 3.9 + ] + ] + """, + "to_dummies": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data, name="a") + >>> s_pl = pl.Series("a", data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_to_dummies( + ... s_native: IntoSeries, drop_first: bool = False + ... ) -> IntoDataFrame: + ... s = nw.from_native(s_native, series_only=True) + ... return s.to_dummies(drop_first=drop_first).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_dummies`: + + >>> agnostic_to_dummies(s_pd) + a_1 a_2 a_3 + 0 1 0 0 + 1 0 1 0 + 2 0 0 1 + + >>> agnostic_to_dummies(s_pd, drop_first=True) + a_2 a_3 + 0 0 0 + 1 1 0 + 2 0 1 + + >>> agnostic_to_dummies(s_pl) + shape: (3, 3) + ┌─────┬─────┬─────┐ + │ a_1 ┆ a_2 ┆ a_3 │ + │ --- ┆ --- ┆ --- │ + │ i8 ┆ i8 ┆ i8 │ + ╞═════╪═════╪═════╡ + │ 1 ┆ 0 ┆ 0 │ + │ 0 ┆ 1 ┆ 0 │ + │ 0 ┆ 0 ┆ 1 │ + └─────┴─────┴─────┘ + + >>> agnostic_to_dummies(s_pl, drop_first=True) + shape: (3, 2) + ┌─────┬─────┐ + │ a_2 ┆ a_3 │ + │ --- ┆ --- │ + │ i8 ┆ i8 │ + ╞═════╪═════╡ + │ 0 ┆ 0 │ + │ 1 ┆ 0 │ + │ 0 ┆ 1 │ + └─────┴─────┘ + + >>> agnostic_to_dummies(s_pa) + pyarrow.Table + _1: int8 + _2: int8 + _3: int8 + ---- + _1: [[1,0,0]] + _2: [[0,1,0]] + _3: [[0,0,1]] + >>> agnostic_to_dummies(s_pa, drop_first=True) + pyarrow.Table + _2: int8 + _3: int8 + ---- + _2: [[0,1,0]] + _3: [[0,0,1]] + """, + "gather_every": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 2, 3, 4] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a dataframe-agnostic function in which gather every 2 rows, + starting from a offset of 1: + + >>> def agnostic_gather_every(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.gather_every(n=2, offset=1).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_gather_every`: + + >>> agnostic_gather_every(s_pd) + 1 2 + 3 4 + dtype: int64 + + >>> agnostic_gather_every(s_pl) # doctest:+NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [i64] + [ + 2 + 4 + ] + + >>> agnostic_gather_every(s_pa) # doctest:+ELLIPSIS + + [ + [ + 2, + 4 + ] + ] + """, + "to_arrow": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 2, 3, 4] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a dataframe-agnostic function that converts to arrow: + + >>> def agnostic_to_arrow(s_native: IntoSeries) -> pa.Array: + ... s = nw.from_native(s_native, series_only=True) + ... return s.to_arrow() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_arrow`: + + >>> agnostic_to_arrow(s_pd) # doctest:+NORMALIZE_WHITESPACE + + [ + 1, + 2, + 3, + 4 + ] + + >>> agnostic_to_arrow(s_pl) # doctest:+NORMALIZE_WHITESPACE + + [ + 1, + 2, + 3, + 4 + ] + + >>> agnostic_to_arrow(s_pa) # doctest:+NORMALIZE_WHITESPACE + + [ + 1, + 2, + 3, + 4 + ] + """, + "mode": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 1, 2, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_mode(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.mode().sort().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_mode`: + + >>> agnostic_mode(s_pd) + 0 1 + 1 2 + dtype: int64 + + >>> agnostic_mode(s_pl) # doctest:+NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [i64] + [ + 1 + 2 + ] + + >>> agnostic_mode(s_pa) # doctest:+ELLIPSIS + + [ + [ + 1, + 2 + ] + ] + """, + "is_finite": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [float("nan"), float("inf"), 2.0, None] + + We define a library agnostic function: + + >>> def agnostic_is_finite(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.is_finite().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_finite`: + + >>> agnostic_is_finite(pd.Series(data)) + 0 False + 1 False + 2 True + 3 False + dtype: bool + + >>> agnostic_is_finite(pl.Series(data)) # doctest: +NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [bool] + [ + false + false + true + null + ] + + >>> agnostic_is_finite(pa.chunked_array([data])) # doctest: +ELLIPSIS + + [ + [ + false, + false, + true, + null + ] + ] + """, + "cum_count": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = ["x", "k", None, "d"] + + We define a library agnostic function: + + >>> def agnostic_cum_count(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.cum_count(reverse=True).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_cum_count`: + + >>> agnostic_cum_count(pd.Series(data)) + 0 3 + 1 2 + 2 1 + 3 1 + dtype: int64 + + >>> agnostic_cum_count(pl.Series(data)) # doctest:+NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [u32] + [ + 3 + 2 + 1 + 1 + ] + + >>> agnostic_cum_count(pa.chunked_array([data])) # doctest:+ELLIPSIS + + [ + [ + 3, + 2, + 1, + 1 + ] + ] + """, + "cum_min": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [3, 1, None, 2] + + We define a library agnostic function: + + >>> def agnostic_cum_min(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.cum_min().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_cum_min`: + + >>> agnostic_cum_min(pd.Series(data)) + 0 3.0 + 1 1.0 + 2 NaN + 3 1.0 + dtype: float64 + + >>> agnostic_cum_min(pl.Series(data)) # doctest:+NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [i64] + [ + 3 + 1 + null + 1 + ] + + >>> agnostic_cum_min(pa.chunked_array([data])) # doctest:+ELLIPSIS + + [ + [ + 3, + 1, + null, + 1 + ] + ] + """, + "cum_max": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 3, None, 2] + + We define a library agnostic function: + + >>> def agnostic_cum_max(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.cum_max().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_cum_max`: + + >>> agnostic_cum_max(pd.Series(data)) + 0 1.0 + 1 3.0 + 2 NaN + 3 3.0 + dtype: float64 + + >>> agnostic_cum_max(pl.Series(data)) # doctest:+NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [i64] + [ + 1 + 3 + null + 3 + ] + + >>> agnostic_cum_max(pa.chunked_array([data])) # doctest:+ELLIPSIS + + [ + [ + 1, + 3, + null, + 3 + ] + ] + """, + "cum_prod": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 3, None, 2] + + We define a library agnostic function: + + >>> def agnostic_cum_prod(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.cum_prod().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_cum_prod`: + + >>> agnostic_cum_prod(pd.Series(data)) + 0 1.0 + 1 3.0 + 2 NaN + 3 6.0 + dtype: float64 + + >>> agnostic_cum_prod(pl.Series(data)) # doctest:+NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [i64] + [ + 1 + 3 + null + 6 + ] + + >>> agnostic_cum_prod(pa.chunked_array([data])) # doctest:+ELLIPSIS + + [ + [ + 1, + 3, + null, + 6 + ] + ] + """, + "rolling_sum": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1.0, 2.0, 3.0, 4.0] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_rolling_sum(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.rolling_sum(window_size=2).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_rolling_sum`: + + >>> agnostic_rolling_sum(s_pd) + 0 NaN + 1 3.0 + 2 5.0 + 3 7.0 + dtype: float64 + + >>> agnostic_rolling_sum(s_pl) # doctest:+NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [f64] + [ + null + 3.0 + 5.0 + 7.0 + ] + + >>> agnostic_rolling_sum(s_pa) # doctest:+ELLIPSIS + + [ + [ + null, + 3, + 5, + 7 + ] + ] + """, + "rolling_mean": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1.0, 2.0, 3.0, 4.0] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_rolling_mean(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.rolling_mean(window_size=2).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_rolling_mean`: + + >>> agnostic_rolling_mean(s_pd) + 0 NaN + 1 1.5 + 2 2.5 + 3 3.5 + dtype: float64 + + >>> agnostic_rolling_mean(s_pl) # doctest:+NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [f64] + [ + null + 1.5 + 2.5 + 3.5 + ] + + >>> agnostic_rolling_mean(s_pa) # doctest:+ELLIPSIS + + [ + [ + null, + 1.5, + 2.5, + 3.5 + ] + ] + """, + "rolling_var": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1.0, 3.0, 1.0, 4.0] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_rolling_var(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.rolling_var(window_size=2, min_periods=1).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_rolling_var`: + + >>> agnostic_rolling_var(s_pd) + 0 NaN + 1 2.0 + 2 2.0 + 3 4.5 + dtype: float64 + + >>> agnostic_rolling_var(s_pl) # doctest:+NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [f64] + [ + null + 2.0 + 2.0 + 4.5 + ] + + >>> agnostic_rolling_var(s_pa) # doctest:+ELLIPSIS + + [ + [ + nan, + 2, + 2, + 4.5 + ] + ] + """, + "rolling_std": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1.0, 3.0, 1.0, 4.0] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_rolling_std(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.rolling_std(window_size=2, min_periods=1).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_rolling_std`: + + >>> agnostic_rolling_std(s_pd) + 0 NaN + 1 1.414214 + 2 1.414214 + 3 2.121320 + dtype: float64 + + >>> agnostic_rolling_std(s_pl) # doctest:+NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [f64] + [ + null + 1.414214 + 1.414214 + 2.12132 + ] + + >>> agnostic_rolling_std(s_pa) # doctest:+ELLIPSIS + + [ + [ + nan, + 1.4142135623730951, + 1.4142135623730951, + 2.1213203435596424 + ] + ] + """, + "rank": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> + >>> data = [3, 6, 1, 1, 6] + + We define a dataframe-agnostic function that computes the dense rank for + the data: + + >>> def agnostic_dense_rank(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.rank(method="dense").to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dense_rank`: + + >>> agnostic_dense_rank(pd.Series(data)) + 0 2.0 + 1 3.0 + 2 1.0 + 3 1.0 + 4 3.0 + dtype: float64 + + >>> agnostic_dense_rank(pl.Series(data)) # doctest:+NORMALIZE_WHITESPACE + shape: (5,) + Series: '' [u32] + [ + 2 + 3 + 1 + 1 + 3 + ] + + >>> agnostic_dense_rank(pa.chunked_array([data])) # doctest:+ELLIPSIS + + [ + [ + 2, + 3, + 1, + 1, + 3 + ] + ] + """, +} diff --git a/docs/docstring_examples/series_cat.py b/docs/docstring_examples/series_cat.py new file mode 100644 index 000000000..2bfa0181f --- /dev/null +++ b/docs/docstring_examples/series_cat.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +EXAMPLES = { + "get_categories": """ + Let's create some series: + + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = ["apple", "mango", "mango"] + >>> s_pd = pd.Series(data, dtype="category") + >>> s_pl = pl.Series(data, dtype=pl.Categorical) + >>> s_pa = pa.chunked_array([data]).dictionary_encode() + + We define a dataframe-agnostic function to get unique categories + from column 'fruits': + + >>> def agnostic_get_categories(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.cat.get_categories().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_get_categories`: + + >>> agnostic_get_categories(s_pd) + 0 apple + 1 mango + dtype: object + + >>> agnostic_get_categories(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [str] + [ + "apple" + "mango" + ] + + >>> agnostic_get_categories(s_pa) # doctest: +ELLIPSIS + + [ + [ + "apple", + "mango" + ] + ] + """, +} diff --git a/docs/docstring_examples/series_dt.py b/docs/docstring_examples/series_dt.py new file mode 100644 index 000000000..9fc504750 --- /dev/null +++ b/docs/docstring_examples/series_dt.py @@ -0,0 +1,969 @@ +from __future__ import annotations + +EXAMPLES = { + "date": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> dates = [datetime(2012, 1, 7, 10, 20), datetime(2023, 3, 10, 11, 32)] + >>> s_pd = pd.Series(dates).convert_dtypes(dtype_backend="pyarrow") + >>> s_pl = pl.Series(dates) + >>> s_pa = pa.chunked_array([dates]) + + We define a library agnostic function: + + >>> def agnostic_date(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.dt.date().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_date`: + + >>> agnostic_date(s_pd) + 0 2012-01-07 + 1 2023-03-10 + dtype: date32[day][pyarrow] + + >>> agnostic_date(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [date] + [ + 2012-01-07 + 2023-03-10 + ] + + >>> agnostic_date(s_pa) # doctest: +ELLIPSIS + + [ + [ + 2012-01-07, + 2023-03-10 + ] + ] + """, + "year": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> dates = [datetime(2012, 1, 7), datetime(2023, 3, 10)] + >>> s_pd = pd.Series(dates) + >>> s_pl = pl.Series(dates) + >>> s_pa = pa.chunked_array([dates]) + + We define a library agnostic function: + + >>> def agnostic_year(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.dt.year().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_year`: + + >>> agnostic_year(s_pd) + 0 2012 + 1 2023 + dtype: int... + + >>> agnostic_year(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [i32] + [ + 2012 + 2023 + ] + + >>> agnostic_year(s_pa) # doctest: +ELLIPSIS + + [ + [ + 2012, + 2023 + ] + ] + """, + "month": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> dates = [datetime(2023, 2, 1), datetime(2023, 8, 3)] + >>> s_pd = pd.Series(dates) + >>> s_pl = pl.Series(dates) + >>> s_pa = pa.chunked_array([dates]) + + We define a library agnostic function: + + >>> def agnostic_month(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.dt.month().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_month`: + + >>> agnostic_month(s_pd) + 0 2 + 1 8 + dtype: int... + >>> agnostic_month(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [i8] + [ + 2 + 8 + ] + + >>> agnostic_month(s_pa) # doctest: +ELLIPSIS + + [ + [ + 2, + 8 + ] + ] + """, + "day": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> dates = [datetime(2022, 1, 1), datetime(2022, 1, 5)] + >>> s_pd = pd.Series(dates) + >>> s_pl = pl.Series(dates) + >>> s_pa = pa.chunked_array([dates]) + + We define a library agnostic function: + + >>> def agnostic_day(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.dt.day().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_day`: + + >>> agnostic_day(s_pd) + 0 1 + 1 5 + dtype: int... + + >>> agnostic_day(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [i8] + [ + 1 + 5 + ] + + >>> agnostic_day(s_pa) # doctest: +ELLIPSIS + + [ + [ + 1, + 5 + ] + ] + """, + "hour": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> dates = [datetime(2022, 1, 1, 5, 3), datetime(2022, 1, 5, 9, 12)] + >>> s_pd = pd.Series(dates) + >>> s_pl = pl.Series(dates) + >>> s_pa = pa.chunked_array([dates]) + + We define a library agnostic function: + + >>> def agnostic_hour(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.dt.hour().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_hour`: + + >>> agnostic_hour(s_pd) + 0 5 + 1 9 + dtype: int... + + >>> agnostic_hour(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [i8] + [ + 5 + 9 + ] + + >>> agnostic_hour(s_pa) # doctest: +ELLIPSIS + + [ + [ + 5, + 9 + ] + ] + """, + "minute": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> dates = [datetime(2022, 1, 1, 5, 3), datetime(2022, 1, 5, 9, 12)] + >>> s_pd = pd.Series(dates) + >>> s_pl = pl.Series(dates) + >>> s_pa = pa.chunked_array([dates]) + + We define a library agnostic function: + + >>> def agnostic_minute(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.dt.minute().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_minute`: + + >>> agnostic_minute(s_pd) + 0 3 + 1 12 + dtype: int... + + >>> agnostic_minute(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [i8] + [ + 3 + 12 + ] + + >>> agnostic_minute(s_pa) # doctest: +ELLIPSIS + + [ + [ + 3, + 12 + ] + ] + """, + "second": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> dates = [datetime(2022, 1, 1, 5, 3, 10), datetime(2022, 1, 5, 9, 12, 4)] + >>> s_pd = pd.Series(dates) + >>> s_pl = pl.Series(dates) + >>> s_pa = pa.chunked_array([dates]) + + We define a library agnostic function: + + >>> def agnostic_second(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.dt.second().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_second`: + + >>> agnostic_second(s_pd) + 0 10 + 1 4 + dtype: int... + + >>> agnostic_second(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [i8] + [ + 10 + 4 + ] + + >>> agnostic_second(s_pa) # doctest: +ELLIPSIS + + [ + [ + 10, + 4 + ] + ] + """, + "millisecond": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> dates = [ + ... datetime(2023, 5, 21, 12, 55, 10, 400000), + ... datetime(2023, 5, 21, 12, 55, 10, 600000), + ... datetime(2023, 5, 21, 12, 55, 10, 800000), + ... datetime(2023, 5, 21, 12, 55, 11, 0), + ... datetime(2023, 5, 21, 12, 55, 11, 200000), + ... ] + >>> s_pd = pd.Series(dates) + >>> s_pl = pl.Series(dates) + >>> s_pa = pa.chunked_array([dates]) + + We define a library agnostic function: + + >>> def agnostic_millisecond(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.dt.millisecond().alias("datetime").to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_millisecond`: + + >>> agnostic_millisecond(s_pd) + 0 400 + 1 600 + 2 800 + 3 0 + 4 200 + Name: datetime, dtype: int... + + >>> agnostic_millisecond(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (5,) + Series: 'datetime' [i32] + [ + 400 + 600 + 800 + 0 + 200 + ] + + >>> agnostic_millisecond(s_pa) # doctest: +ELLIPSIS + + [ + [ + 400, + 600, + 800, + 0, + 200 + ] + ] + """, + "microsecond": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> dates = [ + ... datetime(2023, 5, 21, 12, 55, 10, 400000), + ... datetime(2023, 5, 21, 12, 55, 10, 600000), + ... datetime(2023, 5, 21, 12, 55, 10, 800000), + ... datetime(2023, 5, 21, 12, 55, 11, 0), + ... datetime(2023, 5, 21, 12, 55, 11, 200000), + ... ] + >>> s_pd = pd.Series(dates) + >>> s_pl = pl.Series(dates) + >>> s_pa = pa.chunked_array([dates]) + + We define a library agnostic function: + + >>> def agnostic_microsecond(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.dt.microsecond().alias("datetime").to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_microsecond`: + + >>> agnostic_microsecond(s_pd) + 0 400000 + 1 600000 + 2 800000 + 3 0 + 4 200000 + Name: datetime, dtype: int... + + >>> agnostic_microsecond(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (5,) + Series: 'datetime' [i32] + [ + 400000 + 600000 + 800000 + 0 + 200000 + ] + + >>> agnostic_microsecond(s_pa) # doctest: +ELLIPSIS + + [ + [ + 400000, + 600000, + 800000, + 0, + 200000 + ] + ] + """, + "nanosecond": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> dates = [ + ... datetime(2022, 1, 1, 5, 3, 10, 500000), + ... datetime(2022, 1, 5, 9, 12, 4, 60000), + ... ] + >>> s_pd = pd.Series(dates) + >>> s_pl = pl.Series(dates) + >>> s_pa = pa.chunked_array([dates]) + + We define a library agnostic function: + + >>> def agnostic_nanosecond(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.dt.nanosecond().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_nanosecond`: + + >>> agnostic_nanosecond(s_pd) + 0 500000000 + 1 60000000 + dtype: int... + + >>> agnostic_nanosecond(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [i32] + [ + 500000000 + 60000000 + ] + + >>> agnostic_nanosecond(s_pa) # doctest: +ELLIPSIS + + [ + [ + 500000000, + 60000000 + ] + ] + """, + "ordinal_day": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [datetime(2020, 1, 1), datetime(2020, 8, 3)] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_ordinal_day(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.dt.ordinal_day().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_ordinal_day`: + + >>> agnostic_ordinal_day(s_pd) + 0 1 + 1 216 + dtype: int32 + + >>> agnostic_ordinal_day(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [i16] + [ + 1 + 216 + ] + + + >>> agnostic_ordinal_day(s_pa) # doctest: +ELLIPSIS + + [ + [ + 1, + 216 + ] + ] + """, + "weekday": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> data = [datetime(2020, 1, 1), datetime(2020, 8, 3)] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_weekday(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.dt.weekday().to_native() + + We can then pass either pandas, Polars, PyArrow, and other supported libraries to `agnostic_weekday`: + + >>> agnostic_weekday(s_pd) + 0 3 + 1 1 + dtype: int32 + >>> agnostic_weekday(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [i8] + [ + 3 + 1 + ] + >>> agnostic_weekday(s_pa) # doctest: +ELLIPSIS + + [ + [ + 3, + 1 + ] + ] + """, + "total_minutes": """ + >>> from datetime import timedelta + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [timedelta(minutes=10), timedelta(minutes=20, seconds=40)] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_total_minutes(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.dt.total_minutes().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_total_minutes`: + + >>> agnostic_total_minutes(s_pd) + 0 10 + 1 20 + dtype: int... + + >>> agnostic_total_minutes(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [i64] + [ + 10 + 20 + ] + + >>> agnostic_total_minutes(s_pa) # doctest: +ELLIPSIS + + [ + [ + 10, + 20 + ] + ] + """, + "total_seconds": """ + >>> from datetime import timedelta + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [timedelta(seconds=10), timedelta(seconds=20, milliseconds=40)] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_total_seconds(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.dt.total_seconds().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_total_seconds`: + + >>> agnostic_total_seconds(s_pd) + 0 10 + 1 20 + dtype: int... + + >>> agnostic_total_seconds(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [i64] + [ + 10 + 20 + ] + + >>> agnostic_total_seconds(s_pa) # doctest: +ELLIPSIS + + [ + [ + 10, + 20 + ] + ] + """, + "total_milliseconds": """ + >>> from datetime import timedelta + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [ + ... timedelta(milliseconds=10), + ... timedelta(milliseconds=20, microseconds=40), + ... ] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_total_milliseconds(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.dt.total_milliseconds().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_total_milliseconds`: + + >>> agnostic_total_milliseconds(s_pd) + 0 10 + 1 20 + dtype: int... + + >>> agnostic_total_milliseconds(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [i64] + [ + 10 + 20 + ] + + >>> agnostic_total_milliseconds(s_pa) # doctest: +ELLIPSIS + + [ + [ + 10, + 20 + ] + ] + """, + "total_microseconds": """ + >>> from datetime import timedelta + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [ + ... timedelta(microseconds=10), + ... timedelta(milliseconds=1, microseconds=200), + ... ] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a library agnostic function: + + >>> def agnostic_total_microseconds(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.dt.total_microseconds().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_total_microseconds`: + + >>> agnostic_total_microseconds(s_pd) + 0 10 + 1 1200 + dtype: int... + + >>> agnostic_total_microseconds(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [i64] + [ + 10 + 1200 + ] + + >>> agnostic_total_microseconds(s_pa) # doctest: +ELLIPSIS + + [ + [ + 10, + 1200 + ] + ] + """, + "total_nanoseconds": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = ["2024-01-01 00:00:00.000000001", "2024-01-01 00:00:00.000000002"] + >>> s_pd = pd.to_datetime(pd.Series(data)) + >>> s_pl = pl.Series(data).str.to_datetime(time_unit="ns") + + We define a library agnostic function: + + >>> def agnostic_total_nanoseconds(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.diff().dt.total_nanoseconds().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_total_nanoseconds`: + + >>> agnostic_total_nanoseconds(s_pd) + 0 NaN + 1 1.0 + dtype: float64 + + >>> agnostic_total_nanoseconds(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [i64] + [ + null + 1 + ] + """, + "to_string": """ + >>> from datetime import datetime + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [ + ... datetime(2020, 3, 1), + ... datetime(2020, 4, 1), + ... datetime(2020, 5, 1), + ... ] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a dataframe-agnostic function: + + >>> def agnostic_to_string(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.dt.to_string("%Y/%m/%d").to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_string`: + + >>> agnostic_to_string(s_pd) + 0 2020/03/01 + 1 2020/04/01 + 2 2020/05/01 + dtype: object + + >>> agnostic_to_string(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [str] + [ + "2020/03/01" + "2020/04/01" + "2020/05/01" + ] + + >>> agnostic_to_string(s_pa) # doctest: +ELLIPSIS + + [ + [ + "2020/03/01", + "2020/04/01", + "2020/05/01" + ] + ] + """, + "replace_time_zone": """ + >>> from datetime import datetime, timezone + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [ + ... datetime(2024, 1, 1, tzinfo=timezone.utc), + ... datetime(2024, 1, 2, tzinfo=timezone.utc), + ... ] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_replace_time_zone(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.dt.replace_time_zone("Asia/Kathmandu").to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_replace_time_zone`: + + >>> agnostic_replace_time_zone(s_pd) + 0 2024-01-01 00:00:00+05:45 + 1 2024-01-02 00:00:00+05:45 + dtype: datetime64[ns, Asia/Kathmandu] + + >>> agnostic_replace_time_zone(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [datetime[μs, Asia/Kathmandu]] + [ + 2024-01-01 00:00:00 +0545 + 2024-01-02 00:00:00 +0545 + ] + + >>> agnostic_replace_time_zone(s_pa) + + [ + [ + 2023-12-31 18:15:00.000000Z, + 2024-01-01 18:15:00.000000Z + ] + ] + """, + "convert_time_zone": """ + >>> from datetime import datetime, timezone + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [ + ... datetime(2024, 1, 1, tzinfo=timezone.utc), + ... datetime(2024, 1, 2, tzinfo=timezone.utc), + ... ] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_convert_time_zone(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.dt.convert_time_zone("Asia/Kathmandu").to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_convert_time_zone`: + + >>> agnostic_convert_time_zone(s_pd) + 0 2024-01-01 05:45:00+05:45 + 1 2024-01-02 05:45:00+05:45 + dtype: datetime64[ns, Asia/Kathmandu] + + >>> agnostic_convert_time_zone(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [datetime[μs, Asia/Kathmandu]] + [ + 2024-01-01 05:45:00 +0545 + 2024-01-02 05:45:00 +0545 + ] + + >>> agnostic_convert_time_zone(s_pa) + + [ + [ + 2024-01-01 00:00:00.000000Z, + 2024-01-02 00:00:00.000000Z + ] + ] + """, + "timestamp": """ + >>> from datetime import date + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [date(2001, 1, 1), None, date(2001, 1, 3)] + >>> s_pd = pd.Series(data, dtype="datetime64[ns]") + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a dataframe-agnostic function: + + >>> def agnostic_timestamp(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.dt.timestamp("ms").to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_timestamp`: + + >>> agnostic_timestamp(s_pd) + 0 9.783072e+11 + 1 NaN + 2 9.784800e+11 + dtype: float64 + + >>> agnostic_timestamp(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [i64] + [ + 978307200000 + null + 978480000000 + ] + + >>> agnostic_timestamp(s_pa) + + [ + [ + 978307200000, + null, + 978480000000 + ] + ] + """, +} diff --git a/docs/docstring_examples/series_list.py b/docs/docstring_examples/series_list.py new file mode 100644 index 000000000..e5bf2fc77 --- /dev/null +++ b/docs/docstring_examples/series_list.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +EXAMPLES = { + "len": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [[1, 2], [3, 4, None], None, []] + + Let's define a dataframe-agnostic function: + + >>> def agnostic_list_len(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.list.len().to_native() + + We can then pass pandas / PyArrow / Polars / any other supported library: + + >>> agnostic_list_len( + ... pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64()))) + ... ) # doctest: +SKIP + 0 2 + 1 3 + 2 + 3 0 + dtype: int32[pyarrow] + + >>> agnostic_list_len(pl.Series(data)) # doctest: +NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [u32] + [ + 2 + 3 + null + 0 + ] + + >>> agnostic_list_len(pa.chunked_array([data])) # doctest: +ELLIPSIS + + [ + [ + 2, + 3, + null, + 0 + ] + ] + """, +} diff --git a/docs/docstring_examples/series_str.py b/docs/docstring_examples/series_str.py new file mode 100644 index 000000000..bb6b67272 --- /dev/null +++ b/docs/docstring_examples/series_str.py @@ -0,0 +1,647 @@ +from __future__ import annotations + +EXAMPLES = { + "len_chars": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = ["foo", "Café", "345", "東京", None] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a dataframe-agnostic function: + + >>> def agnostic_len_chars(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.str.len_chars().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_len_chars`: + + >>> agnostic_len_chars(s_pd) + 0 3.0 + 1 4.0 + 2 3.0 + 3 2.0 + 4 NaN + dtype: float64 + + >>> agnostic_len_chars(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (5,) + Series: '' [u32] + [ + 3 + 4 + 3 + 2 + null + ] + + >>> agnostic_len_chars(s_pa) # doctest: +ELLIPSIS + + [ + [ + 3, + 4, + 3, + 2, + null + ] + ] + """, + "replace": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = ["123abc", "abc abc123"] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a dataframe-agnostic function: + + >>> def agnostic_replace(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... s = s.str.replace("abc", "") + ... return s.to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_replace`: + + >>> agnostic_replace(s_pd) + 0 123 + 1 abc123 + dtype: object + + >>> agnostic_replace(s_pl) # doctest:+NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [str] + [ + "123" + " abc123" + ] + + >>> agnostic_replace(s_pa) # doctest: +ELLIPSIS + + [ + [ + "123", + " abc123" + ] + ] + """, + "replace_all": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = ["123abc", "abc abc123"] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a dataframe-agnostic function: + + >>> def agnostic_replace_all(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... s = s.str.replace_all("abc", "") + ... return s.to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_replace_all`: + + >>> agnostic_replace_all(s_pd) + 0 123 + 1 123 + dtype: object + + >>> agnostic_replace_all(s_pl) # doctest:+NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [str] + [ + "123" + " 123" + ] + + >>> agnostic_replace_all(s_pa) # doctest: +ELLIPSIS + + [ + [ + "123", + " 123" + ] + ] + """, + "strip_chars": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = ["apple", "\\nmango"] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a dataframe-agnostic function: + + >>> def agnostic_strip_chars(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... s = s.str.strip_chars() + ... return s.to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_strip_chars`: + + >>> agnostic_strip_chars(s_pd) + 0 apple + 1 mango + dtype: object + + >>> agnostic_strip_chars(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [str] + [ + "apple" + "mango" + ] + + >>> agnostic_strip_chars(s_pa) # doctest: +ELLIPSIS + + [ + [ + "apple", + "mango" + ] + ] + """, + "starts_with": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = ["apple", "mango", None] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a dataframe-agnostic function: + + >>> def agnostic_starts_with(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.str.starts_with("app").to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_starts_with`: + + >>> agnostic_starts_with(s_pd) + 0 True + 1 False + 2 None + dtype: object + + >>> agnostic_starts_with(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [bool] + [ + true + false + null + ] + + >>> agnostic_starts_with(s_pa) # doctest: +ELLIPSIS + + [ + [ + true, + false, + null + ] + ] + """, + "ends_with": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = ["apple", "mango", None] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a dataframe-agnostic function: + + >>> def agnostic_ends_with(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.str.ends_with("ngo").to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_ends_with`: + + >>> agnostic_ends_with(s_pd) + 0 False + 1 True + 2 None + dtype: object + + >>> agnostic_ends_with(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [bool] + [ + false + true + null + ] + + >>> agnostic_ends_with(s_pa) # doctest: +ELLIPSIS + + [ + [ + false, + true, + null + ] + ] + """, + "contains": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = ["cat", "dog", "rabbit and parrot", "dove", None] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a dataframe-agnostic function: + + >>> def agnostic_contains(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.str.contains("parrot|dove").to_native() + + We can then pass any supported library such as pandas, Polars, or PyArrow to `agnostic_contains`: + + >>> agnostic_contains(s_pd) + 0 False + 1 False + 2 True + 3 True + 4 None + dtype: object + + >>> agnostic_contains(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (5,) + Series: '' [bool] + [ + false + false + true + true + null + ] + + >>> agnostic_contains(s_pa) # doctest: +ELLIPSIS + + [ + [ + false, + false, + true, + true, + null + ] + ] + """, + "slice": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = ["pear", None, "papaya", "dragonfruit"] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a dataframe-agnostic function: + + >>> def agnostic_slice(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.str.slice(4, length=3).to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_slice`: + + >>> agnostic_slice(s_pd) # doctest: +NORMALIZE_WHITESPACE + 0 + 1 None + 2 ya + 3 onf + dtype: object + + >>> agnostic_slice(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [str] + [ + "" + null + "ya" + "onf" + ] + + >>> agnostic_slice(s_pa) # doctest: +ELLIPSIS + + [ + [ + "", + null, + "ya", + "onf" + ] + ] + + Using negative indexes: + + >>> def agnostic_slice(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.str.slice(-3).to_native() + + >>> agnostic_slice(s_pd) # doctest: +NORMALIZE_WHITESPACE + 0 ear + 1 None + 2 aya + 3 uit + dtype: object + + >>> agnostic_slice(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [str] + [ + "ear" + null + "aya" + "uit" + ] + + >>> agnostic_slice(s_pa) # doctest: +ELLIPSIS + + [ + [ + "ear", + null, + "aya", + "uit" + ] + ] + """, + "head": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = ["Atatata", "taata", "taatatata", "zukkyun"] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a dataframe-agnostic function: + + >>> def agnostic_head(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.str.head().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_head`: + + >>> agnostic_head(s_pd) + 0 Atata + 1 taata + 2 taata + 3 zukky + dtype: object + + >>> agnostic_head(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [str] + [ + "Atata" + "taata" + "taata" + "zukky" + ] + + >>> agnostic_head(s_pa) # doctest: +ELLIPSIS + + [ + [ + "Atata", + "taata", + "taata", + "zukky" + ] + ] + """, + "tail": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = ["Atatata", "taata", "taatatata", "zukkyun"] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a dataframe-agnostic function: + + >>> def agnostic_tail(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.str.tail().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_tail`: + + >>> agnostic_tail(s_pd) + 0 atata + 1 taata + 2 atata + 3 kkyun + dtype: object + + >>> agnostic_tail(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [str] + [ + "atata" + "taata" + "atata" + "kkyun" + ] + + >>> agnostic_tail(s_pa) # doctest: +ELLIPSIS + + [ + [ + "atata", + "taata", + "atata", + "kkyun" + ] + ] + """, + "to_uppercase": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = ["apple", "mango", None] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a dataframe-agnostic function: + + >>> def agnostic_to_uppercase(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.str.to_uppercase().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_uppercase`: + + >>> agnostic_to_uppercase(s_pd) + 0 APPLE + 1 MANGO + 2 None + dtype: object + + >>> agnostic_to_uppercase(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [str] + [ + "APPLE" + "MANGO" + null + ] + + >>> agnostic_to_uppercase(s_pa) # doctest: +ELLIPSIS + + [ + [ + "APPLE", + "MANGO", + null + ] + ] + """, + "to_lowercase": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = ["APPLE", "MANGO", None] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a dataframe-agnostic function: + + >>> def agnostic_to_lowercase(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.str.to_lowercase().to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_lowercase`: + + >>> agnostic_to_lowercase(s_pd) + 0 apple + 1 mango + 2 None + dtype: object + + >>> agnostic_to_lowercase(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [str] + [ + "apple" + "mango" + null + ] + + >>> agnostic_to_lowercase(s_pa) # doctest: +ELLIPSIS + + [ + [ + "apple", + "mango", + null + ] + ] + """, + "to_datetime": """ + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = ["2020-01-01", "2020-01-02"] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + We define a dataframe-agnostic function: + + >>> def agnostic_to_datetime(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.str.to_datetime(format="%Y-%m-%d").to_native() + + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_datetime`: + + >>> agnostic_to_datetime(s_pd) + 0 2020-01-01 + 1 2020-01-02 + dtype: datetime64[ns] + + >>> agnostic_to_datetime(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (2,) + Series: '' [datetime[μs]] + [ + 2020-01-01 00:00:00 + 2020-01-02 00:00:00 + ] + + >>> agnostic_to_datetime(s_pa) # doctest: +ELLIPSIS + + [ + [ + 2020-01-01 00:00:00.000000, + 2020-01-02 00:00:00.000000 + ] + ] + """, +} diff --git a/docs/docstring_examples/translate.py b/docs/docstring_examples/translate.py new file mode 100644 index 000000000..1a4d13235 --- /dev/null +++ b/docs/docstring_examples/translate.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +EXAMPLES = { + "get_native_namespace": """ + >>> import polars as pl + >>> import pandas as pd + >>> import narwhals as nw + >>> df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]})) + >>> nw.get_native_namespace(df) + + >>> df = nw.from_native(pl.DataFrame({"a": [1, 2, 3]})) + >>> nw.get_native_namespace(df) + + """, + "narwhalify": """ + Instead of writing + + >>> import narwhals as nw + >>> def agnostic_group_by_sum(df): + ... df = nw.from_native(df, pass_through=True) + ... df = df.group_by("a").agg(nw.col("b").sum()) + ... return nw.to_native(df) + + you can just write + + >>> @nw.narwhalify + ... def agnostic_group_by_sum(df): + ... return df.group_by("a").agg(nw.col("b").sum()) + """, + "to_py_scalar": """ + >>> import narwhals as nw + >>> import pandas as pd + >>> df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]})) + >>> nw.to_py_scalar(df["a"].item(0)) + 1 + >>> import pyarrow as pa + >>> df = nw.from_native(pa.table({"a": [1, 2, 3]})) + >>> nw.to_py_scalar(df["a"].item(0)) + 1 + >>> nw.to_py_scalar(1) + 1 + """, +} diff --git a/docs/docstring_examples/utils.py b/docs/docstring_examples/utils.py new file mode 100644 index 000000000..106acd2b4 --- /dev/null +++ b/docs/docstring_examples/utils.py @@ -0,0 +1,181 @@ +from __future__ import annotations + +EXAMPLES = { + "is_pandas": """ + >>> import pandas as pd + >>> import narwhals as nw + >>> df_native = pd.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation.is_pandas() + True + """, + "is_pandas_like": """ + >>> import pandas as pd + >>> import narwhals as nw + >>> df_native = pd.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation.is_pandas_like() + True + """, + "is_polars": """ + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation.is_polars() + True + """, + "is_cudf": """ + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation.is_cudf() + False + """, + "is_modin": """ + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation.is_modin() + False + """, + "is_pyspark": """ + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation.is_pyspark() + False + """, + "is_pyarrow": """ + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation.is_pyarrow() + False + """, + "is_dask": """ + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation.is_dask() + False + """, + "is_duckdb": """ + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation.is_duckdb() + False + """, + "is_ibis": """ + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation.is_ibis() + False + """, + "maybe_align_index": """ + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> df_pd = pd.DataFrame({"a": [1, 2]}, index=[3, 4]) + >>> s_pd = pd.Series([6, 7], index=[4, 3]) + >>> df = nw.from_native(df_pd) + >>> s = nw.from_native(s_pd, series_only=True) + >>> nw.to_native(nw.maybe_align_index(df, s)) + a + 4 2 + 3 1 + """, + "maybe_get_index": """ + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]}) + >>> df = nw.from_native(df_pd) + >>> nw.maybe_get_index(df) + RangeIndex(start=0, stop=2, step=1) + >>> series_pd = pd.Series([1, 2]) + >>> series = nw.from_native(series_pd, series_only=True) + >>> nw.maybe_get_index(series) + RangeIndex(start=0, stop=2, step=1) + """, + "maybe_set_index": """ + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]}) + >>> df = nw.from_native(df_pd) + >>> nw.to_native(nw.maybe_set_index(df, "b")) # doctest: +NORMALIZE_WHITESPACE + a + b + 4 1 + 5 2 + """, + "maybe_reset_index": """ + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]}, index=([6, 7])) + >>> df = nw.from_native(df_pd) + >>> nw.to_native(nw.maybe_reset_index(df)) + a b + 0 1 4 + 1 2 5 + >>> series_pd = pd.Series([1, 2]) + >>> series = nw.from_native(series_pd, series_only=True) + >>> nw.maybe_get_index(series) + RangeIndex(start=0, stop=2, step=1) + """, + "maybe_convert_dtypes": """ + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> import numpy as np + >>> df_pd = pd.DataFrame( + ... { + ... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")), + ... "b": pd.Series([True, False, np.nan], dtype=np.dtype("O")), + ... } + ... ) + >>> df = nw.from_native(df_pd) + >>> nw.to_native(nw.maybe_convert_dtypes(df)).dtypes # doctest: +NORMALIZE_WHITESPACE + a Int32 + b boolean + dtype: object + """, + "is_ordered_categorical": """ + >>> import narwhals as nw + >>> import pandas as pd + >>> import polars as pl + >>> data = ["x", "y"] + >>> s_pd = pd.Series(data, dtype=pd.CategoricalDtype(ordered=True)) + >>> s_pl = pl.Series(data, dtype=pl.Categorical(ordering="physical")) + + Let's define a library-agnostic function: + + >>> @nw.narwhalify + ... def func(s): + ... return nw.is_ordered_categorical(s) + + Then, we can pass any supported library to `func`: + + >>> func(s_pd) + True + >>> func(s_pl) + True + """, + "generate_temporary_column_name": """ + >>> import narwhals as nw + >>> columns = ["abc", "xyz"] + >>> nw.generate_temporary_column_name(n_bytes=8, columns=columns) not in columns + True + """, +} diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index dd786ef3d..13aeaf826 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -391,20 +391,6 @@ def implementation(self) -> Implementation: Returns: Implementation. - - Examples: - >>> import narwhals as nw - >>> import pandas as pd - >>> df_native = pd.DataFrame({"a": [1, 2, 3]}) - >>> df = nw.from_native(df_native) - >>> df.implementation - - >>> df.implementation.is_pandas() - True - >>> df.implementation.is_pandas_like() - True - >>> df.implementation.is_polars() - False """ return self._compliant_frame._implementation # type: ignore[no-any-return] @@ -447,45 +433,6 @@ def lazy(self) -> LazyFrame[Any]: Returns: A new LazyFrame. - - Examples: - Construct pandas, Polars and PyArrow DataFrames: - - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrame - >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_lazy(df_native: IntoFrame) -> IntoFrame: - ... df = nw.from_native(df_native) - ... return df.lazy().to_native() - - Note that then, pandas and pyarrow dataframe stay eager, but Polars DataFrame - becomes a Polars LazyFrame: - - >>> agnostic_lazy(df_pd) - foo bar ham - 0 1 6.0 a - 1 2 7.0 b - 2 3 8.0 c - >>> agnostic_lazy(df_pl) - - >>> agnostic_lazy(df_pa) - pyarrow.Table - foo: int64 - bar: double - ham: string - ---- - foo: [[1,2,3]] - bar: [[6,7,8]] - ham: [["a","b","c"]] """ return self._lazyframe(self._compliant_frame.lazy(), level="lazy") @@ -494,44 +441,6 @@ def to_native(self) -> DataFrameT: Returns: Object of class that user started with. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Calling `to_native` on a Narwhals DataFrame returns the native object: - - >>> nw.from_native(df_pd).to_native() - foo bar ham - 0 1 6.0 a - 1 2 7.0 b - 2 3 8.0 c - >>> nw.from_native(df_pl).to_native() - shape: (3, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ f64 ┆ str │ - ╞═════╪═════╪═════╡ - │ 1 ┆ 6.0 ┆ a │ - │ 2 ┆ 7.0 ┆ b │ - │ 3 ┆ 8.0 ┆ c │ - └─────┴─────┴─────┘ - >>> nw.from_native(df_pa).to_native() - pyarrow.Table - foo: int64 - bar: double - ham: string - ---- - foo: [[1,2,3]] - bar: [[6,7,8]] - ham: [["a","b","c"]] """ return self._compliant_frame._native_frame # type: ignore[no-any-return] @@ -540,44 +449,6 @@ def to_pandas(self) -> pd.DataFrame: Returns: A pandas DataFrame. - - Examples: - Construct pandas, Polars (eager) and PyArrow DataFrames: - - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_to_pandas(df_native: IntoDataFrame) -> pd.DataFrame: - ... df = nw.from_native(df_native) - ... return df.to_pandas() - - We can then pass any supported library such as pandas, Polars (eager), or - PyArrow to `agnostic_to_pandas`: - - >>> agnostic_to_pandas(df_pd) - foo bar ham - 0 1 6.0 a - 1 2 7.0 b - 2 3 8.0 c - >>> agnostic_to_pandas(df_pl) - foo bar ham - 0 1 6.0 a - 1 2 7.0 b - 2 3 8.0 c - >>> agnostic_to_pandas(df_pa) - foo bar ham - 0 1 6.0 a - 1 2 7.0 b - 2 3 8.0 c """ return self._compliant_frame.to_pandas() @@ -596,37 +467,6 @@ def write_csv(self, file: str | Path | BytesIO | None = None) -> str | None: Returns: String or None. - - Examples: - Construct pandas, Polars (eager) and PyArrow DataFrames: - - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_write_csv(df_native: IntoDataFrame) -> str: - ... df = nw.from_native(df_native) - ... return df.write_csv() - - We can pass any supported library such as pandas, Polars or PyArrow to `agnostic_write_csv`: - - >>> agnostic_write_csv(df_pd) - 'foo,bar,ham\n1,6.0,a\n2,7.0,b\n3,8.0,c\n' - >>> agnostic_write_csv(df_pl) - 'foo,bar,ham\n1,6.0,a\n2,7.0,b\n3,8.0,c\n' - >>> agnostic_write_csv(df_pa) - '"foo","bar","ham"\n1,6,"a"\n2,7,"b"\n3,8,"c"\n' - - If we had passed a file name to `write_csv`, it would have been - written to that file. """ return self._compliant_frame.write_csv(file) # type: ignore[no-any-return] @@ -639,31 +479,6 @@ def write_parquet(self, file: str | Path | BytesIO) -> None: Returns: None. - - Examples: - Construct pandas, Polars and PyArrow DataFrames: - - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_write_parquet(df_native: IntoDataFrame): - ... df = nw.from_native(df_native) - ... df.write_parquet("foo.parquet") - - We can then pass either pandas, Polars or PyArrow to `agnostic_write_parquet`: - - >>> agnostic_write_parquet(df_pd) # doctest:+SKIP - >>> agnostic_write_parquet(df_pl) # doctest:+SKIP - >>> agnostic_write_parquet(df_pa) # doctest:+SKIP """ self._compliant_frame.write_parquet(file) @@ -672,41 +487,6 @@ def to_numpy(self) -> np.ndarray: Returns: A NumPy ndarray array. - - Examples: - Construct pandas and polars DataFrames: - - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> import numpy as np - >>> from narwhals.typing import IntoDataFrame - >>> data = {"foo": [1, 2, 3], "bar": [6.5, 7.0, 8.5], "ham": ["a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_to_numpy(df_native: IntoDataFrame) -> np.ndarray: - ... df = nw.from_native(df_native) - ... return df.to_numpy() - - We can then pass either pandas, Polars or PyArrow to `agnostic_to_numpy`: - - >>> agnostic_to_numpy(df_pd) - array([[1, 6.5, 'a'], - [2, 7.0, 'b'], - [3, 8.5, 'c']], dtype=object) - >>> agnostic_to_numpy(df_pl) - array([[1, 6.5, 'a'], - [2, 7.0, 'b'], - [3, 8.5, 'c']], dtype=object) - >>> agnostic_to_numpy(df_pa) - array([[1, 6.5, 'a'], - [2, 7.0, 'b'], - [3, 8.5, 'c']], dtype=object) """ return self._compliant_frame.to_numpy() @@ -716,34 +496,6 @@ def shape(self) -> tuple[int, int]: Returns: The shape of the dataframe as a tuple. - - Examples: - Construct pandas and polars DataFrames: - - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> data = {"foo": [1, 2, 3, 4, 5]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_shape(df_native: IntoDataFrame) -> tuple[int, int]: - ... df = nw.from_native(df_native) - ... return df.shape - - We can then pass either pandas, Polars or PyArrow to `agnostic_shape`: - - >>> agnostic_shape(df_pd) - (5, 1) - >>> agnostic_shape(df_pl) - (5, 1) - >>> agnostic_shape(df_pa) - (5, 1) """ return self._compliant_frame.shape # type: ignore[no-any-return] @@ -762,47 +514,6 @@ def get_column(self, name: str) -> Series[Any]: `narwhals.DataFrame` is backed by a pandas dataframe with non-string columns. This function can only be used to extract a column by name, so there is no risk of ambiguity. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> from narwhals.typing import IntoSeries - >>> data = {"a": [1, 2], "b": [3, 4]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_get_column(df_native: IntoDataFrame) -> IntoSeries: - ... df = nw.from_native(df_native) - ... name = df.columns[0] - ... return df.get_column(name).to_native() - - We can then pass either pandas, Polars or PyArrow to `agnostic_get_column`: - - >>> agnostic_get_column(df_pd) - 0 1 - 1 2 - Name: a, dtype: int64 - >>> agnostic_get_column(df_pl) # doctest:+NORMALIZE_WHITESPACE - shape: (2,) - Series: 'a' [i64] - [ - 1 - 2 - ] - >>> agnostic_get_column(df_pa) # doctest:+ELLIPSIS - - [ - [ - 1, - 2 - ] - ] """ return self._series( self._compliant_frame.get_column(name), @@ -820,36 +531,6 @@ def estimated_size(self, unit: SizeUnit = "b") -> int | float: Returns: Integer or Float. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrameT - >>> data = { - ... "foo": [1, 2, 3], - ... "bar": [6.0, 7.0, 8.0], - ... "ham": ["a", "b", "c"], - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_estimated_size(df_native: IntoDataFrameT) -> int | float: - ... df = nw.from_native(df_native) - ... return df.estimated_size() - - We can then pass either pandas, Polars or PyArrow to `agnostic_estimated_size`: - - >>> agnostic_estimated_size(df_pd) - np.int64(330) - >>> agnostic_estimated_size(df_pl) - 51 - >>> agnostic_estimated_size(df_pa) - 63 """ return self._compliant_frame.estimated_size(unit=unit) # type: ignore[no-any-return] @@ -938,47 +619,6 @@ def __getitem__( If you don't know whether the column name you're trying to extract is definitely a string (e.g. `df[df.columns[0]]`) then you should use `DataFrame.get_column` instead. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> from narwhals.typing import IntoSeries - >>> data = {"a": [1, 2], "b": [3, 4]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_slice(df_native: IntoDataFrame) -> IntoSeries: - ... df = nw.from_native(df_native) - ... return df["a"].to_native() - - We can then pass either pandas, Polars or PyArrow to `agnostic_slice`: - - >>> agnostic_slice(df_pd) - 0 1 - 1 2 - Name: a, dtype: int64 - >>> agnostic_slice(df_pl) # doctest:+NORMALIZE_WHITESPACE - shape: (2,) - Series: 'a' [i64] - [ - 1 - 2 - ] - >>> agnostic_slice(df_pa) # doctest:+ELLIPSIS - - [ - [ - 1, - 2 - ] - ] - """ if isinstance(item, int): item = [item] @@ -1040,40 +680,6 @@ def to_dict( Returns: A mapping from column name to values / Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> data = { - ... "A": [1, 2, 3, 4, 5], - ... "fruits": ["banana", "banana", "apple", "apple", "banana"], - ... "B": [5, 4, 3, 2, 1], - ... "animals": ["beetle", "fly", "beetle", "beetle", "beetle"], - ... "optional": [28, 300, None, 2, -30], - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_to_dict( - ... df_native: IntoDataFrame, - ... ) -> dict[str, list[int | str | float | None]]: - ... df = nw.from_native(df_native) - ... return df.to_dict(as_series=False) - - We can then pass either pandas, Polars or PyArrow to `agnostic_to_dict`: - - >>> agnostic_to_dict(df_pd) - {'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'animals': ['beetle', 'fly', 'beetle', 'beetle', 'beetle'], 'optional': [28.0, 300.0, nan, 2.0, -30.0]} - >>> agnostic_to_dict(df_pl) - {'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'animals': ['beetle', 'fly', 'beetle', 'beetle', 'beetle'], 'optional': [28, 300, None, 2, -30]} - >>> agnostic_to_dict(df_pa) - {'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'animals': ['beetle', 'fly', 'beetle', 'beetle', 'beetle'], 'optional': [28, 300, None, 2, -30]} """ if as_series: return { @@ -1103,32 +709,6 @@ def row(self, index: int) -> tuple[Any, ...]: Notes: cuDF doesn't support this method. - - Examples: - >>> import narwhals as nw - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> from narwhals.typing import IntoDataFrame - >>> from typing import Any - >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a library-agnostic function to get the second row. - - >>> def agnostic_row(df_native: IntoDataFrame) -> tuple[Any, ...]: - ... return nw.from_native(df_native).row(1) - - We can then pass either pandas, Polars or PyArrow to `agnostic_row`: - - >>> agnostic_row(df_pd) - (2, 5) - >>> agnostic_row(df_pl) - (2, 5) - >>> agnostic_row(df_pa) - (, ) """ return self._compliant_frame.row(index) # type: ignore[no-any-return] @@ -1143,51 +723,6 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se Returns: The original object with the function applied. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = {"a": [1, 2, 3], "ba": [4, 5, 6]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_pipe(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.pipe( - ... lambda _df: _df.select( - ... [x for x in _df.columns if len(x) == 1] - ... ).to_native() - ... ) - - We can then pass either pandas, Polars or PyArrow to `agnostic_pipe`: - - >>> agnostic_pipe(df_pd) - a - 0 1 - 1 2 - 2 3 - >>> agnostic_pipe(df_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 1 │ - │ 2 │ - │ 3 │ - └─────┘ - >>> agnostic_pipe(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[1,2,3]] """ return super().pipe(function, *args, **kwargs) @@ -1205,46 +740,6 @@ def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self: pandas handles null values differently from Polars and PyArrow. See [null_handling](../pandas_like_concepts/null_handling.md) for reference. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = {"a": [1.0, 2.0, None], "ba": [1.0, None, 2.0]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_drop_nulls(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.drop_nulls().to_native() - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_drop_nulls`: - - >>> agnostic_drop_nulls(df_pd) - a ba - 0 1.0 1.0 - >>> agnostic_drop_nulls(df_pl) - shape: (1, 2) - ┌─────┬─────┐ - │ a ┆ ba │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞═════╪═════╡ - │ 1.0 ┆ 1.0 │ - └─────┴─────┘ - >>> agnostic_drop_nulls(df_pa) - pyarrow.Table - a: double - ba: double - ---- - a: [[1]] - ba: [[1]] """ return super().drop_nulls(subset=subset) @@ -1256,54 +751,6 @@ def with_row_index(self, name: str = "index") -> Self: Returns: The original object with the column added. - - Examples: - Construct pandas as polars DataFrames: - - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_with_row_index(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_row_index().to_native() - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_with_row_index`: - - >>> agnostic_with_row_index(df_pd) - index a b - 0 0 1 4 - 1 1 2 5 - 2 2 3 6 - >>> agnostic_with_row_index(df_pl) - shape: (3, 3) - ┌───────┬─────┬─────┐ - │ index ┆ a ┆ b │ - │ --- ┆ --- ┆ --- │ - │ u32 ┆ i64 ┆ i64 │ - ╞═══════╪═════╪═════╡ - │ 0 ┆ 1 ┆ 4 │ - │ 1 ┆ 2 ┆ 5 │ - │ 2 ┆ 3 ┆ 6 │ - └───────┴─────┴─────┘ - >>> agnostic_with_row_index(df_pa) - pyarrow.Table - index: int64 - a: int64 - b: int64 - ---- - index: [[0,1,2]] - a: [[1,2,3]] - b: [[4,5,6]] """ return super().with_row_index(name) @@ -1313,38 +760,6 @@ def schema(self) -> Schema: Returns: A Narwhals Schema object that displays the mapping of column names. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.schema import Schema - >>> from narwhals.typing import IntoFrame - >>> data = { - ... "foo": [1, 2, 3], - ... "bar": [6.0, 7.0, 8.0], - ... "ham": ["a", "b", "c"], - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_schema(df_native: IntoFrame) -> Schema: - ... df = nw.from_native(df_native) - ... return df.schema - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_schema`: - - >>> agnostic_schema(df_pd) - Schema({'foo': Int64, 'bar': Float64, 'ham': String}) - >>> agnostic_schema(df_pl) - Schema({'foo': Int64, 'bar': Float64, 'ham': String}) - >>> agnostic_schema(df_pa) - Schema({'foo': Int64, 'bar': Float64, 'ham': String}) """ return super().schema @@ -1353,38 +768,6 @@ def collect_schema(self: Self) -> Schema: Returns: A Narwhals Schema object that displays the mapping of column names. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.schema import Schema - >>> from narwhals.typing import IntoFrame - >>> data = { - ... "foo": [1, 2, 3], - ... "bar": [6.0, 7.0, 8.0], - ... "ham": ["a", "b", "c"], - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_collect_schema(df_native: IntoFrame) -> Schema: - ... df = nw.from_native(df_native) - ... return df.collect_schema() - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_collect_schema`: - - >>> agnostic_collect_schema(df_pd) - Schema({'foo': Int64, 'bar': Float64, 'ham': String}) - >>> agnostic_collect_schema(df_pl) - Schema({'foo': Int64, 'bar': Float64, 'ham': String}) - >>> agnostic_collect_schema(df_pa) - Schema({'foo': Int64, 'bar': Float64, 'ham': String}) """ return super().collect_schema() @@ -1394,33 +777,6 @@ def columns(self) -> list[str]: Returns: The column names stored in a list. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrame - >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_columns(df_native: IntoFrame) -> list[str]: - ... df = nw.from_native(df_native) - ... return df.columns - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_columns`: - - >>> agnostic_columns(df_pd) - ['foo', 'bar', 'ham'] - >>> agnostic_columns(df_pl) - ['foo', 'bar', 'ham'] - >>> agnostic_columns(df_pa) - ['foo', 'bar', 'ham'] """ return super().columns @@ -1445,38 +801,6 @@ def rows( Returns: The data as a list of rows. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_rows(df_native: IntoDataFrame, *, named: bool): - ... return nw.from_native(df_native, eager_only=True).rows(named=named) - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_rows`: - - >>> agnostic_rows(df_pd, named=False) - [(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')] - >>> agnostic_rows(df_pd, named=True) - [{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}] - >>> agnostic_rows(df_pl, named=False) - [(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')] - >>> agnostic_rows(df_pl, named=True) - [{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}] - >>> agnostic_rows(df_pa, named=False) - [(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')] - >>> agnostic_rows(df_pa, named=True) - [{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}] """ return self._compliant_frame.rows(named=named) # type: ignore[no-any-return] @@ -1513,38 +837,6 @@ def iter_rows( Notes: cuDF doesn't support this method. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_iter_rows(df_native: IntoDataFrame, *, named: bool): - ... return nw.from_native(df_native, eager_only=True).iter_rows(named=named) - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_iter_rows`: - - >>> [row for row in agnostic_iter_rows(df_pd, named=False)] - [(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')] - >>> [row for row in agnostic_iter_rows(df_pd, named=True)] - [{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}] - >>> [row for row in agnostic_iter_rows(df_pl, named=False)] - [(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')] - >>> [row for row in agnostic_iter_rows(df_pl, named=True)] - [{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}] - >>> [row for row in agnostic_iter_rows(df_pa, named=False)] - [(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')] - >>> [row for row in agnostic_iter_rows(df_pa, named=True)] - [{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}] """ return self._compliant_frame.iter_rows(named=named, buffer_size=buffer_size) # type: ignore[no-any-return] @@ -1569,64 +861,6 @@ def with_columns( Note: Creating a new DataFrame using this method does not create a new copy of existing data. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = { - ... "a": [1, 2, 3, 4], - ... "b": [0.5, 4, 10, 13], - ... "c": [True, True, False, True], - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function in which we pass an expression - to add it as a new column: - - >>> def agnostic_with_columns(df_native: IntoFrameT) -> IntoFrameT: - ... return ( - ... nw.from_native(df_native) - ... .with_columns((nw.col("a") * 2).alias("a*2")) - ... .to_native() - ... ) - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_with_columns`: - - >>> agnostic_with_columns(df_pd) - a b c a*2 - 0 1 0.5 True 2 - 1 2 4.0 True 4 - 2 3 10.0 False 6 - 3 4 13.0 True 8 - >>> agnostic_with_columns(df_pl) - shape: (4, 4) - ┌─────┬──────┬───────┬─────┐ - │ a ┆ b ┆ c ┆ a*2 │ - │ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ f64 ┆ bool ┆ i64 │ - ╞═════╪══════╪═══════╪═════╡ - │ 1 ┆ 0.5 ┆ true ┆ 2 │ - │ 2 ┆ 4.0 ┆ true ┆ 4 │ - │ 3 ┆ 10.0 ┆ false ┆ 6 │ - │ 4 ┆ 13.0 ┆ true ┆ 8 │ - └─────┴──────┴───────┴─────┘ - >>> agnostic_with_columns(df_pa) - pyarrow.Table - a: int64 - b: double - c: bool - a*2: int64 - ---- - a: [[1,2,3,4]] - b: [[0.5,4,10,13]] - c: [[true,true,false,true]] - a*2: [[2,4,6,8]] """ return super().with_columns(*exprs, **named_exprs) @@ -1647,146 +881,6 @@ def select( Returns: The dataframe containing only the selected columns. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = { - ... "foo": [1, 2, 3], - ... "bar": [6, 7, 8], - ... "ham": ["a", "b", "c"], - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function in which we pass the name of a - column to select that column. - - >>> def agnostic_single_select(df_native: IntoFrameT) -> IntoFrameT: - ... return nw.from_native(df_native).select("foo").to_native() - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_single_select`: - - >>> agnostic_single_select(df_pd) - foo - 0 1 - 1 2 - 2 3 - >>> agnostic_single_select(df_pl) - shape: (3, 1) - ┌─────┐ - │ foo │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 1 │ - │ 2 │ - │ 3 │ - └─────┘ - >>> agnostic_single_select(df_pa) - pyarrow.Table - foo: int64 - ---- - foo: [[1,2,3]] - - Multiple columns can be selected by passing a list of column names. - - >>> def agnostic_multi_select(df_native: IntoFrameT) -> IntoFrameT: - ... return nw.from_native(df_native).select(["foo", "bar"]).to_native() - - >>> agnostic_multi_select(df_pd) - foo bar - 0 1 6 - 1 2 7 - 2 3 8 - >>> agnostic_multi_select(df_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ foo ┆ bar │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 6 │ - │ 2 ┆ 7 │ - │ 3 ┆ 8 │ - └─────┴─────┘ - >>> agnostic_multi_select(df_pa) - pyarrow.Table - foo: int64 - bar: int64 - ---- - foo: [[1,2,3]] - bar: [[6,7,8]] - - Multiple columns can also be selected using positional arguments instead of a - list. Expressions are also accepted. - - >>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT: - ... return ( - ... nw.from_native(df_native) - ... .select(nw.col("foo"), nw.col("bar") + 1) - ... .to_native() - ... ) - - >>> agnostic_select(df_pd) - foo bar - 0 1 7 - 1 2 8 - 2 3 9 - >>> agnostic_select(df_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ foo ┆ bar │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 7 │ - │ 2 ┆ 8 │ - │ 3 ┆ 9 │ - └─────┴─────┘ - >>> agnostic_select(df_pa) - pyarrow.Table - foo: int64 - bar: int64 - ---- - foo: [[1,2,3]] - bar: [[7,8,9]] - - Use keyword arguments to easily name your expression inputs. - - >>> def agnostic_select_w_kwargs(df_native: IntoFrameT) -> IntoFrameT: - ... return ( - ... nw.from_native(df_native) - ... .select(threshold=nw.col("foo") * 2) - ... .to_native() - ... ) - - >>> agnostic_select_w_kwargs(df_pd) - threshold - 0 2 - 1 4 - 2 6 - >>> agnostic_select_w_kwargs(df_pl) - shape: (3, 1) - ┌───────────┐ - │ threshold │ - │ --- │ - │ i64 │ - ╞═══════════╡ - │ 2 │ - │ 4 │ - │ 6 │ - └───────────┘ - >>> agnostic_select_w_kwargs(df_pa) - pyarrow.Table - threshold: int64 - ---- - threshold: [[2,4,6]] """ return super().select(*exprs, **named_exprs) @@ -1798,51 +892,6 @@ def rename(self, mapping: dict[str, str]) -> Self: Returns: The dataframe with the specified columns renamed. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = {"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_rename(df_native: IntoFrameT) -> IntoFrameT: - ... return nw.from_native(df_native).rename({"foo": "apple"}).to_native() - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_rename`: - - >>> agnostic_rename(df_pd) - apple bar ham - 0 1 6 a - 1 2 7 b - 2 3 8 c - >>> agnostic_rename(df_pl) - shape: (3, 3) - ┌───────┬─────┬─────┐ - │ apple ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ str │ - ╞═══════╪═════╪═════╡ - │ 1 ┆ 6 ┆ a │ - │ 2 ┆ 7 ┆ b │ - │ 3 ┆ 8 ┆ c │ - └───────┴─────┴─────┘ - >>> agnostic_rename(df_pa) - pyarrow.Table - apple: int64 - bar: int64 - ham: string - ---- - apple: [[1,2,3]] - bar: [[6,7,8]] - ham: [["a","b","c"]] """ return super().rename(mapping) @@ -1855,55 +904,6 @@ def head(self, n: int = 5) -> Self: Returns: A subset of the dataframe of shape (n, n_columns). - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = { - ... "foo": [1, 2, 3, 4, 5], - ... "bar": [6, 7, 8, 9, 10], - ... "ham": ["a", "b", "c", "d", "e"], - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function that gets the first 3 rows. - - >>> def agnostic_head(df_native: IntoFrameT) -> IntoFrameT: - ... return nw.from_native(df_native).head(3).to_native() - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_head`: - - >>> agnostic_head(df_pd) - foo bar ham - 0 1 6 a - 1 2 7 b - 2 3 8 c - >>> agnostic_head(df_pl) - shape: (3, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ str │ - ╞═════╪═════╪═════╡ - │ 1 ┆ 6 ┆ a │ - │ 2 ┆ 7 ┆ b │ - │ 3 ┆ 8 ┆ c │ - └─────┴─────┴─────┘ - >>> agnostic_head(df_pa) - pyarrow.Table - foo: int64 - bar: int64 - ham: string - ---- - foo: [[1,2,3]] - bar: [[6,7,8]] - ham: [["a","b","c"]] """ return super().head(n) @@ -1916,55 +916,6 @@ def tail(self, n: int = 5) -> Self: Returns: A subset of the dataframe of shape (n, n_columns). - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = { - ... "foo": [1, 2, 3, 4, 5], - ... "bar": [6, 7, 8, 9, 10], - ... "ham": ["a", "b", "c", "d", "e"], - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function that gets the last 3 rows. - - >>> def agnostic_tail(df_native: IntoFrameT) -> IntoFrameT: - ... return nw.from_native(df_native).tail(3).to_native() - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_tail`: - - >>> agnostic_tail(df_pd) - foo bar ham - 2 3 8 c - 3 4 9 d - 4 5 10 e - >>> agnostic_tail(df_pl) - shape: (3, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ str │ - ╞═════╪═════╪═════╡ - │ 3 ┆ 8 ┆ c │ - │ 4 ┆ 9 ┆ d │ - │ 5 ┆ 10 ┆ e │ - └─────┴─────┴─────┘ - >>> agnostic_tail(df_pa) - pyarrow.Table - foo: int64 - bar: int64 - ham: string - ---- - foo: [[3,4,5]] - bar: [[8,9,10]] - ham: [["c","d","e"]] """ return super().tail(n) @@ -1978,77 +929,6 @@ def drop(self, *columns: str | Iterable[str], strict: bool = True) -> Self: *columns: Names of the columns that should be removed from the dataframe. strict: Validate that all column names exist in the schema and throw an exception if a column name does not exist in the schema. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_drop(df_native: IntoFrameT) -> IntoFrameT: - ... return nw.from_native(df_native).drop("ham").to_native() - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_drop`: - - >>> agnostic_drop(df_pd) - foo bar - 0 1 6.0 - 1 2 7.0 - 2 3 8.0 - >>> agnostic_drop(df_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ foo ┆ bar │ - │ --- ┆ --- │ - │ i64 ┆ f64 │ - ╞═════╪═════╡ - │ 1 ┆ 6.0 │ - │ 2 ┆ 7.0 │ - │ 3 ┆ 8.0 │ - └─────┴─────┘ - >>> agnostic_drop(df_pa) - pyarrow.Table - foo: int64 - bar: double - ---- - foo: [[1,2,3]] - bar: [[6,7,8]] - - Use positional arguments to drop multiple columns. - - >>> def agnostic_drop_multi(df_native: IntoFrameT) -> IntoFrameT: - ... return nw.from_native(df_native).drop("foo", "ham").to_native() - - >>> agnostic_drop_multi(df_pd) - bar - 0 6.0 - 1 7.0 - 2 8.0 - >>> agnostic_drop_multi(df_pl) - shape: (3, 1) - ┌─────┐ - │ bar │ - │ --- │ - │ f64 │ - ╞═════╡ - │ 6.0 │ - │ 7.0 │ - │ 8.0 │ - └─────┘ - >>> agnostic_drop_multi(df_pa) - pyarrow.Table - bar: double - ---- - bar: [[6,7,8]] - """ return super().drop(*flatten(columns), strict=strict) @@ -2076,51 +956,6 @@ def unique( Returns: The dataframe with the duplicate rows removed. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = { - ... "foo": [1, 2, 3, 1], - ... "bar": ["a", "a", "a", "a"], - ... "ham": ["b", "b", "b", "b"], - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_unique(df_native: IntoFrameT) -> IntoFrameT: - ... return nw.from_native(df_native).unique(["bar", "ham"]).to_native() - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_unique`: - - >>> agnostic_unique(df_pd) - foo bar ham - 0 1 a b - >>> agnostic_unique(df_pl) - shape: (1, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ str ┆ str │ - ╞═════╪═════╪═════╡ - │ 1 ┆ a ┆ b │ - └─────┴─────┴─────┘ - >>> agnostic_unique(df_pa) - pyarrow.Table - foo: int64 - bar: string - ham: string - ---- - foo: [[1]] - bar: [["a"]] - ham: [["b"]] """ if keep not in {"any", "none", "first", "last"}: msg = f"Expected {'any', 'none', 'first', 'last'}, got: {keep}" @@ -2149,170 +984,6 @@ def filter( Returns: The filtered dataframe. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = { - ... "foo": [1, 2, 3], - ... "bar": [6, 7, 8], - ... "ham": ["a", "b", "c"], - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function in which we filter on - one condition. - - >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.filter(nw.col("foo") > 1).to_native() - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_filter`: - - >>> agnostic_filter(df_pd) - foo bar ham - 1 2 7 b - 2 3 8 c - >>> agnostic_filter(df_pl) - shape: (2, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ str │ - ╞═════╪═════╪═════╡ - │ 2 ┆ 7 ┆ b │ - │ 3 ┆ 8 ┆ c │ - └─────┴─────┴─────┘ - >>> agnostic_filter(df_pa) - pyarrow.Table - foo: int64 - bar: int64 - ham: string - ---- - foo: [[2,3]] - bar: [[7,8]] - ham: [["b","c"]] - - Filter on multiple conditions, combined with and/or operators: - - >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.filter((nw.col("foo") < 3) & (nw.col("ham") == "a")).to_native() - >>> agnostic_filter(df_pd) - foo bar ham - 0 1 6 a - >>> agnostic_filter(df_pl) - shape: (1, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ str │ - ╞═════╪═════╪═════╡ - │ 1 ┆ 6 ┆ a │ - └─────┴─────┴─────┘ - >>> agnostic_filter(df_pa) - pyarrow.Table - foo: int64 - bar: int64 - ham: string - ---- - foo: [[1]] - bar: [[6]] - ham: [["a"]] - - >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... dframe = df.filter( - ... (nw.col("foo") == 1) | (nw.col("ham") == "c") - ... ).to_native() - ... return dframe - >>> agnostic_filter(df_pd) - foo bar ham - 0 1 6 a - 2 3 8 c - >>> agnostic_filter(df_pl) - shape: (2, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ str │ - ╞═════╪═════╪═════╡ - │ 1 ┆ 6 ┆ a │ - │ 3 ┆ 8 ┆ c │ - └─────┴─────┴─────┘ - >>> agnostic_filter(df_pa) - pyarrow.Table - foo: int64 - bar: int64 - ham: string - ---- - foo: [[1,3]] - bar: [[6,8]] - ham: [["a","c"]] - - Provide multiple filters using `*args` syntax: - - >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... dframe = df.filter( - ... nw.col("foo") <= 2, - ... ~nw.col("ham").is_in(["b", "c"]), - ... ).to_native() - ... return dframe - >>> agnostic_filter(df_pd) - foo bar ham - 0 1 6 a - >>> agnostic_filter(df_pl) - shape: (1, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ str │ - ╞═════╪═════╪═════╡ - │ 1 ┆ 6 ┆ a │ - └─────┴─────┴─────┘ - >>> agnostic_filter(df_pa) - pyarrow.Table - foo: int64 - bar: int64 - ham: string - ---- - foo: [[1]] - bar: [[6]] - ham: [["a"]] - - Provide multiple filters using `**kwargs` syntax: - - >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.filter(foo=2, ham="b").to_native() - >>> agnostic_filter(df_pd) - foo bar ham - 1 2 7 b - >>> agnostic_filter(df_pl) - shape: (1, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ str │ - ╞═════╪═════╪═════╡ - │ 2 ┆ 7 ┆ b │ - └─────┴─────┴─────┘ - >>> agnostic_filter(df_pa) - pyarrow.Table - foo: int64 - bar: int64 - ham: string - ---- - foo: [[2]] - bar: [[7]] - ham: [["b"]] """ return super().filter(*predicates, **constraints) @@ -2328,89 +999,6 @@ def group_by( Returns: GroupBy: Object which can be used to perform aggregations. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrameT - >>> data = { - ... "a": ["a", "b", "a", "b", "c"], - ... "b": [1, 2, 1, 3, 3], - ... "c": [5, 4, 3, 2, 1], - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function in which we group by one column - and call `agg` to compute the grouped sum of another column. - - >>> def agnostic_group_by_agg(df_native: IntoDataFrameT) -> IntoDataFrameT: - ... df = nw.from_native(df_native, eager_only=True) - ... return df.group_by("a").agg(nw.col("b").sum()).sort("a").to_native() - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_group_by_agg`: - - >>> agnostic_group_by_agg(df_pd) - a b - 0 a 2 - 1 b 5 - 2 c 3 - >>> agnostic_group_by_agg(df_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ str ┆ i64 │ - ╞═════╪═════╡ - │ a ┆ 2 │ - │ b ┆ 5 │ - │ c ┆ 3 │ - └─────┴─────┘ - >>> agnostic_group_by_agg(df_pa) - pyarrow.Table - a: string - b: int64 - ---- - a: [["a","b","c"]] - b: [[2,5,3]] - - Group by multiple columns by passing a list of column names. - - >>> def agnostic_group_by_agg(df_native: IntoDataFrameT) -> IntoDataFrameT: - ... df = nw.from_native(df_native, eager_only=True) - ... return df.group_by(["a", "b"]).agg(nw.max("c")).sort("a", "b").to_native() - - >>> agnostic_group_by_agg(df_pd) - a b c - 0 a 1 5 - 1 b 2 4 - 2 b 3 2 - 3 c 3 1 - >>> agnostic_group_by_agg(df_pl) - shape: (4, 3) - ┌─────┬─────┬─────┐ - │ a ┆ b ┆ c │ - │ --- ┆ --- ┆ --- │ - │ str ┆ i64 ┆ i64 │ - ╞═════╪═════╪═════╡ - │ a ┆ 1 ┆ 5 │ - │ b ┆ 2 ┆ 4 │ - │ b ┆ 3 ┆ 2 │ - │ c ┆ 3 ┆ 1 │ - └─────┴─────┴─────┘ - >>> agnostic_group_by_agg(df_pa) - pyarrow.Table - a: string - b: int64 - c: int64 - ---- - a: [["a","b","b","c"]] - b: [[1,2,3,3]] - c: [[5,4,2,1]] """ from narwhals.expr import Expr from narwhals.group_by import GroupBy @@ -2448,57 +1036,6 @@ def sort( Unlike Polars, it is not possible to specify a sequence of booleans for `nulls_last` in order to control per-column behaviour. Instead a single boolean is applied for all `by` columns. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = { - ... "a": [1, 2, None], - ... "b": [6.0, 5.0, 4.0], - ... "c": ["a", "c", "b"], - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function in which we sort by multiple - columns in different orders - - >>> def agnostic_sort(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.sort("c", "a", descending=[False, True]).to_native() - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_sort`: - - >>> agnostic_sort(df_pd) - a b c - 0 1.0 6.0 a - 2 NaN 4.0 b - 1 2.0 5.0 c - >>> agnostic_sort(df_pl) - shape: (3, 3) - ┌──────┬─────┬─────┐ - │ a ┆ b ┆ c │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ f64 ┆ str │ - ╞══════╪═════╪═════╡ - │ 1 ┆ 6.0 ┆ a │ - │ null ┆ 4.0 ┆ b │ - │ 2 ┆ 5.0 ┆ c │ - └──────┴─────┴─────┘ - >>> agnostic_sort(df_pa) - pyarrow.Table - a: int64 - b: double - c: string - ---- - a: [[1,null,2]] - b: [[6,4,5]] - c: [["a","b","c"]] """ return super().sort(by, *more_by, descending=descending, nulls_last=nulls_last) @@ -2531,70 +1068,6 @@ def join( Returns: A new joined DataFrame - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = { - ... "foo": [1, 2, 3], - ... "bar": [6.0, 7.0, 8.0], - ... "ham": ["a", "b", "c"], - ... } - >>> data_other = { - ... "apple": ["x", "y", "z"], - ... "ham": ["a", "b", "d"], - ... } - - >>> df_pd = pd.DataFrame(data) - >>> other_pd = pd.DataFrame(data_other) - - >>> df_pl = pl.DataFrame(data) - >>> other_pl = pl.DataFrame(data_other) - - >>> df_pa = pa.table(data) - >>> other_pa = pa.table(data_other) - - Let's define a dataframe-agnostic function in which we join over "ham" column: - - >>> def agnostic_join_on_ham( - ... df_native: IntoFrameT, other_native: IntoFrameT - ... ) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... other = nw.from_native(other_native) - ... return df.join(other, left_on="ham", right_on="ham").to_native() - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_join_on_ham`: - - >>> agnostic_join_on_ham(df_pd, other_pd) - foo bar ham apple - 0 1 6.0 a x - 1 2 7.0 b y - - >>> agnostic_join_on_ham(df_pl, other_pl) - shape: (2, 4) - ┌─────┬─────┬─────┬───────┐ - │ foo ┆ bar ┆ ham ┆ apple │ - │ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ f64 ┆ str ┆ str │ - ╞═════╪═════╪═════╪═══════╡ - │ 1 ┆ 6.0 ┆ a ┆ x │ - │ 2 ┆ 7.0 ┆ b ┆ y │ - └─────┴─────┴─────┴───────┘ - >>> agnostic_join_on_ham(df_pa, other_pa) - pyarrow.Table - foo: int64 - bar: double - ham: string - apple: string - ---- - foo: [[1,2]] - bar: [[6,7]] - ham: [["a","b"]] - apple: [["x","y"]] """ return super().join( other, how=how, left_on=left_on, right_on=right_on, on=on, suffix=suffix @@ -2634,149 +1107,6 @@ def join_asof( Returns: A new joined DataFrame - - Examples: - >>> from datetime import datetime - >>> from typing import Literal - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data_gdp = { - ... "datetime": [ - ... datetime(2016, 1, 1), - ... datetime(2017, 1, 1), - ... datetime(2018, 1, 1), - ... datetime(2019, 1, 1), - ... datetime(2020, 1, 1), - ... ], - ... "gdp": [4164, 4411, 4566, 4696, 4827], - ... } - >>> data_population = { - ... "datetime": [ - ... datetime(2016, 3, 1), - ... datetime(2018, 8, 1), - ... datetime(2019, 1, 1), - ... ], - ... "population": [82.19, 82.66, 83.12], - ... } - >>> gdp_pd = pd.DataFrame(data_gdp) - >>> population_pd = pd.DataFrame(data_population) - - >>> gdp_pl = pl.DataFrame(data_gdp).sort("datetime") - >>> population_pl = pl.DataFrame(data_population).sort("datetime") - - Let's define a dataframe-agnostic function in which we join over "datetime" column: - - >>> def agnostic_join_asof_datetime( - ... df_native: IntoFrameT, - ... other_native: IntoFrameT, - ... strategy: Literal["backward", "forward", "nearest"], - ... ) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... other = nw.from_native(other_native) - ... return df.join_asof(other, on="datetime", strategy=strategy).to_native() - - We can then pass any supported library such as Pandas or Polars - to `agnostic_join_asof_datetime`: - - >>> agnostic_join_asof_datetime(population_pd, gdp_pd, strategy="backward") - datetime population gdp - 0 2016-03-01 82.19 4164 - 1 2018-08-01 82.66 4566 - 2 2019-01-01 83.12 4696 - - >>> agnostic_join_asof_datetime(population_pl, gdp_pl, strategy="backward") - shape: (3, 3) - ┌─────────────────────┬────────────┬──────┐ - │ datetime ┆ population ┆ gdp │ - │ --- ┆ --- ┆ --- │ - │ datetime[μs] ┆ f64 ┆ i64 │ - ╞═════════════════════╪════════════╪══════╡ - │ 2016-03-01 00:00:00 ┆ 82.19 ┆ 4164 │ - │ 2018-08-01 00:00:00 ┆ 82.66 ┆ 4566 │ - │ 2019-01-01 00:00:00 ┆ 83.12 ┆ 4696 │ - └─────────────────────┴────────────┴──────┘ - - Here is a real-world times-series example that uses `by` argument. - - >>> from datetime import datetime - >>> import narwhals as nw - >>> import pandas as pd - >>> import polars as pl - >>> data_quotes = { - ... "datetime": [ - ... datetime(2016, 5, 25, 13, 30, 0, 23), - ... datetime(2016, 5, 25, 13, 30, 0, 23), - ... datetime(2016, 5, 25, 13, 30, 0, 30), - ... datetime(2016, 5, 25, 13, 30, 0, 41), - ... datetime(2016, 5, 25, 13, 30, 0, 48), - ... datetime(2016, 5, 25, 13, 30, 0, 49), - ... datetime(2016, 5, 25, 13, 30, 0, 72), - ... datetime(2016, 5, 25, 13, 30, 0, 75), - ... ], - ... "ticker": [ - ... "GOOG", - ... "MSFT", - ... "MSFT", - ... "MSFT", - ... "GOOG", - ... "AAPL", - ... "GOOG", - ... "MSFT", - ... ], - ... "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01], - ... "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03], - ... } - >>> data_trades = { - ... "datetime": [ - ... datetime(2016, 5, 25, 13, 30, 0, 23), - ... datetime(2016, 5, 25, 13, 30, 0, 38), - ... datetime(2016, 5, 25, 13, 30, 0, 48), - ... datetime(2016, 5, 25, 13, 30, 0, 48), - ... datetime(2016, 5, 25, 13, 30, 0, 48), - ... ], - ... "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"], - ... "price": [51.95, 51.95, 720.77, 720.92, 98.0], - ... "quantity": [75, 155, 100, 100, 100], - ... } - >>> quotes_pd = pd.DataFrame(data_quotes) - >>> trades_pd = pd.DataFrame(data_trades) - >>> quotes_pl = pl.DataFrame(data_quotes).sort("datetime") - >>> trades_pl = pl.DataFrame(data_trades).sort("datetime") - - Let's define a dataframe-agnostic function in which we join over "datetime" and by "ticker" columns: - - >>> def agnostic_join_asof_datetime_by_ticker( - ... df_native: IntoFrameT, other_native: IntoFrameT - ... ) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... other = nw.from_native(other_native) - ... return df.join_asof(other, on="datetime", by="ticker").to_native() - - We can now pass either pandas or Polars to the function: - - >>> agnostic_join_asof_datetime_by_ticker(trades_pd, quotes_pd) - datetime ticker price quantity bid ask - 0 2016-05-25 13:30:00.000023 MSFT 51.95 75 51.95 51.96 - 1 2016-05-25 13:30:00.000038 MSFT 51.95 155 51.97 51.98 - 2 2016-05-25 13:30:00.000048 GOOG 720.77 100 720.50 720.93 - 3 2016-05-25 13:30:00.000048 GOOG 720.92 100 720.50 720.93 - 4 2016-05-25 13:30:00.000048 AAPL 98.00 100 NaN NaN - - >>> agnostic_join_asof_datetime_by_ticker(trades_pl, quotes_pl) - shape: (5, 6) - ┌────────────────────────────┬────────┬────────┬──────────┬───────┬────────┐ - │ datetime ┆ ticker ┆ price ┆ quantity ┆ bid ┆ ask │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ datetime[μs] ┆ str ┆ f64 ┆ i64 ┆ f64 ┆ f64 │ - ╞════════════════════════════╪════════╪════════╪══════════╪═══════╪════════╡ - │ 2016-05-25 13:30:00.000023 ┆ MSFT ┆ 51.95 ┆ 75 ┆ 51.95 ┆ 51.96 │ - │ 2016-05-25 13:30:00.000038 ┆ MSFT ┆ 51.95 ┆ 155 ┆ 51.97 ┆ 51.98 │ - │ 2016-05-25 13:30:00.000048 ┆ GOOG ┆ 720.77 ┆ 100 ┆ 720.5 ┆ 720.93 │ - │ 2016-05-25 13:30:00.000048 ┆ GOOG ┆ 720.92 ┆ 100 ┆ 720.5 ┆ 720.93 │ - │ 2016-05-25 13:30:00.000048 ┆ AAPL ┆ 98.0 ┆ 100 ┆ null ┆ null │ - └────────────────────────────┴────────┴────────┴──────────┴───────┴────────┘ """ return super().join_asof( other, @@ -2795,57 +1125,6 @@ def is_duplicated(self: Self) -> Series[Any]: Returns: A new Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> from narwhals.typing import IntoSeries - >>> data = { - ... "a": [1, 2, 3, 1], - ... "b": ["x", "y", "z", "x"], - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_duplicated(df_native: IntoDataFrame) -> IntoSeries: - ... df = nw.from_native(df_native, eager_only=True) - ... return df.is_duplicated().to_native() - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_is_duplicated`: - - >>> agnostic_is_duplicated(df_pd) - 0 True - 1 False - 2 False - 3 True - dtype: bool - - >>> agnostic_is_duplicated(df_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [bool] - [ - true - false - false - true - ] - >>> agnostic_is_duplicated(df_pa) # doctest: +ELLIPSIS - - [ - [ - true, - false, - false, - true - ] - ] """ return self._series( self._compliant_frame.is_duplicated(), @@ -2857,37 +1136,6 @@ def is_empty(self: Self) -> bool: Returns: A boolean indicating whether the dataframe is empty (True) or not (False). - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - - Let's define a dataframe-agnostic function that filters rows in which "foo" - values are greater than 10, and then checks if the result is empty or not: - - >>> def agnostic_is_empty(df_native: IntoDataFrame) -> bool: - ... df = nw.from_native(df_native, eager_only=True) - ... return df.filter(nw.col("foo") > 10).is_empty() - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_is_empty`: - - >>> data = {"foo": [1, 2, 3], "bar": [4, 5, 6]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - >>> agnostic_is_empty(df_pd), agnostic_is_empty(df_pl), agnostic_is_empty(df_pa) - (True, True, True) - - >>> data = {"foo": [100, 2, 3], "bar": [4, 5, 6]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - >>> agnostic_is_empty(df_pd), agnostic_is_empty(df_pl), agnostic_is_empty(df_pa) - (False, False, False) """ return self._compliant_frame.is_empty() # type: ignore[no-any-return] @@ -2896,57 +1144,6 @@ def is_unique(self: Self) -> Series[Any]: Returns: A new Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> from narwhals.typing import IntoSeries - >>> data = { - ... "a": [1, 2, 3, 1], - ... "b": ["x", "y", "z", "x"], - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_unique(df_native: IntoDataFrame) -> IntoSeries: - ... df = nw.from_native(df_native, eager_only=True) - ... return df.is_unique().to_native() - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_is_unique`: - - >>> agnostic_is_unique(df_pd) - 0 False - 1 True - 2 True - 3 False - dtype: bool - - >>> agnostic_is_unique(df_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [bool] - [ - false - true - true - false - ] - >>> agnostic_is_unique(df_pa) # doctest: +ELLIPSIS - - [ - [ - false, - true, - true, - false - ] - ] """ return self._series( self._compliant_frame.is_unique(), @@ -2963,55 +1160,6 @@ def null_count(self: Self) -> Self: pandas handles null values differently from Polars and PyArrow. See [null_handling](../pandas_like_concepts/null_handling.md/) for reference. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = { - ... "foo": [1, None, 3], - ... "bar": [6, 7, None], - ... "ham": ["a", "b", "c"], - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function that returns the null count of - each columns: - - >>> def agnostic_null_count(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.null_count().to_native() - - We can then pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_null_count`: - - >>> agnostic_null_count(df_pd) - foo bar ham - 0 1 1 0 - - >>> agnostic_null_count(df_pl) - shape: (1, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ u32 ┆ u32 ┆ u32 │ - ╞═════╪═════╪═════╡ - │ 1 ┆ 1 ┆ 0 │ - └─────┴─────┴─────┘ - - >>> agnostic_null_count(df_pa) - pyarrow.Table - foo: int64 - bar: int64 - ham: int64 - ---- - foo: [[1]] - bar: [[1]] - ham: [[0]] """ return self._from_compliant_dataframe(self._compliant_frame.null_count()) @@ -3028,35 +1176,6 @@ def item(self: Self, row: int | None = None, column: int | str | None = None) -> Notes: If row/col not provided, this is equivalent to df[0,0], with a check that the shape is (1,1). With row/col, this is equivalent to df[row,col]. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function that returns item at given row/column - - >>> def agnostic_item( - ... df_native: IntoDataFrame, row: int | None, column: int | str | None - ... ): - ... df = nw.from_native(df_native, eager_only=True) - ... return df.item(row, column) - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_item`: - - >>> agnostic_item(df_pd, 1, 1), agnostic_item(df_pd, 2, "b") - (np.int64(5), np.int64(6)) - >>> agnostic_item(df_pl, 1, 1), agnostic_item(df_pl, 2, "b") - (5, 6) - >>> agnostic_item(df_pa, 1, 1), agnostic_item(df_pa, 2, "b") - (5, 6) """ return self._compliant_frame.item(row=row, column=column) @@ -3065,41 +1184,6 @@ def clone(self) -> Self: Returns: An identical copy of the original dataframe. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = {"a": [1, 2], "b": [3, 4]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - - Let's define a dataframe-agnostic function in which we clone the DataFrame: - - >>> def agnostic_clone(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.clone().to_native() - - We can then pass any supported library such as Pandas or Polars - to `agnostic_clone`: - - >>> agnostic_clone(df_pd) - a b - 0 1 3 - 1 2 4 - - >>> agnostic_clone(df_pl) - shape: (2, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 3 │ - │ 2 ┆ 4 │ - └─────┴─────┘ """ return super().clone() @@ -3112,50 +1196,6 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self: Returns: The dataframe containing only the selected rows. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function in which gather every 2 rows, - starting from a offset of 1: - - >>> def agnostic_gather_every(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.gather_every(n=2, offset=1).to_native() - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_gather_every`: - - >>> agnostic_gather_every(df_pd) - a b - 1 2 6 - 3 4 8 - - >>> agnostic_gather_every(df_pl) - shape: (2, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 2 ┆ 6 │ - │ 4 ┆ 8 │ - └─────┴─────┘ - >>> agnostic_gather_every(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[2,4]] - b: [[6,8]] """ return super().gather_every(n=n, offset=offset) @@ -3198,46 +1238,6 @@ def pivot( Returns: A new dataframe. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrameT - >>> data = { - ... "ix": [1, 1, 2, 2, 1, 2], - ... "col": ["a", "a", "a", "a", "b", "b"], - ... "foo": [0, 1, 2, 2, 7, 1], - ... "bar": [0, 2, 0, 0, 9, 4], - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_pivot(df_native: IntoDataFrameT) -> IntoDataFrameT: - ... df = nw.from_native(df_native, eager_only=True) - ... return df.pivot("col", index="ix", aggregate_function="sum").to_native() - - We can then pass any supported library such as Pandas or Polars - to `agnostic_pivot`: - - >>> agnostic_pivot(df_pd) - ix foo_a foo_b bar_a bar_b - 0 1 1 7 2 9 - 1 2 4 1 0 4 - >>> agnostic_pivot(df_pl) - shape: (2, 5) - ┌─────┬───────┬───────┬───────┬───────┐ - │ ix ┆ foo_a ┆ foo_b ┆ bar_a ┆ bar_b │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ - ╞═════╪═══════╪═══════╪═══════╪═══════╡ - │ 1 ┆ 1 ┆ 7 ┆ 2 ┆ 9 │ - │ 2 ┆ 4 ┆ 1 ┆ 0 ┆ 4 │ - └─────┴───────┴───────┴───────┴───────┘ """ if values is None and index is None: msg = "At least one of `values` and `index` must be passed" @@ -3265,50 +1265,6 @@ def to_arrow(self: Self) -> pa.Table: Returns: A new PyArrow table. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> data = {"foo": [1, 2, 3], "bar": ["a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function that converts to arrow table: - - >>> def agnostic_to_arrow(df_native: IntoDataFrame) -> pa.Table: - ... df = nw.from_native(df_native, eager_only=True) - ... return df.to_arrow() - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_to_arrow`: - - >>> agnostic_to_arrow(df_pd) - pyarrow.Table - foo: int64 - bar: string - ---- - foo: [[1,2,3]] - bar: [["a","b","c"]] - - >>> agnostic_to_arrow(df_pl) - pyarrow.Table - foo: int64 - bar: large_string - ---- - foo: [[1,2,3]] - bar: [["a","b","c"]] - - >>> agnostic_to_arrow(df_pa) - pyarrow.Table - foo: int64 - bar: string - ---- - foo: [[1,2,3]] - bar: [["a","b","c"]] """ return self._compliant_frame.to_arrow() @@ -3334,51 +1290,6 @@ def sample( Notes: The results may not be consistent across libraries. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrameT - >>> data = {"a": [1, 2, 3, 4], "b": ["x", "y", "x", "y"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_sample(df_native: IntoDataFrameT) -> IntoDataFrameT: - ... df = nw.from_native(df_native, eager_only=True) - ... return df.sample(n=2, seed=123).to_native() - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_sample`: - - >>> agnostic_sample(df_pd) - a b - 3 4 y - 0 1 x - >>> agnostic_sample(df_pl) - shape: (2, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ str │ - ╞═════╪═════╡ - │ 2 ┆ y │ - │ 3 ┆ x │ - └─────┴─────┘ - >>> agnostic_sample(df_pa) - pyarrow.Table - a: int64 - b: string - ---- - a: [[1,3]] - b: [["x","x"]] - - As you can see, by using the same seed, the result will be consistent within - the same backend, but not necessarely across different backends. """ return self._from_compliant_dataframe( self._compliant_frame.sample( @@ -3417,61 +1328,6 @@ def unpivot( If you're coming from pandas, this is similar to `pandas.DataFrame.melt`, but with `index` replacing `id_vars` and `on` replacing `value_vars`. In other frameworks, you might know this operation as `pivot_longer`. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = { - ... "a": ["x", "y", "z"], - ... "b": [1, 3, 5], - ... "c": [2, 4, 6], - ... } - - We define a library agnostic function: - - >>> def agnostic_unpivot(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.unpivot(on=["b", "c"], index="a").to_native() - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_unpivot`: - - >>> agnostic_unpivot(pl.DataFrame(data)) - shape: (6, 3) - ┌─────┬──────────┬───────┐ - │ a ┆ variable ┆ value │ - │ --- ┆ --- ┆ --- │ - │ str ┆ str ┆ i64 │ - ╞═════╪══════════╪═══════╡ - │ x ┆ b ┆ 1 │ - │ y ┆ b ┆ 3 │ - │ z ┆ b ┆ 5 │ - │ x ┆ c ┆ 2 │ - │ y ┆ c ┆ 4 │ - │ z ┆ c ┆ 6 │ - └─────┴──────────┴───────┘ - - >>> agnostic_unpivot(pd.DataFrame(data)) - a variable value - 0 x b 1 - 1 y b 3 - 2 z b 5 - 3 x c 2 - 4 y c 4 - 5 z c 6 - - >>> agnostic_unpivot(pa.table(data)) - pyarrow.Table - a: string - variable: string - value: int64 - ---- - a: [["x","y","z"],["x","y","z"]] - variable: [["b","b","b"],["c","c","c"]] - value: [[1,3,5],[2,4,6]] """ return super().unpivot( on=on, index=index, variable_name=variable_name, value_name=value_name @@ -3490,52 +1346,6 @@ def explode(self: Self, columns: str | Sequence[str], *more_columns: str) -> Sel Returns: New DataFrame - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = { - ... "a": ["x", "y", "z", "w"], - ... "lst1": [[1, 2], None, [None], []], - ... "lst2": [[3, None], None, [42], []], - ... } - - We define a library agnostic function: - - >>> def agnostic_explode(df_native: IntoFrameT) -> IntoFrameT: - ... return ( - ... nw.from_native(df_native) - ... .with_columns(nw.col("lst1", "lst2").cast(nw.List(nw.Int32()))) - ... .explode("lst1", "lst2") - ... .to_native() - ... ) - - We can then pass any supported library such as pandas, Polars (eager), - or PyArrow to `agnostic_explode`: - - >>> agnostic_explode(pd.DataFrame(data)) - a lst1 lst2 - 0 x 1 3 - 0 x 2 - 1 y - 2 z 42 - 3 w - >>> agnostic_explode(pl.DataFrame(data)) - shape: (5, 3) - ┌─────┬──────┬──────┐ - │ a ┆ lst1 ┆ lst2 │ - │ --- ┆ --- ┆ --- │ - │ str ┆ i32 ┆ i32 │ - ╞═════╪══════╪══════╡ - │ x ┆ 1 ┆ 3 │ - │ x ┆ 2 ┆ null │ - │ y ┆ null ┆ null │ - │ z ┆ null ┆ 42 │ - │ w ┆ null ┆ null │ - └─────┴──────┴──────┘ """ return super().explode(columns, *more_columns) @@ -3582,27 +1392,6 @@ def implementation(self) -> Implementation: Returns: Implementation. - - Examples: - >>> import narwhals as nw - >>> import polars as pl - >>> import dask.dataframe as dd - >>> lf_pl = pl.LazyFrame({"a": [1, 2, 3]}) - >>> lf_dask = dd.from_dict({"a": [1, 2, 3]}, npartitions=2) - - >>> lf = nw.from_native(lf_pl) - >>> lf.implementation - - >>> lf.implementation.is_pandas() - False - >>> lf.implementation.is_polars() - True - - >>> lf = nw.from_native(lf_dask) - >>> lf.implementation - - >>> lf.implementation.is_dask() - True """ return self._compliant_frame._implementation # type: ignore[no-any-return] @@ -3615,59 +1404,6 @@ def collect(self) -> DataFrame[Any]: Returns: DataFrame - - Examples: - >>> import narwhals as nw - >>> import polars as pl - >>> import dask.dataframe as dd - >>> data = { - ... "a": ["a", "b", "a", "b", "b", "c"], - ... "b": [1, 2, 3, 4, 5, 6], - ... "c": [6, 5, 4, 3, 2, 1], - ... } - >>> lf_pl = pl.LazyFrame(data) - >>> lf_dask = dd.from_dict(data, npartitions=2) - - >>> lf = nw.from_native(lf_pl) - >>> lf # doctest:+ELLIPSIS - ┌─────────────────────────────┐ - | Narwhals LazyFrame | - |-----------------------------| - |>> df = lf.group_by("a").agg(nw.all().sum()).collect() - >>> df.to_native().sort("a") - shape: (3, 3) - ┌─────┬─────┬─────┐ - │ a ┆ b ┆ c │ - │ --- ┆ --- ┆ --- │ - │ str ┆ i64 ┆ i64 │ - ╞═════╪═════╪═════╡ - │ a ┆ 4 ┆ 10 │ - │ b ┆ 11 ┆ 10 │ - │ c ┆ 6 ┆ 1 │ - └─────┴─────┴─────┘ - - >>> lf = nw.from_native(lf_dask) - >>> lf - ┌───────────────────────────────────┐ - | Narwhals LazyFrame | - |-----------------------------------| - |Dask DataFrame Structure: | - | a b c| - |npartitions=2 | - |0 string int64 int64| - |3 ... ... ...| - |5 ... ... ...| - |Dask Name: frompandas, 1 expression| - |Expr=df | - └───────────────────────────────────┘ - >>> df = lf.group_by("a").agg(nw.col("b", "c").sum()).collect() - >>> df.to_native() - a b c - 0 a 4 10 - 1 b 11 10 - 2 c 6 1 """ return self._dataframe( self._compliant_frame.collect(), @@ -3679,34 +1415,6 @@ def to_native(self) -> FrameT: Returns: Object of class that user started with. - - Examples: - >>> import polars as pl - >>> import dask.dataframe as dd - >>> import narwhals as nw - >>> - >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} - >>> lf_pl = pl.LazyFrame(data) - >>> lf_dask = dd.from_dict(data, npartitions=2) - - Calling `to_native` on a Narwhals LazyFrame returns the native object: - - >>> nw.from_native(lf_pl).to_native().collect() - shape: (3, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ f64 ┆ str │ - ╞═════╪═════╪═════╡ - │ 1 ┆ 6.0 ┆ a │ - │ 2 ┆ 7.0 ┆ b │ - │ 3 ┆ 8.0 ┆ c │ - └─────┴─────┴─────┘ - >>> nw.from_native(lf_dask).to_native().compute() - foo bar ham - 0 1 6.0 a - 1 2 7.0 b - 2 3 8.0 c """ return to_native(narwhals_object=self, pass_through=False) @@ -3721,41 +1429,6 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se Returns: The original object with the function applied. - - Examples: - >>> import polars as pl - >>> import dask.dataframe as dd - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3], "ba": [4, 5, 6]} - >>> lf_pl = pl.LazyFrame(data) - >>> lf_dask = dd.from_dict(data, npartitions=2) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_pipe(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.pipe(lambda _df: _df.select("a")).collect().to_native() - - We can then pass any supported library such as Polars or Dask to `agnostic_pipe`: - - >>> agnostic_pipe(lf_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 1 │ - │ 2 │ - │ 3 │ - └─────┘ - >>> agnostic_pipe(lf_dask) - a - 0 1 - 1 2 - 2 3 """ return super().pipe(function, *args, **kwargs) @@ -3773,37 +1446,6 @@ def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self: pandas handles null values differently from Polars and PyArrow. See [null_handling](../pandas_like_concepts/null_handling.md/) for reference. - - Examples: - >>> import polars as pl - >>> import dask.dataframe as dd - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1.0, 2.0, None], "ba": [1.0, None, 2.0]} - >>> lf_pl = pl.LazyFrame(data) - >>> lf_dask = dd.from_dict(data, npartitions=2) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_drop_nulls(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.drop_nulls().collect().to_native() - - We can then pass any supported library such as Polars or Dask to `agnostic_drop_nulls`: - - >>> agnostic_drop_nulls(lf_pl) - shape: (1, 2) - ┌─────┬─────┐ - │ a ┆ ba │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞═════╪═════╡ - │ 1.0 ┆ 1.0 │ - └─────┴─────┘ - >>> agnostic_drop_nulls(lf_dask) - a ba - 0 1.0 1.0 """ return super().drop_nulls(subset=subset) @@ -3815,41 +1457,6 @@ def with_row_index(self, name: str = "index") -> Self: Returns: The original object with the column added. - - Examples: - >>> import polars as pl - >>> import dask.dataframe as dd - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} - >>> lf_pl = pl.LazyFrame(data) - >>> lf_dask = dd.from_dict(data, npartitions=2) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_with_row_index(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_row_index().collect().to_native() - - We can then pass any supported library such as Polars or Dask to `agnostic_with_row_index`: - - >>> agnostic_with_row_index(lf_pl) - shape: (3, 3) - ┌───────┬─────┬─────┐ - │ index ┆ a ┆ b │ - │ --- ┆ --- ┆ --- │ - │ u32 ┆ i64 ┆ i64 │ - ╞═══════╪═════╪═════╡ - │ 0 ┆ 1 ┆ 4 │ - │ 1 ┆ 2 ┆ 5 │ - │ 2 ┆ 3 ┆ 6 │ - └───────┴─────┴─────┘ - >>> agnostic_with_row_index(lf_dask) - index a b - 0 0 1 4 - 1 1 2 5 - 2 2 3 6 """ return super().with_row_index(name) @@ -3859,26 +1466,6 @@ def schema(self) -> Schema: Returns: A Narwhals Schema object that displays the mapping of column names. - - Examples: - >>> import polars as pl - >>> import dask.dataframe as dd - >>> import narwhals as nw - >>> data = { - ... "foo": [1, 2, 3], - ... "bar": [6.0, 7.0, 8.0], - ... "ham": ["a", "b", "c"], - ... } - >>> lf_pl = pl.LazyFrame(data) - >>> lf_dask = dd.from_dict(data, npartitions=2) - - >>> lf = nw.from_native(lf_pl) - >>> lf.schema # doctest: +SKIP - Schema({'foo': Int64, 'bar': Float64, 'ham': String}) - - >>> lf = nw.from_native(lf_dask) - >>> lf.schema # doctest: +SKIP - Schema({'foo': Int64, 'bar': Float64, 'ham': String}) """ return super().schema @@ -3887,26 +1474,6 @@ def collect_schema(self: Self) -> Schema: Returns: A Narwhals Schema object that displays the mapping of column names. - - Examples: - >>> import polars as pl - >>> import dask.dataframe as dd - >>> import narwhals as nw - >>> data = { - ... "foo": [1, 2, 3], - ... "bar": [6.0, 7.0, 8.0], - ... "ham": ["a", "b", "c"], - ... } - >>> lf_pl = pl.LazyFrame(data) - >>> lf_dask = dd.from_dict(data, npartitions=2) - - >>> lf = nw.from_native(lf_pl) - >>> lf.collect_schema() - Schema({'foo': Int64, 'bar': Float64, 'ham': String}) - - >>> lf = nw.from_native(lf_dask) - >>> lf.collect_schema() - Schema({'foo': Int64, 'bar': Float64, 'ham': String}) """ return super().collect_schema() @@ -3916,29 +1483,6 @@ def columns(self) -> list[str]: Returns: The column names stored in a list. - - Examples: - >>> import polars as pl - >>> import dask.dataframe as dd - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrame - >>> - >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} - >>> lf_pl = pl.LazyFrame(data) - >>> lf_dask = dd.from_dict(data, npartitions=2) - - We define a library agnostic function: - - >>> def agnostic_columns(df_native: IntoFrame) -> list[str]: - ... df = nw.from_native(df_native) - ... return df.columns - - We can then pass any supported library such as Polars or Dask to `agnostic_columns`: - - >>> agnostic_columns(lf_pl) # doctest: +SKIP - ['foo', 'bar', 'ham'] - >>> agnostic_columns(lf_dask) - ['foo', 'bar', 'ham'] """ return super().columns @@ -3963,50 +1507,6 @@ def with_columns( Note: Creating a new LazyFrame using this method does not create a new copy of existing data. - - Examples: - >>> import polars as pl - >>> import dask.dataframe as dd - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [1, 2, 3, 4], - ... "b": [0.5, 4, 10, 13], - ... "c": [True, True, False, True], - ... } - >>> lf_pl = pl.LazyFrame(data) - >>> lf_dask = dd.from_dict(data, npartitions=2) - - Let's define a dataframe-agnostic function in which we pass an expression - to add it as a new column: - - >>> def agnostic_with_columns(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return ( - ... df.with_columns((nw.col("a") * 2).alias("2a")).collect().to_native() - ... ) - - We can then pass any supported library such as Polars or Dask to `agnostic_with_columns`: - - >>> agnostic_with_columns(lf_pl) - shape: (4, 4) - ┌─────┬──────┬───────┬─────┐ - │ a ┆ b ┆ c ┆ 2a │ - │ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ f64 ┆ bool ┆ i64 │ - ╞═════╪══════╪═══════╪═════╡ - │ 1 ┆ 0.5 ┆ true ┆ 2 │ - │ 2 ┆ 4.0 ┆ true ┆ 4 │ - │ 3 ┆ 10.0 ┆ false ┆ 6 │ - │ 4 ┆ 13.0 ┆ true ┆ 8 │ - └─────┴──────┴───────┴─────┘ - >>> agnostic_with_columns(lf_dask) - a b c 2a - 0 1 0.5 True 2 - 1 2 4.0 True 4 - 2 3 10.0 False 6 - 3 4 13.0 True 8 """ return super().with_columns(*exprs, **named_exprs) @@ -4031,116 +1531,6 @@ def select( if you're working with pandas) then you should explicitly use `nw.col` instead of just passing the column name. For example, to select a column named `0` use `df.select(nw.col(0))`, not `df.select(0)`. - - Examples: - >>> import polars as pl - >>> import dask.dataframe as dd - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "foo": [1, 2, 3], - ... "bar": [6, 7, 8], - ... "ham": ["a", "b", "c"], - ... } - >>> lf_pl = pl.LazyFrame(data) - >>> lf_dask = dd.from_dict(data, npartitions=2) - - Let's define a dataframe-agnostic function in which we pass the name of a - column to select that column. - - >>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select("foo").collect().to_native() - - We can then pass any supported library such as Polars or Dask to `agnostic_select`: - - >>> agnostic_select(lf_pl) - shape: (3, 1) - ┌─────┐ - │ foo │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 1 │ - │ 2 │ - │ 3 │ - └─────┘ - >>> agnostic_select(lf_dask) - foo - 0 1 - 1 2 - 2 3 - - Multiple columns can be selected by passing a list of column names. - - >>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(["foo", "bar"]).collect().to_native() - - >>> agnostic_select(lf_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ foo ┆ bar │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 6 │ - │ 2 ┆ 7 │ - │ 3 ┆ 8 │ - └─────┴─────┘ - >>> agnostic_select(lf_dask) - foo bar - 0 1 6 - 1 2 7 - 2 3 8 - - Multiple columns can also be selected using positional arguments instead of a - list. Expressions are also accepted. - - >>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("foo"), nw.col("bar") + 1).collect().to_native() - - >>> agnostic_select(lf_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ foo ┆ bar │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 7 │ - │ 2 ┆ 8 │ - │ 3 ┆ 9 │ - └─────┴─────┘ - >>> agnostic_select(lf_dask) - foo bar - 0 1 7 - 1 2 8 - 2 3 9 - - Use keyword arguments to easily name your expression inputs. - - >>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(threshold=nw.col("foo") * 2).collect().to_native() - - >>> agnostic_select(lf_pl) - shape: (3, 1) - ┌───────────┐ - │ threshold │ - │ --- │ - │ i64 │ - ╞═══════════╡ - │ 2 │ - │ 4 │ - │ 6 │ - └───────────┘ - >>> agnostic_select(lf_dask) - threshold - 0 2 - 1 4 - 2 6 """ return super().select(*exprs, **named_exprs) @@ -4154,41 +1544,6 @@ def rename(self, mapping: dict[str, str]) -> Self: Returns: The LazyFrame with the specified columns renamed. - - Examples: - >>> import polars as pl - >>> import dask.dataframe as dd - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]} - >>> lf_pl = pl.LazyFrame(data) - >>> lf_dask = dd.from_dict(data, npartitions=2) - - We define a library agnostic function: - - >>> def agnostic_rename(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.rename({"foo": "apple"}).collect().to_native() - - We can then pass any supported library such as Polars or Dask to `agnostic_rename`: - - >>> agnostic_rename(lf_pl) - shape: (3, 3) - ┌───────┬─────┬─────┐ - │ apple ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ str │ - ╞═══════╪═════╪═════╡ - │ 1 ┆ 6 ┆ a │ - │ 2 ┆ 7 ┆ b │ - │ 3 ┆ 8 ┆ c │ - └───────┴─────┴─────┘ - >>> agnostic_rename(lf_dask) - apple bar ham - 0 1 6 a - 1 2 7 b - 2 3 8 c """ return super().rename(mapping) @@ -4200,44 +1555,6 @@ def head(self, n: int = 5) -> Self: Returns: A subset of the LazyFrame of shape (n, n_columns). - - Examples: - >>> import narwhals as nw - >>> import polars as pl - >>> import dask.dataframe as dd - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [1, 2, 3, 4, 5, 6], - ... "b": [7, 8, 9, 10, 11, 12], - ... } - >>> lf_pl = pl.LazyFrame(data) - >>> lf_dask = dd.from_dict(data, npartitions=2) - - Let's define a dataframe-agnostic function that gets the first 3 rows. - - >>> def agnostic_head(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.head(3).collect().to_native() - - We can then pass any supported library such as Polars or Dask to `agnostic_head`: - - >>> agnostic_head(lf_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 7 │ - │ 2 ┆ 8 │ - │ 3 ┆ 9 │ - └─────┴─────┘ - >>> agnostic_head(lf_dask) - a b - 0 1 7 - 1 2 8 - 2 3 9 """ return super().head(n) @@ -4253,44 +1570,6 @@ def tail(self, n: int = 5) -> Self: Notes: `LazyFrame.tail` is not supported for the Dask backend with multiple partitions. - - Examples: - >>> import narwhals as nw - >>> import polars as pl - >>> import dask.dataframe as dd - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [1, 2, 3, 4, 5, 6], - ... "b": [7, 8, 9, 10, 11, 12], - ... } - >>> lf_pl = pl.LazyFrame(data) - >>> lf_dask = dd.from_dict(data, npartitions=1) - - Let's define a dataframe-agnostic function that gets the last 3 rows. - - >>> def agnostic_tail(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.tail(3).collect().to_native() - - We can then pass any supported library such as Polars or Dask to `agnostic_tail`: - - >>> agnostic_tail(lf_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 4 ┆ 10 │ - │ 5 ┆ 11 │ - │ 6 ┆ 12 │ - └─────┴─────┘ - >>> agnostic_tail(lf_dask) - a b - 3 4 10 - 4 5 11 - 5 6 12 """ return super().tail(n) @@ -4309,64 +1588,6 @@ def drop(self, *columns: str | Iterable[str], strict: bool = True) -> Self: `strict` argument is ignored for `polars<1.0.0`. Please consider upgrading to a newer version or pass to eager mode. - - Examples: - >>> import polars as pl - >>> import dask.dataframe as dd - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} - >>> lf_pl = pl.LazyFrame(data) - >>> lf_dask = dd.from_dict(data, npartitions=2) - - We define a library agnostic function: - - >>> def agnostic_drop(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.drop("ham").collect().to_native() - - We can then pass any supported library such as Polars or Dask to `agnostic_drop`: - - >>> agnostic_drop(lf_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ foo ┆ bar │ - │ --- ┆ --- │ - │ i64 ┆ f64 │ - ╞═════╪═════╡ - │ 1 ┆ 6.0 │ - │ 2 ┆ 7.0 │ - │ 3 ┆ 8.0 │ - └─────┴─────┘ - >>> agnostic_drop(lf_dask) - foo bar - 0 1 6.0 - 1 2 7.0 - 2 3 8.0 - - Use positional arguments to drop multiple columns. - - >>> def agnostic_drop(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.drop("foo", "ham").collect().to_native() - - >>> agnostic_drop(lf_pl) - shape: (3, 1) - ┌─────┐ - │ bar │ - │ --- │ - │ f64 │ - ╞═════╡ - │ 6.0 │ - │ 7.0 │ - │ 8.0 │ - └─────┘ - >>> agnostic_drop(lf_dask) - bar - 0 6.0 - 1 7.0 - 2 8.0 """ return super().drop(*flatten(columns), strict=strict) @@ -4392,41 +1613,6 @@ def unique( Returns: The LazyFrame with unique rows. - - Examples: - >>> import polars as pl - >>> import dask.dataframe as dd - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "foo": [1, 2, 3, 1], - ... "bar": ["a", "a", "a", "a"], - ... "ham": ["b", "b", "b", "b"], - ... } - >>> lf_pl = pl.LazyFrame(data) - >>> lf_dask = dd.from_dict(data, npartitions=2) - - We define a library agnostic function: - - >>> def agnostic_unique(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.unique(["bar", "ham"]).collect().to_native() - - We can then pass any supported library such as Polars or Dask to `agnostic_unique`: - - >>> agnostic_unique(lf_pl) - shape: (1, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ str ┆ str │ - ╞═════╪═════╪═════╡ - │ 1 ┆ a ┆ b │ - └─────┴─────┴─────┘ - >>> agnostic_unique(lf_dask) - foo bar ham - 0 1 a b """ if keep not in {"any", "none"}: msg = ( @@ -4465,137 +1651,6 @@ def filter( Returns: The filtered LazyFrame. - - Examples: - >>> import polars as pl - >>> import dask.dataframe as dd - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "foo": [1, 2, 3], - ... "bar": [6, 7, 8], - ... "ham": ["a", "b", "c"], - ... } - >>> lf_pl = pl.LazyFrame(data) - >>> lf_dask = dd.from_dict(data, npartitions=2) - - Let's define a dataframe-agnostic function in which we filter on - one condition. - - >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.filter(nw.col("foo") > 1).collect().to_native() - - We can then pass any supported library such as Polars or Dask to `agnostic_filter`: - - >>> agnostic_filter(lf_pl) - shape: (2, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ str │ - ╞═════╪═════╪═════╡ - │ 2 ┆ 7 ┆ b │ - │ 3 ┆ 8 ┆ c │ - └─────┴─────┴─────┘ - >>> agnostic_filter(lf_dask) - foo bar ham - 1 2 7 b - 2 3 8 c - - Filter on multiple conditions: - - >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return ( - ... df.filter((nw.col("foo") < 3) & (nw.col("ham") == "a")) - ... .collect() - ... .to_native() - ... ) - - >>> agnostic_filter(lf_pl) - shape: (1, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ str │ - ╞═════╪═════╪═════╡ - │ 1 ┆ 6 ┆ a │ - └─────┴─────┴─────┘ - >>> agnostic_filter(lf_dask) - foo bar ham - 0 1 6 a - - Provide multiple filters using `*args` syntax: - - >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return ( - ... df.filter( - ... nw.col("foo") == 1, - ... nw.col("ham") == "a", - ... ) - ... .collect() - ... .to_native() - ... ) - - >>> agnostic_filter(lf_pl) - shape: (1, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ str │ - ╞═════╪═════╪═════╡ - │ 1 ┆ 6 ┆ a │ - └─────┴─────┴─────┘ - >>> agnostic_filter(lf_dask) - foo bar ham - 0 1 6 a - - Filter on an OR condition: - - >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return ( - ... df.filter((nw.col("foo") == 1) | (nw.col("ham") == "c")) - ... .collect() - ... .to_native() - ... ) - - >>> agnostic_filter(lf_pl) - shape: (2, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ str │ - ╞═════╪═════╪═════╡ - │ 1 ┆ 6 ┆ a │ - │ 3 ┆ 8 ┆ c │ - └─────┴─────┴─────┘ - >>> agnostic_filter(lf_dask) - foo bar ham - 0 1 6 a - 2 3 8 c - - Provide multiple filters using `**kwargs` syntax: - - >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.filter(foo=2, ham="b").collect().to_native() - - >>> agnostic_filter(lf_pl) - shape: (1, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ str │ - ╞═════╪═════╪═════╡ - │ 2 ┆ 7 ┆ b │ - └─────┴─────┴─────┘ - >>> agnostic_filter(lf_dask) - foo bar ham - 1 2 7 b """ if ( len(predicates) == 1 @@ -4622,86 +1677,6 @@ def group_by( Returns: Object which can be used to perform aggregations. - - Examples: - Group by one column and call `agg` to compute the grouped sum of - another column. - - >>> import polars as pl - >>> import dask.dataframe as dd - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": ["a", "b", "a", "b", "c"], - ... "b": [1, 2, 1, 3, 3], - ... "c": [5, 4, 3, 2, 1], - ... } - >>> lf_pl = pl.LazyFrame(data) - >>> lf_dask = dd.from_dict(data, npartitions=2) - - Let's define a dataframe-agnostic function in which we group by one column - and call `agg` to compute the grouped sum of another column. - - >>> def agnostic_group_by_agg(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return ( - ... df.group_by("a") - ... .agg(nw.col("b").sum()) - ... .sort("a") - ... .collect() - ... .to_native() - ... ) - - We can then pass any supported library such as Polars or Dask to `agnostic_group_by_agg`: - - >>> agnostic_group_by_agg(lf_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ str ┆ i64 │ - ╞═════╪═════╡ - │ a ┆ 2 │ - │ b ┆ 5 │ - │ c ┆ 3 │ - └─────┴─────┘ - >>> agnostic_group_by_agg(lf_dask) - a b - 0 a 2 - 1 b 5 - 2 c 3 - - Group by multiple columns by passing a list of column names. - - >>> def agnostic_group_by_agg(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return ( - ... df.group_by(["a", "b"]) - ... .agg(nw.max("c")) - ... .sort(["a", "b"]) - ... .collect() - ... .to_native() - ... ) - - >>> agnostic_group_by_agg(lf_pl) - shape: (4, 3) - ┌─────┬─────┬─────┐ - │ a ┆ b ┆ c │ - │ --- ┆ --- ┆ --- │ - │ str ┆ i64 ┆ i64 │ - ╞═════╪═════╪═════╡ - │ a ┆ 1 ┆ 5 │ - │ b ┆ 2 ┆ 4 │ - │ b ┆ 3 ┆ 2 │ - │ c ┆ 3 ┆ 1 │ - └─────┴─────┴─────┘ - >>> agnostic_group_by_agg(lf_dask) - a b c - 0 a 1 5 - 1 b 2 4 - 2 b 3 2 - 3 c 3 1 """ from narwhals.expr import Expr from narwhals.group_by import LazyGroupBy @@ -4740,46 +1715,6 @@ def sort( Unlike Polars, it is not possible to specify a sequence of booleans for `nulls_last` in order to control per-column behaviour. Instead a single boolean is applied for all `by` columns. - - Examples: - >>> import narwhals as nw - >>> import polars as pl - >>> import dask.dataframe as dd - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [1, 2, None], - ... "b": [6.0, 5.0, 4.0], - ... "c": ["a", "c", "b"], - ... } - >>> lf_pl = pl.LazyFrame(data) - >>> lf_dask = dd.from_dict(data, npartitions=2) - - Let's define a dataframe-agnostic function in which we sort by multiple - columns in different orders - - >>> def agnostic_sort(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.sort("c", "a", descending=[False, True]).collect().to_native() - - We can then pass any supported library such as Polars or Dask to `agnostic_sort`: - - >>> agnostic_sort(lf_pl) - shape: (3, 3) - ┌──────┬─────┬─────┐ - │ a ┆ b ┆ c │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ f64 ┆ str │ - ╞══════╪═════╪═════╡ - │ 1 ┆ 6.0 ┆ a │ - │ null ┆ 4.0 ┆ b │ - │ 2 ┆ 5.0 ┆ c │ - └──────┴─────┴─────┘ - >>> agnostic_sort(lf_dask) - a b c - 0 1.0 6.0 a - 2 NaN 4.0 b - 1 2.0 5.0 c """ return super().sort(by, *more_by, descending=descending, nulls_last=nulls_last) @@ -4812,59 +1747,6 @@ def join( Returns: A new joined LazyFrame. - - Examples: - >>> import narwhals as nw - >>> import polars as pl - >>> import dask.dataframe as dd - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "foo": [1, 2, 3], - ... "bar": [6.0, 7.0, 8.0], - ... "ham": ["a", "b", "c"], - ... } - >>> data_other = { - ... "apple": ["x", "y", "z"], - ... "ham": ["a", "b", "d"], - ... } - - >>> lf_pl = pl.LazyFrame(data) - >>> other_pl = pl.LazyFrame(data_other) - >>> lf_dask = dd.from_dict(data, npartitions=2) - >>> other_dask = dd.from_dict(data_other, npartitions=2) - - Let's define a dataframe-agnostic function in which we join over "ham" column: - - >>> def agnostic_join_on_ham( - ... df_native: IntoFrameT, - ... other_native: IntoFrameT, - ... ) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... other = nw.from_native(other_native) - ... return ( - ... df.join(other, left_on="ham", right_on="ham") - ... .sort("ham") - ... .collect() - ... .to_native() - ... ) - - We can then pass any supported library such as Polars or Dask to `agnostic_join_on_ham`: - - >>> agnostic_join_on_ham(lf_pl, other_pl) - shape: (2, 4) - ┌─────┬─────┬─────┬───────┐ - │ foo ┆ bar ┆ ham ┆ apple │ - │ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ f64 ┆ str ┆ str │ - ╞═════╪═════╪═════╪═══════╡ - │ 1 ┆ 6.0 ┆ a ┆ x │ - │ 2 ┆ 7.0 ┆ b ┆ y │ - └─────┴─────┴─────┴───────┘ - >>> agnostic_join_on_ham(lf_dask, other_dask) - foo bar ham apple - 0 1 6.0 a x - 0 2 7.0 b y """ return super().join( other, how=how, left_on=left_on, right_on=right_on, on=on, suffix=suffix @@ -4911,160 +1793,6 @@ def join_asof( Returns: A new joined LazyFrame. - - Examples: - >>> from datetime import datetime - >>> import narwhals as nw - >>> import polars as pl - >>> import dask.dataframe as dd - >>> from typing import Literal - >>> from narwhals.typing import IntoFrameT - >>> - >>> data_gdp = { - ... "datetime": [ - ... datetime(2016, 1, 1), - ... datetime(2017, 1, 1), - ... datetime(2018, 1, 1), - ... datetime(2019, 1, 1), - ... datetime(2020, 1, 1), - ... ], - ... "gdp": [4164, 4411, 4566, 4696, 4827], - ... } - >>> data_population = { - ... "datetime": [ - ... datetime(2016, 3, 1), - ... datetime(2018, 8, 1), - ... datetime(2019, 1, 1), - ... ], - ... "population": [82.19, 82.66, 83.12], - ... } - >>> gdp_pl = pl.LazyFrame(data_gdp) - >>> population_pl = pl.LazyFrame(data_population) - >>> gdp_dask = dd.from_dict(data_gdp, npartitions=2) - >>> population_dask = dd.from_dict(data_population, npartitions=2) - - Let's define a dataframe-agnostic function in which we join over "datetime" column: - - >>> def agnostic_join_asof_datetime( - ... df_native: IntoFrameT, - ... other_native: IntoFrameT, - ... strategy: Literal["backward", "forward", "nearest"], - ... ) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... other = nw.from_native(other_native) - ... return ( - ... df.sort("datetime") - ... .join_asof(other, on="datetime", strategy=strategy) - ... .collect() - ... .to_native() - ... ) - - We can then pass any supported library such as Polars or Dask to `agnostic_join_asof_datetime`: - - >>> agnostic_join_asof_datetime(population_pl, gdp_pl, strategy="backward") - shape: (3, 3) - ┌─────────────────────┬────────────┬──────┐ - │ datetime ┆ population ┆ gdp │ - │ --- ┆ --- ┆ --- │ - │ datetime[μs] ┆ f64 ┆ i64 │ - ╞═════════════════════╪════════════╪══════╡ - │ 2016-03-01 00:00:00 ┆ 82.19 ┆ 4164 │ - │ 2018-08-01 00:00:00 ┆ 82.66 ┆ 4566 │ - │ 2019-01-01 00:00:00 ┆ 83.12 ┆ 4696 │ - └─────────────────────┴────────────┴──────┘ - >>> agnostic_join_asof_datetime(population_dask, gdp_dask, strategy="backward") - datetime population gdp - 0 2016-03-01 82.19 4164 - 1 2018-08-01 82.66 4566 - 0 2019-01-01 83.12 4696 - - Here is a real-world times-series example that uses `by` argument. - - >>> from datetime import datetime - >>> import narwhals as nw - >>> import polars as pl - >>> import dask.dataframe as dd - >>> from narwhals.typing import IntoFrameT - >>> - >>> data_quotes = { - ... "datetime": [ - ... datetime(2016, 5, 25, 13, 30, 0, 23), - ... datetime(2016, 5, 25, 13, 30, 0, 23), - ... datetime(2016, 5, 25, 13, 30, 0, 30), - ... datetime(2016, 5, 25, 13, 30, 0, 41), - ... datetime(2016, 5, 25, 13, 30, 0, 48), - ... datetime(2016, 5, 25, 13, 30, 0, 49), - ... datetime(2016, 5, 25, 13, 30, 0, 72), - ... datetime(2016, 5, 25, 13, 30, 0, 75), - ... ], - ... "ticker": [ - ... "GOOG", - ... "MSFT", - ... "MSFT", - ... "MSFT", - ... "GOOG", - ... "AAPL", - ... "GOOG", - ... "MSFT", - ... ], - ... "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01], - ... "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03], - ... } - >>> data_trades = { - ... "datetime": [ - ... datetime(2016, 5, 25, 13, 30, 0, 23), - ... datetime(2016, 5, 25, 13, 30, 0, 38), - ... datetime(2016, 5, 25, 13, 30, 0, 48), - ... datetime(2016, 5, 25, 13, 30, 0, 49), - ... datetime(2016, 5, 25, 13, 30, 0, 48), - ... ], - ... "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"], - ... "price": [51.95, 51.95, 720.77, 720.92, 98.0], - ... "quantity": [75, 155, 100, 100, 100], - ... } - >>> quotes_pl = pl.LazyFrame(data_quotes) - >>> trades_pl = pl.LazyFrame(data_trades) - >>> quotes_dask = dd.from_dict(data_quotes, npartitions=2) - >>> trades_dask = dd.from_dict(data_trades, npartitions=2) - - Let's define a dataframe-agnostic function in which we join over "datetime" and by "ticker" columns: - - >>> def agnostic_join_asof_datetime_by_ticker( - ... df_native: IntoFrameT, - ... other_native: IntoFrameT, - ... ) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... other = nw.from_native(other_native) - ... return ( - ... df.sort("datetime", "ticker") - ... .join_asof(other, on="datetime", by="ticker") - ... .sort("datetime", "ticker") - ... .collect() - ... .to_native() - ... ) - - We can then pass any supported library such as Polars or Dask to `agnostic_join_asof_datetime_by_ticker`: - - >>> agnostic_join_asof_datetime_by_ticker(trades_pl, quotes_pl) - shape: (5, 6) - ┌────────────────────────────┬────────┬────────┬──────────┬───────┬────────┐ - │ datetime ┆ ticker ┆ price ┆ quantity ┆ bid ┆ ask │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ datetime[μs] ┆ str ┆ f64 ┆ i64 ┆ f64 ┆ f64 │ - ╞════════════════════════════╪════════╪════════╪══════════╪═══════╪════════╡ - │ 2016-05-25 13:30:00.000023 ┆ MSFT ┆ 51.95 ┆ 75 ┆ 51.95 ┆ 51.96 │ - │ 2016-05-25 13:30:00.000038 ┆ MSFT ┆ 51.95 ┆ 155 ┆ 51.97 ┆ 51.98 │ - │ 2016-05-25 13:30:00.000048 ┆ AAPL ┆ 98.0 ┆ 100 ┆ null ┆ null │ - │ 2016-05-25 13:30:00.000048 ┆ GOOG ┆ 720.77 ┆ 100 ┆ 720.5 ┆ 720.93 │ - │ 2016-05-25 13:30:00.000049 ┆ GOOG ┆ 720.92 ┆ 100 ┆ 720.5 ┆ 720.93 │ - └────────────────────────────┴────────┴────────┴──────────┴───────┴────────┘ - >>> agnostic_join_asof_datetime_by_ticker(trades_dask, quotes_dask) - datetime ticker price quantity bid ask - 0 2016-05-25 13:30:00.000023 MSFT 51.95 75 51.95 51.96 - 0 2016-05-25 13:30:00.000038 MSFT 51.95 155 51.97 51.98 - 1 2016-05-25 13:30:00.000048 AAPL 98.00 100 NaN NaN - 2 2016-05-25 13:30:00.000048 GOOG 720.77 100 720.50 720.93 - 3 2016-05-25 13:30:00.000049 GOOG 720.92 100 720.50 720.93 """ return super().join_asof( other, @@ -5082,33 +1810,6 @@ def clone(self) -> Self: Returns: An identical copy of the original LazyFrame. - - Examples: - >>> import narwhals as nw - >>> import polars as pl - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2], "b": [3, 4]} - >>> lf_pl = pl.LazyFrame(data) - - Let's define a dataframe-agnostic function in which we copy the DataFrame: - - >>> def agnostic_clone(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.clone().collect().to_native() - - We can then pass any supported library such as Polars to `agnostic_clone`: - - >>> agnostic_clone(lf_pl) - shape: (2, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 3 │ - │ 2 ┆ 4 │ - └─────┴─────┘ """ return super().clone() @@ -5119,34 +1820,6 @@ def lazy(self) -> Self: Returns: A LazyFrame. - - Examples: - Construct pandas and Polars objects: - - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} - >>> df_pd = pd.DataFrame(df) - >>> lf_pl = pl.LazyFrame(df) - - We define a library agnostic function: - - >>> def agnostic_lazy(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.lazy().to_native() - - Note that then, pandas dataframe stay eager, and the Polars LazyFrame stays lazy: - - >>> agnostic_lazy(df_pd) - foo bar ham - 0 1 6.0 a - 1 2 7.0 b - 2 3 8.0 c - >>> agnostic_lazy(lf_pl) - """ return self @@ -5159,40 +1832,6 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self: Returns: The LazyFrame containing only the selected rows. - - Examples: - >>> import narwhals as nw - >>> import polars as pl - >>> import dask.dataframe as dd - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]} - >>> lf_pl = pl.LazyFrame(data) - >>> lf_dask = dd.from_dict(data, npartitions=2) - - Let's define a dataframe-agnostic function in which we gather every 2 rows, - starting from a offset of 1: - - >>> def agnostic_gather_every(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.gather_every(n=2, offset=1).collect().to_native() - - We can then pass any supported library such as Polars or Dask to `agnostic_gather_every`: - - >>> agnostic_gather_every(lf_pl) - shape: (2, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 2 ┆ 6 │ - │ 4 ┆ 8 │ - └─────┴─────┘ - >>> agnostic_gather_every(lf_dask) - a b - 1 2 6 - 3 4 8 """ return super().gather_every(n=n, offset=offset) @@ -5227,55 +1866,6 @@ def unpivot( If you're coming from pandas, this is similar to `pandas.DataFrame.melt`, but with `index` replacing `id_vars` and `on` replacing `value_vars`. In other frameworks, you might know this operation as `pivot_longer`. - - Examples: - >>> import narwhals as nw - >>> import polars as pl - >>> import dask.dataframe as dd - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": ["x", "y", "z"], - ... "b": [1, 3, 5], - ... "c": [2, 4, 6], - ... } - >>> lf_pl = pl.LazyFrame(data) - >>> lf_dask = dd.from_dict(data, npartitions=2) - - We define a library agnostic function: - - >>> def agnostic_unpivot(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return ( - ... (df.unpivot(on=["b", "c"], index="a").sort(["variable", "a"])) - ... .collect() - ... .to_native() - ... ) - - We can then pass any supported library such as Polars or Dask to `agnostic_unpivot`: - - >>> agnostic_unpivot(lf_pl) - shape: (6, 3) - ┌─────┬──────────┬───────┐ - │ a ┆ variable ┆ value │ - │ --- ┆ --- ┆ --- │ - │ str ┆ str ┆ i64 │ - ╞═════╪══════════╪═══════╡ - │ x ┆ b ┆ 1 │ - │ y ┆ b ┆ 3 │ - │ z ┆ b ┆ 5 │ - │ x ┆ c ┆ 2 │ - │ y ┆ c ┆ 4 │ - │ z ┆ c ┆ 6 │ - └─────┴──────────┴───────┘ - >>> agnostic_unpivot(lf_dask) - a variable value - 0 x b 1 - 1 y b 3 - 0 z b 5 - 2 x c 2 - 3 y c 4 - 1 z c 6 """ return super().unpivot( on=on, index=index, variable_name=variable_name, value_name=value_name @@ -5294,42 +1884,5 @@ def explode(self: Self, columns: str | Sequence[str], *more_columns: str) -> Sel Returns: New LazyFrame - - Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> import polars as pl - >>> data = { - ... "a": ["x", "y", "z", "w"], - ... "lst1": [[1, 2], None, [None], []], - ... "lst2": [[3, None], None, [42], []], - ... } - - We define a library agnostic function: - - >>> def agnostic_explode(df_native: IntoFrameT) -> IntoFrameT: - ... return ( - ... nw.from_native(df_native) - ... .with_columns(nw.col("lst1", "lst2").cast(nw.List(nw.Int32()))) - ... .explode("lst1", "lst2") - ... .collect() - ... .to_native() - ... ) - - We can then pass any supported library such as Polars to `agnostic_explode`: - - >>> agnostic_explode(pl.LazyFrame(data)) - shape: (5, 3) - ┌─────┬──────┬──────┐ - │ a ┆ lst1 ┆ lst2 │ - │ --- ┆ --- ┆ --- │ - │ str ┆ i32 ┆ i32 │ - ╞═════╪══════╪══════╡ - │ x ┆ 1 ┆ 3 │ - │ x ┆ 2 ┆ null │ - │ y ┆ null ┆ null │ - │ z ┆ null ┆ 42 │ - │ w ┆ null ┆ null │ - └─────┴──────┴──────┘ """ return super().explode(columns, *more_columns) diff --git a/narwhals/dependencies.py b/narwhals/dependencies.py index 43904a0ba..7fa2727a8 100644 --- a/narwhals/dependencies.py +++ b/narwhals/dependencies.py @@ -262,23 +262,6 @@ def is_into_series(native_series: IntoSeries) -> bool: Returns: `True` if `native_series` can be converted to a Narwhals Series, `False` otherwise. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import numpy as np - >>> import narwhals as nw - - >>> s_pd = pd.Series([1, 2, 3]) - >>> s_pl = pl.Series([1, 2, 3]) - >>> np_arr = np.array([1, 2, 3]) - - >>> nw.dependencies.is_into_series(s_pd) - True - >>> nw.dependencies.is_into_series(s_pl) - True - >>> nw.dependencies.is_into_series(np_arr) - False """ from narwhals.series import Series @@ -299,23 +282,6 @@ def is_into_dataframe(native_dataframe: Any) -> bool: Returns: `True` if `native_dataframe` can be converted to a Narwhals DataFrame, `False` otherwise. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import numpy as np - >>> from narwhals.dependencies import is_into_dataframe - - >>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - >>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - >>> np_arr = np.array([[1, 4], [2, 5], [3, 6]]) - - >>> is_into_dataframe(df_pd) - True - >>> is_into_dataframe(df_pl) - True - >>> is_into_dataframe(np_arr) - False """ from narwhals.dataframe import DataFrame diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py index 57ee762eb..e73f78573 100644 --- a/narwhals/dtypes.py +++ b/narwhals/dtypes.py @@ -48,15 +48,7 @@ class TemporalType(DType): ... class Decimal(NumericType): - """Decimal type. - - Examples: - >>> import polars as pl - >>> import narwhals as nw - >>> s = pl.Series(["1.5"], dtype=pl.Decimal) - >>> nw.from_native(s, series_only=True).dtype - Decimal - """ + """Decimal type.""" class Int128(NumericType): @@ -64,103 +56,19 @@ class Int128(NumericType): class Int64(NumericType): - """64-bit signed integer type. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> data = [2, 1, 3, 7] - >>> ser_pd = pd.Series(data) - >>> ser_pl = pl.Series(data) - >>> ser_pa = pa.chunked_array([data]) - - >>> nw.from_native(ser_pd, series_only=True).dtype - Int64 - >>> nw.from_native(ser_pl, series_only=True).dtype - Int64 - >>> nw.from_native(ser_pa, series_only=True).dtype - Int64 - """ + """64-bit signed integer type.""" class Int32(NumericType): - """32-bit signed integer type. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> data = [2, 1, 3, 7] - >>> ser_pd = pd.Series(data) - >>> ser_pl = pl.Series(data) - >>> ser_pa = pa.chunked_array([data]) - - >>> def func(ser): - ... ser_nw = nw.from_native(ser, series_only=True) - ... return ser_nw.cast(nw.Int32).dtype - - >>> func(ser_pd) - Int32 - >>> func(ser_pl) - Int32 - >>> func(ser_pa) - Int32 - """ + """32-bit signed integer type.""" class Int16(NumericType): - """16-bit signed integer type. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> data = [2, 1, 3, 7] - >>> ser_pd = pd.Series(data) - >>> ser_pl = pl.Series(data) - >>> ser_pa = pa.chunked_array([data]) - - >>> def func(ser): - ... ser_nw = nw.from_native(ser, series_only=True) - ... return ser_nw.cast(nw.Int16).dtype - - >>> func(ser_pd) - Int16 - >>> func(ser_pl) - Int16 - >>> func(ser_pa) - Int16 - """ + """16-bit signed integer type.""" class Int8(NumericType): - """8-bit signed integer type. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> data = [2, 1, 3, 7] - >>> ser_pd = pd.Series(data) - >>> ser_pl = pl.Series(data) - >>> ser_pa = pa.chunked_array([data]) - - >>> def func(ser): - ... ser_nw = nw.from_native(ser, series_only=True) - ... return ser_nw.cast(nw.Int8).dtype - - >>> func(ser_pd) - Int8 - >>> func(ser_pl) - Int8 - >>> func(ser_pa) - Int8 - """ + """8-bit signed integer type.""" class UInt128(NumericType): @@ -168,232 +76,43 @@ class UInt128(NumericType): class UInt64(NumericType): - """64-bit unsigned integer type. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> data = [2, 1, 3, 7] - >>> ser_pd = pd.Series(data) - >>> ser_pl = pl.Series(data) - >>> ser_pa = pa.chunked_array([data]) - - >>> def func(ser): - ... ser_nw = nw.from_native(ser, series_only=True) - ... return ser_nw.cast(nw.UInt64).dtype - - >>> func(ser_pd) - UInt64 - >>> func(ser_pl) - UInt64 - >>> func(ser_pa) - UInt64 - """ + """64-bit unsigned integer type.""" class UInt32(NumericType): - """32-bit unsigned integer type. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> data = [2, 1, 3, 7] - >>> ser_pd = pd.Series(data) - >>> ser_pl = pl.Series(data) - >>> ser_pa = pa.chunked_array([data]) - - >>> def func(ser): - ... ser_nw = nw.from_native(ser, series_only=True) - ... return ser_nw.cast(nw.UInt32).dtype - - >>> func(ser_pd) - UInt32 - >>> func(ser_pl) - UInt32 - >>> func(ser_pa) - UInt32 - """ + """32-bit unsigned integer type.""" class UInt16(NumericType): - """16-bit unsigned integer type. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> data = [2, 1, 3, 7] - >>> ser_pd = pd.Series(data) - >>> ser_pl = pl.Series(data) - >>> ser_pa = pa.chunked_array([data]) - - >>> def func(ser): - ... ser_nw = nw.from_native(ser, series_only=True) - ... return ser_nw.cast(nw.UInt16).dtype - - >>> func(ser_pd) - UInt16 - >>> func(ser_pl) - UInt16 - >>> func(ser_pa) - UInt16 - """ + """16-bit unsigned integer type.""" class UInt8(NumericType): - """8-bit unsigned integer type. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> data = [2, 1, 3, 7] - >>> ser_pd = pd.Series(data) - >>> ser_pl = pl.Series(data) - >>> ser_pa = pa.chunked_array([data]) - - >>> def func(ser): - ... ser_nw = nw.from_native(ser, series_only=True) - ... return ser_nw.cast(nw.UInt8).dtype - - >>> func(ser_pd) - UInt8 - >>> func(ser_pl) - UInt8 - >>> func(ser_pa) - UInt8 - """ + """8-bit unsigned integer type.""" class Float64(NumericType): - """64-bit floating point type. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> data = [0.001, 0.1, 0.01, 0.1] - >>> ser_pd = pd.Series(data) - >>> ser_pl = pl.Series(data) - >>> ser_pa = pa.chunked_array([data]) - - >>> nw.from_native(ser_pd, series_only=True).dtype - Float64 - >>> nw.from_native(ser_pl, series_only=True).dtype - Float64 - >>> nw.from_native(ser_pa, series_only=True).dtype - Float64 - """ + """64-bit floating point type.""" class Float32(NumericType): - """32-bit floating point type. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> data = [0.001, 0.1, 0.01, 0.1] - >>> ser_pd = pd.Series(data) - >>> ser_pl = pl.Series(data) - >>> ser_pa = pa.chunked_array([data]) - - >>> def func(ser): - ... ser_nw = nw.from_native(ser, series_only=True) - ... return ser_nw.cast(nw.Float32).dtype - - >>> func(ser_pd) - Float32 - >>> func(ser_pl) - Float32 - >>> func(ser_pa) - Float32 - """ + """32-bit floating point type.""" class String(DType): - """UTF-8 encoded string type. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> data = ["beluga", "narwhal", "orca", "vaquita"] - >>> ser_pd = pd.Series(data) - >>> ser_pl = pl.Series(data) - >>> ser_pa = pa.chunked_array([data]) - - >>> nw.from_native(ser_pd, series_only=True).dtype - String - >>> nw.from_native(ser_pl, series_only=True).dtype - String - >>> nw.from_native(ser_pa, series_only=True).dtype - String - """ + """UTF-8 encoded string type.""" class Boolean(DType): - """Boolean type. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> data = [True, False, False, True] - >>> ser_pd = pd.Series(data) - >>> ser_pl = pl.Series(data) - >>> ser_pa = pa.chunked_array([data]) - - >>> nw.from_native(ser_pd, series_only=True).dtype - Boolean - >>> nw.from_native(ser_pl, series_only=True).dtype - Boolean - >>> nw.from_native(ser_pa, series_only=True).dtype - Boolean - """ + """Boolean type.""" class Object(DType): - """Data type for wrapping arbitrary Python objects. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> class Foo: ... - >>> ser_pd = pd.Series([Foo(), Foo()]) - >>> ser_pl = pl.Series([Foo(), Foo()]) - - >>> nw.from_native(ser_pd, series_only=True).dtype - Object - >>> nw.from_native(ser_pl, series_only=True).dtype - Object - """ + """Data type for wrapping arbitrary Python objects.""" class Unknown(DType): - """Type representing DataType values that could not be determined statically. - - Examples: - >>> import pandas as pd - >>> import narwhals as nw - >>> data = pd.period_range("2000-01", periods=4, freq="M") - >>> ser_pd = pd.Series(data) - - >>> nw.from_native(ser_pd, series_only=True).dtype - Unknown - """ + """Type representing DataType values that could not be determined statically.""" class Datetime(TemporalType): @@ -406,33 +125,6 @@ class Datetime(TemporalType): Notes: Adapted from [Polars implementation](https://github.com/pola-rs/polars/blob/py-1.7.1/py-polars/polars/datatypes/classes.py#L398-L457) - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import pyarrow.compute as pc - >>> import narwhals as nw - >>> from datetime import datetime, timedelta - >>> data = [datetime(2024, 12, 9) + timedelta(days=n) for n in range(5)] - >>> ser_pd = ( - ... pd.Series(data) - ... .dt.tz_localize("Africa/Accra") - ... .astype("datetime64[ms, Africa/Accra]") - ... ) - >>> ser_pl = ( - ... pl.Series(data).cast(pl.Datetime("ms")).dt.replace_time_zone("Africa/Accra") - ... ) - >>> ser_pa = pc.assume_timezone( - ... pa.chunked_array([data], type=pa.timestamp("ms")), "Africa/Accra" - ... ) - - >>> nw.from_native(ser_pd, series_only=True).dtype - Datetime(time_unit='ms', time_zone='Africa/Accra') - >>> nw.from_native(ser_pl, series_only=True).dtype - Datetime(time_unit='ms', time_zone='Africa/Accra') - >>> nw.from_native(ser_pa, series_only=True).dtype - Datetime(time_unit='ms', time_zone='Africa/Accra') """ def __init__( @@ -478,24 +170,6 @@ class Duration(TemporalType): Notes: Adapted from [Polars implementation](https://github.com/pola-rs/polars/blob/py-1.7.1/py-polars/polars/datatypes/classes.py#L460-L502) - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from datetime import timedelta - >>> data = [timedelta(seconds=d) for d in range(1, 4)] - >>> ser_pd = pd.Series(data).astype("timedelta64[ms]") - >>> ser_pl = pl.Series(data).cast(pl.Duration("ms")) - >>> ser_pa = pa.chunked_array([data], type=pa.duration("ms")) - - >>> nw.from_native(ser_pd, series_only=True).dtype - Duration(time_unit='ms') - >>> nw.from_native(ser_pl, series_only=True).dtype - Duration(time_unit='ms') - >>> nw.from_native(ser_pa, series_only=True).dtype - Duration(time_unit='ms') """ def __init__( @@ -529,40 +203,13 @@ def __repr__(self: Self) -> str: # pragma: no cover class Categorical(DType): - """A categorical encoding of a set of strings. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> data = ["beluga", "narwhal", "orca", "vaquita"] - >>> ser_pd = pd.Series(data) - >>> ser_pl = pl.Series(data) - >>> ser_pa = pa.chunked_array([data]) - - >>> nw.from_native(ser_pd, series_only=True).cast(nw.Categorical).dtype - Categorical - >>> nw.from_native(ser_pl, series_only=True).cast(nw.Categorical).dtype - Categorical - >>> nw.from_native(ser_pa, series_only=True).cast(nw.Categorical).dtype - Categorical - """ + """A categorical encoding of a set of strings.""" class Enum(DType): """A fixed categorical encoding of a unique set of strings. Polars has an Enum data type, while pandas and PyArrow do not. - - Examples: - >>> import polars as pl - >>> import narwhals as nw - >>> data = ["beluga", "narwhal", "orca", "vaquita"] - >>> ser_pl = pl.Series(data, dtype=pl.Enum(data)) - - >>> nw.from_native(ser_pl, series_only=True).dtype - Enum """ @@ -598,19 +245,6 @@ class Struct(DType): Arguments: fields: The fields that make up the struct. Can be either a sequence of Field objects or a mapping of column names to data types. - - Examples: - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> data = [{"a": 1, "b": ["narwhal", "beluga"]}, {"a": 2, "b": ["orca"]}] - >>> ser_pl = pl.Series(data) - >>> ser_pa = pa.chunked_array([data]) - - >>> nw.from_native(ser_pl, series_only=True).dtype - Struct({'a': Int64, 'b': List(String)}) - >>> nw.from_native(ser_pa, series_only=True).dtype - Struct({'a': Int64, 'b': List(String)}) """ fields: list[Field] @@ -660,25 +294,7 @@ def to_schema(self) -> OrderedDict[str, DType | type[DType]]: class List(DType): - """Variable length list type. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> data = [["narwhal", "orca"], ["beluga", "vaquita"]] - >>> ser_pd = pd.Series(data, dtype=pd.ArrowDtype(pa.large_list(pa.large_string()))) - >>> ser_pl = pl.Series(data) - >>> ser_pa = pa.chunked_array([data]) - - >>> nw.from_native(ser_pd, series_only=True).dtype - List(String) - >>> nw.from_native(ser_pl, series_only=True).dtype - List(String) - >>> nw.from_native(ser_pa, series_only=True).dtype - List(String) - """ + """Variable length list type.""" def __init__(self, inner: DType | type[DType]) -> None: self.inner = inner @@ -712,23 +328,6 @@ class Array(DType): Arguments: inner: The datatype of the values within each array. width: the length of each array. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> data = [[1, 2], [3, 4], [5, 6]] - >>> ser_pd = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int32(), 2))) - >>> ser_pl = pl.Series(data, dtype=pl.Array(pl.Int32, 2)) - >>> ser_pa = pa.chunked_array([data], type=pa.list_(pa.int32(), 2)) - - >>> nw.from_native(ser_pd, series_only=True).dtype - Array(Int32, 2) - >>> nw.from_native(ser_pl, series_only=True).dtype - Array(Int32, 2) - >>> nw.from_native(ser_pa, series_only=True).dtype - Array(Int32, 2) """ def __init__(self, inner: DType | type[DType], width: int | None = None) -> None: @@ -762,23 +361,4 @@ def __repr__(self) -> str: class Date(TemporalType): - """Data type representing a calendar date. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from datetime import date, timedelta - >>> data = [date(2024, 12, 1) + timedelta(days=d) for d in range(4)] - >>> ser_pd = pd.Series(data, dtype="date32[pyarrow]") - >>> ser_pl = pl.Series(data) - >>> ser_pa = pa.chunked_array([data]) - - >>> nw.from_native(ser_pd, series_only=True).dtype - Date - >>> nw.from_native(ser_pl, series_only=True).dtype - Date - >>> nw.from_native(ser_pa, series_only=True).dtype - Date - """ + """Data type representing a calendar date.""" diff --git a/narwhals/expr.py b/narwhals/expr.py index 84646cf8b..732402432 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -46,50 +46,6 @@ def alias(self, name: str) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2], "b": [4, 5]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_alias(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select((nw.col("b") + 10).alias("c")).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_alias`: - - >>> agnostic_alias(df_pd) - c - 0 14 - 1 15 - - >>> agnostic_alias(df_pl) - shape: (2, 1) - ┌─────┐ - │ c │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 14 │ - │ 15 │ - └─────┘ - - >>> agnostic_alias(df_pa) - pyarrow.Table - c: int64 - ---- - c: [[14,15]] - """ return self.__class__(lambda plx: self._to_compliant_expr(plx).alias(name)) @@ -103,53 +59,6 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3, 4]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Lets define a library-agnostic function: - - >>> def agnostic_pipe(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").pipe(lambda x: x + 1)).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_pipe`: - - >>> agnostic_pipe(df_pd) - a - 0 2 - 1 3 - 2 4 - 3 5 - - >>> agnostic_pipe(df_pl) - shape: (4, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 2 │ - │ 3 │ - │ 4 │ - │ 5 │ - └─────┘ - - >>> agnostic_pipe(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[2,3,4,5]] """ return function(self, *args, **kwargs) @@ -161,53 +70,6 @@ def cast(self: Self, dtype: DType | type[DType]) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_cast(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("foo").cast(nw.Float32), nw.col("bar").cast(nw.UInt8) - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_cast`: - - >>> agnostic_cast(df_pd) - foo bar - 0 1.0 6 - 1 2.0 7 - 2 3.0 8 - >>> agnostic_cast(df_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ foo ┆ bar │ - │ --- ┆ --- │ - │ f32 ┆ u8 │ - ╞═════╪═════╡ - │ 1.0 ┆ 6 │ - │ 2.0 ┆ 7 │ - │ 3.0 ┆ 8 │ - └─────┴─────┘ - >>> agnostic_cast(df_pa) - pyarrow.Table - foo: float - bar: uint8 - ---- - foo: [[1,2,3]] - bar: [[6,7,8]] """ _validate_dtype(dtype) return self.__class__( @@ -387,49 +249,6 @@ def any(self) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [True, False], "b": [True, True]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_any(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").any()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_any`: - - >>> agnostic_any(df_pd) - a b - 0 True True - - >>> agnostic_any(df_pl) - shape: (1, 2) - ┌──────┬──────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ bool ┆ bool │ - ╞══════╪══════╡ - │ true ┆ true │ - └──────┴──────┘ - - >>> agnostic_any(df_pa) - pyarrow.Table - a: bool - b: bool - ---- - a: [[true]] - b: [[true]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).any()) @@ -438,49 +257,6 @@ def all(self) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [True, False], "b": [True, True]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_all(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").all()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_all`: - - >>> agnostic_all(df_pd) - a b - 0 False True - - >>> agnostic_all(df_pl) - shape: (1, 2) - ┌───────┬──────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ bool ┆ bool │ - ╞═══════╪══════╡ - │ false ┆ true │ - └───────┴──────┘ - - >>> agnostic_all(df_pa) - pyarrow.Table - a: bool - b: bool - ---- - a: [[false]] - b: [[true]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).all()) @@ -535,44 +311,6 @@ def ewm_mean( Returns: Expr - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - - We define a library agnostic function: - - >>> def agnostic_ewm_mean(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("a").ewm_mean(com=1, ignore_nulls=False) - ... ).to_native() - - We can then pass either pandas or Polars to `agnostic_ewm_mean`: - - >>> agnostic_ewm_mean(df_pd) - a - 0 1.000000 - 1 1.666667 - 2 2.428571 - - >>> agnostic_ewm_mean(df_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3, 1) - ┌──────────┐ - │ a │ - │ --- │ - │ f64 │ - ╞══════════╡ - │ 1.0 │ - │ 1.666667 │ - │ 2.428571 │ - └──────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).ewm_mean( @@ -591,49 +329,6 @@ def mean(self) -> Self: Returns: A new expression. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [-1, 0, 1], "b": [2, 4, 6]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_mean(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").mean()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_mean`: - - >>> agnostic_mean(df_pd) - a b - 0 0.0 4.0 - - >>> agnostic_mean(df_pl) - shape: (1, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞═════╪═════╡ - │ 0.0 ┆ 4.0 │ - └─────┴─────┘ - - >>> agnostic_mean(df_pa) - pyarrow.Table - a: double - b: double - ---- - a: [[0]] - b: [[4]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).mean()) @@ -645,49 +340,6 @@ def median(self) -> Self: Notes: Results might slightly differ across backends due to differences in the underlying algorithms used to compute the median. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 8, 3], "b": [4, 5, 2]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_median(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").median()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_median`: - - >>> agnostic_median(df_pd) - a b - 0 3.0 4.0 - - >>> agnostic_median(df_pl) - shape: (1, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞═════╪═════╡ - │ 3.0 ┆ 4.0 │ - └─────┴─────┘ - - >>> agnostic_median(df_pa) - pyarrow.Table - a: double - b: double - ---- - a: [[3]] - b: [[4]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).median()) @@ -700,48 +352,6 @@ def std(self, *, ddof: int = 1) -> Self: Returns: A new expression. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [20, 25, 60], "b": [1.5, 1, -1.4]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_std(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").std(ddof=0)).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_std`: - - >>> agnostic_std(df_pd) - a b - 0 17.79513 1.265789 - >>> agnostic_std(df_pl) - shape: (1, 2) - ┌──────────┬──────────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞══════════╪══════════╡ - │ 17.79513 ┆ 1.265789 │ - └──────────┴──────────┘ - >>> agnostic_std(df_pa) - pyarrow.Table - a: double - b: double - ---- - a: [[17.795130420052185]] - b: [[1.2657891697365016]] - """ return self.__class__(lambda plx: self._to_compliant_expr(plx).std(ddof=ddof)) @@ -754,49 +364,6 @@ def var(self, *, ddof: int = 1) -> Self: Returns: A new expression. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [20, 25, 60], "b": [1.5, 1, -1.4]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_var(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").var(ddof=0)).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_var`: - - >>> agnostic_var(df_pd) - a b - 0 316.666667 1.602222 - - >>> agnostic_var(df_pl) - shape: (1, 2) - ┌────────────┬──────────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞════════════╪══════════╡ - │ 316.666667 ┆ 1.602222 │ - └────────────┴──────────┘ - - >>> agnostic_var(df_pa) - pyarrow.Table - a: double - b: double - ---- - a: [[316.6666666666667]] - b: [[1.6022222222222222]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).var(ddof=ddof)) @@ -819,55 +386,6 @@ def map_batches( Returns: A new expression. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_map_batches(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("a", "b").map_batches( - ... lambda s: s.to_numpy() + 1, return_dtype=nw.Float64 - ... ) - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_map_batches`: - - >>> agnostic_map_batches(df_pd) - a b - 0 2.0 5.0 - 1 3.0 6.0 - 2 4.0 7.0 - >>> agnostic_map_batches(df_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞═════╪═════╡ - │ 2.0 ┆ 5.0 │ - │ 3.0 ┆ 6.0 │ - │ 4.0 ┆ 7.0 │ - └─────┴─────┘ - >>> agnostic_map_batches(df_pa) - pyarrow.Table - a: double - b: double - ---- - a: [[2,3,4]] - b: [[5,6,7]] """ return self.__class__( lambda plx: self._to_compliant_expr(plx).map_batches( @@ -880,49 +398,6 @@ def skew(self: Self) -> Self: Returns: An expression representing the sample skewness of the column. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3, 4, 5], "b": [1, 1, 2, 10, 100]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_skew(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").skew()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_skew`: - - >>> agnostic_skew(df_pd) - a b - 0 0.0 1.472427 - - >>> agnostic_skew(df_pl) - shape: (1, 2) - ┌─────┬──────────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞═════╪══════════╡ - │ 0.0 ┆ 1.472427 │ - └─────┴──────────┘ - - >>> agnostic_skew(df_pa) - pyarrow.Table - a: double - b: double - ---- - a: [[0]] - b: [[1.4724267269058975]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).skew()) @@ -931,47 +406,6 @@ def sum(self) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [5, 10], "b": [50, 100]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_sum(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").sum()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_sum`: - - >>> agnostic_sum(df_pd) - a b - 0 15 150 - >>> agnostic_sum(df_pl) - shape: (1, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 15 ┆ 150 │ - └─────┴─────┘ - >>> agnostic_sum(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[15]] - b: [[150]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).sum()) @@ -980,49 +414,6 @@ def min(self) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2], "b": [4, 3]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_min(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.min("a", "b")).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_min`: - - >>> agnostic_min(df_pd) - a b - 0 1 3 - - >>> agnostic_min(df_pl) - shape: (1, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 3 │ - └─────┴─────┘ - - >>> agnostic_min(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[1]] - b: [[3]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).min()) @@ -1031,49 +422,6 @@ def max(self) -> Self: Returns: A new expression. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [10, 20], "b": [50, 100]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_max(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.max("a", "b")).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_max`: - - >>> agnostic_max(df_pd) - a b - 0 20 100 - - >>> agnostic_max(df_pl) - shape: (1, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 20 ┆ 100 │ - └─────┴─────┘ - - >>> agnostic_max(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[20]] - b: [[100]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).max()) @@ -1082,51 +430,6 @@ def arg_min(self) -> Self: Returns: A new expression. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [10, 20], "b": [150, 100]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_arg_min(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("a", "b").arg_min().name.suffix("_arg_min") - ... ).to_native() - - We can then pass any supported library such as Pandas, Polars, or - PyArrow to `agnostic_arg_min`: - - >>> agnostic_arg_min(df_pd) - a_arg_min b_arg_min - 0 0 1 - - >>> agnostic_arg_min(df_pl) - shape: (1, 2) - ┌───────────┬───────────┐ - │ a_arg_min ┆ b_arg_min │ - │ --- ┆ --- │ - │ u32 ┆ u32 │ - ╞═══════════╪═══════════╡ - │ 0 ┆ 1 │ - └───────────┴───────────┘ - - >>> agnostic_arg_min(df_pa) - pyarrow.Table - a_arg_min: int64 - b_arg_min: int64 - ---- - a_arg_min: [[0]] - b_arg_min: [[1]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).arg_min()) @@ -1135,51 +438,6 @@ def arg_max(self) -> Self: Returns: A new expression. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [10, 20], "b": [150, 100]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_arg_max(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("a", "b").arg_max().name.suffix("_arg_max") - ... ).to_native() - - We can then pass any supported library such as Pandas, Polars, or - PyArrow to `agnostic_arg_max`: - - >>> agnostic_arg_max(df_pd) - a_arg_max b_arg_max - 0 1 0 - - >>> agnostic_arg_max(df_pl) - shape: (1, 2) - ┌───────────┬───────────┐ - │ a_arg_max ┆ b_arg_max │ - │ --- ┆ --- │ - │ u32 ┆ u32 │ - ╞═══════════╪═══════════╡ - │ 1 ┆ 0 │ - └───────────┴───────────┘ - - >>> agnostic_arg_max(df_pa) - pyarrow.Table - a_arg_max: int64 - b_arg_max: int64 - ---- - a_arg_max: [[1]] - b_arg_max: [[0]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).arg_max()) @@ -1188,49 +446,6 @@ def count(self) -> Self: Returns: A new expression. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3], "b": [None, 4, 4]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_count(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.all().count()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_count`: - - >>> agnostic_count(df_pd) - a b - 0 3 2 - - >>> agnostic_count(df_pl) - shape: (1, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ u32 ┆ u32 │ - ╞═════╪═════╡ - │ 3 ┆ 2 │ - └─────┴─────┘ - - >>> agnostic_count(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[3]] - b: [[2]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).count()) @@ -1239,47 +454,6 @@ def n_unique(self) -> Self: Returns: A new expression. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_n_unique(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").n_unique()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_n_unique`: - - >>> agnostic_n_unique(df_pd) - a b - 0 5 3 - >>> agnostic_n_unique(df_pl) - shape: (1, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ u32 ┆ u32 │ - ╞═════╪═════╡ - │ 5 ┆ 3 │ - └─────┴─────┘ - >>> agnostic_n_unique(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[5]] - b: [[3]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).n_unique()) @@ -1293,53 +467,6 @@ def unique(self, *, maintain_order: bool = False) -> Self: Returns: A new expression. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_unique(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").unique(maintain_order=True)).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_unique`: - - >>> agnostic_unique(df_pd) - a b - 0 1 2 - 1 3 4 - 2 5 6 - - >>> agnostic_unique(df_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 2 │ - │ 3 ┆ 4 │ - │ 5 ┆ 6 │ - └─────┴─────┘ - - >>> agnostic_unique(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[1,3,5]] - b: [[2,4,6]] """ return self.__class__( lambda plx: self._to_compliant_expr(plx).unique(maintain_order=maintain_order) @@ -1350,51 +477,6 @@ def abs(self) -> Self: Returns: A new expression. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, -2], "b": [-3, 4]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_abs(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").abs()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_abs`: - - >>> agnostic_abs(df_pd) - a b - 0 1 3 - 1 2 4 - - >>> agnostic_abs(df_pl) - shape: (2, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 3 │ - │ 2 ┆ 4 │ - └─────┴─────┘ - - >>> agnostic_abs(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[1,2]] - b: [[3,4]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).abs()) @@ -1406,55 +488,6 @@ def cum_sum(self: Self, *, reverse: bool = False) -> Self: Returns: A new expression. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_cum_sum(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").cum_sum()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_cum_sum`: - - >>> agnostic_cum_sum(df_pd) - a b - 0 1 2 - 1 2 6 - 2 5 10 - 3 10 16 - 4 15 22 - >>> agnostic_cum_sum(df_pl) - shape: (5, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 2 │ - │ 2 ┆ 6 │ - │ 5 ┆ 10 │ - │ 10 ┆ 16 │ - │ 15 ┆ 22 │ - └─────┴─────┘ - >>> agnostic_cum_sum(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[1,2,5,10,15]] - b: [[2,6,10,16,22]] """ return self.__class__( lambda plx: self._to_compliant_expr(plx).cum_sum(reverse=reverse) @@ -1474,55 +507,6 @@ def diff(self) -> Self: do: nw.col("a").diff().fill_null(0).cast(nw.Int64) - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 1, 3, 5, 5]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_diff(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(a_diff=nw.col("a").diff()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_diff`: - - >>> agnostic_diff(df_pd) - a_diff - 0 NaN - 1 0.0 - 2 2.0 - 3 2.0 - 4 0.0 - - >>> agnostic_diff(df_pl) - shape: (5, 1) - ┌────────┐ - │ a_diff │ - │ --- │ - │ i64 │ - ╞════════╡ - │ null │ - │ 0 │ - │ 2 │ - │ 2 │ - │ 0 │ - └────────┘ - - >>> agnostic_diff(df_pa) - pyarrow.Table - a_diff: int64 - ---- - a_diff: [[null,0,2,2,0]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).diff()) @@ -1543,55 +527,6 @@ def shift(self, n: int) -> Self: do: nw.col("a").shift(1).fill_null(0).cast(nw.Int64) - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 1, 3, 5, 5]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_shift(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(a_shift=nw.col("a").shift(n=1)).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_shift`: - - >>> agnostic_shift(df_pd) - a_shift - 0 NaN - 1 1.0 - 2 1.0 - 3 3.0 - 4 5.0 - - >>> agnostic_shift(df_pl) - shape: (5, 1) - ┌─────────┐ - │ a_shift │ - │ --- │ - │ i64 │ - ╞═════════╡ - │ null │ - │ 1 │ - │ 1 │ - │ 3 │ - │ 5 │ - └─────────┘ - - >>> agnostic_shift(df_pa) - pyarrow.Table - a_shift: int64 - ---- - a_shift: [[null,1,1,3,5]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).shift(n)) @@ -1617,61 +552,6 @@ def replace_strict( Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [3, 0, 1, 2]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define dataframe-agnostic functions: - - >>> def agnostic_replace_strict(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... b=nw.col("a").replace_strict( - ... [0, 1, 2, 3], - ... ["zero", "one", "two", "three"], - ... return_dtype=nw.String, - ... ) - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_replace_strict`: - - >>> agnostic_replace_strict(df_pd) - a b - 0 3 three - 1 0 zero - 2 1 one - 3 2 two - - >>> agnostic_replace_strict(df_pl) - shape: (4, 2) - ┌─────┬───────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ str │ - ╞═════╪═══════╡ - │ 3 ┆ three │ - │ 0 ┆ zero │ - │ 1 ┆ one │ - │ 2 ┆ two │ - └─────┴───────┘ - - >>> agnostic_replace_strict(df_pa) - pyarrow.Table - a: int64 - b: string - ---- - a: [[3,0,1,2]] - b: [["three","zero","one","two"]] """ if new is None: if not isinstance(old, Mapping): @@ -1696,83 +576,6 @@ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [5, None, 1, 2]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define dataframe-agnostic functions: - - >>> def agnostic_sort(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").sort()).to_native() - - >>> def agnostic_sort_descending(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").sort(descending=True)).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_sort` and `agnostic_sort_descending`: - - >>> agnostic_sort(df_pd) - a - 1 NaN - 2 1.0 - 3 2.0 - 0 5.0 - - >>> agnostic_sort(df_pl) - shape: (4, 1) - ┌──────┐ - │ a │ - │ --- │ - │ i64 │ - ╞══════╡ - │ null │ - │ 1 │ - │ 2 │ - │ 5 │ - └──────┘ - - >>> agnostic_sort(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[null,1,2,5]] - - >>> agnostic_sort_descending(df_pd) - a - 1 NaN - 0 5.0 - 3 2.0 - 2 1.0 - - >>> agnostic_sort_descending(df_pl) - shape: (4, 1) - ┌──────┐ - │ a │ - │ --- │ - │ i64 │ - ╞══════╡ - │ null │ - │ 5 │ - │ 2 │ - │ 1 │ - └──────┘ - - >>> agnostic_sort_descending(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[null,5,2,1]] """ return self.__class__( lambda plx: self._to_compliant_expr(plx).sort( @@ -1796,55 +599,6 @@ def is_between( Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3, 4, 5]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_between(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").is_between(2, 4, "right")).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_between`: - - >>> agnostic_is_between(df_pd) - a - 0 False - 1 False - 2 True - 3 True - 4 False - - >>> agnostic_is_between(df_pl) - shape: (5, 1) - ┌───────┐ - │ a │ - │ --- │ - │ bool │ - ╞═══════╡ - │ false │ - │ false │ - │ true │ - │ true │ - │ false │ - └───────┘ - - >>> agnostic_is_between(df_pa) - pyarrow.Table - a: bool - ---- - a: [[false,false,true,true,false]] """ return self.__class__( lambda plx: self._to_compliant_expr(plx).is_between( @@ -1862,55 +616,6 @@ def is_in(self, other: Any) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 9, 10]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_in(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns(b=nw.col("a").is_in([1, 2])).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_in`: - - >>> agnostic_is_in(df_pd) - a b - 0 1 True - 1 2 True - 2 9 False - 3 10 False - - >>> agnostic_is_in(df_pl) - shape: (4, 2) - ┌─────┬───────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ bool │ - ╞═════╪═══════╡ - │ 1 ┆ true │ - │ 2 ┆ true │ - │ 9 ┆ false │ - │ 10 ┆ false │ - └─────┴───────┘ - - >>> agnostic_is_in(df_pa) - pyarrow.Table - a: int64 - b: bool - ---- - a: [[1,2,9,10]] - b: [[true,true,false,false]] """ if isinstance(other, Iterable) and not isinstance(other, (str, bytes)): return self.__class__( @@ -1930,56 +635,6 @@ def filter(self, *predicates: Any) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("a").filter(nw.col("a") > 4), - ... nw.col("b").filter(nw.col("b") < 13), - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_filter`: - - >>> agnostic_filter(df_pd) - a b - 3 5 10 - 4 6 11 - 5 7 12 - - >>> agnostic_filter(df_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 5 ┆ 10 │ - │ 6 ┆ 11 │ - │ 7 ┆ 12 │ - └─────┴─────┘ - - >>> agnostic_filter(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[5,6,7]] - b: [[10,11,12]] """ return self.__class__( lambda plx: self._to_compliant_expr(plx).filter( @@ -1997,71 +652,6 @@ def is_null(self) -> Self: pandas handles null values differently from Polars and PyArrow. See [null_handling](../pandas_like_concepts/null_handling.md/) for reference. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> df_pd = pd.DataFrame( - ... { - ... "a": [2, 4, None, 3, 5], - ... "b": [2.0, 4.0, float("nan"), 3.0, 5.0], - ... } - ... ) - >>> data = { - ... "a": [2, 4, None, 3, 5], - ... "b": [2.0, 4.0, None, 3.0, 5.0], - ... } - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_null(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... a_is_null=nw.col("a").is_null(), b_is_null=nw.col("b").is_null() - ... ).to_native() - - We can then pass any supported library such as Pandas, Polars, or - PyArrow to `agnostic_is_null`: - - >>> agnostic_is_null(df_pd) - a b a_is_null b_is_null - 0 2.0 2.0 False False - 1 4.0 4.0 False False - 2 NaN NaN True True - 3 3.0 3.0 False False - 4 5.0 5.0 False False - - >>> agnostic_is_null(df_pl) - shape: (5, 4) - ┌──────┬──────┬───────────┬───────────┐ - │ a ┆ b ┆ a_is_null ┆ b_is_null │ - │ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ f64 ┆ bool ┆ bool │ - ╞══════╪══════╪═══════════╪═══════════╡ - │ 2 ┆ 2.0 ┆ false ┆ false │ - │ 4 ┆ 4.0 ┆ false ┆ false │ - │ null ┆ null ┆ true ┆ true │ - │ 3 ┆ 3.0 ┆ false ┆ false │ - │ 5 ┆ 5.0 ┆ false ┆ false │ - └──────┴──────┴───────────┴───────────┘ - - >>> agnostic_is_null(df_pa) - pyarrow.Table - a: int64 - b: double - a_is_null: bool - b_is_null: bool - ---- - a: [[2,4,null,3,5]] - b: [[2,4,null,3,5]] - a_is_null: [[false,false,true,false,false]] - b_is_null: [[false,false,true,false,false]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).is_null()) @@ -2075,58 +665,6 @@ def is_nan(self) -> Self: pandas handles null values differently from Polars and PyArrow. See [null_handling](../pandas_like_concepts/null_handling.md/) for reference. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"orig": [0.0, None, 2.0]} - >>> df_pd = pd.DataFrame(data).astype({"orig": "Float64"}) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_self_div_is_nan(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... divided=nw.col("orig") / nw.col("orig"), - ... divided_is_nan=(nw.col("orig") / nw.col("orig")).is_nan(), - ... ).to_native() - - We can then pass any supported library such as Pandas, Polars, or - PyArrow to `agnostic_self_div_is_nan`: - - >>> print(agnostic_self_div_is_nan(df_pd)) - orig divided divided_is_nan - 0 0.0 NaN True - 1 - 2 2.0 1.0 False - - >>> print(agnostic_self_div_is_nan(df_pl)) - shape: (3, 3) - ┌──────┬─────────┬────────────────┐ - │ orig ┆ divided ┆ divided_is_nan │ - │ --- ┆ --- ┆ --- │ - │ f64 ┆ f64 ┆ bool │ - ╞══════╪═════════╪════════════════╡ - │ 0.0 ┆ NaN ┆ true │ - │ null ┆ null ┆ null │ - │ 2.0 ┆ 1.0 ┆ false │ - └──────┴─────────┴────────────────┘ - - >>> print(agnostic_self_div_is_nan(df_pa)) - pyarrow.Table - orig: double - divided: double - divided_is_nan: bool - ---- - orig: [[0,null,2]] - divided: [[nan,null,1]] - divided_is_nan: [[true,null,false]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).is_nan()) @@ -2135,49 +673,6 @@ def arg_true(self) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, None, None, 2]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_arg_true(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").is_null().arg_true()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_arg_true`: - - >>> agnostic_arg_true(df_pd) - a - 1 1 - 2 2 - - >>> agnostic_arg_true(df_pl) - shape: (2, 1) - ┌─────┐ - │ a │ - │ --- │ - │ u32 │ - ╞═════╡ - │ 1 │ - │ 2 │ - └─────┘ - - >>> agnostic_arg_true(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[1,2]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).arg_true()) @@ -2201,113 +696,6 @@ def fill_null( pandas handles null values differently from Polars and PyArrow. See [null_handling](../pandas_like_concepts/null_handling.md/) for reference. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> df_pd = pd.DataFrame( - ... { - ... "a": [2, 4, None, None, 3, 5], - ... "b": [2.0, 4.0, float("nan"), float("nan"), 3.0, 5.0], - ... } - ... ) - >>> data = { - ... "a": [2, 4, None, None, 3, 5], - ... "b": [2.0, 4.0, None, None, 3.0, 5.0], - ... } - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_fill_null(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns(nw.col("a", "b").fill_null(0)).to_native() - - We can then pass any supported library such as Pandas, Polars, or - PyArrow to `agnostic_fill_null`: - - >>> agnostic_fill_null(df_pd) - a b - 0 2.0 2.0 - 1 4.0 4.0 - 2 0.0 0.0 - 3 0.0 0.0 - 4 3.0 3.0 - 5 5.0 5.0 - - >>> agnostic_fill_null(df_pl) - shape: (6, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ f64 │ - ╞═════╪═════╡ - │ 2 ┆ 2.0 │ - │ 4 ┆ 4.0 │ - │ 0 ┆ 0.0 │ - │ 0 ┆ 0.0 │ - │ 3 ┆ 3.0 │ - │ 5 ┆ 5.0 │ - └─────┴─────┘ - - >>> agnostic_fill_null(df_pa) - pyarrow.Table - a: int64 - b: double - ---- - a: [[2,4,0,0,3,5]] - b: [[2,4,0,0,3,5]] - - Using a strategy: - - >>> def agnostic_fill_null_with_strategy(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.col("a", "b") - ... .fill_null(strategy="forward", limit=1) - ... .name.suffix("_filled") - ... ).to_native() - - >>> agnostic_fill_null_with_strategy(df_pd) - a b a_filled b_filled - 0 2.0 2.0 2.0 2.0 - 1 4.0 4.0 4.0 4.0 - 2 NaN NaN 4.0 4.0 - 3 NaN NaN NaN NaN - 4 3.0 3.0 3.0 3.0 - 5 5.0 5.0 5.0 5.0 - - >>> agnostic_fill_null_with_strategy(df_pl) - shape: (6, 4) - ┌──────┬──────┬──────────┬──────────┐ - │ a ┆ b ┆ a_filled ┆ b_filled │ - │ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ f64 ┆ i64 ┆ f64 │ - ╞══════╪══════╪══════════╪══════════╡ - │ 2 ┆ 2.0 ┆ 2 ┆ 2.0 │ - │ 4 ┆ 4.0 ┆ 4 ┆ 4.0 │ - │ null ┆ null ┆ 4 ┆ 4.0 │ - │ null ┆ null ┆ null ┆ null │ - │ 3 ┆ 3.0 ┆ 3 ┆ 3.0 │ - │ 5 ┆ 5.0 ┆ 5 ┆ 5.0 │ - └──────┴──────┴──────────┴──────────┘ - - >>> agnostic_fill_null_with_strategy(df_pa) - pyarrow.Table - a: int64 - b: double - a_filled: int64 - b_filled: double - ---- - a: [[2,4,null,null,3,5]] - b: [[2,4,null,null,3,5]] - a_filled: [[2,4,4,null,3,5]] - b_filled: [[2,4,4,null,3,5]] """ if value is not None and strategy is not None: msg = "cannot specify both `value` and `strategy`" @@ -2335,52 +723,6 @@ def drop_nulls(self) -> Self: pandas handles null values differently from Polars and PyArrow. See [null_handling](../pandas_like_concepts/null_handling.md/) for reference. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> df_pd = pd.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]}) - >>> df_pl = pl.DataFrame({"a": [2.0, 4.0, None, 3.0, None, 5.0]}) - >>> df_pa = pa.table({"a": [2.0, 4.0, None, 3.0, None, 5.0]}) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_drop_nulls(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").drop_nulls()).to_native() - - We can then pass any supported library such as Pandas, Polars, or - PyArrow to `agnostic_drop_nulls`: - - >>> agnostic_drop_nulls(df_pd) - a - 0 2.0 - 1 4.0 - 3 3.0 - 5 5.0 - - >>> agnostic_drop_nulls(df_pl) - shape: (4, 1) - ┌─────┐ - │ a │ - │ --- │ - │ f64 │ - ╞═════╡ - │ 2.0 │ - │ 4.0 │ - │ 3.0 │ - │ 5.0 │ - └─────┘ - - >>> agnostic_drop_nulls(df_pa) - pyarrow.Table - a: double - ---- - a: [[2,4,3,5]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).drop_nulls()) @@ -2403,53 +745,6 @@ def sample( Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_sample(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("a").sample(fraction=1.0, with_replacement=True) - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_sample`: - - >>> agnostic_sample(df_pd) # doctest: +SKIP - a - 2 3 - 0 1 - 2 3 - - >>> agnostic_sample(df_pl) # doctest: +SKIP - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ f64 │ - ╞═════╡ - │ 2 │ - │ 3 │ - │ 3 │ - └─────┘ - - >>> agnostic_sample(df_pa) # doctest: +SKIP - pyarrow.Table - a: int64 - ---- - a: [[1,3,3]] """ return self.__class__( lambda plx: self._to_compliant_expr(plx).sample( @@ -2467,82 +762,6 @@ def over(self, *keys: str | Iterable[str]) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3], "b": [1, 1, 2]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_min_over_b(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... a_min_per_group=nw.col("a").min().over("b") - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_min_over_b`: - - >>> agnostic_min_over_b(df_pd) - a b a_min_per_group - 0 1 1 1 - 1 2 1 1 - 2 3 2 3 - - >>> agnostic_min_over_b(df_pl) - shape: (3, 3) - ┌─────┬─────┬─────────────────┐ - │ a ┆ b ┆ a_min_per_group │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 │ - ╞═════╪═════╪═════════════════╡ - │ 1 ┆ 1 ┆ 1 │ - │ 2 ┆ 1 ┆ 1 │ - │ 3 ┆ 2 ┆ 3 │ - └─────┴─────┴─────────────────┘ - - >>> agnostic_min_over_b(df_pa) - pyarrow.Table - a: int64 - b: int64 - a_min_per_group: int64 - ---- - a: [[1,2,3]] - b: [[1,1,2]] - a_min_per_group: [[1,1,3]] - - Cumulative operations are also supported, but (currently) only for - pandas and Polars: - - >>> def agnostic_cum_sum(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns(c=nw.col("a").cum_sum().over("b")).to_native() - - >>> agnostic_cum_sum(df_pd) - a b c - 0 1 1 1 - 1 2 1 3 - 2 3 2 3 - - >>> agnostic_cum_sum(df_pl) - shape: (3, 3) - ┌─────┬─────┬─────┐ - │ a ┆ b ┆ c │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 │ - ╞═════╪═════╪═════╡ - │ 1 ┆ 1 ┆ 1 │ - │ 2 ┆ 1 ┆ 3 │ - │ 3 ┆ 2 ┆ 3 │ - └─────┴─────┴─────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).over(flatten(keys)) @@ -2553,55 +772,6 @@ def is_duplicated(self) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_duplicated(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.all().is_duplicated()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_duplicated`: - - >>> agnostic_is_duplicated(df_pd) - a b - 0 True True - 1 False True - 2 False False - 3 True False - - >>> agnostic_is_duplicated(df_pl) - shape: (4, 2) - ┌───────┬───────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ bool ┆ bool │ - ╞═══════╪═══════╡ - │ true ┆ true │ - │ false ┆ true │ - │ false ┆ false │ - │ true ┆ false │ - └───────┴───────┘ - - >>> agnostic_is_duplicated(df_pa) - pyarrow.Table - a: bool - b: bool - ---- - a: [[true,false,false,true]] - b: [[true,true,false,false]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).is_duplicated()) @@ -2610,55 +780,6 @@ def is_unique(self) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_unique(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.all().is_unique()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_unique`: - - >>> agnostic_is_unique(df_pd) - a b - 0 False False - 1 True False - 2 True True - 3 False True - - >>> agnostic_is_unique(df_pl) - shape: (4, 2) - ┌───────┬───────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ bool ┆ bool │ - ╞═══════╪═══════╡ - │ false ┆ false │ - │ true ┆ false │ - │ true ┆ true │ - │ false ┆ true │ - └───────┴───────┘ - - >>> agnostic_is_unique(df_pa) - pyarrow.Table - a: bool - b: bool - ---- - a: [[false,true,true,false]] - b: [[false,false,true,true]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).is_unique()) @@ -2672,49 +793,6 @@ def null_count(self) -> Self: pandas handles null values differently from Polars and PyArrow. See [null_handling](../pandas_like_concepts/null_handling.md/) for reference. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, None, 1], "b": ["a", None, "b", None]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_null_count(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.all().null_count()).to_native() - - We can then pass any supported library such as Pandas, Polars, or - PyArrow to `agnostic_null_count`: - - >>> agnostic_null_count(df_pd) - a b - 0 1 2 - - >>> agnostic_null_count(df_pl) - shape: (1, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ u32 ┆ u32 │ - ╞═════╪═════╡ - │ 1 ┆ 2 │ - └─────┴─────┘ - - >>> agnostic_null_count(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[1]] - b: [[2]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).null_count()) @@ -2723,55 +801,6 @@ def is_first_distinct(self) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_first_distinct(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.all().is_first_distinct()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_first_distinct`: - - >>> agnostic_is_first_distinct(df_pd) - a b - 0 True True - 1 True False - 2 True True - 3 False True - - >>> agnostic_is_first_distinct(df_pl) - shape: (4, 2) - ┌───────┬───────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ bool ┆ bool │ - ╞═══════╪═══════╡ - │ true ┆ true │ - │ true ┆ false │ - │ true ┆ true │ - │ false ┆ true │ - └───────┴───────┘ - - >>> agnostic_is_first_distinct(df_pa) - pyarrow.Table - a: bool - b: bool - ---- - a: [[true,true,true,false]] - b: [[true,false,true,true]] """ return self.__class__( lambda plx: self._to_compliant_expr(plx).is_first_distinct() @@ -2782,55 +811,6 @@ def is_last_distinct(self) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_last_distinct(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.all().is_last_distinct()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_last_distinct`: - - >>> agnostic_is_last_distinct(df_pd) - a b - 0 False False - 1 True True - 2 True True - 3 True True - - >>> agnostic_is_last_distinct(df_pl) - shape: (4, 2) - ┌───────┬───────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ bool ┆ bool │ - ╞═══════╪═══════╡ - │ false ┆ false │ - │ true ┆ true │ - │ true ┆ true │ - │ true ┆ true │ - └───────┴───────┘ - - >>> agnostic_is_last_distinct(df_pa) - pyarrow.Table - a: bool - b: bool - ---- - a: [[false,true,true,true]] - b: [[false,true,true,true]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).is_last_distinct()) @@ -2854,51 +834,6 @@ def quantile( its own method to approximate quantile and it doesn't implement 'nearest', 'higher', 'lower', 'midpoint' as interpolation method - use 'linear' which is closest to the native 'dask' - method. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": list(range(50)), "b": list(range(50, 100))} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_quantile(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("a", "b").quantile(0.5, interpolation="linear") - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_quantile`: - - >>> agnostic_quantile(df_pd) - a b - 0 24.5 74.5 - - >>> agnostic_quantile(df_pl) - shape: (1, 2) - ┌──────┬──────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞══════╪══════╡ - │ 24.5 ┆ 74.5 │ - └──────┴──────┘ - - >>> agnostic_quantile(df_pa) - pyarrow.Table - a: double - b: double - ---- - a: [[24.5]] - b: [[74.5]] """ return self.__class__( lambda plx: self._to_compliant_expr(plx).quantile(quantile, interpolation) @@ -2912,51 +847,6 @@ def head(self, n: int = 10) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": list(range(10))} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function that returns the first 3 rows: - - >>> def agnostic_head(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").head(3)).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_head`: - - >>> agnostic_head(df_pd) - a - 0 0 - 1 1 - 2 2 - - >>> agnostic_head(df_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 0 │ - │ 1 │ - │ 2 │ - └─────┘ - - >>> agnostic_head(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[0,1,2]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).head(n)) @@ -2968,51 +858,6 @@ def tail(self, n: int = 10) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": list(range(10))} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function that returns the last 3 rows: - - >>> def agnostic_tail(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").tail(3)).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_tail`: - - >>> agnostic_tail(df_pd) - a - 7 7 - 8 8 - 9 9 - - >>> agnostic_tail(df_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 7 │ - │ 8 │ - │ 9 │ - └─────┘ - - >>> agnostic_tail(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[7,8,9]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).tail(n)) @@ -3033,51 +878,6 @@ def round(self, decimals: int = 0) -> Self: 4.5 to 4.0, etc..). Polars and Arrow round away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..). - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1.12345, 2.56789, 3.901234]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function that rounds to the first decimal: - - >>> def agnostic_round(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").round(1)).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_round`: - - >>> agnostic_round(df_pd) - a - 0 1.1 - 1 2.6 - 2 3.9 - - >>> agnostic_round(df_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ f64 │ - ╞═════╡ - │ 1.1 │ - │ 2.6 │ - │ 3.9 │ - └─────┘ - - >>> agnostic_round(df_pa) - pyarrow.Table - a: double - ---- - a: [[1.1,2.6,3.9]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).round(decimals)) @@ -3088,53 +888,6 @@ def len(self) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": ["x", "y", "z"], "b": [1, 2, 1]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function that computes the len over - different values of "b" column: - - >>> def agnostic_len(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("a").filter(nw.col("b") == 1).len().alias("a1"), - ... nw.col("a").filter(nw.col("b") == 2).len().alias("a2"), - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_len`: - - >>> agnostic_len(df_pd) - a1 a2 - 0 2 1 - - >>> agnostic_len(df_pl) - shape: (1, 2) - ┌─────┬─────┐ - │ a1 ┆ a2 │ - │ --- ┆ --- │ - │ u32 ┆ u32 │ - ╞═════╪═════╡ - │ 2 ┆ 1 │ - └─────┴─────┘ - - >>> agnostic_len(df_pa) - pyarrow.Table - a1: int64 - a2: int64 - ---- - a1: [[2]] - a2: [[1]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).len()) @@ -3147,50 +900,6 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function in which gather every 2 rows, - starting from a offset of 1: - - >>> def agnostic_gather_every(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").gather_every(n=2, offset=1)).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_gather_every`: - - >>> agnostic_gather_every(df_pd) - a - 1 2 - 3 4 - - >>> agnostic_gather_every(df_pl) - shape: (2, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 2 │ - │ 4 │ - └─────┘ - - >>> agnostic_gather_every(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[2,4]] """ return self.__class__( lambda plx: self._to_compliant_expr(plx).gather_every(n=n, offset=offset) @@ -3211,130 +920,6 @@ def clip( Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_clip_lower(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").clip(2)).to_native() - - We can then pass any supported library such as Pandas, Polars, or - PyArrow to `agnostic_clip_lower`: - - >>> agnostic_clip_lower(df_pd) - a - 0 2 - 1 2 - 2 3 - - >>> agnostic_clip_lower(df_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 2 │ - │ 2 │ - │ 3 │ - └─────┘ - - >>> agnostic_clip_lower(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[2,2,3]] - - We define another library agnostic function: - - >>> def agnostic_clip_upper(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").clip(upper_bound=2)).to_native() - - We can then pass any supported library such as Pandas, Polars, or - PyArrow to `agnostic_clip_upper`: - - >>> agnostic_clip_upper(df_pd) - a - 0 1 - 1 2 - 2 2 - - >>> agnostic_clip_upper(df_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 1 │ - │ 2 │ - │ 2 │ - └─────┘ - - >>> agnostic_clip_upper(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[1,2,2]] - - We can have both at the same time - - >>> data = {"a": [-1, 1, -3, 3, -5, 5]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_clip(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").clip(-1, 3)).to_native() - - We can pass any supported library such as Pandas, Polars, or - PyArrow to `agnostic_clip`: - - >>> agnostic_clip(df_pd) - a - 0 -1 - 1 1 - 2 -1 - 3 3 - 4 -1 - 5 3 - - >>> agnostic_clip(df_pl) - shape: (6, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ -1 │ - │ 1 │ - │ -1 │ - │ 3 │ - │ -1 │ - │ 3 │ - └─────┘ - - >>> agnostic_clip(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[-1,1,-1,3,-1,3]] """ return self.__class__( lambda plx: self._to_compliant_expr(plx).clip( @@ -3350,50 +935,6 @@ def mode(self: Self) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [1, 1, 2, 3], - ... "b": [1, 1, 2, 2], - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_mode(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").mode()).sort("a").to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_mode`: - - >>> agnostic_mode(df_pd) - a - 0 1 - - >>> agnostic_mode(df_pl) - shape: (1, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 1 │ - └─────┘ - - >>> agnostic_mode(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[1]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).mode()) @@ -3407,53 +948,6 @@ def is_finite(self: Self) -> Self: Returns: Expression of `Boolean` data type. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [float("nan"), float("inf"), 2.0, None]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_is_finite(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").is_finite()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_finite`: - - >>> agnostic_is_finite(df_pd) - a - 0 False - 1 False - 2 True - 3 False - - >>> agnostic_is_finite(df_pl) - shape: (4, 1) - ┌───────┐ - │ a │ - │ --- │ - │ bool │ - ╞═══════╡ - │ false │ - │ false │ - │ true │ - │ null │ - └───────┘ - - >>> agnostic_is_finite(df_pa) - pyarrow.Table - a: bool - ---- - a: [[false,false,true,null]] """ return self.__class__(lambda plx: self._to_compliant_expr(plx).is_finite()) @@ -3465,60 +959,6 @@ def cum_count(self: Self, *, reverse: bool = False) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": ["x", "k", None, "d"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_cum_count(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.col("a").cum_count().alias("cum_count"), - ... nw.col("a").cum_count(reverse=True).alias("cum_count_reverse"), - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_cum_count`: - - >>> agnostic_cum_count(df_pd) - a cum_count cum_count_reverse - 0 x 1 3 - 1 k 2 2 - 2 None 2 1 - 3 d 3 1 - - >>> agnostic_cum_count(df_pl) - shape: (4, 3) - ┌──────┬───────────┬───────────────────┐ - │ a ┆ cum_count ┆ cum_count_reverse │ - │ --- ┆ --- ┆ --- │ - │ str ┆ u32 ┆ u32 │ - ╞══════╪═══════════╪═══════════════════╡ - │ x ┆ 1 ┆ 3 │ - │ k ┆ 2 ┆ 2 │ - │ null ┆ 2 ┆ 1 │ - │ d ┆ 3 ┆ 1 │ - └──────┴───────────┴───────────────────┘ - - >>> agnostic_cum_count(df_pa) - pyarrow.Table - a: string - cum_count: uint32 - cum_count_reverse: uint32 - ---- - a: [["x","k",null,"d"]] - cum_count: [[1,2,2,3]] - cum_count_reverse: [[3,2,1,1]] """ return self.__class__( lambda plx: self._to_compliant_expr(plx).cum_count(reverse=reverse) @@ -3532,60 +972,6 @@ def cum_min(self: Self, *, reverse: bool = False) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [3, 1, None, 2]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_cum_min(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.col("a").cum_min().alias("cum_min"), - ... nw.col("a").cum_min(reverse=True).alias("cum_min_reverse"), - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_cum_min`: - - >>> agnostic_cum_min(df_pd) - a cum_min cum_min_reverse - 0 3.0 3.0 1.0 - 1 1.0 1.0 1.0 - 2 NaN NaN NaN - 3 2.0 1.0 2.0 - - >>> agnostic_cum_min(df_pl) - shape: (4, 3) - ┌──────┬─────────┬─────────────────┐ - │ a ┆ cum_min ┆ cum_min_reverse │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 │ - ╞══════╪═════════╪═════════════════╡ - │ 3 ┆ 3 ┆ 1 │ - │ 1 ┆ 1 ┆ 1 │ - │ null ┆ null ┆ null │ - │ 2 ┆ 1 ┆ 2 │ - └──────┴─────────┴─────────────────┘ - - >>> agnostic_cum_min(df_pa) - pyarrow.Table - a: int64 - cum_min: int64 - cum_min_reverse: int64 - ---- - a: [[3,1,null,2]] - cum_min: [[3,1,null,1]] - cum_min_reverse: [[1,1,null,2]] """ return self.__class__( lambda plx: self._to_compliant_expr(plx).cum_min(reverse=reverse) @@ -3599,60 +985,6 @@ def cum_max(self: Self, *, reverse: bool = False) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 3, None, 2]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_cum_max(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.col("a").cum_max().alias("cum_max"), - ... nw.col("a").cum_max(reverse=True).alias("cum_max_reverse"), - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_`: - - >>> agnostic_cum_max(df_pd) - a cum_max cum_max_reverse - 0 1.0 1.0 3.0 - 1 3.0 3.0 3.0 - 2 NaN NaN NaN - 3 2.0 3.0 2.0 - - >>> agnostic_cum_max(df_pl) - shape: (4, 3) - ┌──────┬─────────┬─────────────────┐ - │ a ┆ cum_max ┆ cum_max_reverse │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 │ - ╞══════╪═════════╪═════════════════╡ - │ 1 ┆ 1 ┆ 3 │ - │ 3 ┆ 3 ┆ 3 │ - │ null ┆ null ┆ null │ - │ 2 ┆ 3 ┆ 2 │ - └──────┴─────────┴─────────────────┘ - - >>> agnostic_cum_max(df_pa) - pyarrow.Table - a: int64 - cum_max: int64 - cum_max_reverse: int64 - ---- - a: [[1,3,null,2]] - cum_max: [[1,3,null,3]] - cum_max_reverse: [[3,3,null,2]] """ return self.__class__( lambda plx: self._to_compliant_expr(plx).cum_max(reverse=reverse) @@ -3666,60 +998,6 @@ def cum_prod(self: Self, *, reverse: bool = False) -> Self: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 3, None, 2]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_cum_prod(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.col("a").cum_prod().alias("cum_prod"), - ... nw.col("a").cum_prod(reverse=True).alias("cum_prod_reverse"), - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_cum_prod`: - - >>> agnostic_cum_prod(df_pd) - a cum_prod cum_prod_reverse - 0 1.0 1.0 6.0 - 1 3.0 3.0 6.0 - 2 NaN NaN NaN - 3 2.0 6.0 2.0 - - >>> agnostic_cum_prod(df_pl) - shape: (4, 3) - ┌──────┬──────────┬──────────────────┐ - │ a ┆ cum_prod ┆ cum_prod_reverse │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 │ - ╞══════╪══════════╪══════════════════╡ - │ 1 ┆ 1 ┆ 6 │ - │ 3 ┆ 3 ┆ 6 │ - │ null ┆ null ┆ null │ - │ 2 ┆ 6 ┆ 2 │ - └──────┴──────────┴──────────────────┘ - - >>> agnostic_cum_prod(df_pa) - pyarrow.Table - a: int64 - cum_prod: int64 - cum_prod_reverse: int64 - ---- - a: [[1,3,null,2]] - cum_prod: [[1,3,null,6]] - cum_prod_reverse: [[6,6,null,2]] """ return self.__class__( lambda plx: self._to_compliant_expr(plx).cum_prod(reverse=reverse) @@ -3755,57 +1033,6 @@ def rolling_sum( Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1.0, 2.0, None, 4.0]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_rolling_sum(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... b=nw.col("a").rolling_sum(window_size=3, min_periods=1) - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_rolling_sum`: - - >>> agnostic_rolling_sum(df_pd) - a b - 0 1.0 1.0 - 1 2.0 3.0 - 2 NaN 3.0 - 3 4.0 6.0 - - >>> agnostic_rolling_sum(df_pl) - shape: (4, 2) - ┌──────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞══════╪═════╡ - │ 1.0 ┆ 1.0 │ - │ 2.0 ┆ 3.0 │ - │ null ┆ 3.0 │ - │ 4.0 ┆ 6.0 │ - └──────┴─────┘ - - >>> agnostic_rolling_sum(df_pa) - pyarrow.Table - a: double - b: double - ---- - a: [[1,2,null,4]] - b: [[1,3,3,6]] """ window_size, min_periods = _validate_rolling_arguments( window_size=window_size, min_periods=min_periods @@ -3849,57 +1076,6 @@ def rolling_mean( Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1.0, 2.0, None, 4.0]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_rolling_mean(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... b=nw.col("a").rolling_mean(window_size=3, min_periods=1) - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_rolling_mean`: - - >>> agnostic_rolling_mean(df_pd) - a b - 0 1.0 1.0 - 1 2.0 1.5 - 2 NaN 1.5 - 3 4.0 3.0 - - >>> agnostic_rolling_mean(df_pl) - shape: (4, 2) - ┌──────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞══════╪═════╡ - │ 1.0 ┆ 1.0 │ - │ 2.0 ┆ 1.5 │ - │ null ┆ 1.5 │ - │ 4.0 ┆ 3.0 │ - └──────┴─────┘ - - >>> agnostic_rolling_mean(df_pa) - pyarrow.Table - a: double - b: double - ---- - a: [[1,2,null,4]] - b: [[1,1.5,1.5,3]] """ window_size, min_periods = _validate_rolling_arguments( window_size=window_size, min_periods=min_periods @@ -3945,57 +1121,6 @@ def rolling_var( Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1.0, 2.0, None, 4.0]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_rolling_var(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... b=nw.col("a").rolling_var(window_size=3, min_periods=1) - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_rolling_var`: - - >>> agnostic_rolling_var(df_pd) - a b - 0 1.0 NaN - 1 2.0 0.5 - 2 NaN 0.5 - 3 4.0 2.0 - - >>> agnostic_rolling_var(df_pl) # doctest:+SKIP - shape: (4, 2) - ┌──────┬──────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞══════╪══════╡ - │ 1.0 ┆ null │ - │ 2.0 ┆ 0.5 │ - │ null ┆ 0.5 │ - │ 4.0 ┆ 2.0 │ - └──────┴──────┘ - - >>> agnostic_rolling_var(df_pa) - pyarrow.Table - a: double - b: double - ---- - a: [[1,2,null,4]] - b: [[nan,0.5,0.5,2]] """ window_size, min_periods = _validate_rolling_arguments( window_size=window_size, min_periods=min_periods @@ -4039,57 +1164,6 @@ def rolling_std( Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1.0, 2.0, None, 4.0]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_rolling_std(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... b=nw.col("a").rolling_std(window_size=3, min_periods=1) - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_rolling_std`: - - >>> agnostic_rolling_std(df_pd) - a b - 0 1.0 NaN - 1 2.0 0.707107 - 2 NaN 0.707107 - 3 4.0 1.414214 - - >>> agnostic_rolling_std(df_pl) # doctest:+SKIP - shape: (4, 2) - ┌──────┬──────────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞══════╪══════════╡ - │ 1.0 ┆ null │ - │ 2.0 ┆ 0.707107 │ - │ null ┆ 0.707107 │ - │ 4.0 ┆ 1.414214 │ - └──────┴──────────┘ - - >>> agnostic_rolling_std(df_pa) - pyarrow.Table - a: double - b: double - ---- - a: [[1,2,null,4]] - b: [[nan,0.7071067811865476,0.7071067811865476,1.4142135623730951]] """ window_size, min_periods = _validate_rolling_arguments( window_size=window_size, min_periods=min_periods @@ -4136,56 +1210,6 @@ def rank( Returns: A new expression with rank data. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [3, 6, 1, 1, 6]} - - We define a dataframe-agnostic function that computes the dense rank for - the data: - - >>> def agnostic_dense_rank(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... result = df.with_columns(rnk=nw.col("a").rank(method="dense")) - ... return result.to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dense_rank`: - - >>> agnostic_dense_rank(pd.DataFrame(data)) - a rnk - 0 3 2.0 - 1 6 3.0 - 2 1 1.0 - 3 1 1.0 - 4 6 3.0 - - >>> agnostic_dense_rank(pl.DataFrame(data)) - shape: (5, 2) - ┌─────┬─────┐ - │ a ┆ rnk │ - │ --- ┆ --- │ - │ i64 ┆ u32 │ - ╞═════╪═════╡ - │ 3 ┆ 2 │ - │ 6 ┆ 3 │ - │ 1 ┆ 1 │ - │ 1 ┆ 1 │ - │ 6 ┆ 3 │ - └─────┴─────┘ - - >>> agnostic_dense_rank(pa.table(data)) - pyarrow.Table - a: int64 - rnk: uint64 - ---- - a: [[3,6,1,1,6]] - rnk: [[2,3,1,1,3]] """ supported_rank_methods = {"average", "min", "max", "dense", "ordinal"} if method not in supported_rank_methods: diff --git a/narwhals/expr_cat.py b/narwhals/expr_cat.py index ada8e3a45..092a694d4 100644 --- a/narwhals/expr_cat.py +++ b/narwhals/expr_cat.py @@ -21,45 +21,6 @@ def get_categories(self: Self) -> ExprT: Returns: A new expression. - - Examples: - Let's create some dataframes: - - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"fruits": ["apple", "mango", "mango"]} - >>> df_pd = pd.DataFrame(data, dtype="category") - >>> df_pl = pl.DataFrame(data, schema={"fruits": pl.Categorical}) - - We define a dataframe-agnostic function to get unique categories - from column 'fruits': - - >>> def agnostic_cat_get_categories(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("fruits").cat.get_categories()).to_native() - - We can then pass any supported library such as pandas or Polars to - `agnostic_cat_get_categories`: - - >>> agnostic_cat_get_categories(df_pd) - fruits - 0 apple - 1 mango - - >>> agnostic_cat_get_categories(df_pl) - shape: (2, 1) - ┌────────┐ - │ fruits │ - │ --- │ - │ str │ - ╞════════╡ - │ apple │ - │ mango │ - └────────┘ """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).cat.get_categories() diff --git a/narwhals/expr_dt.py b/narwhals/expr_dt.py index d0676dd9b..87b321dac 100644 --- a/narwhals/expr_dt.py +++ b/narwhals/expr_dt.py @@ -25,50 +25,6 @@ def date(self: Self) -> ExprT: Raises: NotImplementedError: If pandas default backend is being used. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [datetime(2012, 1, 7, 10, 20), datetime(2023, 3, 10, 11, 32)]} - >>> df_pd = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow") - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_dt_date(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").dt.date()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dt_date`: - - >>> agnostic_dt_date(df_pd) - a - 0 2012-01-07 - 1 2023-03-10 - - >>> agnostic_dt_date(df_pl) - shape: (2, 1) - ┌────────────┐ - │ a │ - │ --- │ - │ date │ - ╞════════════╡ - │ 2012-01-07 │ - │ 2023-03-10 │ - └────────────┘ - - >>> agnostic_dt_date(df_pa) - pyarrow.Table - a: date32[day] - ---- - a: [[2012-01-07,2023-03-10]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).dt.date() @@ -81,62 +37,6 @@ def year(self: Self) -> ExprT: Returns: A new expression. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "datetime": [ - ... datetime(1978, 6, 1), - ... datetime(2024, 12, 13), - ... datetime(2065, 1, 1), - ... ] - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_dt_year(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.col("datetime").dt.year().alias("year") - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dt_year`: - - >>> agnostic_dt_year(df_pd) - datetime year - 0 1978-06-01 1978 - 1 2024-12-13 2024 - 2 2065-01-01 2065 - - >>> agnostic_dt_year(df_pl) - shape: (3, 2) - ┌─────────────────────┬──────┐ - │ datetime ┆ year │ - │ --- ┆ --- │ - │ datetime[μs] ┆ i32 │ - ╞═════════════════════╪══════╡ - │ 1978-06-01 00:00:00 ┆ 1978 │ - │ 2024-12-13 00:00:00 ┆ 2024 │ - │ 2065-01-01 00:00:00 ┆ 2065 │ - └─────────────────────┴──────┘ - - >>> agnostic_dt_year(df_pa) - pyarrow.Table - datetime: timestamp[us] - year: int64 - ---- - datetime: [[1978-06-01 00:00:00.000000,2024-12-13 00:00:00.000000,2065-01-01 00:00:00.000000]] - year: [[1978,2024,2065]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).dt.year() @@ -149,62 +49,6 @@ def month(self: Self) -> ExprT: Returns: A new expression. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "datetime": [ - ... datetime(1978, 6, 1), - ... datetime(2024, 12, 13), - ... datetime(2065, 1, 1), - ... ] - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_dt_month(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.col("datetime").dt.month().alias("month"), - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dt_month`: - - >>> agnostic_dt_month(df_pd) - datetime month - 0 1978-06-01 6 - 1 2024-12-13 12 - 2 2065-01-01 1 - - >>> agnostic_dt_month(df_pl) - shape: (3, 2) - ┌─────────────────────┬───────┐ - │ datetime ┆ month │ - │ --- ┆ --- │ - │ datetime[μs] ┆ i8 │ - ╞═════════════════════╪═══════╡ - │ 1978-06-01 00:00:00 ┆ 6 │ - │ 2024-12-13 00:00:00 ┆ 12 │ - │ 2065-01-01 00:00:00 ┆ 1 │ - └─────────────────────┴───────┘ - - >>> agnostic_dt_month(df_pa) - pyarrow.Table - datetime: timestamp[us] - month: int64 - ---- - datetime: [[1978-06-01 00:00:00.000000,2024-12-13 00:00:00.000000,2065-01-01 00:00:00.000000]] - month: [[6,12,1]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).dt.month() @@ -217,62 +61,6 @@ def day(self: Self) -> ExprT: Returns: A new expression. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "datetime": [ - ... datetime(1978, 6, 1), - ... datetime(2024, 12, 13), - ... datetime(2065, 1, 1), - ... ] - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_dt_day(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.col("datetime").dt.day().alias("day"), - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dt_day`: - - >>> agnostic_dt_day(df_pd) - datetime day - 0 1978-06-01 1 - 1 2024-12-13 13 - 2 2065-01-01 1 - - >>> agnostic_dt_day(df_pl) - shape: (3, 2) - ┌─────────────────────┬─────┐ - │ datetime ┆ day │ - │ --- ┆ --- │ - │ datetime[μs] ┆ i8 │ - ╞═════════════════════╪═════╡ - │ 1978-06-01 00:00:00 ┆ 1 │ - │ 2024-12-13 00:00:00 ┆ 13 │ - │ 2065-01-01 00:00:00 ┆ 1 │ - └─────────────────────┴─────┘ - - >>> agnostic_dt_day(df_pa) - pyarrow.Table - datetime: timestamp[us] - day: int64 - ---- - datetime: [[1978-06-01 00:00:00.000000,2024-12-13 00:00:00.000000,2065-01-01 00:00:00.000000]] - day: [[1,13,1]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).dt.day() @@ -285,62 +73,6 @@ def hour(self: Self) -> ExprT: Returns: A new expression. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "datetime": [ - ... datetime(1978, 1, 1, 1), - ... datetime(2024, 10, 13, 5), - ... datetime(2065, 1, 1, 10), - ... ] - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_dt_hour(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.col("datetime").dt.hour().alias("hour") - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dt_hour`: - - >>> agnostic_dt_hour(df_pd) - datetime hour - 0 1978-01-01 01:00:00 1 - 1 2024-10-13 05:00:00 5 - 2 2065-01-01 10:00:00 10 - - >>> agnostic_dt_hour(df_pl) - shape: (3, 2) - ┌─────────────────────┬──────┐ - │ datetime ┆ hour │ - │ --- ┆ --- │ - │ datetime[μs] ┆ i8 │ - ╞═════════════════════╪══════╡ - │ 1978-01-01 01:00:00 ┆ 1 │ - │ 2024-10-13 05:00:00 ┆ 5 │ - │ 2065-01-01 10:00:00 ┆ 10 │ - └─────────────────────┴──────┘ - - >>> agnostic_dt_hour(df_pa) - pyarrow.Table - datetime: timestamp[us] - hour: int64 - ---- - datetime: [[1978-01-01 01:00:00.000000,2024-10-13 05:00:00.000000,2065-01-01 10:00:00.000000]] - hour: [[1,5,10]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).dt.hour() @@ -353,62 +85,6 @@ def minute(self: Self) -> ExprT: Returns: A new expression. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "datetime": [ - ... datetime(1978, 1, 1, 1, 1), - ... datetime(2024, 10, 13, 5, 30), - ... datetime(2065, 1, 1, 10, 20), - ... ] - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_dt_minute(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.col("datetime").dt.minute().alias("minute"), - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dt_minute`: - - >>> agnostic_dt_minute(df_pd) - datetime minute - 0 1978-01-01 01:01:00 1 - 1 2024-10-13 05:30:00 30 - 2 2065-01-01 10:20:00 20 - - >>> agnostic_dt_minute(df_pl) - shape: (3, 2) - ┌─────────────────────┬────────┐ - │ datetime ┆ minute │ - │ --- ┆ --- │ - │ datetime[μs] ┆ i8 │ - ╞═════════════════════╪════════╡ - │ 1978-01-01 01:01:00 ┆ 1 │ - │ 2024-10-13 05:30:00 ┆ 30 │ - │ 2065-01-01 10:20:00 ┆ 20 │ - └─────────────────────┴────────┘ - - >>> agnostic_dt_minute(df_pa) - pyarrow.Table - datetime: timestamp[us] - minute: int64 - ---- - datetime: [[1978-01-01 01:01:00.000000,2024-10-13 05:30:00.000000,2065-01-01 10:20:00.000000]] - minute: [[1,30,20]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).dt.minute() @@ -419,62 +95,6 @@ def second(self: Self) -> ExprT: Returns: A new expression. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "datetime": [ - ... datetime(1978, 1, 1, 1, 1, 1), - ... datetime(2024, 10, 13, 5, 30, 14), - ... datetime(2065, 1, 1, 10, 20, 30), - ... ] - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_dt_second(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.col("datetime").dt.second().alias("second"), - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dt_second`: - - >>> agnostic_dt_second(df_pd) - datetime second - 0 1978-01-01 01:01:01 1 - 1 2024-10-13 05:30:14 14 - 2 2065-01-01 10:20:30 30 - - >>> agnostic_dt_second(df_pl) - shape: (3, 2) - ┌─────────────────────┬────────┐ - │ datetime ┆ second │ - │ --- ┆ --- │ - │ datetime[μs] ┆ i8 │ - ╞═════════════════════╪════════╡ - │ 1978-01-01 01:01:01 ┆ 1 │ - │ 2024-10-13 05:30:14 ┆ 14 │ - │ 2065-01-01 10:20:30 ┆ 30 │ - └─────────────────────┴────────┘ - - >>> agnostic_dt_second(df_pa) - pyarrow.Table - datetime: timestamp[us] - second: int64 - ---- - datetime: [[1978-01-01 01:01:01.000000,2024-10-13 05:30:14.000000,2065-01-01 10:20:30.000000]] - second: [[1,14,30]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).dt.second() @@ -485,62 +105,6 @@ def millisecond(self: Self) -> ExprT: Returns: A new expression. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "datetime": [ - ... datetime(1978, 1, 1, 1, 1, 1, 0), - ... datetime(2024, 10, 13, 5, 30, 14, 505000), - ... datetime(2065, 1, 1, 10, 20, 30, 67000), - ... ] - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_dt_millisecond(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.col("datetime").dt.millisecond().alias("millisecond"), - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dt_millisecond`: - - >>> agnostic_dt_millisecond(df_pd) - datetime millisecond - 0 1978-01-01 01:01:01.000 0 - 1 2024-10-13 05:30:14.505 505 - 2 2065-01-01 10:20:30.067 67 - - >>> agnostic_dt_millisecond(df_pl) - shape: (3, 2) - ┌─────────────────────────┬─────────────┐ - │ datetime ┆ millisecond │ - │ --- ┆ --- │ - │ datetime[μs] ┆ i32 │ - ╞═════════════════════════╪═════════════╡ - │ 1978-01-01 01:01:01 ┆ 0 │ - │ 2024-10-13 05:30:14.505 ┆ 505 │ - │ 2065-01-01 10:20:30.067 ┆ 67 │ - └─────────────────────────┴─────────────┘ - - >>> agnostic_dt_millisecond(df_pa) - pyarrow.Table - datetime: timestamp[us] - millisecond: int64 - ---- - datetime: [[1978-01-01 01:01:01.000000,2024-10-13 05:30:14.505000,2065-01-01 10:20:30.067000]] - millisecond: [[0,505,67]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).dt.millisecond() @@ -551,62 +115,6 @@ def microsecond(self: Self) -> ExprT: Returns: A new expression. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "datetime": [ - ... datetime(1978, 1, 1, 1, 1, 1, 0), - ... datetime(2024, 10, 13, 5, 30, 14, 505000), - ... datetime(2065, 1, 1, 10, 20, 30, 67000), - ... ] - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_dt_microsecond(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.col("datetime").dt.microsecond().alias("microsecond"), - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dt_microsecond`: - - >>> agnostic_dt_microsecond(df_pd) - datetime microsecond - 0 1978-01-01 01:01:01.000 0 - 1 2024-10-13 05:30:14.505 505000 - 2 2065-01-01 10:20:30.067 67000 - - >>> agnostic_dt_microsecond(df_pl) - shape: (3, 2) - ┌─────────────────────────┬─────────────┐ - │ datetime ┆ microsecond │ - │ --- ┆ --- │ - │ datetime[μs] ┆ i32 │ - ╞═════════════════════════╪═════════════╡ - │ 1978-01-01 01:01:01 ┆ 0 │ - │ 2024-10-13 05:30:14.505 ┆ 505000 │ - │ 2065-01-01 10:20:30.067 ┆ 67000 │ - └─────────────────────────┴─────────────┘ - - >>> agnostic_dt_microsecond(df_pa) - pyarrow.Table - datetime: timestamp[us] - microsecond: int64 - ---- - datetime: [[1978-01-01 01:01:01.000000,2024-10-13 05:30:14.505000,2065-01-01 10:20:30.067000]] - microsecond: [[0,505000,67000]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).dt.microsecond() @@ -617,62 +125,6 @@ def nanosecond(self: Self) -> ExprT: Returns: A new expression. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "datetime": [ - ... datetime(1978, 1, 1, 1, 1, 1, 0), - ... datetime(2024, 10, 13, 5, 30, 14, 500000), - ... datetime(2065, 1, 1, 10, 20, 30, 60000), - ... ] - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_dt_nanosecond(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.col("datetime").dt.nanosecond().alias("nanosecond"), - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dt_nanosecond`: - - >>> agnostic_dt_nanosecond(df_pd) - datetime nanosecond - 0 1978-01-01 01:01:01.000 0 - 1 2024-10-13 05:30:14.500 500000000 - 2 2065-01-01 10:20:30.060 60000000 - - >>> agnostic_dt_nanosecond(df_pl) - shape: (3, 2) - ┌─────────────────────────┬────────────┐ - │ datetime ┆ nanosecond │ - │ --- ┆ --- │ - │ datetime[μs] ┆ i32 │ - ╞═════════════════════════╪════════════╡ - │ 1978-01-01 01:01:01 ┆ 0 │ - │ 2024-10-13 05:30:14.500 ┆ 500000000 │ - │ 2065-01-01 10:20:30.060 ┆ 60000000 │ - └─────────────────────────┴────────────┘ - - >>> agnostic_dt_nanosecond(df_pa) - pyarrow.Table - datetime: timestamp[us] - nanosecond: int64 - ---- - datetime: [[1978-01-01 01:01:01.000000,2024-10-13 05:30:14.500000,2065-01-01 10:20:30.060000]] - nanosecond: [[0,500000000,60000000]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).dt.nanosecond() @@ -683,54 +135,6 @@ def ordinal_day(self: Self) -> ExprT: Returns: A new expression. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [datetime(2020, 1, 1), datetime(2020, 8, 3)]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_dt_ordinal_day(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... a_ordinal_day=nw.col("a").dt.ordinal_day() - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dt_ordinal_day`: - - >>> agnostic_dt_ordinal_day(df_pd) - a a_ordinal_day - 0 2020-01-01 1 - 1 2020-08-03 216 - - >>> agnostic_dt_ordinal_day(df_pl) - shape: (2, 2) - ┌─────────────────────┬───────────────┐ - │ a ┆ a_ordinal_day │ - │ --- ┆ --- │ - │ datetime[μs] ┆ i16 │ - ╞═════════════════════╪═══════════════╡ - │ 2020-01-01 00:00:00 ┆ 1 │ - │ 2020-08-03 00:00:00 ┆ 216 │ - └─────────────────────┴───────────────┘ - - >>> agnostic_dt_ordinal_day(df_pa) - pyarrow.Table - a: timestamp[us] - a_ordinal_day: int64 - ---- - a: [[2020-01-01 00:00:00.000000,2020-08-03 00:00:00.000000]] - a_ordinal_day: [[1,216]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).dt.ordinal_day() @@ -741,52 +145,6 @@ def weekday(self: Self) -> ExprT: Returns: Returns the ISO weekday number where monday = 1 and sunday = 7 - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [datetime(2020, 1, 1), datetime(2020, 8, 3)]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_dt_weekday(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns(a_weekday=nw.col("a").dt.weekday()).to_native() - - We can then pass either pandas, Polars, PyArrow, and other supported libraries to - `agnostic_dt_weekday`: - - >>> agnostic_dt_weekday(df_pd) - a a_weekday - 0 2020-01-01 3 - 1 2020-08-03 1 - - >>> agnostic_dt_weekday(df_pl) - shape: (2, 2) - ┌─────────────────────┬───────────┐ - │ a ┆ a_weekday │ - │ --- ┆ --- │ - │ datetime[μs] ┆ i8 │ - ╞═════════════════════╪═══════════╡ - │ 2020-01-01 00:00:00 ┆ 3 │ - │ 2020-08-03 00:00:00 ┆ 1 │ - └─────────────────────┴───────────┘ - - >>> agnostic_dt_weekday(df_pa) - pyarrow.Table - a: timestamp[us] - a_weekday: int64 - ---- - a: [[2020-01-01 00:00:00.000000,2020-08-03 00:00:00.000000]] - a_weekday: [[3,1]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).dt.weekday() @@ -802,54 +160,6 @@ def total_minutes(self: Self) -> ExprT: The function outputs the total minutes in the int dtype by default, however, pandas may change the dtype to float when there are missing values, consider using `fill_null()` and `cast` in this case. - - Examples: - >>> from datetime import timedelta - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [timedelta(minutes=10), timedelta(minutes=20, seconds=40)]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_dt_total_minutes(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... a_total_minutes=nw.col("a").dt.total_minutes() - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dt_total_minutes`: - - >>> agnostic_dt_total_minutes(df_pd) - a a_total_minutes - 0 0 days 00:10:00 10 - 1 0 days 00:20:40 20 - - >>> agnostic_dt_total_minutes(df_pl) - shape: (2, 2) - ┌──────────────┬─────────────────┐ - │ a ┆ a_total_minutes │ - │ --- ┆ --- │ - │ duration[μs] ┆ i64 │ - ╞══════════════╪═════════════════╡ - │ 10m ┆ 10 │ - │ 20m 40s ┆ 20 │ - └──────────────┴─────────────────┘ - - >>> agnostic_dt_total_minutes(df_pa) - pyarrow.Table - a: duration[us] - a_total_minutes: int64 - ---- - a: [[600000000,1240000000]] - a_total_minutes: [[10,20]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).dt.total_minutes() @@ -865,54 +175,6 @@ def total_seconds(self: Self) -> ExprT: The function outputs the total seconds in the int dtype by default, however, pandas may change the dtype to float when there are missing values, consider using `fill_null()` and `cast` in this case. - - Examples: - >>> from datetime import timedelta - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [timedelta(seconds=10), timedelta(seconds=20, milliseconds=40)]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_dt_total_seconds(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... a_total_seconds=nw.col("a").dt.total_seconds() - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dt_total_seconds`: - - >>> agnostic_dt_total_seconds(df_pd) - a a_total_seconds - 0 0 days 00:00:10 10 - 1 0 days 00:00:20.040000 20 - - >>> agnostic_dt_total_seconds(df_pl) - shape: (2, 2) - ┌──────────────┬─────────────────┐ - │ a ┆ a_total_seconds │ - │ --- ┆ --- │ - │ duration[μs] ┆ i64 │ - ╞══════════════╪═════════════════╡ - │ 10s ┆ 10 │ - │ 20s 40ms ┆ 20 │ - └──────────────┴─────────────────┘ - - >>> agnostic_dt_total_seconds(df_pa) - pyarrow.Table - a: duration[us] - a_total_seconds: int64 - ---- - a: [[10000000,20040000]] - a_total_seconds: [[10,20]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).dt.total_seconds() @@ -928,59 +190,6 @@ def total_milliseconds(self: Self) -> ExprT: The function outputs the total milliseconds in the int dtype by default, however, pandas may change the dtype to float when there are missing values, consider using `fill_null()` and `cast` in this case. - - Examples: - >>> from datetime import timedelta - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [ - ... timedelta(milliseconds=10), - ... timedelta(milliseconds=20, microseconds=40), - ... ] - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_dt_total_milliseconds(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... a_total_milliseconds=nw.col("a").dt.total_milliseconds() - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dt_total_milliseconds`: - - >>> agnostic_dt_total_milliseconds(df_pd) - a a_total_milliseconds - 0 0 days 00:00:00.010000 10 - 1 0 days 00:00:00.020040 20 - - >>> agnostic_dt_total_milliseconds(df_pl) - shape: (2, 2) - ┌──────────────┬──────────────────────┐ - │ a ┆ a_total_milliseconds │ - │ --- ┆ --- │ - │ duration[μs] ┆ i64 │ - ╞══════════════╪══════════════════════╡ - │ 10ms ┆ 10 │ - │ 20040µs ┆ 20 │ - └──────────────┴──────────────────────┘ - - >>> agnostic_dt_total_milliseconds(df_pa) - pyarrow.Table - a: duration[us] - a_total_milliseconds: int64 - ---- - a: [[10000,20040]] - a_total_milliseconds: [[10,20]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).dt.total_milliseconds() @@ -996,59 +205,6 @@ def total_microseconds(self: Self) -> ExprT: The function outputs the total microseconds in the int dtype by default, however, pandas may change the dtype to float when there are missing values, consider using `fill_null()` and `cast` in this case. - - Examples: - >>> from datetime import timedelta - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [ - ... timedelta(microseconds=10), - ... timedelta(milliseconds=1, microseconds=200), - ... ] - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_dt_total_microseconds(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... a_total_microseconds=nw.col("a").dt.total_microseconds() - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dt_total_microseconds`: - - >>> agnostic_dt_total_microseconds(df_pd) - a a_total_microseconds - 0 0 days 00:00:00.000010 10 - 1 0 days 00:00:00.001200 1200 - - >>> agnostic_dt_total_microseconds(df_pl) - shape: (2, 2) - ┌──────────────┬──────────────────────┐ - │ a ┆ a_total_microseconds │ - │ --- ┆ --- │ - │ duration[μs] ┆ i64 │ - ╞══════════════╪══════════════════════╡ - │ 10µs ┆ 10 │ - │ 1200µs ┆ 1200 │ - └──────────────┴──────────────────────┘ - - >>> agnostic_dt_total_microseconds(df_pa) - pyarrow.Table - a: duration[us] - a_total_microseconds: int64 - ---- - a: [[10,1200]] - a_total_microseconds: [[10,1200]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).dt.total_microseconds() @@ -1064,46 +220,6 @@ def total_nanoseconds(self: Self) -> ExprT: The function outputs the total nanoseconds in the int dtype by default, however, pandas may change the dtype to float when there are missing values, consider using `fill_null()` and `cast` in this case. - - Examples: - >>> from datetime import timedelta - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = ["2024-01-01 00:00:00.000000001", "2024-01-01 00:00:00.000000002"] - >>> df_pd = pd.DataFrame({"a": pd.to_datetime(data)}) - >>> df_pl = pl.DataFrame({"a": data}).with_columns( - ... pl.col("a").str.to_datetime(time_unit="ns") - ... ) - - We define a dataframe-agnostic function: - - >>> def agnostic_dt_total_nanoseconds(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... a_diff_total_nanoseconds=nw.col("a").diff().dt.total_nanoseconds() - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dt_total_nanoseconds`: - - >>> agnostic_dt_total_nanoseconds(df_pd) - a a_diff_total_nanoseconds - 0 2024-01-01 00:00:00.000000001 NaN - 1 2024-01-01 00:00:00.000000002 1.0 - - >>> agnostic_dt_total_nanoseconds(df_pl) - shape: (2, 2) - ┌───────────────────────────────┬──────────────────────────┐ - │ a ┆ a_diff_total_nanoseconds │ - │ --- ┆ --- │ - │ datetime[ns] ┆ i64 │ - ╞═══════════════════════════════╪══════════════════════════╡ - │ 2024-01-01 00:00:00.000000001 ┆ null │ - │ 2024-01-01 00:00:00.000000002 ┆ 1 │ - └───────────────────────────────┴──────────────────────────┘ """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).dt.total_nanoseconds() @@ -1148,60 +264,6 @@ def to_string(self: Self, format: str) -> ExprT: # noqa: A002 If you have an application where this is not enough, please open an issue and let us know. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [ - ... datetime(2020, 3, 1), - ... datetime(2020, 4, 1), - ... datetime(2020, 5, 1), - ... ] - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_dt_to_string(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("a").dt.to_string("%Y/%m/%d %H:%M:%S") - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dt_to_string`: - - >>> agnostic_dt_to_string(df_pd) - a - 0 2020/03/01 00:00:00 - 1 2020/04/01 00:00:00 - 2 2020/05/01 00:00:00 - - >>> agnostic_dt_to_string(df_pl) - shape: (3, 1) - ┌─────────────────────┐ - │ a │ - │ --- │ - │ str │ - ╞═════════════════════╡ - │ 2020/03/01 00:00:00 │ - │ 2020/04/01 00:00:00 │ - │ 2020/05/01 00:00:00 │ - └─────────────────────┘ - - >>> agnostic_dt_to_string(df_pa) - pyarrow.Table - a: string - ---- - a: [["2020/03/01 00:00:00.000000","2020/04/01 00:00:00.000000","2020/05/01 00:00:00.000000"]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).dt.to_string(format) @@ -1215,57 +277,6 @@ def replace_time_zone(self: Self, time_zone: str | None) -> ExprT: Returns: A new expression. - - Examples: - >>> from datetime import datetime, timezone - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [ - ... datetime(2024, 1, 1, tzinfo=timezone.utc), - ... datetime(2024, 1, 2, tzinfo=timezone.utc), - ... ] - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_dt_replace_time_zone(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("a").dt.replace_time_zone("Asia/Kathmandu") - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dt_replace_time_zone`: - - >>> agnostic_dt_replace_time_zone(df_pd) - a - 0 2024-01-01 00:00:00+05:45 - 1 2024-01-02 00:00:00+05:45 - - >>> agnostic_dt_replace_time_zone(df_pl) - shape: (2, 1) - ┌──────────────────────────────┐ - │ a │ - │ --- │ - │ datetime[μs, Asia/Kathmandu] │ - ╞══════════════════════════════╡ - │ 2024-01-01 00:00:00 +0545 │ - │ 2024-01-02 00:00:00 +0545 │ - └──────────────────────────────┘ - - >>> agnostic_dt_replace_time_zone(df_pa) - pyarrow.Table - a: timestamp[us, tz=Asia/Kathmandu] - ---- - a: [[2023-12-31 18:15:00.000000Z,2024-01-01 18:15:00.000000Z]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).dt.replace_time_zone(time_zone) @@ -1282,57 +293,6 @@ def convert_time_zone(self: Self, time_zone: str) -> ExprT: Returns: A new expression. - - Examples: - >>> from datetime import datetime, timezone - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [ - ... datetime(2024, 1, 1, tzinfo=timezone.utc), - ... datetime(2024, 1, 2, tzinfo=timezone.utc), - ... ] - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_dt_convert_time_zone(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("a").dt.convert_time_zone("Asia/Kathmandu") - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dt_convert_time_zone`: - - >>> agnostic_dt_convert_time_zone(df_pd) - a - 0 2024-01-01 05:45:00+05:45 - 1 2024-01-02 05:45:00+05:45 - - >>> agnostic_dt_convert_time_zone(df_pl) - shape: (2, 1) - ┌──────────────────────────────┐ - │ a │ - │ --- │ - │ datetime[μs, Asia/Kathmandu] │ - ╞══════════════════════════════╡ - │ 2024-01-01 05:45:00 +0545 │ - │ 2024-01-02 05:45:00 +0545 │ - └──────────────────────────────┘ - - >>> agnostic_dt_convert_time_zone(df_pa) - pyarrow.Table - a: timestamp[us, tz=Asia/Kathmandu] - ---- - a: [[2024-01-01 00:00:00.000000Z,2024-01-02 00:00:00.000000Z]] """ if time_zone is None: msg = "Target `time_zone` cannot be `None` in `convert_time_zone`. Please use `replace_time_zone(None)` if you want to remove the time zone." @@ -1350,59 +310,6 @@ def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> ExprT: Returns: A new expression. - - Examples: - >>> from datetime import date - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"date": [date(2001, 1, 1), None, date(2001, 1, 3)]} - >>> df_pd = pd.DataFrame(data, dtype="datetime64[ns]") - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_dt_timestamp(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.col("date").dt.timestamp().alias("timestamp_us"), - ... nw.col("date").dt.timestamp("ms").alias("timestamp_ms"), - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dt_timestamp`: - - >>> agnostic_dt_timestamp(df_pd) - date timestamp_us timestamp_ms - 0 2001-01-01 9.783072e+14 9.783072e+11 - 1 NaT NaN NaN - 2 2001-01-03 9.784800e+14 9.784800e+11 - - >>> agnostic_dt_timestamp(df_pl) - shape: (3, 3) - ┌────────────┬─────────────────┬──────────────┐ - │ date ┆ timestamp_us ┆ timestamp_ms │ - │ --- ┆ --- ┆ --- │ - │ date ┆ i64 ┆ i64 │ - ╞════════════╪═════════════════╪══════════════╡ - │ 2001-01-01 ┆ 978307200000000 ┆ 978307200000 │ - │ null ┆ null ┆ null │ - │ 2001-01-03 ┆ 978480000000000 ┆ 978480000000 │ - └────────────┴─────────────────┴──────────────┘ - - >>> agnostic_dt_timestamp(df_pa) - pyarrow.Table - date: date32[day] - timestamp_us: int64 - timestamp_ms: int64 - ---- - date: [[2001-01-01,null,2001-01-03]] - timestamp_us: [[978307200000000,null,978480000000000]] - timestamp_ms: [[978307200000,null,978480000000]] """ if time_unit not in {"ns", "us", "ms"}: msg = ( diff --git a/narwhals/expr_list.py b/narwhals/expr_list.py index 17efeaf29..19aa3fb0f 100644 --- a/narwhals/expr_list.py +++ b/narwhals/expr_list.py @@ -23,54 +23,6 @@ def len(self: Self) -> ExprT: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [[1, 2], [3, 4, None], None, []]} - - Let's define a dataframe-agnostic function: - - >>> def agnostic_list_len(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns(a_len=nw.col("a").list.len()).to_native() - - We can then pass pandas / PyArrow / Polars / any other supported library: - - >>> agnostic_list_len( - ... pd.DataFrame(data).astype({"a": pd.ArrowDtype(pa.list_(pa.int64()))}) - ... ) # doctest: +SKIP - a a_len - 0 [1. 2.] 2 - 1 [ 3. 4. nan] 3 - 2 - 3 [] 0 - - >>> agnostic_list_len(pl.DataFrame(data)) - shape: (4, 2) - ┌──────────────┬───────┐ - │ a ┆ a_len │ - │ --- ┆ --- │ - │ list[i64] ┆ u32 │ - ╞══════════════╪═══════╡ - │ [1, 2] ┆ 2 │ - │ [3, 4, null] ┆ 3 │ - │ null ┆ null │ - │ [] ┆ 0 │ - └──────────────┴───────┘ - - >>> agnostic_list_len(pa.table(data)) - pyarrow.Table - a: list - child 0, item: int64 - a_len: uint32 - ---- - a: [[[1,2],[3,4,null],null,[]]] - a_len: [[2,3,null,0]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).list.len() diff --git a/narwhals/expr_name.py b/narwhals/expr_name.py index 312a2bc9c..a05f7cd6c 100644 --- a/narwhals/expr_name.py +++ b/narwhals/expr_name.py @@ -27,36 +27,6 @@ def keep(self: Self) -> ExprT: This will undo any previous renaming operations on the expression. Due to implementation constraints, this method can only be called as the last expression in a chain. Only one name operation per expression will work. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrame - >>> - >>> data = {"foo": [1, 2], "BAR": [4, 5]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_name_keep(df_native: IntoFrame) -> list[str]: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("foo").alias("alias_for_foo").name.keep()).columns - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_name_keep`: - - >>> agnostic_name_keep(df_pd) - ['foo'] - - >>> agnostic_name_keep(df_pl) - ['foo'] - - >>> agnostic_name_keep(df_pa) - ['foo'] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).name.keep() @@ -75,37 +45,6 @@ def map(self: Self, function: Callable[[str], str]) -> ExprT: This will undo any previous renaming operations on the expression. Due to implementation constraints, this method can only be called as the last expression in a chain. Only one name operation per expression will work. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrame - >>> - >>> data = {"foo": [1, 2], "BAR": [4, 5]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> renaming_func = lambda s: s[::-1] # reverse column name - >>> def agnostic_name_map(df_native: IntoFrame) -> list[str]: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("foo", "BAR").name.map(renaming_func)).columns - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_name_map`: - - >>> agnostic_name_map(df_pd) - ['oof', 'RAB'] - - >>> agnostic_name_map(df_pl) - ['oof', 'RAB'] - - >>> agnostic_name_map(df_pa) - ['oof', 'RAB'] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).name.map(function) @@ -124,36 +63,6 @@ def prefix(self: Self, prefix: str) -> ExprT: This will undo any previous renaming operations on the expression. Due to implementation constraints, this method can only be called as the last expression in a chain. Only one name operation per expression will work. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrame - >>> - >>> data = {"foo": [1, 2], "BAR": [4, 5]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_name_prefix(df_native: IntoFrame, prefix: str) -> list[str]: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("foo", "BAR").name.prefix(prefix)).columns - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_name_prefix`: - - >>> agnostic_name_prefix(df_pd, "with_prefix_") - ['with_prefix_foo', 'with_prefix_BAR'] - - >>> agnostic_name_prefix(df_pl, "with_prefix_") - ['with_prefix_foo', 'with_prefix_BAR'] - - >>> agnostic_name_prefix(df_pa, "with_prefix_") - ['with_prefix_foo', 'with_prefix_BAR'] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).name.prefix(prefix) @@ -172,36 +81,6 @@ def suffix(self: Self, suffix: str) -> ExprT: This will undo any previous renaming operations on the expression. Due to implementation constraints, this method can only be called as the last expression in a chain. Only one name operation per expression will work. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrame - >>> - >>> data = {"foo": [1, 2], "BAR": [4, 5]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_name_suffix(df_native: IntoFrame, suffix: str) -> list[str]: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("foo", "BAR").name.suffix(suffix)).columns - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_name_suffix`: - - >>> agnostic_name_suffix(df_pd, "_with_suffix") - ['foo_with_suffix', 'BAR_with_suffix'] - - >>> agnostic_name_suffix(df_pl, "_with_suffix") - ['foo_with_suffix', 'BAR_with_suffix'] - - >>> agnostic_name_suffix(df_pa, "_with_suffix") - ['foo_with_suffix', 'BAR_with_suffix'] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).name.suffix(suffix) @@ -217,36 +96,6 @@ def to_lowercase(self: Self) -> ExprT: This will undo any previous renaming operations on the expression. Due to implementation constraints, this method can only be called as the last expression in a chain. Only one name operation per expression will work. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrame - >>> - >>> data = {"foo": [1, 2], "BAR": [4, 5]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_name_to_lowercase(df_native: IntoFrame) -> list[str]: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("foo", "BAR").name.to_lowercase()).columns - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_name_to_lowercase`: - - >>> agnostic_name_to_lowercase(df_pd) - ['foo', 'bar'] - - >>> agnostic_name_to_lowercase(df_pl) - ['foo', 'bar'] - - >>> agnostic_name_to_lowercase(df_pa) - ['foo', 'bar'] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).name.to_lowercase() @@ -262,36 +111,6 @@ def to_uppercase(self: Self) -> ExprT: This will undo any previous renaming operations on the expression. Due to implementation constraints, this method can only be called as the last expression in a chain. Only one name operation per expression will work. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrame - >>> - >>> data = {"foo": [1, 2], "BAR": [4, 5]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_name_to_uppercase(df_native: IntoFrame) -> list[str]: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("foo", "BAR").name.to_uppercase()).columns - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_name_to_uppercase`: - - >>> agnostic_name_to_uppercase(df_pd) - ['FOO', 'BAR'] - - >>> agnostic_name_to_uppercase(df_pl) - ['FOO', 'BAR'] - - >>> agnostic_name_to_uppercase(df_pa) - ['FOO', 'BAR'] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).name.to_uppercase() diff --git a/narwhals/expr_str.py b/narwhals/expr_str.py index 91d355c66..946a150de 100644 --- a/narwhals/expr_str.py +++ b/narwhals/expr_str.py @@ -21,59 +21,6 @@ def len_chars(self: Self) -> ExprT: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"words": ["foo", "Café", "345", "東京", None]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_str_len_chars(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... words_len=nw.col("words").str.len_chars() - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_str_len_chars`: - - >>> agnostic_str_len_chars(df_pd) - words words_len - 0 foo 3.0 - 1 Café 4.0 - 2 345 3.0 - 3 東京 2.0 - 4 None NaN - - >>> agnostic_str_len_chars(df_pl) - shape: (5, 2) - ┌───────┬───────────┐ - │ words ┆ words_len │ - │ --- ┆ --- │ - │ str ┆ u32 │ - ╞═══════╪═══════════╡ - │ foo ┆ 3 │ - │ Café ┆ 4 │ - │ 345 ┆ 3 │ - │ 東京 ┆ 2 │ - │ null ┆ null │ - └───────┴───────────┘ - - >>> agnostic_str_len_chars(df_pa) - pyarrow.Table - words: string - words_len: int32 - ---- - words: [["foo","Café","345","東京",null]] - words_len: [[3,4,3,2,null]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).str.len_chars() @@ -92,52 +39,6 @@ def replace( Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"foo": ["123abc", "abc abc123"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_str_replace(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... df = df.with_columns(replaced=nw.col("foo").str.replace("abc", "")) - ... return df.to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_str_replace`: - - >>> agnostic_str_replace(df_pd) - foo replaced - 0 123abc 123 - 1 abc abc123 abc123 - - >>> agnostic_str_replace(df_pl) - shape: (2, 2) - ┌────────────┬──────────┐ - │ foo ┆ replaced │ - │ --- ┆ --- │ - │ str ┆ str │ - ╞════════════╪══════════╡ - │ 123abc ┆ 123 │ - │ abc abc123 ┆ abc123 │ - └────────────┴──────────┘ - - >>> agnostic_str_replace(df_pa) - pyarrow.Table - foo: string - replaced: string - ---- - foo: [["123abc","abc abc123"]] - replaced: [["123"," abc123"]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).str.replace( @@ -157,52 +58,6 @@ def replace_all( Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"foo": ["123abc", "abc abc123"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_str_replace_all(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... df = df.with_columns(replaced=nw.col("foo").str.replace_all("abc", "")) - ... return df.to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_str_replace_all`: - - >>> agnostic_str_replace_all(df_pd) - foo replaced - 0 123abc 123 - 1 abc abc123 123 - - >>> agnostic_str_replace_all(df_pl) - shape: (2, 2) - ┌────────────┬──────────┐ - │ foo ┆ replaced │ - │ --- ┆ --- │ - │ str ┆ str │ - ╞════════════╪══════════╡ - │ 123abc ┆ 123 │ - │ abc abc123 ┆ 123 │ - └────────────┴──────────┘ - - >>> agnostic_str_replace_all(df_pa) - pyarrow.Table - foo: string - replaced: string - ---- - foo: [["123abc","abc abc123"]] - replaced: [["123"," 123"]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).str.replace_all( @@ -221,38 +76,6 @@ def strip_chars(self: Self, characters: str | None = None) -> ExprT: Returns: A new expression. - - Examples: - >>> from typing import Any - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrame - >>> - >>> data = {"fruits": ["apple", "\nmango"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_str_strip_chars(df_native: IntoFrame) -> dict[str, Any]: - ... df = nw.from_native(df_native) - ... df = df.with_columns(stripped=nw.col("fruits").str.strip_chars()) - ... return df.to_dict(as_series=False) - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_str_strip_chars`: - - >>> agnostic_str_strip_chars(df_pd) - {'fruits': ['apple', '\nmango'], 'stripped': ['apple', 'mango']} - - >>> agnostic_str_strip_chars(df_pl) - {'fruits': ['apple', '\nmango'], 'stripped': ['apple', 'mango']} - - >>> agnostic_str_strip_chars(df_pa) - {'fruits': ['apple', '\nmango'], 'stripped': ['apple', 'mango']} """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).str.strip_chars(characters) @@ -266,55 +89,6 @@ def starts_with(self: Self, prefix: str) -> ExprT: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"fruits": ["apple", "mango", None]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_str_starts_with(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... has_prefix=nw.col("fruits").str.starts_with("app") - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_str_starts_with`: - - >>> agnostic_str_starts_with(df_pd) - fruits has_prefix - 0 apple True - 1 mango False - 2 None None - - >>> agnostic_str_starts_with(df_pl) - shape: (3, 2) - ┌────────┬────────────┐ - │ fruits ┆ has_prefix │ - │ --- ┆ --- │ - │ str ┆ bool │ - ╞════════╪════════════╡ - │ apple ┆ true │ - │ mango ┆ false │ - │ null ┆ null │ - └────────┴────────────┘ - - >>> agnostic_str_starts_with(df_pa) - pyarrow.Table - fruits: string - has_prefix: bool - ---- - fruits: [["apple","mango",null]] - has_prefix: [[true,false,null]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).str.starts_with(prefix) @@ -328,55 +102,6 @@ def ends_with(self: Self, suffix: str) -> ExprT: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"fruits": ["apple", "mango", None]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_str_ends_with(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... has_suffix=nw.col("fruits").str.ends_with("ngo") - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_str_ends_with`: - - >>> agnostic_str_ends_with(df_pd) - fruits has_suffix - 0 apple False - 1 mango True - 2 None None - - >>> agnostic_str_ends_with(df_pl) - shape: (3, 2) - ┌────────┬────────────┐ - │ fruits ┆ has_suffix │ - │ --- ┆ --- │ - │ str ┆ bool │ - ╞════════╪════════════╡ - │ apple ┆ false │ - │ mango ┆ true │ - │ null ┆ null │ - └────────┴────────────┘ - - >>> agnostic_str_ends_with(df_pa) - pyarrow.Table - fruits: string - has_suffix: bool - ---- - fruits: [["apple","mango",null]] - has_suffix: [[false,true,null]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).str.ends_with(suffix) @@ -392,67 +117,6 @@ def contains(self: Self, pattern: str, *, literal: bool = False) -> ExprT: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"pets": ["cat", "dog", "rabbit and parrot", "dove", None]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_str_contains(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... default_match=nw.col("pets").str.contains("parrot|Dove"), - ... case_insensitive_match=nw.col("pets").str.contains("(?i)parrot|Dove"), - ... literal_match=nw.col("pets").str.contains( - ... "parrot|Dove", literal=True - ... ), - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_str_contains`: - - >>> agnostic_str_contains(df_pd) - pets default_match case_insensitive_match literal_match - 0 cat False False False - 1 dog False False False - 2 rabbit and parrot True True False - 3 dove False True False - 4 None None None None - - >>> agnostic_str_contains(df_pl) - shape: (5, 4) - ┌───────────────────┬───────────────┬────────────────────────┬───────────────┐ - │ pets ┆ default_match ┆ case_insensitive_match ┆ literal_match │ - │ --- ┆ --- ┆ --- ┆ --- │ - │ str ┆ bool ┆ bool ┆ bool │ - ╞═══════════════════╪═══════════════╪════════════════════════╪═══════════════╡ - │ cat ┆ false ┆ false ┆ false │ - │ dog ┆ false ┆ false ┆ false │ - │ rabbit and parrot ┆ true ┆ true ┆ false │ - │ dove ┆ false ┆ true ┆ false │ - │ null ┆ null ┆ null ┆ null │ - └───────────────────┴───────────────┴────────────────────────┴───────────────┘ - - >>> agnostic_str_contains(df_pa) - pyarrow.Table - pets: string - default_match: bool - case_insensitive_match: bool - literal_match: bool - ---- - pets: [["cat","dog","rabbit and parrot","dove",null]] - default_match: [[false,false,true,false,null]] - case_insensitive_match: [[false,false,true,true,null]] - literal_match: [[false,false,false,false,null]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).str.contains( @@ -470,91 +134,6 @@ def slice(self: Self, offset: int, length: int | None = None) -> ExprT: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"s": ["pear", None, "papaya", "dragonfruit"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_str_slice(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... s_sliced=nw.col("s").str.slice(4, length=3) - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_str_slice`: - - >>> agnostic_str_slice(df_pd) # doctest: +NORMALIZE_WHITESPACE - s s_sliced - 0 pear - 1 None None - 2 papaya ya - 3 dragonfruit onf - - >>> agnostic_str_slice(df_pl) - shape: (4, 2) - ┌─────────────┬──────────┐ - │ s ┆ s_sliced │ - │ --- ┆ --- │ - │ str ┆ str │ - ╞═════════════╪══════════╡ - │ pear ┆ │ - │ null ┆ null │ - │ papaya ┆ ya │ - │ dragonfruit ┆ onf │ - └─────────────┴──────────┘ - - >>> agnostic_str_slice(df_pa) - pyarrow.Table - s: string - s_sliced: string - ---- - s: [["pear",null,"papaya","dragonfruit"]] - s_sliced: [["",null,"ya","onf"]] - - Using negative indexes: - - >>> def agnostic_str_slice_negative(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns(s_sliced=nw.col("s").str.slice(-3)).to_native() - - >>> agnostic_str_slice_negative(df_pd) - s s_sliced - 0 pear ear - 1 None None - 2 papaya aya - 3 dragonfruit uit - - >>> agnostic_str_slice_negative(df_pl) - shape: (4, 2) - ┌─────────────┬──────────┐ - │ s ┆ s_sliced │ - │ --- ┆ --- │ - │ str ┆ str │ - ╞═════════════╪══════════╡ - │ pear ┆ ear │ - │ null ┆ null │ - │ papaya ┆ aya │ - │ dragonfruit ┆ uit │ - └─────────────┴──────────┘ - - >>> agnostic_str_slice_negative(df_pa) - pyarrow.Table - s: string - s_sliced: string - ---- - s: [["pear",null,"papaya","dragonfruit"]] - s_sliced: [["ear",null,"aya","uit"]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).str.slice( @@ -573,57 +152,6 @@ def head(self: Self, n: int = 5) -> ExprT: Notes: If the length of the string has fewer than `n` characters, the full string is returned. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"lyrics": ["Atatata", "taata", "taatatata", "zukkyun"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_str_head(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... lyrics_head=nw.col("lyrics").str.head() - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_str_head`: - - >>> agnostic_str_head(df_pd) - lyrics lyrics_head - 0 Atatata Atata - 1 taata taata - 2 taatatata taata - 3 zukkyun zukky - - >>> agnostic_str_head(df_pl) - shape: (4, 2) - ┌───────────┬─────────────┐ - │ lyrics ┆ lyrics_head │ - │ --- ┆ --- │ - │ str ┆ str │ - ╞═══════════╪═════════════╡ - │ Atatata ┆ Atata │ - │ taata ┆ taata │ - │ taatatata ┆ taata │ - │ zukkyun ┆ zukky │ - └───────────┴─────────────┘ - - >>> agnostic_str_head(df_pa) - pyarrow.Table - lyrics: string - lyrics_head: string - ---- - lyrics: [["Atatata","taata","taatatata","zukkyun"]] - lyrics_head: [["Atata","taata","taata","zukky"]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).str.slice(0, n) @@ -640,57 +168,6 @@ def tail(self: Self, n: int = 5) -> ExprT: Notes: If the length of the string has fewer than `n` characters, the full string is returned. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"lyrics": ["Atatata", "taata", "taatatata", "zukkyun"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_str_tail(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... lyrics_tail=nw.col("lyrics").str.tail() - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_str_tail`: - - >>> agnostic_str_tail(df_pd) - lyrics lyrics_tail - 0 Atatata atata - 1 taata taata - 2 taatatata atata - 3 zukkyun kkyun - - >>> agnostic_str_tail(df_pl) - shape: (4, 2) - ┌───────────┬─────────────┐ - │ lyrics ┆ lyrics_tail │ - │ --- ┆ --- │ - │ str ┆ str │ - ╞═══════════╪═════════════╡ - │ Atatata ┆ atata │ - │ taata ┆ taata │ - │ taatatata ┆ atata │ - │ zukkyun ┆ kkyun │ - └───────────┴─────────────┘ - - >>> agnostic_str_tail(df_pa) - pyarrow.Table - lyrics: string - lyrics_tail: string - ---- - lyrics: [["Atatata","taata","taatatata","zukkyun"]] - lyrics_tail: [["atata","taata","atata","kkyun"]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).str.slice( @@ -717,51 +194,6 @@ def to_datetime(self: Self, format: str | None = None) -> ExprT: # noqa: A002 Prior to pandas 2.0, nanoseconds were the only time unit supported in pandas, with no ability to set any other one. The ability to set the time unit in pandas, if the version permits, will arrive. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = ["2020-01-01", "2020-01-02"] - >>> df_pd = pd.DataFrame({"a": data}) - >>> df_pl = pl.DataFrame({"a": data}) - >>> df_pa = pa.table({"a": data}) - - We define a dataframe-agnostic function: - - >>> def agnostic_str_to_datetime(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("a").str.to_datetime(format="%Y-%m-%d") - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_str_to_datetime`: - - >>> agnostic_str_to_datetime(df_pd) - a - 0 2020-01-01 - 1 2020-01-02 - - >>> agnostic_str_to_datetime(df_pl) - shape: (2, 1) - ┌─────────────────────┐ - │ a │ - │ --- │ - │ datetime[μs] │ - ╞═════════════════════╡ - │ 2020-01-01 00:00:00 │ - │ 2020-01-02 00:00:00 │ - └─────────────────────┘ - - >>> agnostic_str_to_datetime(df_pa) - pyarrow.Table - a: timestamp[us] - ---- - a: [[2020-01-01 00:00:00.000000,2020-01-02 00:00:00.000000]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).str.to_datetime(format=format) @@ -777,55 +209,6 @@ def to_uppercase(self: Self) -> ExprT: The PyArrow backend will convert 'ß' to 'ẞ' instead of 'SS'. For more info see [the related issue](https://github.com/apache/arrow/issues/34599). There may be other unicode-edge-case-related variations across implementations. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"fruits": ["apple", "mango", None]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_str_to_uppercase(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... upper_col=nw.col("fruits").str.to_uppercase() - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_str_to_uppercase`: - - >>> agnostic_str_to_uppercase(df_pd) - fruits upper_col - 0 apple APPLE - 1 mango MANGO - 2 None None - - >>> agnostic_str_to_uppercase(df_pl) - shape: (3, 2) - ┌────────┬───────────┐ - │ fruits ┆ upper_col │ - │ --- ┆ --- │ - │ str ┆ str │ - ╞════════╪═══════════╡ - │ apple ┆ APPLE │ - │ mango ┆ MANGO │ - │ null ┆ null │ - └────────┴───────────┘ - - >>> agnostic_str_to_uppercase(df_pa) - pyarrow.Table - fruits: string - upper_col: string - ---- - fruits: [["apple","mango",null]] - upper_col: [["APPLE","MANGO",null]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).str.to_uppercase() @@ -836,55 +219,6 @@ def to_lowercase(self: Self) -> ExprT: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"fruits": ["APPLE", "MANGO", None]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_str_to_lowercase(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... lower_col=nw.col("fruits").str.to_lowercase() - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_str_to_lowercase`: - - >>> agnostic_str_to_lowercase(df_pd) - fruits lower_col - 0 APPLE apple - 1 MANGO mango - 2 None None - - >>> agnostic_str_to_lowercase(df_pl) - shape: (3, 2) - ┌────────┬───────────┐ - │ fruits ┆ lower_col │ - │ --- ┆ --- │ - │ str ┆ str │ - ╞════════╪═══════════╡ - │ APPLE ┆ apple │ - │ MANGO ┆ mango │ - │ null ┆ null │ - └────────┴───────────┘ - - >>> agnostic_str_to_lowercase(df_pa) - pyarrow.Table - fruits: string - lower_col: string - ---- - fruits: [["APPLE","MANGO",null]] - lower_col: [["apple","mango",null]] """ return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).str.to_lowercase() diff --git a/narwhals/functions.py b/narwhals/functions.py index f10a397f7..b70be62ae 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -96,123 +96,6 @@ def concat( Raises: TypeError: The items to concatenate should either all be eager, or all lazy - - Examples: - Let's take an example of vertical concatenation: - - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals as nw - >>> data_1 = {"a": [1, 2, 3], "b": [4, 5, 6]} - >>> data_2 = {"a": [5, 2], "b": [1, 4]} - - >>> df_pd_1 = pd.DataFrame(data_1) - >>> df_pd_2 = pd.DataFrame(data_2) - >>> df_pl_1 = pl.DataFrame(data_1) - >>> df_pl_2 = pl.DataFrame(data_2) - - Let's define a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def agnostic_vertical_concat(df1, df2): - ... return nw.concat([df1, df2], how="vertical") - - >>> agnostic_vertical_concat(df_pd_1, df_pd_2) - a b - 0 1 4 - 1 2 5 - 2 3 6 - 0 5 1 - 1 2 4 - >>> agnostic_vertical_concat(df_pl_1, df_pl_2) - shape: (5, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 4 │ - │ 2 ┆ 5 │ - │ 3 ┆ 6 │ - │ 5 ┆ 1 │ - │ 2 ┆ 4 │ - └─────┴─────┘ - - Let's look at case a for horizontal concatenation: - - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals as nw - >>> data_1 = {"a": [1, 2, 3], "b": [4, 5, 6]} - >>> data_2 = {"c": [5, 2], "d": [1, 4]} - - >>> df_pd_1 = pd.DataFrame(data_1) - >>> df_pd_2 = pd.DataFrame(data_2) - >>> df_pl_1 = pl.DataFrame(data_1) - >>> df_pl_2 = pl.DataFrame(data_2) - - Defining a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def agnostic_horizontal_concat(df1, df2): - ... return nw.concat([df1, df2], how="horizontal") - - >>> agnostic_horizontal_concat(df_pd_1, df_pd_2) - a b c d - 0 1 4 5.0 1.0 - 1 2 5 2.0 4.0 - 2 3 6 NaN NaN - - >>> agnostic_horizontal_concat(df_pl_1, df_pl_2) - shape: (3, 4) - ┌─────┬─────┬──────┬──────┐ - │ a ┆ b ┆ c ┆ d │ - │ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 ┆ i64 │ - ╞═════╪═════╪══════╪══════╡ - │ 1 ┆ 4 ┆ 5 ┆ 1 │ - │ 2 ┆ 5 ┆ 2 ┆ 4 │ - │ 3 ┆ 6 ┆ null ┆ null │ - └─────┴─────┴──────┴──────┘ - - Let's look at case a for diagonal concatenation: - - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals as nw - >>> data_1 = {"a": [1, 2], "b": [3.5, 4.5]} - >>> data_2 = {"a": [3, 4], "z": ["x", "y"]} - - >>> df_pd_1 = pd.DataFrame(data_1) - >>> df_pd_2 = pd.DataFrame(data_2) - >>> df_pl_1 = pl.DataFrame(data_1) - >>> df_pl_2 = pl.DataFrame(data_2) - - Defining a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def agnostic_diagonal_concat(df1, df2): - ... return nw.concat([df1, df2], how="diagonal") - - >>> agnostic_diagonal_concat(df_pd_1, df_pd_2) - a b z - 0 1 3.5 NaN - 1 2 4.5 NaN - 0 3 NaN x - 1 4 NaN y - - >>> agnostic_diagonal_concat(df_pl_1, df_pl_2) - shape: (4, 3) - ┌─────┬──────┬──────┐ - │ a ┆ b ┆ z │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ f64 ┆ str │ - ╞═════╪══════╪══════╡ - │ 1 ┆ 3.5 ┆ null │ - │ 2 ┆ 4.5 ┆ null │ - │ 3 ┆ null ┆ x │ - │ 4 ┆ null ┆ y │ - └─────┴──────┴──────┘ """ if how not in {"horizontal", "vertical", "diagonal"}: # pragma: no cover msg = "Only vertical, horizontal and diagonal concatenations are supported." @@ -247,54 +130,6 @@ def new_series( Returns: A new Series - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT, IntoSeriesT - >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} - - Let's define a dataframe-agnostic function: - - >>> def agnostic_new_series(df_native: IntoFrameT) -> IntoSeriesT: - ... values = [4, 1, 2, 3] - ... native_namespace = nw.get_native_namespace(df_native) - ... return nw.new_series( - ... name="a", - ... values=values, - ... dtype=nw.Int32, - ... native_namespace=native_namespace, - ... ).to_native() - - We can then pass any supported eager library, such as pandas / Polars / PyArrow: - - >>> agnostic_new_series(pd.DataFrame(data)) - 0 4 - 1 1 - 2 2 - 3 3 - Name: a, dtype: int32 - >>> agnostic_new_series(pl.DataFrame(data)) # doctest: +NORMALIZE_WHITESPACE - shape: (4,) - Series: 'a' [i32] - [ - 4 - 1 - 2 - 3 - ] - >>> agnostic_new_series(pa.table(data)) - - [ - [ - 4, - 1, - 2, - 3 - ] - ] """ return _new_series_impl( name, @@ -388,45 +223,6 @@ def from_dict( Returns: A new DataFrame. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} - - Let's create a new dataframe of the same class as the dataframe we started with, from a dict of new data: - - >>> def agnostic_from_dict(df_native: IntoFrameT) -> IntoFrameT: - ... new_data = {"c": [5, 2], "d": [1, 4]} - ... native_namespace = nw.get_native_namespace(df_native) - ... return nw.from_dict(new_data, native_namespace=native_namespace).to_native() - - Let's see what happens when passing pandas, Polars or PyArrow input: - - >>> agnostic_from_dict(pd.DataFrame(data)) - c d - 0 5 1 - 1 2 4 - >>> agnostic_from_dict(pl.DataFrame(data)) - shape: (2, 2) - ┌─────┬─────┐ - │ c ┆ d │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 5 ┆ 1 │ - │ 2 ┆ 4 │ - └─────┴─────┘ - >>> agnostic_from_dict(pa.table(data)) - pyarrow.Table - c: int64 - d: int64 - ---- - c: [[5,2]] - d: [[1,4]] """ return _from_dict_impl( data, @@ -557,123 +353,6 @@ def from_numpy( Returns: A new DataFrame. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> import numpy as np - >>> from narwhals.typing import IntoFrameT - >>> data = {"a": [1, 2], "b": [3, 4]} - - Let's create a new dataframe of the same class as the dataframe we started with, from a NumPy ndarray of new data: - - >>> def agnostic_from_numpy(df_native: IntoFrameT) -> IntoFrameT: - ... new_data = np.array([[5, 2, 1], [1, 4, 3]]) - ... df = nw.from_native(df_native) - ... native_namespace = nw.get_native_namespace(df) - ... return nw.from_numpy(new_data, native_namespace=native_namespace).to_native() - - Let's see what happens when passing pandas, Polars or PyArrow input: - - >>> agnostic_from_numpy(pd.DataFrame(data)) - column_0 column_1 column_2 - 0 5 2 1 - 1 1 4 3 - >>> agnostic_from_numpy(pl.DataFrame(data)) - shape: (2, 3) - ┌──────────┬──────────┬──────────┐ - │ column_0 ┆ column_1 ┆ column_2 │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 │ - ╞══════════╪══════════╪══════════╡ - │ 5 ┆ 2 ┆ 1 │ - │ 1 ┆ 4 ┆ 3 │ - └──────────┴──────────┴──────────┘ - >>> agnostic_from_numpy(pa.table(data)) - pyarrow.Table - column_0: int64 - column_1: int64 - column_2: int64 - ---- - column_0: [[5,1]] - column_1: [[2,4]] - column_2: [[1,3]] - - Let's specify the column names: - - >>> def agnostic_from_numpy(df_native: IntoFrameT) -> IntoFrameT: - ... new_data = np.array([[5, 2, 1], [1, 4, 3]]) - ... schema = ["c", "d", "e"] - ... df = nw.from_native(df_native) - ... native_namespace = nw.get_native_namespace(df) - ... return nw.from_numpy( - ... new_data, native_namespace=native_namespace, schema=schema - ... ).to_native() - - Let's see the modified outputs: - - >>> agnostic_from_numpy(pd.DataFrame(data)) - c d e - 0 5 2 1 - 1 1 4 3 - >>> agnostic_from_numpy(pl.DataFrame(data)) - shape: (2, 3) - ┌─────┬─────┬─────┐ - │ c ┆ d ┆ e │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 │ - ╞═════╪═════╪═════╡ - │ 5 ┆ 2 ┆ 1 │ - │ 1 ┆ 4 ┆ 3 │ - └─────┴─────┴─────┘ - >>> agnostic_from_numpy(pa.table(data)) - pyarrow.Table - c: int64 - d: int64 - e: int64 - ---- - c: [[5,1]] - d: [[2,4]] - e: [[1,3]] - - Let's modify the function so that it specifies the schema: - - >>> def agnostic_from_numpy(df_native: IntoFrameT) -> IntoFrameT: - ... new_data = np.array([[5, 2, 1], [1, 4, 3]]) - ... schema = {"c": nw.Int16(), "d": nw.Float32(), "e": nw.Int8()} - ... df = nw.from_native(df_native) - ... native_namespace = nw.get_native_namespace(df) - ... return nw.from_numpy( - ... new_data, native_namespace=native_namespace, schema=schema - ... ).to_native() - - Let's see the outputs: - - >>> agnostic_from_numpy(pd.DataFrame(data)) - c d e - 0 5 2.0 1 - 1 1 4.0 3 - >>> agnostic_from_numpy(pl.DataFrame(data)) - shape: (2, 3) - ┌─────┬─────┬─────┐ - │ c ┆ d ┆ e │ - │ --- ┆ --- ┆ --- │ - │ i16 ┆ f32 ┆ i8 │ - ╞═════╪═════╪═════╡ - │ 5 ┆ 2.0 ┆ 1 │ - │ 1 ┆ 4.0 ┆ 3 │ - └─────┴─────┴─────┘ - >>> agnostic_from_numpy(pa.table(data)) - pyarrow.Table - c: int16 - d: float - e: int8 - ---- - c: [[5,1]] - d: [[2,4]] - e: [[1,3]] """ return _from_numpy_impl( data, @@ -801,38 +480,6 @@ def from_arrow( Returns: A new DataFrame. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} - - Let's define a dataframe-agnostic function which creates a PyArrow - Table. - - >>> def agnostic_to_arrow(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return nw.from_arrow(df, native_namespace=pa).to_native() - - Let's see what happens when passing pandas / Polars input: - - >>> agnostic_to_arrow(pd.DataFrame(data)) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[1,2,3]] - b: [[4,5,6]] - >>> agnostic_to_arrow(pl.DataFrame(data)) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[1,2,3]] - b: [[4,5,6]] """ if not hasattr(native_frame, "__arrow_c_stream__"): msg = f"Given object of type {type(native_frame)} does not support PyCapsule interface" @@ -948,12 +595,7 @@ def _get_deps_info() -> dict[str, str]: def show_versions() -> None: - """Print useful debugging information. - - Examples: - >>> from narwhals import show_versions - >>> show_versions() # doctest: +SKIP - """ + """Print useful debugging information.""" sys_info = _get_sys_info() deps_info = _get_deps_info() @@ -1002,45 +644,6 @@ def read_csv( Returns: DataFrame. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> from types import ModuleType - - Let's create an agnostic function that reads a csv file with a specified native namespace: - - >>> def agnostic_read_csv(native_namespace: ModuleType) -> IntoDataFrame: - ... return nw.read_csv("file.csv", native_namespace=native_namespace).to_native() - - Then we can read the file by passing pandas, Polars or PyArrow namespaces: - - >>> agnostic_read_csv(native_namespace=pd) # doctest:+SKIP - a b - 0 1 4 - 1 2 5 - 2 3 6 - >>> agnostic_read_csv(native_namespace=pl) # doctest:+SKIP - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 4 │ - │ 2 ┆ 5 │ - │ 3 ┆ 6 │ - └─────┴─────┘ - >>> agnostic_read_csv(native_namespace=pa) # doctest:+SKIP - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[1,2,3]] - b: [[4,5,6]] """ return _read_csv_impl(source, native_namespace=native_namespace, **kwargs) @@ -1088,38 +691,6 @@ def scan_csv( Returns: LazyFrame. - - Examples: - >>> import dask.dataframe as dd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrame - >>> from types import ModuleType - - Let's create an agnostic function that lazily reads a csv file with a specified native namespace: - - >>> def agnostic_scan_csv(native_namespace: ModuleType) -> IntoFrame: - ... return nw.scan_csv("file.csv", native_namespace=native_namespace).to_native() - - Then we can read the file by passing, for example, Polars or Dask namespaces: - - >>> agnostic_scan_csv(native_namespace=pl).collect() # doctest:+SKIP - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 4 │ - │ 2 ┆ 5 │ - │ 3 ┆ 6 │ - └─────┴─────┘ - >>> agnostic_scan_csv(native_namespace=dd).compute() # doctest:+SKIP - a b - 0 1 4 - 1 2 5 - 2 3 6 """ return _scan_csv_impl(source, native_namespace=native_namespace, **kwargs) @@ -1170,47 +741,6 @@ def read_parquet( Returns: DataFrame. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> from types import ModuleType - - Let's create an agnostic function that reads a parquet file with a specified native namespace: - - >>> def agnostic_read_parquet(native_namespace: ModuleType) -> IntoDataFrame: - ... return nw.read_parquet( - ... "file.parquet", native_namespace=native_namespace - ... ).to_native() - - Then we can read the file by passing pandas, Polars or PyArrow namespaces: - - >>> agnostic_read_parquet(native_namespace=pd) # doctest:+SKIP - a b - 0 1 4 - 1 2 5 - 2 3 6 - >>> agnostic_read_parquet(native_namespace=pl) # doctest:+SKIP - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 4 │ - │ 2 ┆ 5 │ - │ 3 ┆ 6 │ - └─────┴─────┘ - >>> agnostic_read_parquet(native_namespace=pa) # doctest:+SKIP - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[1,2,3]] - b: [[4,5,6]] """ return _read_parquet_impl(source, native_namespace=native_namespace, **kwargs) @@ -1259,40 +789,6 @@ def scan_parquet( Returns: LazyFrame. - - Examples: - >>> import dask.dataframe as dd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrame - >>> from types import ModuleType - - Let's create an agnostic function that lazily reads a parquet file with a specified native namespace: - - >>> def agnostic_scan_parquet(native_namespace: ModuleType) -> IntoFrame: - ... return nw.scan_parquet( - ... "file.parquet", native_namespace=native_namespace - ... ).to_native() - - Then we can read the file by passing, for example, Polars or Dask namespaces: - - >>> agnostic_scan_parquet(native_namespace=pl).collect() # doctest:+SKIP - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 4 │ - │ 2 ┆ 5 │ - │ 3 ┆ 6 │ - └─────┴─────┘ - >>> agnostic_scan_parquet(native_namespace=dd).compute() # doctest:+SKIP - a b - 0 1 4 - 1 2 5 - 2 3 6 """ return _scan_parquet_impl(source, native_namespace=native_namespace, **kwargs) @@ -1334,49 +830,6 @@ def col(*names: str | Iterable[str]) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2], "b": [3, 4]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_col(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a") * nw.col("b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_col`: - - >>> agnostic_col(df_pd) - a - 0 3 - 1 8 - - >>> agnostic_col(df_pl) - shape: (2, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 3 │ - │ 8 │ - └─────┘ - - >>> agnostic_col(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[3,8]] """ def func(plx: Any) -> Any: @@ -1397,48 +850,6 @@ def nth(*indices: int | Sequence[int]) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2], "b": [3, 4]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_nth(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.nth(0) * 2).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to `agnostic_nth`: - - >>> agnostic_nth(df_pd) - a - 0 2 - 1 4 - - >>> agnostic_nth(df_pl) - shape: (2, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 2 │ - │ 4 │ - └─────┘ - - >>> agnostic_nth(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[2,4]] """ def func(plx: Any) -> Any: @@ -1453,53 +864,6 @@ def all_() -> Expr: Returns: A new expression. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_all(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.all() * 2).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_all`: - - >>> agnostic_all(df_pd) - a b - 0 2 8 - 1 4 10 - 2 6 12 - - >>> agnostic_all(df_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 2 ┆ 8 │ - │ 4 ┆ 10 │ - │ 6 ┆ 12 │ - └─────┴─────┘ - - >>> agnostic_all(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[2,4,6]] - b: [[8,10,12]] """ return Expr(lambda plx: plx.all()) @@ -1510,45 +874,6 @@ def len_() -> Expr: Returns: A new expression. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2], "b": [5, 10]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_len(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.len()).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_len`: - - >>> agnostic_len(df_pd) - len - 0 2 - >>> agnostic_len(df_pl) - shape: (1, 1) - ┌─────┐ - │ len │ - │ --- │ - │ u32 │ - ╞═════╡ - │ 2 │ - └─────┘ - >>> agnostic_len(df_pa) - pyarrow.Table - len: int64 - ---- - len: [[2]] """ def func(plx: Any) -> Any: @@ -1568,47 +893,6 @@ def sum(*columns: str) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_sum(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.sum("a")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_sum`: - - >>> agnostic_sum(df_pd) - a - 0 3 - - >>> agnostic_sum(df_pl) - shape: (1, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 3 │ - └─────┘ - - >>> agnostic_sum(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[3]] """ return Expr(lambda plx: plx.col(*columns).sum()) @@ -1624,47 +908,6 @@ def mean(*columns: str) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 8, 3]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe agnostic function: - - >>> def agnostic_mean(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.mean("a")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_mean`: - - >>> agnostic_mean(df_pd) - a - 0 4.0 - - >>> agnostic_mean(df_pl) - shape: (1, 1) - ┌─────┐ - │ a │ - │ --- │ - │ f64 │ - ╞═════╡ - │ 4.0 │ - └─────┘ - - >>> agnostic_mean(df_pa) - pyarrow.Table - a: double - ---- - a: [[4]] """ return Expr(lambda plx: plx.col(*columns).mean()) @@ -1682,47 +925,6 @@ def median(*columns: str) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [4, 5, 2]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe agnostic function: - - >>> def agnostic_median(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.median("a")).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_median`: - - >>> agnostic_median(df_pd) - a - 0 4.0 - - >>> agnostic_median(df_pl) - shape: (1, 1) - ┌─────┐ - │ a │ - │ --- │ - │ f64 │ - ╞═════╡ - │ 4.0 │ - └─────┘ - - >>> agnostic_median(df_pa) - pyarrow.Table - a: double - ---- - a: [[4]] """ return Expr(lambda plx: plx.col(*columns).median()) @@ -1738,47 +940,6 @@ def min(*columns: str) -> Expr: Returns: A new expression. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2], "b": [5, 10]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_min(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.min("b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_min`: - - >>> agnostic_min(df_pd) - b - 0 5 - - >>> agnostic_min(df_pl) - shape: (1, 1) - ┌─────┐ - │ b │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 5 │ - └─────┘ - - >>> agnostic_min(df_pa) - pyarrow.Table - b: int64 - ---- - b: [[5]] """ return Expr(lambda plx: plx.col(*columns).min()) @@ -1794,47 +955,6 @@ def max(*columns: str) -> Expr: Returns: A new expression. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2], "b": [5, 10]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_max(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.max("a")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_max`: - - >>> agnostic_max(df_pd) - a - 0 2 - - >>> agnostic_max(df_pl) - shape: (1, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 2 │ - └─────┘ - - >>> agnostic_max(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[2]] """ return Expr(lambda plx: plx.col(*columns).max()) @@ -1851,50 +971,6 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3], "b": [5, 10, None]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_sum_horizontal(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.sum_horizontal("a", "b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to `agnostic_sum_horizontal`: - - >>> agnostic_sum_horizontal(df_pd) - a - 0 6.0 - 1 12.0 - 2 3.0 - - >>> agnostic_sum_horizontal(df_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 6 │ - │ 12 │ - │ 3 │ - └─────┘ - - >>> agnostic_sum_horizontal(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[6,12,3]] """ if not exprs: msg = "At least one expression must be passed to `sum_horizontal`" @@ -1918,53 +994,6 @@ def min_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [1, 8, 3], - ... "b": [4, 5, None], - ... "c": ["x", "y", "z"], - ... } - - We define a dataframe-agnostic function that computes the horizontal min of "a" - and "b" columns: - - >>> def agnostic_min_horizontal(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.min_horizontal("a", "b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_min_horizontal`: - - >>> agnostic_min_horizontal(pd.DataFrame(data)) - a - 0 1.0 - 1 5.0 - 2 3.0 - - >>> agnostic_min_horizontal(pl.DataFrame(data)) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 1 │ - │ 5 │ - │ 3 │ - └─────┘ - - >>> agnostic_min_horizontal(pa.table(data)) - pyarrow.Table - a: int64 - ---- - a: [[1,5,3]] """ if not exprs: msg = "At least one expression must be passed to `min_horizontal`" @@ -1988,53 +1017,6 @@ def max_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [1, 8, 3], - ... "b": [4, 5, None], - ... "c": ["x", "y", "z"], - ... } - - We define a dataframe-agnostic function that computes the horizontal max of "a" - and "b" columns: - - >>> def agnostic_max_horizontal(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.max_horizontal("a", "b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_max_horizontal`: - - >>> agnostic_max_horizontal(pd.DataFrame(data)) - a - 0 4.0 - 1 8.0 - 2 3.0 - - >>> agnostic_max_horizontal(pl.DataFrame(data)) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 4 │ - │ 8 │ - │ 3 │ - └─────┘ - - >>> agnostic_max_horizontal(pa.table(data)) - pyarrow.Table - a: int64 - ---- - a: [[4,8,3]] """ if not exprs: msg = "At least one expression must be passed to `max_horizontal`" @@ -2093,57 +1075,6 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When: Returns: A "when" object, which `.then` can be called on. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3], "b": [5, 10, 15]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_when_then_otherwise(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when") - ... ).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_when_then_otherwise`: - - >>> agnostic_when_then_otherwise(df_pd) - a b a_when - 0 1 5 5 - 1 2 10 5 - 2 3 15 6 - - >>> agnostic_when_then_otherwise(df_pl) - shape: (3, 3) - ┌─────┬─────┬────────┐ - │ a ┆ b ┆ a_when │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i32 │ - ╞═════╪═════╪════════╡ - │ 1 ┆ 5 ┆ 5 │ - │ 2 ┆ 10 ┆ 5 │ - │ 3 ┆ 15 ┆ 6 │ - └─────┴─────┴────────┘ - - >>> agnostic_when_then_otherwise(df_pa) - pyarrow.Table - a: int64 - b: int64 - a_when: int64 - ---- - a: [[1,2,3]] - b: [[5,10,15]] - a_when: [[5,5,6]] """ return When(*predicates) @@ -2157,64 +1088,6 @@ def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [False, False, True, True, False, None], - ... "b": [False, True, True, None, None, None], - ... } - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow") - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_all_horizontal(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select("a", "b", all=nw.all_horizontal("a", "b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_all_horizontal`: - - >>> agnostic_all_horizontal(df_pd) - a b all - 0 False False False - 1 False True False - 2 True True True - 3 True - 4 False False - 5 - - >>> agnostic_all_horizontal(df_pl) - shape: (6, 3) - ┌───────┬───────┬───────┐ - │ a ┆ b ┆ all │ - │ --- ┆ --- ┆ --- │ - │ bool ┆ bool ┆ bool │ - ╞═══════╪═══════╪═══════╡ - │ false ┆ false ┆ false │ - │ false ┆ true ┆ false │ - │ true ┆ true ┆ true │ - │ true ┆ null ┆ null │ - │ false ┆ null ┆ false │ - │ null ┆ null ┆ null │ - └───────┴───────┴───────┘ - - >>> agnostic_all_horizontal(df_pa) - pyarrow.Table - a: bool - b: bool - all: bool - ---- - a: [[false,false,true,true,false,null]] - b: [[false,true,true,null,null,null]] - all: [[false,false,true,null,false,null]] """ if not exprs: msg = "At least one expression must be passed to `all_horizontal`" @@ -2236,51 +1109,6 @@ def lit(value: Any, dtype: DType | type[DType] | None = None) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_lit(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns(nw.lit(3)).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_lit`: - - >>> agnostic_lit(df_pd) - a literal - 0 1 3 - 1 2 3 - - >>> agnostic_lit(df_pl) - shape: (2, 2) - ┌─────┬─────────┐ - │ a ┆ literal │ - │ --- ┆ --- │ - │ i64 ┆ i32 │ - ╞═════╪═════════╡ - │ 1 ┆ 3 │ - │ 2 ┆ 3 │ - └─────┴─────────┘ - - >>> agnostic_lit(df_pa) - pyarrow.Table - a: int64 - literal: int64 - ---- - a: [[1,2]] - literal: [[3,3]] """ if is_numpy_array(value): msg = ( @@ -2305,64 +1133,6 @@ def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [False, False, True, True, False, None], - ... "b": [False, True, True, None, None, None], - ... } - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow") - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_any_horizontal(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select("a", "b", any=nw.any_horizontal("a", "b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_any_horizontal`: - - >>> agnostic_any_horizontal(df_pd) - a b any - 0 False False False - 1 False True True - 2 True True True - 3 True True - 4 False - 5 - - >>> agnostic_any_horizontal(df_pl) - shape: (6, 3) - ┌───────┬───────┬───────┐ - │ a ┆ b ┆ any │ - │ --- ┆ --- ┆ --- │ - │ bool ┆ bool ┆ bool │ - ╞═══════╪═══════╪═══════╡ - │ false ┆ false ┆ false │ - │ false ┆ true ┆ true │ - │ true ┆ true ┆ true │ - │ true ┆ null ┆ true │ - │ false ┆ null ┆ null │ - │ null ┆ null ┆ null │ - └───────┴───────┴───────┘ - - >>> agnostic_any_horizontal(df_pa) - pyarrow.Table - a: bool - b: bool - any: bool - ---- - a: [[false,false,true,true,false,null]] - b: [[false,true,true,null,null,null]] - any: [[false,true,true,true,null,null]] """ if not exprs: msg = "At least one expression must be passed to `any_horizontal`" @@ -2383,56 +1153,6 @@ def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [1, 8, 3], - ... "b": [4, 5, None], - ... "c": ["x", "y", "z"], - ... } - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function that computes the horizontal mean of "a" - and "b" columns: - - >>> def agnostic_mean_horizontal(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.mean_horizontal("a", "b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_mean_horizontal`: - - >>> agnostic_mean_horizontal(df_pd) - a - 0 2.5 - 1 6.5 - 2 3.0 - - >>> agnostic_mean_horizontal(df_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ f64 │ - ╞═════╡ - │ 2.5 │ - │ 6.5 │ - │ 3.0 │ - └─────┘ - - >>> agnostic_mean_horizontal(df_pa) - pyarrow.Table - a: double - ---- - a: [[2.5,6.5,3]] """ if not exprs: msg = "At least one expression must be passed to `mean_horizontal`" @@ -2465,62 +1185,6 @@ def concat_str( Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [1, 2, 3], - ... "b": ["dogs", "cats", None], - ... "c": ["play", "swim", "walk"], - ... } - - We define a dataframe-agnostic function that computes the horizontal string - concatenation of different columns - - >>> def agnostic_concat_str(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.concat_str( - ... [ - ... nw.col("a") * 2, - ... nw.col("b"), - ... nw.col("c"), - ... ], - ... separator=" ", - ... ).alias("full_sentence") - ... ).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_concat_str`: - - >>> agnostic_concat_str(pd.DataFrame(data)) - full_sentence - 0 2 dogs play - 1 4 cats swim - 2 None - - >>> agnostic_concat_str(pl.DataFrame(data)) - shape: (3, 1) - ┌───────────────┐ - │ full_sentence │ - │ --- │ - │ str │ - ╞═══════════════╡ - │ 2 dogs play │ - │ 4 cats swim │ - │ null │ - └───────────────┘ - - >>> agnostic_concat_str(pa.table(data)) - pyarrow.Table - full_sentence: string - ---- - full_sentence: [["2 dogs play","4 cats swim",null]] """ return Expr( lambda plx: plx.concat_str( diff --git a/narwhals/group_by.py b/narwhals/group_by.py index 76c04fa1f..e500acab5 100644 --- a/narwhals/group_by.py +++ b/narwhals/group_by.py @@ -39,75 +39,6 @@ def agg( Returns: A new Dataframe. - - Examples: - Group by one column or by multiple columns and call `agg` to compute - the grouped sum of another column. - - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals as nw - >>> df_pd = pd.DataFrame( - ... { - ... "a": ["a", "b", "a", "b", "c"], - ... "b": [1, 2, 1, 3, 3], - ... "c": [5, 4, 3, 2, 1], - ... } - ... ) - >>> df_pl = pl.DataFrame( - ... { - ... "a": ["a", "b", "a", "b", "c"], - ... "b": [1, 2, 1, 3, 3], - ... "c": [5, 4, 3, 2, 1], - ... } - ... ) - - We define library agnostic functions: - - >>> @nw.narwhalify - ... def func(df): - ... return df.group_by("a").agg(nw.col("b").sum()).sort("a") - - >>> @nw.narwhalify - ... def func_mult_col(df): - ... return df.group_by("a", "b").agg(nw.sum("c")).sort("a", "b") - - We can then pass either pandas or Polars to `func` and `func_mult_col`: - - >>> func(df_pd) - a b - 0 a 2 - 1 b 5 - 2 c 3 - >>> func(df_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ str ┆ i64 │ - ╞═════╪═════╡ - │ a ┆ 2 │ - │ b ┆ 5 │ - │ c ┆ 3 │ - └─────┴─────┘ - >>> func_mult_col(df_pd) - a b c - 0 a 1 8 - 1 b 2 4 - 2 b 3 2 - 3 c 3 1 - >>> func_mult_col(df_pl) - shape: (4, 3) - ┌─────┬─────┬─────┐ - │ a ┆ b ┆ c │ - │ --- ┆ --- ┆ --- │ - │ str ┆ i64 ┆ i64 │ - ╞═════╪═════╪═════╡ - │ a ┆ 1 ┆ 8 │ - │ b ┆ 2 ┆ 4 │ - │ b ┆ 3 ┆ 2 │ - │ c ┆ 3 ┆ 1 │ - └─────┴─────┴─────┘ """ aggs, named_aggs = self._df._flatten_and_extract(*aggs, **named_aggs) return self._df._from_compliant_dataframe( # type: ignore[return-value] @@ -143,57 +74,6 @@ def agg( Returns: A new LazyFrame. - - Examples: - Group by one column or by multiple columns and call `agg` to compute - the grouped sum of another column. - - >>> import polars as pl - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> lf_pl = pl.LazyFrame( - ... { - ... "a": ["a", "b", "a", "b", "c"], - ... "b": [1, 2, 1, 3, 3], - ... "c": [5, 4, 3, 2, 1], - ... } - ... ) - - We define library agnostic functions: - - >>> def agnostic_func_one_col(lf_native: IntoFrameT) -> IntoFrameT: - ... lf = nw.from_native(lf_native) - ... return nw.to_native(lf.group_by("a").agg(nw.col("b").sum()).sort("a")) - - >>> def agnostic_func_mult_col(lf_native: IntoFrameT) -> IntoFrameT: - ... lf = nw.from_native(lf_native) - ... return nw.to_native(lf.group_by("a", "b").agg(nw.sum("c")).sort("a", "b")) - - We can then pass a lazy frame and materialise it with `collect`: - - >>> agnostic_func_one_col(lf_pl).collect() - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ str ┆ i64 │ - ╞═════╪═════╡ - │ a ┆ 2 │ - │ b ┆ 5 │ - │ c ┆ 3 │ - └─────┴─────┘ - >>> agnostic_func_mult_col(lf_pl).collect() - shape: (4, 3) - ┌─────┬─────┬─────┐ - │ a ┆ b ┆ c │ - │ --- ┆ --- ┆ --- │ - │ str ┆ i64 ┆ i64 │ - ╞═════╪═════╪═════╡ - │ a ┆ 1 ┆ 8 │ - │ b ┆ 2 ┆ 4 │ - │ b ┆ 3 ┆ 2 │ - │ c ┆ 3 ┆ 1 │ - └─────┴─────┴─────┘ """ aggs, named_aggs = self._df._flatten_and_extract(*aggs, **named_aggs) return self._df._from_compliant_dataframe( # type: ignore[return-value] diff --git a/narwhals/schema.py b/narwhals/schema.py index 1d357b766..b5261539e 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -29,28 +29,6 @@ class Schema(BaseSchema): schema: Mapping[str, DType] | Iterable[tuple[str, DType]] | None The schema definition given by column names and their associated. *instantiated* Narwhals data type. Accepts a mapping or an iterable of tuples. - - Examples: - Define a schema by passing *instantiated* data types. - - >>> import narwhals as nw - >>> schema = nw.Schema({"foo": nw.Int8(), "bar": nw.String()}) - >>> schema - Schema({'foo': Int8, 'bar': String}) - - Access the data type associated with a specific column name. - - >>> schema["foo"] - Int8 - - Access various schema properties using the `names`, `dtypes`, and `len` methods. - - >>> schema.names() - ['foo', 'bar'] - >>> schema.dtypes() - [Int8, String] - >>> schema.len() - 2 """ def __init__( diff --git a/narwhals/selectors.py b/narwhals/selectors.py index 31a5f80e8..d158e5f07 100644 --- a/narwhals/selectors.py +++ b/narwhals/selectors.py @@ -17,40 +17,6 @@ def by_dtype(*dtypes: Any) -> Expr: Returns: A new expression. - - Examples: - >>> import narwhals as nw - >>> import narwhals.selectors as ncs - >>> import pandas as pd - >>> import polars as pl - >>> - >>> data = {"a": [1, 2], "b": ["x", "y"], "c": [4.1, 2.3]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - - Let's define a dataframe-agnostic function to select int64 and float64 - dtypes and multiplies each value by 2: - - >>> @nw.narwhalify - ... def func(df): - ... return df.select(ncs.by_dtype(nw.Int64, nw.Float64) * 2) - - We can then pass either pandas or Polars dataframes: - - >>> func(df_pd) - a c - 0 2 8.2 - 1 4 4.6 - >>> func(df_pl) - shape: (2, 2) - ┌─────┬─────┐ - │ a ┆ c │ - │ --- ┆ --- │ - │ i64 ┆ f64 │ - ╞═════╪═════╡ - │ 2 ┆ 8.2 │ - │ 4 ┆ 4.6 │ - └─────┴─────┘ """ return Selector(lambda plx: plx.selectors.by_dtype(flatten(dtypes))) @@ -60,40 +26,6 @@ def numeric() -> Expr: Returns: A new expression. - - Examples: - >>> import narwhals as nw - >>> import narwhals.selectors as ncs - >>> import pandas as pd - >>> import polars as pl - >>> - >>> data = {"a": [1, 2], "b": ["x", "y"], "c": [4.1, 2.3]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - - Let's define a dataframe-agnostic function to select numeric - dtypes and multiplies each value by 2: - - >>> @nw.narwhalify - ... def func(df): - ... return df.select(ncs.numeric() * 2) - - We can then pass either pandas or Polars dataframes: - - >>> func(df_pd) - a c - 0 2 8.2 - 1 4 4.6 - >>> func(df_pl) - shape: (2, 2) - ┌─────┬─────┐ - │ a ┆ c │ - │ --- ┆ --- │ - │ i64 ┆ f64 │ - ╞═════╪═════╡ - │ 2 ┆ 8.2 │ - │ 4 ┆ 4.6 │ - └─────┴─────┘ """ return Selector(lambda plx: plx.selectors.numeric()) @@ -103,40 +35,6 @@ def boolean() -> Expr: Returns: A new expression. - - Examples: - >>> import narwhals as nw - >>> import narwhals.selectors as ncs - >>> import pandas as pd - >>> import polars as pl - >>> - >>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - - Let's define a dataframe-agnostic function to select boolean - dtypes: - - >>> @nw.narwhalify - ... def func(df): - ... return df.select(ncs.boolean()) - - We can then pass either pandas or Polars dataframes: - - >>> func(df_pd) - c - 0 False - 1 True - >>> func(df_pl) - shape: (2, 1) - ┌───────┐ - │ c │ - │ --- │ - │ bool │ - ╞═══════╡ - │ false │ - │ true │ - └───────┘ """ return Selector(lambda plx: plx.selectors.boolean()) @@ -146,40 +44,6 @@ def string() -> Expr: Returns: A new expression. - - Examples: - >>> import narwhals as nw - >>> import narwhals.selectors as ncs - >>> import pandas as pd - >>> import polars as pl - >>> - >>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - - Let's define a dataframe-agnostic function to select string - dtypes: - - >>> @nw.narwhalify - ... def func(df): - ... return df.select(ncs.string()) - - We can then pass either pandas or Polars dataframes: - - >>> func(df_pd) - b - 0 x - 1 y - >>> func(df_pl) - shape: (2, 1) - ┌─────┐ - │ b │ - │ --- │ - │ str │ - ╞═════╡ - │ x │ - │ y │ - └─────┘ """ return Selector(lambda plx: plx.selectors.string()) @@ -189,40 +53,6 @@ def categorical() -> Expr: Returns: A new expression. - - Examples: - >>> import narwhals as nw - >>> import narwhals.selectors as ncs - >>> import pandas as pd - >>> import polars as pl - >>> - >>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]} - >>> df_pd = pd.DataFrame(data).astype({"b": "category"}) - >>> df_pl = pl.DataFrame(data, schema_overrides={"b": pl.Categorical}) - - Let's define a dataframe-agnostic function to select string - dtypes: - - >>> @nw.narwhalify - ... def func(df): - ... return df.select(ncs.categorical()) - - We can then pass either pandas or Polars dataframes: - - >>> func(df_pd) - b - 0 x - 1 y - >>> func(df_pl) - shape: (2, 1) - ┌─────┐ - │ b │ - │ --- │ - │ cat │ - ╞═════╡ - │ x │ - │ y │ - └─────┘ """ return Selector(lambda plx: plx.selectors.categorical()) @@ -232,40 +62,6 @@ def all() -> Expr: Returns: A new expression. - - Examples: - >>> import narwhals as nw - >>> import narwhals.selectors as ncs - >>> import pandas as pd - >>> import polars as pl - >>> - >>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]} - >>> df_pd = pd.DataFrame(data).astype({"b": "category"}) - >>> df_pl = pl.DataFrame(data, schema_overrides={"b": pl.Categorical}) - - Let's define a dataframe-agnostic function to select string - dtypes: - - >>> @nw.narwhalify - ... def func(df): - ... return df.select(ncs.all()) - - We can then pass either pandas or Polars dataframes: - - >>> func(df_pd) - a b c - 0 1 x False - 1 2 y True - >>> func(df_pl) - shape: (2, 3) - ┌─────┬─────┬───────┐ - │ a ┆ b ┆ c │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ cat ┆ bool │ - ╞═════╪═════╪═══════╡ - │ 1 ┆ x ┆ false │ - │ 2 ┆ y ┆ true │ - └─────┴─────┴───────┘ """ return Selector(lambda plx: plx.selectors.all()) diff --git a/narwhals/series.py b/narwhals/series.py index 46ed53abf..694e8d5bf 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -87,25 +87,6 @@ def implementation(self) -> Implementation: Returns: Implementation. - - Examples: - >>> import narwhals as nw - >>> import pandas as pd - - >>> s_native = pd.Series([1, 2, 3]) - >>> s = nw.from_native(s_native, series_only=True) - - >>> s.implementation - - - >>> s.implementation.is_pandas() - True - - >>> s.implementation.is_pandas_like() - True - - >>> s.implementation.is_polars() - False """ return self._compliant_series._implementation # type: ignore[no-any-return] @@ -130,65 +111,6 @@ def __getitem__(self: Self, idx: int | slice | Sequence[int]) -> Any | Self: Returns: A single element if `idx` is an integer, else a subset of the Series. - - Examples: - >>> from typing import Any - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_get_first_item(s_native: IntoSeriesT) -> Any: - ... s = nw.from_native(s_native, series_only=True) - ... return s[0] - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_get_first_item`: - - >>> agnostic_get_first_item(s_pd) - np.int64(1) - - >>> agnostic_get_first_item(s_pl) - 1 - - >>> agnostic_get_first_item(s_pa) - 1 - - We can also make a function to slice the Series: - - >>> def agnostic_slice(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s[:2].to_native() - - >>> agnostic_slice(s_pd) - 0 1 - 1 2 - dtype: int64 - - >>> agnostic_slice(s_pl) # doctest:+NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [i64] - [ - 1 - 2 - ] - - >>> agnostic_slice(s_pa) # doctest:+ELLIPSIS - - [ - [ - 1, - 2 - ] - ] """ if isinstance(idx, int) or ( is_numpy_scalar(idx) and idx.dtype.kind in ("i", "u") @@ -229,52 +151,6 @@ def to_native(self) -> IntoSeriesT: Returns: Series of class that user started with. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_to_native(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_to_native`: - - >>> agnostic_to_native(s_pd) - 0 1 - 1 2 - 2 3 - dtype: int64 - - >>> agnostic_to_native(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [i64] - [ - 1 - 2 - 3 - ] - - >>> agnostic_to_native(s_pa) # doctest:+ELLIPSIS - - [ - [ - 1, - 2, - 3 - ] - ] """ return self._compliant_series._native_series # type: ignore[no-any-return] @@ -308,53 +184,6 @@ def scatter(self, indices: int | Sequence[int], values: Any) -> Self: values = [some_function(x) for x in positions] s = s.scatter(positions, values) ``` - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - - >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_scatter(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns(df["a"].scatter([0, 1], [999, 888])).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_scatter`: - - >>> agnostic_scatter(df_pd) - a b - 0 999 4 - 1 888 5 - 2 3 6 - - >>> agnostic_scatter(df_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 999 ┆ 4 │ - │ 888 ┆ 5 │ - │ 3 ┆ 6 │ - └─────┴─────┘ - - >>> agnostic_scatter(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[999,888,3]] - b: [[4,5,6]] """ return self._from_compliant_series( self._compliant_series.scatter(indices, self._extract_native(values)) @@ -366,36 +195,6 @@ def shape(self) -> tuple[int]: Returns: A tuple containing the length of the Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_shape(s_native: IntoSeries) -> tuple[int]: - ... s = nw.from_native(s_native, series_only=True) - ... return s.shape - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_shape`: - - >>> agnostic_shape(s_pd) - (3,) - - >>> agnostic_shape(s_pl) - (3,) - - >>> agnostic_shape(s_pa) - (3,) """ return self._compliant_series.shape # type: ignore[no-any-return] @@ -417,52 +216,6 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se Returns: A new Series with the results of the piped function applied. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - Let's define a function to pipe into: - - >>> def agnostic_pipe(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.pipe(lambda x: x + 2).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_pipe`: - - >>> agnostic_pipe(s_pd) - 0 3 - 1 4 - 2 5 - dtype: int64 - - >>> agnostic_pipe(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [i64] - [ - 3 - 4 - 5 - ] - - >>> agnostic_pipe(s_pa) # doctest: +ELLIPSIS - - [ - [ - 3, - 4, - 5 - ] - ] """ return function(self, *args, **kwargs) @@ -479,36 +232,6 @@ def len(self) -> int: Returns: The number of elements in the Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 2, None] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - Let's define a dataframe-agnostic function that computes the len of the series: - - >>> def agnostic_len(s_native: IntoSeries) -> int: - ... s = nw.from_native(s_native, series_only=True) - ... return s.len() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_len`: - - >>> agnostic_len(s_pd) - 3 - - >>> agnostic_len(s_pl) - 3 - - >>> agnostic_len(s_pa) - 3 """ return len(self._compliant_series) @@ -518,36 +241,6 @@ def dtype(self: Self) -> DType: Returns: The data type of the Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_dtype(s_native: IntoSeriesT) -> nw.dtypes.DType: - ... s = nw.from_native(s_native, series_only=True) - ... return s.dtype - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dtype`: - - >>> agnostic_dtype(s_pd) - Int64 - - >>> agnostic_dtype(s_pl) - Int64 - - >>> agnostic_dtype(s_pa) - Int64 """ return self._compliant_series.dtype # type: ignore[no-any-return] @@ -557,32 +250,6 @@ def name(self) -> str: Returns: The name of the Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data, name="foo") - >>> s_pl = pl.Series("foo", data) - - We define a library agnostic function: - - >>> def agnostic_name(s_native: IntoSeries) -> str: - ... s = nw.from_native(s_native, series_only=True) - ... return s.name - - We can then pass any supported library such as pandas or Polars - to `agnostic_name`: - - >>> agnostic_name(s_pd) - 'foo' - - >>> agnostic_name(s_pl) - 'foo' """ return self._compliant_series.name # type: ignore[no-any-return] @@ -637,40 +304,6 @@ def ewm_mean( Returns: Series - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(name="a", data=data) - >>> s_pl = pl.Series(name="a", values=data) - - We define a library agnostic function: - - >>> def agnostic_ewm_mean(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.ewm_mean(com=1, ignore_nulls=False).to_native() - - We can then pass any supported library such as pandas or Polars - to `agnostic_ewm_mean`: - - >>> agnostic_ewm_mean(s_pd) - 0 1.000000 - 1 1.666667 - 2 2.428571 - Name: a, dtype: float64 - - >>> agnostic_ewm_mean(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: 'a' [f64] - [ - 1.0 - 1.666667 - 2.428571 - ] """ return self._from_compliant_series( self._compliant_series.ewm_mean( @@ -692,52 +325,6 @@ def cast(self: Self, dtype: DType | type[DType]) -> Self: Returns: A new Series with the specified data type. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [True, False, True] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a dataframe-agnostic function: - - >>> def agnostic_cast(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.cast(nw.Int64).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_cast`: - - >>> agnostic_cast(s_pd) - 0 1 - 1 0 - 2 1 - dtype: int64 - - >>> agnostic_cast(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [i64] - [ - 1 - 0 - 1 - ] - - >>> agnostic_cast(s_pa) # doctest: +ELLIPSIS - - [ - [ - 1, - 0, - 1 - ] - ] """ _validate_dtype(dtype) return self._from_compliant_series(self._compliant_series.cast(dtype)) @@ -747,50 +334,6 @@ def to_frame(self) -> DataFrame[Any]: Returns: A DataFrame containing this Series as a single column. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 2] - >>> s_pd = pd.Series(data, name="a") - >>> s_pl = pl.Series("a", data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_to_frame(s_native: IntoSeries) -> IntoDataFrame: - ... s = nw.from_native(s_native, series_only=True) - ... return s.to_frame().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_to_frame`: - - >>> agnostic_to_frame(s_pd) - a - 0 1 - 1 2 - - >>> agnostic_to_frame(s_pl) - shape: (2, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 1 │ - │ 2 │ - └─────┘ - - >>> agnostic_to_frame(s_pa) - pyarrow.Table - : int64 - ---- - : [[1,2]] """ return self._dataframe( self._compliant_series.to_frame(), @@ -808,36 +351,6 @@ def to_list(self) -> list[Any]: Returns: A list of Python objects. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_to_list(s_native: IntoSeries): - ... s = nw.from_native(s_native, series_only=True) - ... return s.to_list() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_to_list`: - - >>> agnostic_to_list(s_pd) - [1, 2, 3] - - >>> agnostic_to_list(s_pl) - [1, 2, 3] - - >>> agnostic_to_list(s_pa) - [1, 2, 3] """ return self._compliant_series.to_list() # type: ignore[no-any-return] @@ -846,36 +359,6 @@ def mean(self) -> Any: Returns: The average of all elements in the Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_mean(s_native: IntoSeries) -> float: - ... s = nw.from_native(s_native, series_only=True) - ... return s.mean() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_mean`: - - >>> agnostic_mean(s_pd) - np.float64(2.0) - - >>> agnostic_mean(s_pl) - 2.0 - - >>> agnostic_mean(s_pa) - 2.0 """ return self._compliant_series.mean() @@ -887,36 +370,6 @@ def median(self) -> Any: Returns: The median value of all elements in the Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = [5, 3, 8] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - Let's define a library agnostic function: - - >>> def agnostic_median(s_native: IntoSeries) -> float: - ... s = nw.from_native(s_native, series_only=True) - ... return s.median() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_median`: - - >>> agnostic_median(s_pd) - np.float64(5.0) - - >>> agnostic_median(s_pl) - 5.0 - - >>> agnostic_median(s_pa) - 5.0 """ return self._compliant_series.median() @@ -925,40 +378,6 @@ def skew(self: Self) -> Any: Returns: The sample skewness of the Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 1, 2, 10, 100] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_skew(s_native: IntoSeries) -> float: - ... s = nw.from_native(s_native, series_only=True) - ... return s.skew() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_skew`: - - >>> agnostic_skew(s_pd) - np.float64(1.4724267269058975) - - >>> agnostic_skew(s_pl) - 1.4724267269058975 - - >>> agnostic_skew(s_pa) - 1.4724267269058975 - - Notes: - The skewness is a measure of the asymmetry of the probability distribution. - A perfectly symmetric distribution has a skewness of 0. """ return self._compliant_series.skew() @@ -967,36 +386,6 @@ def count(self) -> Any: Returns: The number of non-null elements in the Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_count(s_native: IntoSeries) -> int: - ... s = nw.from_native(s_native, series_only=True) - ... return s.count() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_count`: - - >>> agnostic_count(s_pd) - np.int64(3) - - >>> agnostic_count(s_pl) - 3 - - >>> agnostic_count(s_pa) - 3 """ return self._compliant_series.count() @@ -1008,36 +397,6 @@ def any(self) -> Any: Returns: A boolean indicating if any values in the Series are True. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = [False, True, False] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_any(s_native: IntoSeries) -> bool: - ... s = nw.from_native(s_native, series_only=True) - ... return s.any() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_any`: - - >>> agnostic_any(s_pd) - np.True_ - - >>> agnostic_any(s_pl) - True - - >>> agnostic_any(s_pa) - True """ return self._compliant_series.any() @@ -1046,36 +405,6 @@ def all(self) -> Any: Returns: A boolean indicating if all values in the Series are True. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = [False, True, False] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_all(s_native: IntoSeries) -> bool: - ... s = nw.from_native(s_native, series_only=True) - ... return s.all() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_all`: - - >>> agnostic_all(s_pd) - np.False_ - - >>> agnostic_all(s_pl) - False - - >>> agnostic_all(s_pa) - False """ return self._compliant_series.all() @@ -1084,36 +413,6 @@ def min(self) -> Any: Returns: The minimum value in the Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_min(s_native: IntoSeries): - ... s = nw.from_native(s_native, series_only=True) - ... return s.min() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_min`: - - >>> agnostic_min(s_pd) - np.int64(1) - - >>> agnostic_min(s_pl) - 1 - - >>> agnostic_min(s_pa) - 1 """ return self._compliant_series.min() @@ -1122,107 +421,15 @@ def max(self) -> Any: Returns: The maximum value in the Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_max(s_native: IntoSeries): - ... s = nw.from_native(s_native, series_only=True) - ... return s.max() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_max`: - - >>> agnostic_max(s_pd) - np.int64(3) - - >>> agnostic_max(s_pl) - 3 - - >>> agnostic_max(s_pa) - 3 """ return self._compliant_series.max() def arg_min(self) -> int: - """Returns the index of the minimum value. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_arg_min(s_native: IntoSeries): - ... s = nw.from_native(s_native, series_only=True) - ... return s.arg_min() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_arg_min`: - - >>> agnostic_arg_min(s_pd) - np.int64(0) - - >>> agnostic_arg_min(s_pl) - 0 - - >>> agnostic_arg_min(s_pa) - 0 - """ + """Returns the index of the minimum value.""" return self._compliant_series.arg_min() # type: ignore[no-any-return] def arg_max(self) -> int: - """Returns the index of the maximum value. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_arg_max(s_native: IntoSeries): - ... s = nw.from_native(s_native, series_only=True) - ... return s.arg_max() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_arg_max`: - - >>> agnostic_arg_max(s_pd) - np.int64(2) - - >>> agnostic_arg_max(s_pl) - 2 - - >>> agnostic_arg_max(s_pa) - 2 - """ + """Returns the index of the maximum value.""" return self._compliant_series.arg_max() # type: ignore[no-any-return] def sum(self) -> Any: @@ -1230,36 +437,6 @@ def sum(self) -> Any: Returns: The sum of all elements in the Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_sum(s_native: IntoSeries): - ... s = nw.from_native(s_native, series_only=True) - ... return s.sum() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_sum`: - - >>> agnostic_sum(s_pd) - np.int64(6) - - >>> agnostic_sum(s_pl) - 6 - - >>> agnostic_sum(s_pa) - 6 """ return self._compliant_series.sum() @@ -1272,36 +449,6 @@ def std(self, *, ddof: int = 1) -> Any: Returns: The standard deviation of all elements in the Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_std(s_native: IntoSeries) -> float: - ... s = nw.from_native(s_native, series_only=True) - ... return s.std() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_std`: - - >>> agnostic_std(s_pd) - np.float64(1.0) - - >>> agnostic_std(s_pl) - 1.0 - - >>> agnostic_std(s_pa) - 1.0 """ return self._compliant_series.std(ddof=ddof) @@ -1311,36 +458,6 @@ def var(self, *, ddof: int = 1) -> Any: Arguments: ddof: "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_var(s_native: IntoSeries) -> float: - ... s = nw.from_native(s_native, series_only=True) - ... return s.var() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_var`: - - >>> agnostic_var(s_pd) - np.float64(1.0) - - >>> agnostic_var(s_pl) - 1.0 - - >>> agnostic_var(s_pa) - 1.0 """ return self._compliant_series.var(ddof=ddof) @@ -1355,136 +472,6 @@ def clip( Returns: A new Series with values clipped to the specified bounds. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_clip_lower(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.clip(2).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_clip_lower`: - - >>> agnostic_clip_lower(s_pd) - 0 2 - 1 2 - 2 3 - dtype: int64 - - >>> agnostic_clip_lower(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [i64] - [ - 2 - 2 - 3 - ] - - >>> agnostic_clip_lower(s_pa) # doctest: +ELLIPSIS - - [ - [ - 2, - 2, - 3 - ] - ] - - We define another library agnostic function: - - >>> def agnostic_clip_upper(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.clip(upper_bound=2).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_clip_upper`: - - >>> agnostic_clip_upper(s_pd) - 0 1 - 1 2 - 2 2 - dtype: int64 - - >>> agnostic_clip_upper(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [i64] - [ - 1 - 2 - 2 - ] - - >>> agnostic_clip_upper(s_pa) # doctest: +ELLIPSIS - - [ - [ - 1, - 2, - 2 - ] - ] - - We can have both at the same time - - >>> data = [-1, 1, -3, 3, -5, 5] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_clip(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.clip(-1, 3).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_clip`: - - >>> agnostic_clip(s_pd) - 0 -1 - 1 1 - 2 -1 - 3 3 - 4 -1 - 5 3 - dtype: int64 - - >>> agnostic_clip(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (6,) - Series: '' [i64] - [ - -1 - 1 - -1 - 3 - -1 - 3 - ] - - >>> agnostic_clip_upper(s_pa) # doctest: +ELLIPSIS - - [ - [ - -1, - 1, - -3, - 2, - -5, - 2 - ] - ] """ return self._from_compliant_series( self._compliant_series.clip( @@ -1501,52 +488,6 @@ def is_in(self, other: Any) -> Self: Returns: A new Series with boolean values indicating if the elements are in the other sequence. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_is_in(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.is_in([3, 2, 8]).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_in`: - - >>> agnostic_is_in(s_pd) - 0 False - 1 True - 2 True - dtype: bool - - >>> agnostic_is_in(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [bool] - [ - false - true - true - ] - - >>> agnostic_is_in(s_pa) # doctest: +ELLIPSIS - - [ - [ - false, - true, - true - ] - ] """ return self._from_compliant_series( self._compliant_series.is_in(self._extract_native(other)) @@ -1557,49 +498,6 @@ def arg_true(self) -> Self: Returns: A new Series with the indices of elements that are True. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, None, None, 2] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_arg_true(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.is_null().arg_true().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_arg_true`: - - >>> agnostic_arg_true(s_pd) - 1 1 - 2 2 - dtype: int64 - - >>> agnostic_arg_true(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [u32] - [ - 1 - 2 - ] - - >>> agnostic_arg_true(s_pa) # doctest: +ELLIPSIS - - [ - [ - 1, - 2 - ] - ] """ return self._from_compliant_series(self._compliant_series.arg_true()) @@ -1613,55 +511,6 @@ def drop_nulls(self) -> Self: Returns: A new Series with null values removed. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [2, 4, None, 3, 5] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_drop_nulls(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.drop_nulls().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_drop_nulls`: - - >>> agnostic_drop_nulls(s_pd) - 0 2.0 - 1 4.0 - 3 3.0 - 4 5.0 - dtype: float64 - - >>> agnostic_drop_nulls(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [i64] - [ - 2 - 4 - 3 - 5 - ] - - >>> agnostic_drop_nulls(s_pa) # doctest: +ELLIPSIS - - [ - [ - 2, - 4, - 3, - 5 - ] - ] """ return self._from_compliant_series(self._compliant_series.drop_nulls()) @@ -1670,52 +519,6 @@ def abs(self) -> Self: Returns: A new Series with the absolute values of the original elements. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [2, -4, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a dataframe-agnostic function: - - >>> def agnostic_abs(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.abs().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_abs`: - - >>> agnostic_abs(s_pd) - 0 2 - 1 4 - 2 3 - dtype: int64 - - >>> agnostic_abs(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [i64] - [ - 2 - 4 - 3 - ] - - >>> agnostic_abs(s_pa) # doctest: +ELLIPSIS - - [ - [ - 2, - 4, - 3 - ] - ] """ return self._from_compliant_series(self._compliant_series.abs()) @@ -1727,52 +530,6 @@ def cum_sum(self: Self, *, reverse: bool = False) -> Self: Returns: A new Series with the cumulative sum of non-null values. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [2, 4, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a dataframe-agnostic function: - - >>> def agnostic_cum_sum(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.cum_sum().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_cum_sum`: - - >>> agnostic_cum_sum(s_pd) - 0 2 - 1 6 - 2 9 - dtype: int64 - - >>> agnostic_cum_sum(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [i64] - [ - 2 - 6 - 9 - ] - - >>> agnostic_cum_sum(s_pa) # doctest: +ELLIPSIS - - [ - [ - 2, - 6, - 9 - ] - ] """ return self._from_compliant_series( self._compliant_series.cum_sum(reverse=reverse) @@ -1788,52 +545,6 @@ def unique(self, *, maintain_order: bool = False) -> Self: Returns: A new Series with duplicate values removed. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [2, 4, 4, 6] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_unique(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.unique(maintain_order=True).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_unique`: - - >>> agnostic_unique(s_pd) - 0 2 - 1 4 - 2 6 - dtype: int64 - - >>> agnostic_unique(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [i64] - [ - 2 - 4 - 6 - ] - - >>> agnostic_unique(s_pa) # doctest: +ELLIPSIS - - [ - [ - 2, - 4, - 6 - ] - ] """ return self._from_compliant_series( self._compliant_series.unique(maintain_order=maintain_order) @@ -1853,52 +564,6 @@ def diff(self) -> Self: Returns: A new Series with the difference between each element and its predecessor. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [2, 4, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a dataframe-agnostic function: - - >>> def agnostic_diff(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.diff().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_diff`: - - >>> agnostic_diff(s_pd) - 0 NaN - 1 2.0 - 2 -1.0 - dtype: float64 - - >>> agnostic_diff(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [i64] - [ - null - 2 - -1 - ] - - >>> agnostic_diff(s_pa) # doctest: +ELLIPSIS - - [ - [ - null, - 2, - -1 - ] - ] """ return self._from_compliant_series(self._compliant_series.diff()) @@ -1920,52 +585,6 @@ def shift(self, n: int) -> Self: do: s.shift(1).fill_null(0).cast(nw.Int64) - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [2, 4, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a dataframe-agnostic function: - - >>> def agnostic_shift(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.shift(1).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_shift`: - - >>> agnostic_shift(s_pd) - 0 NaN - 1 2.0 - 2 4.0 - dtype: float64 - - >>> agnostic_shift(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [i64] - [ - null - 2 - 4 - ] - - >>> agnostic_shift(s_pa) # doctest: +ELLIPSIS - - [ - [ - null, - 2, - 4 - ] - ] """ return self._from_compliant_series(self._compliant_series.shift(n)) @@ -1993,55 +612,6 @@ def sample( The `sample` method returns a Series with a specified number of randomly selected items chosen from this Series. The results are not consistent across libraries. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 2, 3, 4] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_sample(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.sample(fraction=1.0, with_replacement=True).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_sample`: - - >>> agnostic_sample(s_pd) # doctest: +SKIP - a - 2 3 - 1 2 - 3 4 - 3 4 - - >>> agnostic_sample(s_pl) # doctest: +SKIP - shape: (4,) - Series: '' [i64] - [ - 1 - 4 - 3 - 4 - ] - - >>> agnostic_sample(s_pa) # doctest: +SKIP - - [ - [ - 1, - 4, - 3, - 4 - ] - ] """ return self._from_compliant_series( self._compliant_series.sample( @@ -2076,52 +646,6 @@ def alias(self, name: str) -> Self: Returns: A new Series with the updated name. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data, name="foo") - >>> s_pl = pl.Series("foo", data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_alias(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.alias("bar").to_native() - - We can then pass any supported library such as pandas or Polars, or - PyArrow to `agnostic_alias`: - - >>> agnostic_alias(s_pd) - 0 1 - 1 2 - 2 3 - Name: bar, dtype: int64 - - >>> agnostic_alias(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: 'bar' [i64] - [ - 1 - 2 - 3 - ] - - >>> agnostic_alias(s_pa) # doctest: +ELLIPSIS - - [ - [ - 1, - 2, - 3 - ] - ] """ return self._from_compliant_series(self._compliant_series.alias(name=name)) @@ -2154,52 +678,6 @@ def rename(self, name: str) -> Self: Returns: A new Series with the updated name. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data, name="foo") - >>> s_pl = pl.Series("foo", data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_rename(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.rename("bar").to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_rename`: - - >>> agnostic_rename(s_pd) - 0 1 - 1 2 - 2 3 - Name: bar, dtype: int64 - - >>> agnostic_rename(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: 'bar' [i64] - [ - 1 - 2 - 3 - ] - - >>> agnostic_rename(s_pa) # doctest: +ELLIPSIS - - [ - [ - 1, - 2, - 3 - ] - ] """ return self.alias(name=name) @@ -2225,57 +703,6 @@ def replace_strict( Returns: A new Series with values replaced according to the mapping. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = {"a": [3, 0, 1, 2]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define dataframe-agnostic functions: - - >>> def agnostic_replace_strict(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.replace_strict( - ... [0, 1, 2, 3], ["zero", "one", "two", "three"], return_dtype=nw.String - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_replace_strict`: - - >>> agnostic_replace_strict(df_pd["a"]) - 0 three - 1 zero - 2 one - 3 two - Name: a, dtype: object - - >>> agnostic_replace_strict(df_pl["a"]) # doctest: +NORMALIZE_WHITESPACE - shape: (4,) - Series: 'a' [str] - [ - "three" - "zero" - "one" - "two" - ] - - >>> agnostic_replace_strict(df_pa["a"]) - - [ - [ - "three", - "zero", - "one", - "two" - ] - ] """ if new is None: if not isinstance(old, Mapping): @@ -2298,87 +725,6 @@ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self: Returns: A new sorted Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [5, None, 1, 2] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define library agnostic functions: - - >>> def agnostic_sort(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.sort().to_native() - - >>> def agnostic_sort_descending(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.sort(descending=True).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_sort` and `agnostic_sort_descending`: - - >>> agnostic_sort(s_pd) - 1 NaN - 2 1.0 - 3 2.0 - 0 5.0 - dtype: float64 - - >>> agnostic_sort(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [i64] - [ - null - 1 - 2 - 5 - ] - - >>> agnostic_sort(s_pa) # doctest: +ELLIPSIS - - [ - [ - null, - 1, - 2, - 5 - ] - ] - - >>> agnostic_sort_descending(s_pd) - 1 NaN - 0 5.0 - 3 2.0 - 2 1.0 - dtype: float64 - - >>> agnostic_sort_descending(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [i64] - [ - null - 5 - 2 - 1 - ] - - >>> agnostic_sort_descending(s_pa) # doctest: +ELLIPSIS - - [ - [ - null, - 5, - 2, - 1 - ] - ] """ return self._from_compliant_series( self._compliant_series.sort(descending=descending, nulls_last=nulls_last) @@ -2394,52 +740,6 @@ def is_null(self) -> Self: Returns: A boolean Series indicating which values are null. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 2, None] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_null(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.is_null().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_null`: - - >>> agnostic_is_null(s_pd) - 0 False - 1 False - 2 True - dtype: bool - - >>> agnostic_is_null(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [bool] - [ - false - false - true - ] - - >>> agnostic_is_null(s_pa) # doctest:+ELLIPSIS - - [ - [ - false, - false, - true - ] - ] """ return self._from_compliant_series(self._compliant_series.is_null()) @@ -2453,46 +753,6 @@ def is_nan(self) -> Self: pandas handles null values differently from Polars and PyArrow. See [null_handling](../pandas_like_concepts/null_handling.md/) for reference. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [0.0, None, 2.0] - >>> s_pd = pd.Series(data, dtype="Float64") - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data], type=pa.float64()) - - >>> def agnostic_self_div_is_nan(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.is_nan().to_native() - - >>> print(agnostic_self_div_is_nan(s_pd)) - 0 False - 1 - 2 False - dtype: boolean - - >>> print(agnostic_self_div_is_nan(s_pl)) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [bool] - [ - false - null - false - ] - - >>> print(agnostic_self_div_is_nan(s_pa)) # doctest: +NORMALIZE_WHITESPACE - [ - [ - false, - null, - false - ] - ] """ return self._from_compliant_series(self._compliant_series.is_nan()) @@ -2516,83 +776,6 @@ def fill_null( Returns: A new Series with null values filled according to the specified value or strategy. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 2, None] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_fill_null(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.fill_null(5).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_fill_null`: - - >>> agnostic_fill_null(s_pd) - 0 1.0 - 1 2.0 - 2 5.0 - dtype: float64 - - >>> agnostic_fill_null(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [i64] - [ - 1 - 2 - 5 - ] - - >>> agnostic_fill_null(s_pa) # doctest:+ELLIPSIS - - [ - [ - 1, - 2, - 5 - ] - ] - - Using a strategy: - - >>> def agnostic_fill_null_with_strategy(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.fill_null(strategy="forward", limit=1).to_native() - - >>> agnostic_fill_null_with_strategy(s_pd) - 0 1.0 - 1 2.0 - 2 2.0 - dtype: float64 - - >>> agnostic_fill_null_with_strategy(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [i64] - [ - 1 - 2 - 2 - ] - - >>> agnostic_fill_null_with_strategy(s_pa) # doctest:+ELLIPSIS - - [ - [ - 1, - 2, - 2 - ] - ] """ if value is not None and strategy is not None: msg = "cannot specify both `value` and `strategy`" @@ -2626,58 +809,6 @@ def is_between( Returns: A boolean Series indicating which values are between the given bounds. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 2, 3, 4, 5] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_is_between(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.is_between(2, 4, "right").to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_between`: - - >>> agnostic_is_between(s_pd) - 0 False - 1 False - 2 True - 3 True - 4 False - dtype: bool - - >>> agnostic_is_between(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (5,) - Series: '' [bool] - [ - false - false - true - true - false - ] - - >>> agnostic_is_between(s_pa) # doctest: +ELLIPSIS - - [ - [ - false, - false, - true, - true, - false - ] - ] """ return self._from_compliant_series( self._compliant_series.is_between( @@ -2692,36 +823,6 @@ def n_unique(self) -> int: Returns: Number of unique values in the Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 2, 2, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_n_unique(s_native: IntoSeries) -> int: - ... s = nw.from_native(s_native, series_only=True) - ... return s.n_unique() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_n_unique`: - - >>> agnostic_n_unique(s_pd) - 3 - - >>> agnostic_n_unique(s_pl) - 3 - - >>> agnostic_n_unique(s_pa) - 3 """ return self._compliant_series.n_unique() # type: ignore[no-any-return] @@ -2730,37 +831,6 @@ def to_numpy(self) -> np.ndarray: Returns: NumPy ndarray representation of the Series. - - Examples: - >>> import numpy as np - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data, name="a") - >>> s_pl = pl.Series("a", data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_to_numpy(s_native: IntoSeries) -> np.ndarray: - ... s = nw.from_native(s_native, series_only=True) - ... return s.to_numpy() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_to_numpy`: - - >>> agnostic_to_numpy(s_pd) - array([1, 2, 3]...) - - >>> agnostic_to_numpy(s_pl) - array([1, 2, 3]...) - - >>> agnostic_to_numpy(s_pa) - array([1, 2, 3]...) """ return self._compliant_series.to_numpy() @@ -2769,45 +839,6 @@ def to_pandas(self) -> pd.Series: Returns: A pandas Series containing the data from this Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data, name="a") - >>> s_pl = pl.Series("a", data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_to_pandas(s_native: IntoSeries) -> pd.Series: - ... s = nw.from_native(s_native, series_only=True) - ... return s.to_pandas() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_to_pandas`: - - >>> agnostic_to_pandas(s_pd) - 0 1 - 1 2 - 2 3 - Name: a, dtype: int64 - - >>> agnostic_to_pandas(s_pl) - 0 1 - 1 2 - 2 3 - Name: a, dtype: int64 - - >>> agnostic_to_pandas(s_pa) - 0 1 - 1 2 - 2 3 - Name: , dtype: int64 """ return self._compliant_series.to_pandas() @@ -2940,52 +971,6 @@ def filter(self, other: Any) -> Self: Returns: A new Series with elements that satisfy the condition. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [4, 10, 15, 34, 50] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_filter(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.filter(s > 10).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_filter`: - - >>> agnostic_filter(s_pd) - 2 15 - 3 34 - 4 50 - dtype: int64 - - >>> agnostic_filter(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [i64] - [ - 15 - 34 - 50 - ] - - >>> agnostic_filter(s_pa) # doctest: +ELLIPSIS - - [ - [ - 15, - 34, - 50 - ] - ] """ return self._from_compliant_series( self._compliant_series.filter(self._extract_native(other)) @@ -2997,55 +982,6 @@ def is_duplicated(self: Self) -> Self: Returns: A new Series with boolean values indicating duplicated rows. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 2, 3, 1] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_duplicated(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.is_duplicated().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_duplicated`: - - >>> agnostic_is_duplicated(s_pd) - 0 True - 1 False - 2 False - 3 True - dtype: bool - - >>> agnostic_is_duplicated(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [bool] - [ - true - false - false - true - ] - - >>> agnostic_is_duplicated(s_pa) # doctest: +ELLIPSIS - - [ - [ - true, - false, - false, - true - ] - ] """ return self._from_compliant_series(self._compliant_series.is_duplicated()) @@ -3054,37 +990,6 @@ def is_empty(self: Self) -> bool: Returns: A boolean indicating if the series is empty. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - Let's define a dataframe-agnostic function that filters rows in which "foo" - values are greater than 10, and then checks if the result is empty or not: - - >>> def agnostic_is_empty(s_native: IntoSeries) -> bool: - ... s = nw.from_native(s_native, series_only=True) - ... return s.filter(s > 10).is_empty() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_empty`: - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - >>> agnostic_is_empty(s_pd), agnostic_is_empty(s_pl), agnostic_is_empty(s_pa) - (True, True, True) - - >>> data = [100, 2, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - >>> agnostic_is_empty(s_pd), agnostic_is_empty(s_pl), agnostic_is_empty(s_pa) - (False, False, False) """ return self._compliant_series.is_empty() # type: ignore[no-any-return] @@ -3093,54 +998,6 @@ def is_unique(self: Self) -> Self: Returns: A new Series with boolean values indicating unique rows. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 2, 3, 1] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_unique(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.is_unique().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_unique`: - - >>> agnostic_is_unique(s_pd) - 0 False - 1 True - 2 True - 3 False - dtype: bool - - >>> agnostic_is_unique(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [bool] - [ - false - true - true - false - ] - >>> agnostic_is_unique(s_pa) # doctest: +ELLIPSIS - - [ - [ - false, - true, - true, - false - ] - ] """ return self._from_compliant_series(self._compliant_series.is_unique()) @@ -3154,37 +1011,6 @@ def null_count(self: Self) -> int: Returns: The number of null values in the Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = [1, None, None] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - Let's define a dataframe-agnostic function that returns the null count of - the series: - - >>> def agnostic_null_count(s_native: IntoSeries) -> int: - ... s = nw.from_native(s_native, series_only=True) - ... return s.null_count() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_null_count`: - - >>> agnostic_null_count(s_pd) - np.int64(2) - - >>> agnostic_null_count(s_pl) - 2 - - >>> agnostic_null_count(s_pa) - 2 """ return self._compliant_series.null_count() # type: ignore[no-any-return] @@ -3193,58 +1019,6 @@ def is_first_distinct(self: Self) -> Self: Returns: A new Series with boolean values indicating the first occurrence of each distinct value. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 1, 2, 3, 2] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_first_distinct(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.is_first_distinct().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_first_distinct`: - - >>> agnostic_is_first_distinct(s_pd) - 0 True - 1 False - 2 True - 3 True - 4 False - dtype: bool - - >>> agnostic_is_first_distinct(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (5,) - Series: '' [bool] - [ - true - false - true - true - false - ] - - >>> agnostic_is_first_distinct(s_pa) # doctest: +ELLIPSIS - - [ - [ - true, - false, - true, - true, - false - ] - ] """ return self._from_compliant_series(self._compliant_series.is_first_distinct()) @@ -3253,58 +1027,6 @@ def is_last_distinct(self: Self) -> Self: Returns: A new Series with boolean values indicating the last occurrence of each distinct value. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 1, 2, 3, 2] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_last_distinct(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.is_last_distinct().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_last_distinct`: - - >>> agnostic_is_last_distinct(s_pd) - 0 False - 1 True - 2 False - 3 True - 4 True - dtype: bool - - >>> agnostic_is_last_distinct(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (5,) - Series: '' [bool] - [ - false - true - false - true - true - ] - - >>> agnostic_is_last_distinct(s_pa) # doctest: +ELLIPSIS - - [ - [ - false, - true, - false, - true, - true - ] - ] """ return self._from_compliant_series(self._compliant_series.is_last_distinct()) @@ -3316,43 +1038,6 @@ def is_sorted(self: Self, *, descending: bool = False) -> bool: Returns: A boolean indicating if the Series is sorted. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> unsorted_data = [1, 3, 2] - >>> sorted_data = [3, 2, 1] - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_sorted(s_native: IntoSeries, descending: bool = False): - ... s = nw.from_native(s_native, series_only=True) - ... return s.is_sorted(descending=descending) - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_sorted`: - - >>> agnostic_is_sorted(pd.Series(unsorted_data)) - False - - >>> agnostic_is_sorted(pd.Series(sorted_data), descending=True) - True - - >>> agnostic_is_sorted(pl.Series(unsorted_data)) - False - - >>> agnostic_is_sorted(pl.Series(sorted_data), descending=True) - True - - >>> agnostic_is_sorted(pa.chunked_array([unsorted_data])) - False - - >>> agnostic_is_sorted(pa.chunked_array([sorted_data]), descending=True) - True """ return self._compliant_series.is_sorted(descending=descending) # type: ignore[no-any-return] @@ -3378,54 +1063,6 @@ def value_counts( A DataFrame with two columns: - The original values as first column - Either count or proportion as second column, depending on normalize parameter. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 1, 2, 3, 2] - >>> s_pd = pd.Series(data, name="s") - >>> s_pl = pl.Series(values=data, name="s") - >>> s_pa = pa.chunked_array([data]) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_value_counts(s_native: IntoSeries) -> IntoDataFrame: - ... s = nw.from_native(s_native, series_only=True) - ... return s.value_counts(sort=True).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_value_counts`: - - >>> agnostic_value_counts(s_pd) - s count - 0 1 2 - 1 2 2 - 2 3 1 - - >>> agnostic_value_counts(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3, 2) - ┌─────┬───────┐ - │ s ┆ count │ - │ --- ┆ --- │ - │ i64 ┆ u32 │ - ╞═════╪═══════╡ - │ 1 ┆ 2 │ - │ 2 ┆ 2 │ - │ 3 ┆ 1 │ - └─────┴───────┘ - - >>> agnostic_value_counts(s_pa) - pyarrow.Table - : int64 - count: int64 - ---- - : [[1,2,3]] - count: [[2,2,1]] """ return self._dataframe( self._compliant_series.value_counts( @@ -3450,39 +1087,6 @@ def quantile( Returns: The quantile value. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = list(range(50)) - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_quantile(s_native: IntoSeries) -> list[float]: - ... s = nw.from_native(s_native, series_only=True) - ... return [ - ... s.quantile(quantile=q, interpolation="nearest") - ... for q in (0.1, 0.25, 0.5, 0.75, 0.9) - ... ] - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_quantile`: - - >>> agnostic_quantile(s_pd) - [np.int64(5), np.int64(12), np.int64(24), np.int64(37), np.int64(44)] - - >>> agnostic_quantile(s_pl) - [5.0, 12.0, 25.0, 37.0, 44.0] - - >>> agnostic_quantile(s_pa) - [5, 12, 24, 37, 44] """ return self._compliant_series.quantile( quantile=quantile, interpolation=interpolation @@ -3500,73 +1104,6 @@ def zip_with(self: Self, mask: Self, other: Self) -> Self: Returns: A new Series with values selected from self or other based on the mask. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 2, 3, 4, 5] - >>> other = [5, 4, 3, 2, 1] - >>> mask = [True, False, True, False, True] - - Let's define a dataframe-agnostic function: - - >>> def agnostic_zip_with( - ... s1_native: IntoSeriesT, mask_native: IntoSeriesT, s2_native: IntoSeriesT - ... ) -> IntoSeriesT: - ... s1 = nw.from_native(s1_native, series_only=True) - ... mask = nw.from_native(mask_native, series_only=True) - ... s2 = nw.from_native(s2_native, series_only=True) - ... return s1.zip_with(mask, s2).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_zip_with`: - - >>> agnostic_zip_with( - ... s1_native=pl.Series(data), - ... mask_native=pl.Series(mask), - ... s2_native=pl.Series(other), - ... ) # doctest: +NORMALIZE_WHITESPACE - shape: (5,) - Series: '' [i64] - [ - 1 - 4 - 3 - 2 - 5 - ] - - >>> agnostic_zip_with( - ... s1_native=pd.Series(data), - ... mask_native=pd.Series(mask), - ... s2_native=pd.Series(other), - ... ) - 0 1 - 1 4 - 2 3 - 3 2 - 4 5 - dtype: int64 - - >>> agnostic_zip_with( - ... s1_native=pa.chunked_array([data]), - ... mask_native=pa.chunked_array([mask]), - ... s2_native=pa.chunked_array([other]), - ... ) # doctest: +ELLIPSIS - - [ - [ - 1, - 4, - 3, - 2, - 5 - ] - ] """ return self._from_compliant_series( self._compliant_series.zip_with( @@ -3582,36 +1119,6 @@ def item(self: Self, index: int | None = None) -> Any: Returns: The scalar value of the Series or the element at the given index. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - Let's define a dataframe-agnostic function that returns item at given index - - >>> def agnostic_item(s_native: IntoSeries, index=None): - ... s = nw.from_native(s_native, series_only=True) - ... return s.item(index) - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_item`: - - >>> ( - ... agnostic_item(pl.Series("a", [1]), None), - ... agnostic_item(pd.Series([1]), None), - ... agnostic_item(pa.chunked_array([[1]]), None), - ... ) - (1, np.int64(1), 1) - - >>> ( - ... agnostic_item(pl.Series("a", [9, 8, 7]), -1), - ... agnostic_item(pl.Series([9, 8, 7]), -2), - ... agnostic_item(pa.chunked_array([[9, 8, 7]]), -3), - ... ) - (7, 8, 9) """ return self._compliant_series.item(index=index) @@ -3623,52 +1130,6 @@ def head(self: Self, n: int = 10) -> Self: Returns: A new Series containing the first n characters of each string. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = list(range(10)) - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - Let's define a dataframe-agnostic function that returns the first 3 rows: - - >>> def agnostic_head(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.head(3).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_head`: - - >>> agnostic_head(s_pd) - 0 0 - 1 1 - 2 2 - dtype: int64 - - >>> agnostic_head(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [i64] - [ - 0 - 1 - 2 - ] - - >>> agnostic_head(s_pa) # doctest: +ELLIPSIS - - [ - [ - 0, - 1, - 2 - ] - ] """ return self._from_compliant_series(self._compliant_series.head(n)) @@ -3680,52 +1141,6 @@ def tail(self: Self, n: int = 10) -> Self: Returns: A new Series with the last n rows. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = list(range(10)) - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - Let's define a dataframe-agnostic function that returns the last 3 rows: - - >>> def agnostic_tail(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.tail(3).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_tail`: - - >>> agnostic_tail(s_pd) - 7 7 - 8 8 - 9 9 - dtype: int64 - - >>> agnostic_tail(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [i64] - [ - 7 - 8 - 9 - ] - - >>> agnostic_tail(s_pa) # doctest: +ELLIPSIS - - [ - [ - 7, - 8, - 9 - ] - ] """ return self._from_compliant_series(self._compliant_series.tail(n)) @@ -3745,52 +1160,6 @@ def round(self: Self, decimals: int = 0) -> Self: 4.5 to 4.0, etc..). Polars and Arrow round away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..). - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1.12345, 2.56789, 3.901234] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - Let's define a dataframe-agnostic function that rounds to the first decimal: - - >>> def agnostic_round(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.round(1).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_round`: - - >>> agnostic_round(s_pd) - 0 1.1 - 1 2.6 - 2 3.9 - dtype: float64 - - >>> agnostic_round(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [f64] - [ - 1.1 - 2.6 - 3.9 - ] - - >>> agnostic_round(s_pa) # doctest: +ELLIPSIS - - [ - [ - 1.1, - 2.6, - 3.9 - ] - ] """ return self._from_compliant_series(self._compliant_series.round(decimals)) @@ -3809,83 +1178,6 @@ def to_dummies( Notes: pandas and Polars handle null values differently. Polars distinguishes between NaN and Null, whereas pandas doesn't. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(data, name="a") - >>> s_pl = pl.Series("a", data) - >>> s_pa = pa.chunked_array([data]) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_to_dummies( - ... s_native: IntoSeries, drop_first: bool = False - ... ) -> IntoDataFrame: - ... s = nw.from_native(s_native, series_only=True) - ... return s.to_dummies(drop_first=drop_first).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_to_dummies`: - - >>> agnostic_to_dummies(s_pd) - a_1 a_2 a_3 - 0 1 0 0 - 1 0 1 0 - 2 0 0 1 - - >>> agnostic_to_dummies(s_pd, drop_first=True) - a_2 a_3 - 0 0 0 - 1 1 0 - 2 0 1 - - >>> agnostic_to_dummies(s_pl) - shape: (3, 3) - ┌─────┬─────┬─────┐ - │ a_1 ┆ a_2 ┆ a_3 │ - │ --- ┆ --- ┆ --- │ - │ i8 ┆ i8 ┆ i8 │ - ╞═════╪═════╪═════╡ - │ 1 ┆ 0 ┆ 0 │ - │ 0 ┆ 1 ┆ 0 │ - │ 0 ┆ 0 ┆ 1 │ - └─────┴─────┴─────┘ - - >>> agnostic_to_dummies(s_pl, drop_first=True) - shape: (3, 2) - ┌─────┬─────┐ - │ a_2 ┆ a_3 │ - │ --- ┆ --- │ - │ i8 ┆ i8 │ - ╞═════╪═════╡ - │ 0 ┆ 0 │ - │ 1 ┆ 0 │ - │ 0 ┆ 1 │ - └─────┴─────┘ - - >>> agnostic_to_dummies(s_pa) - pyarrow.Table - _1: int8 - _2: int8 - _3: int8 - ---- - _1: [[1,0,0]] - _2: [[0,1,0]] - _3: [[0,0,1]] - >>> agnostic_to_dummies(s_pa, drop_first=True) - pyarrow.Table - _2: int8 - _3: int8 - ---- - _2: [[0,1,0]] - _3: [[0,0,1]] """ return self._dataframe( self._compliant_series.to_dummies(separator=separator, drop_first=drop_first), @@ -3901,50 +1193,6 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self: Returns: A new Series with every nth value starting from the offset. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 2, 3, 4] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - Let's define a dataframe-agnostic function in which gather every 2 rows, - starting from a offset of 1: - - >>> def agnostic_gather_every(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.gather_every(n=2, offset=1).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_gather_every`: - - >>> agnostic_gather_every(s_pd) - 1 2 - 3 4 - dtype: int64 - - >>> agnostic_gather_every(s_pl) # doctest:+NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [i64] - [ - 2 - 4 - ] - - >>> agnostic_gather_every(s_pa) # doctest:+ELLIPSIS - - [ - [ - 2, - 4 - ] - ] """ return self._from_compliant_series( self._compliant_series.gather_every(n=n, offset=offset) @@ -3955,54 +1203,6 @@ def to_arrow(self: Self) -> pa.Array: Returns: A PyArrow Array containing the data from the Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 2, 3, 4] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - Let's define a dataframe-agnostic function that converts to arrow: - - >>> def agnostic_to_arrow(s_native: IntoSeries) -> pa.Array: - ... s = nw.from_native(s_native, series_only=True) - ... return s.to_arrow() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_to_arrow`: - - >>> agnostic_to_arrow(s_pd) # doctest:+NORMALIZE_WHITESPACE - - [ - 1, - 2, - 3, - 4 - ] - - >>> agnostic_to_arrow(s_pl) # doctest:+NORMALIZE_WHITESPACE - - [ - 1, - 2, - 3, - 4 - ] - - >>> agnostic_to_arrow(s_pa) # doctest:+NORMALIZE_WHITESPACE - - [ - 1, - 2, - 3, - 4 - ] """ return self._compliant_series.to_arrow() @@ -4013,49 +1213,6 @@ def mode(self: Self) -> Self: Returns: A new Series containing the mode(s) (values that appear most frequently). - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 1, 2, 2, 3] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_mode(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.mode().sort().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_mode`: - - >>> agnostic_mode(s_pd) - 0 1 - 1 2 - dtype: int64 - - >>> agnostic_mode(s_pl) # doctest:+NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [i64] - [ - 1 - 2 - ] - - >>> agnostic_mode(s_pa) # doctest:+ELLIPSIS - - [ - [ - 1, - 2 - ] - ] """ return self._from_compliant_series(self._compliant_series.mode()) @@ -4069,52 +1226,6 @@ def is_finite(self: Self) -> Self: Returns: Expression of `Boolean` data type. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [float("nan"), float("inf"), 2.0, None] - - We define a library agnostic function: - - >>> def agnostic_is_finite(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.is_finite().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_finite`: - - >>> agnostic_is_finite(pd.Series(data)) - 0 False - 1 False - 2 True - 3 False - dtype: bool - - >>> agnostic_is_finite(pl.Series(data)) # doctest: +NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [bool] - [ - false - false - true - null - ] - - >>> agnostic_is_finite(pa.chunked_array([data])) # doctest: +ELLIPSIS - - [ - [ - false, - false, - true, - null - ] - ] """ return self._from_compliant_series(self._compliant_series.is_finite()) @@ -4126,53 +1237,6 @@ def cum_count(self: Self, *, reverse: bool = False) -> Self: Returns: A new Series with the cumulative count of non-null values. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = ["x", "k", None, "d"] - - We define a library agnostic function: - - >>> def agnostic_cum_count(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.cum_count(reverse=True).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_cum_count`: - - >>> agnostic_cum_count(pd.Series(data)) - 0 3 - 1 2 - 2 1 - 3 1 - dtype: int64 - - >>> agnostic_cum_count(pl.Series(data)) # doctest:+NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [u32] - [ - 3 - 2 - 1 - 1 - ] - - >>> agnostic_cum_count(pa.chunked_array([data])) # doctest:+ELLIPSIS - - [ - [ - 3, - 2, - 1, - 1 - ] - ] - """ return self._from_compliant_series( self._compliant_series.cum_count(reverse=reverse) @@ -4186,53 +1250,6 @@ def cum_min(self: Self, *, reverse: bool = False) -> Self: Returns: A new Series with the cumulative min of non-null values. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [3, 1, None, 2] - - We define a library agnostic function: - - >>> def agnostic_cum_min(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.cum_min().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_cum_min`: - - >>> agnostic_cum_min(pd.Series(data)) - 0 3.0 - 1 1.0 - 2 NaN - 3 1.0 - dtype: float64 - - >>> agnostic_cum_min(pl.Series(data)) # doctest:+NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [i64] - [ - 3 - 1 - null - 1 - ] - - >>> agnostic_cum_min(pa.chunked_array([data])) # doctest:+ELLIPSIS - - [ - [ - 3, - 1, - null, - 1 - ] - ] - """ return self._from_compliant_series( self._compliant_series.cum_min(reverse=reverse) @@ -4246,53 +1263,6 @@ def cum_max(self: Self, *, reverse: bool = False) -> Self: Returns: A new Series with the cumulative max of non-null values. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 3, None, 2] - - We define a library agnostic function: - - >>> def agnostic_cum_max(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.cum_max().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_cum_max`: - - >>> agnostic_cum_max(pd.Series(data)) - 0 1.0 - 1 3.0 - 2 NaN - 3 3.0 - dtype: float64 - - >>> agnostic_cum_max(pl.Series(data)) # doctest:+NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [i64] - [ - 1 - 3 - null - 3 - ] - - >>> agnostic_cum_max(pa.chunked_array([data])) # doctest:+ELLIPSIS - - [ - [ - 1, - 3, - null, - 3 - ] - ] - """ return self._from_compliant_series( self._compliant_series.cum_max(reverse=reverse) @@ -4306,53 +1276,6 @@ def cum_prod(self: Self, *, reverse: bool = False) -> Self: Returns: A new Series with the cumulative product of non-null values. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 3, None, 2] - - We define a library agnostic function: - - >>> def agnostic_cum_prod(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.cum_prod().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_cum_prod`: - - >>> agnostic_cum_prod(pd.Series(data)) - 0 1.0 - 1 3.0 - 2 NaN - 3 6.0 - dtype: float64 - - >>> agnostic_cum_prod(pl.Series(data)) # doctest:+NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [i64] - [ - 1 - 3 - null - 6 - ] - - >>> agnostic_cum_prod(pa.chunked_array([data])) # doctest:+ELLIPSIS - - [ - [ - 1, - 3, - null, - 6 - ] - ] - """ return self._from_compliant_series( self._compliant_series.cum_prod(reverse=reverse) @@ -4388,55 +1311,6 @@ def rolling_sum( Returns: A new series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1.0, 2.0, 3.0, 4.0] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_rolling_sum(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.rolling_sum(window_size=2).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_rolling_sum`: - - >>> agnostic_rolling_sum(s_pd) - 0 NaN - 1 3.0 - 2 5.0 - 3 7.0 - dtype: float64 - - >>> agnostic_rolling_sum(s_pl) # doctest:+NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [f64] - [ - null - 3.0 - 5.0 - 7.0 - ] - - >>> agnostic_rolling_sum(s_pa) # doctest:+ELLIPSIS - - [ - [ - null, - 3, - 5, - 7 - ] - ] """ window_size, min_periods = _validate_rolling_arguments( window_size=window_size, min_periods=min_periods @@ -4483,55 +1357,6 @@ def rolling_mean( Returns: A new series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1.0, 2.0, 3.0, 4.0] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_rolling_mean(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.rolling_mean(window_size=2).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_rolling_mean`: - - >>> agnostic_rolling_mean(s_pd) - 0 NaN - 1 1.5 - 2 2.5 - 3 3.5 - dtype: float64 - - >>> agnostic_rolling_mean(s_pl) # doctest:+NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [f64] - [ - null - 1.5 - 2.5 - 3.5 - ] - - >>> agnostic_rolling_mean(s_pa) # doctest:+ELLIPSIS - - [ - [ - null, - 1.5, - 2.5, - 3.5 - ] - ] """ window_size, min_periods = _validate_rolling_arguments( window_size=window_size, min_periods=min_periods @@ -4580,55 +1405,6 @@ def rolling_var( Returns: A new series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1.0, 3.0, 1.0, 4.0] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_rolling_var(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.rolling_var(window_size=2, min_periods=1).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_rolling_var`: - - >>> agnostic_rolling_var(s_pd) - 0 NaN - 1 2.0 - 2 2.0 - 3 4.5 - dtype: float64 - - >>> agnostic_rolling_var(s_pl) # doctest:+NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [f64] - [ - null - 2.0 - 2.0 - 4.5 - ] - - >>> agnostic_rolling_var(s_pa) # doctest:+ELLIPSIS - - [ - [ - nan, - 2, - 2, - 4.5 - ] - ] """ window_size, min_periods = _validate_rolling_arguments( window_size=window_size, min_periods=min_periods @@ -4675,55 +1451,6 @@ def rolling_std( Returns: A new series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1.0, 3.0, 1.0, 4.0] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_rolling_std(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.rolling_std(window_size=2, min_periods=1).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_rolling_std`: - - >>> agnostic_rolling_std(s_pd) - 0 NaN - 1 1.414214 - 2 1.414214 - 3 2.121320 - dtype: float64 - - >>> agnostic_rolling_std(s_pl) # doctest:+NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [f64] - [ - null - 1.414214 - 1.414214 - 2.12132 - ] - - >>> agnostic_rolling_std(s_pa) # doctest:+ELLIPSIS - - [ - [ - nan, - 1.4142135623730951, - 1.4142135623730951, - 2.1213203435596424 - ] - ] """ window_size, min_periods = _validate_rolling_arguments( window_size=window_size, min_periods=min_periods @@ -4776,56 +1503,6 @@ def rank( Returns: A new series with rank data as values. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - >>> - >>> data = [3, 6, 1, 1, 6] - - We define a dataframe-agnostic function that computes the dense rank for - the data: - - >>> def agnostic_dense_rank(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.rank(method="dense").to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dense_rank`: - - >>> agnostic_dense_rank(pd.Series(data)) - 0 2.0 - 1 3.0 - 2 1.0 - 3 1.0 - 4 3.0 - dtype: float64 - - >>> agnostic_dense_rank(pl.Series(data)) # doctest:+NORMALIZE_WHITESPACE - shape: (5,) - Series: '' [u32] - [ - 2 - 3 - 1 - 1 - 3 - ] - - >>> agnostic_dense_rank(pa.chunked_array([data])) # doctest:+ELLIPSIS - - [ - [ - 2, - 3, - 1, - 1, - 3 - ] - ] """ supported_rank_methods = {"average", "min", "max", "dense", "ordinal"} if method not in supported_rank_methods: diff --git a/narwhals/series_cat.py b/narwhals/series_cat.py index 73f899d13..3957df7b5 100644 --- a/narwhals/series_cat.py +++ b/narwhals/series_cat.py @@ -22,52 +22,6 @@ def get_categories(self: Self) -> SeriesT: Returns: A new Series containing the unique categories. - - Examples: - Let's create some series: - - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = ["apple", "mango", "mango"] - >>> s_pd = pd.Series(data, dtype="category") - >>> s_pl = pl.Series(data, dtype=pl.Categorical) - >>> s_pa = pa.chunked_array([data]).dictionary_encode() - - We define a dataframe-agnostic function to get unique categories - from column 'fruits': - - >>> def agnostic_get_categories(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.cat.get_categories().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_get_categories`: - - >>> agnostic_get_categories(s_pd) - 0 apple - 1 mango - dtype: object - - >>> agnostic_get_categories(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [str] - [ - "apple" - "mango" - ] - - >>> agnostic_get_categories(s_pa) # doctest: +ELLIPSIS - - [ - [ - "apple", - "mango" - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.cat.get_categories() diff --git a/narwhals/series_dt.py b/narwhals/series_dt.py index 5fea4ff5c..b8a1fdab4 100644 --- a/narwhals/series_dt.py +++ b/narwhals/series_dt.py @@ -26,50 +26,6 @@ def date(self: Self) -> SeriesT: Raises: NotImplementedError: If pandas default backend is being used. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> dates = [datetime(2012, 1, 7, 10, 20), datetime(2023, 3, 10, 11, 32)] - >>> s_pd = pd.Series(dates).convert_dtypes(dtype_backend="pyarrow") - >>> s_pl = pl.Series(dates) - >>> s_pa = pa.chunked_array([dates]) - - We define a library agnostic function: - - >>> def agnostic_date(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.dt.date().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_date`: - - >>> agnostic_date(s_pd) - 0 2012-01-07 - 1 2023-03-10 - dtype: date32[day][pyarrow] - - >>> agnostic_date(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [date] - [ - 2012-01-07 - 2023-03-10 - ] - - >>> agnostic_date(s_pa) # doctest: +ELLIPSIS - - [ - [ - 2012-01-07, - 2023-03-10 - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.date() @@ -80,50 +36,6 @@ def year(self: Self) -> SeriesT: Returns: A new Series containing the year component of each datetime value. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> dates = [datetime(2012, 1, 7), datetime(2023, 3, 10)] - >>> s_pd = pd.Series(dates) - >>> s_pl = pl.Series(dates) - >>> s_pa = pa.chunked_array([dates]) - - We define a library agnostic function: - - >>> def agnostic_year(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.dt.year().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_year`: - - >>> agnostic_year(s_pd) - 0 2012 - 1 2023 - dtype: int... - - >>> agnostic_year(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [i32] - [ - 2012 - 2023 - ] - - >>> agnostic_year(s_pa) # doctest: +ELLIPSIS - - [ - [ - 2012, - 2023 - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.year() @@ -134,49 +46,6 @@ def month(self: Self) -> SeriesT: Returns: A new Series containing the month component of each datetime value. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> dates = [datetime(2023, 2, 1), datetime(2023, 8, 3)] - >>> s_pd = pd.Series(dates) - >>> s_pl = pl.Series(dates) - >>> s_pa = pa.chunked_array([dates]) - - We define a library agnostic function: - - >>> def agnostic_month(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.dt.month().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_month`: - - >>> agnostic_month(s_pd) - 0 2 - 1 8 - dtype: int... - >>> agnostic_month(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [i8] - [ - 2 - 8 - ] - - >>> agnostic_month(s_pa) # doctest: +ELLIPSIS - - [ - [ - 2, - 8 - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.month() @@ -187,50 +56,6 @@ def day(self: Self) -> SeriesT: Returns: A new Series containing the day component of each datetime value. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> dates = [datetime(2022, 1, 1), datetime(2022, 1, 5)] - >>> s_pd = pd.Series(dates) - >>> s_pl = pl.Series(dates) - >>> s_pa = pa.chunked_array([dates]) - - We define a library agnostic function: - - >>> def agnostic_day(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.dt.day().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_day`: - - >>> agnostic_day(s_pd) - 0 1 - 1 5 - dtype: int... - - >>> agnostic_day(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [i8] - [ - 1 - 5 - ] - - >>> agnostic_day(s_pa) # doctest: +ELLIPSIS - - [ - [ - 1, - 5 - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.day() @@ -241,50 +66,6 @@ def hour(self: Self) -> SeriesT: Returns: A new Series containing the hour component of each datetime value. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> dates = [datetime(2022, 1, 1, 5, 3), datetime(2022, 1, 5, 9, 12)] - >>> s_pd = pd.Series(dates) - >>> s_pl = pl.Series(dates) - >>> s_pa = pa.chunked_array([dates]) - - We define a library agnostic function: - - >>> def agnostic_hour(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.dt.hour().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_hour`: - - >>> agnostic_hour(s_pd) - 0 5 - 1 9 - dtype: int... - - >>> agnostic_hour(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [i8] - [ - 5 - 9 - ] - - >>> agnostic_hour(s_pa) # doctest: +ELLIPSIS - - [ - [ - 5, - 9 - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.hour() @@ -295,50 +76,6 @@ def minute(self: Self) -> SeriesT: Returns: A new Series containing the minute component of each datetime value. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> dates = [datetime(2022, 1, 1, 5, 3), datetime(2022, 1, 5, 9, 12)] - >>> s_pd = pd.Series(dates) - >>> s_pl = pl.Series(dates) - >>> s_pa = pa.chunked_array([dates]) - - We define a library agnostic function: - - >>> def agnostic_minute(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.dt.minute().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_minute`: - - >>> agnostic_minute(s_pd) - 0 3 - 1 12 - dtype: int... - - >>> agnostic_minute(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [i8] - [ - 3 - 12 - ] - - >>> agnostic_minute(s_pa) # doctest: +ELLIPSIS - - [ - [ - 3, - 12 - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.minute() @@ -349,50 +86,6 @@ def second(self: Self) -> SeriesT: Returns: A new Series containing the second component of each datetime value. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> dates = [datetime(2022, 1, 1, 5, 3, 10), datetime(2022, 1, 5, 9, 12, 4)] - >>> s_pd = pd.Series(dates) - >>> s_pl = pl.Series(dates) - >>> s_pa = pa.chunked_array([dates]) - - We define a library agnostic function: - - >>> def agnostic_second(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.dt.second().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_second`: - - >>> agnostic_second(s_pd) - 0 10 - 1 4 - dtype: int... - - >>> agnostic_second(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [i8] - [ - 10 - 4 - ] - - >>> agnostic_second(s_pa) # doctest: +ELLIPSIS - - [ - [ - 10, - 4 - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.second() @@ -403,65 +96,6 @@ def millisecond(self: Self) -> SeriesT: Returns: A new Series containing the millisecond component of each datetime value. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> dates = [ - ... datetime(2023, 5, 21, 12, 55, 10, 400000), - ... datetime(2023, 5, 21, 12, 55, 10, 600000), - ... datetime(2023, 5, 21, 12, 55, 10, 800000), - ... datetime(2023, 5, 21, 12, 55, 11, 0), - ... datetime(2023, 5, 21, 12, 55, 11, 200000), - ... ] - >>> s_pd = pd.Series(dates) - >>> s_pl = pl.Series(dates) - >>> s_pa = pa.chunked_array([dates]) - - We define a library agnostic function: - - >>> def agnostic_millisecond(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.dt.millisecond().alias("datetime").to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_millisecond`: - - >>> agnostic_millisecond(s_pd) - 0 400 - 1 600 - 2 800 - 3 0 - 4 200 - Name: datetime, dtype: int... - - >>> agnostic_millisecond(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (5,) - Series: 'datetime' [i32] - [ - 400 - 600 - 800 - 0 - 200 - ] - - >>> agnostic_millisecond(s_pa) # doctest: +ELLIPSIS - - [ - [ - 400, - 600, - 800, - 0, - 200 - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.millisecond() @@ -472,65 +106,6 @@ def microsecond(self: Self) -> SeriesT: Returns: A new Series containing the microsecond component of each datetime value. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> dates = [ - ... datetime(2023, 5, 21, 12, 55, 10, 400000), - ... datetime(2023, 5, 21, 12, 55, 10, 600000), - ... datetime(2023, 5, 21, 12, 55, 10, 800000), - ... datetime(2023, 5, 21, 12, 55, 11, 0), - ... datetime(2023, 5, 21, 12, 55, 11, 200000), - ... ] - >>> s_pd = pd.Series(dates) - >>> s_pl = pl.Series(dates) - >>> s_pa = pa.chunked_array([dates]) - - We define a library agnostic function: - - >>> def agnostic_microsecond(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.dt.microsecond().alias("datetime").to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_microsecond`: - - >>> agnostic_microsecond(s_pd) - 0 400000 - 1 600000 - 2 800000 - 3 0 - 4 200000 - Name: datetime, dtype: int... - - >>> agnostic_microsecond(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (5,) - Series: 'datetime' [i32] - [ - 400000 - 600000 - 800000 - 0 - 200000 - ] - - >>> agnostic_microsecond(s_pa) # doctest: +ELLIPSIS - - [ - [ - 400000, - 600000, - 800000, - 0, - 200000 - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.microsecond() @@ -541,53 +116,6 @@ def nanosecond(self: Self) -> SeriesT: Returns: A new Series containing the nanosecond component of each datetime value. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> dates = [ - ... datetime(2022, 1, 1, 5, 3, 10, 500000), - ... datetime(2022, 1, 5, 9, 12, 4, 60000), - ... ] - >>> s_pd = pd.Series(dates) - >>> s_pl = pl.Series(dates) - >>> s_pa = pa.chunked_array([dates]) - - We define a library agnostic function: - - >>> def agnostic_nanosecond(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.dt.nanosecond().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_nanosecond`: - - >>> agnostic_nanosecond(s_pd) - 0 500000000 - 1 60000000 - dtype: int... - - >>> agnostic_nanosecond(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [i32] - [ - 500000000 - 60000000 - ] - - >>> agnostic_nanosecond(s_pa) # doctest: +ELLIPSIS - - [ - [ - 500000000, - 60000000 - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.nanosecond() @@ -598,51 +126,6 @@ def ordinal_day(self: Self) -> SeriesT: Returns: A new Series containing the ordinal day (day of year) for each datetime value. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [datetime(2020, 1, 1), datetime(2020, 8, 3)] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_ordinal_day(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.dt.ordinal_day().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_ordinal_day`: - - >>> agnostic_ordinal_day(s_pd) - 0 1 - 1 216 - dtype: int32 - - >>> agnostic_ordinal_day(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [i16] - [ - 1 - 216 - ] - - - >>> agnostic_ordinal_day(s_pa) # doctest: +ELLIPSIS - - [ - [ - 1, - 216 - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.ordinal_day() @@ -655,46 +138,6 @@ def weekday(self: Self) -> SeriesT: A new Series containing the week day for each datetime value. Returns the ISO weekday number where monday = 1 and sunday = 7 - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - >>> data = [datetime(2020, 1, 1), datetime(2020, 8, 3)] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_weekday(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.dt.weekday().to_native() - - We can then pass either pandas, Polars, PyArrow, and other supported libraries to `agnostic_weekday`: - - >>> agnostic_weekday(s_pd) - 0 3 - 1 1 - dtype: int32 - >>> agnostic_weekday(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [i8] - [ - 3 - 1 - ] - >>> agnostic_weekday(s_pa) # doctest: +ELLIPSIS - - [ - [ - 3, - 1 - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.weekday() @@ -710,50 +153,6 @@ def total_minutes(self: Self) -> SeriesT: Returns: A new Series containing the total number of minutes for each timedelta value. - - Examples: - >>> from datetime import timedelta - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [timedelta(minutes=10), timedelta(minutes=20, seconds=40)] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_total_minutes(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.dt.total_minutes().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_total_minutes`: - - >>> agnostic_total_minutes(s_pd) - 0 10 - 1 20 - dtype: int... - - >>> agnostic_total_minutes(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [i64] - [ - 10 - 20 - ] - - >>> agnostic_total_minutes(s_pa) # doctest: +ELLIPSIS - - [ - [ - 10, - 20 - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.total_minutes() @@ -769,50 +168,6 @@ def total_seconds(self: Self) -> SeriesT: Returns: A new Series containing the total number of seconds for each timedelta value. - - Examples: - >>> from datetime import timedelta - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [timedelta(seconds=10), timedelta(seconds=20, milliseconds=40)] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_total_seconds(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.dt.total_seconds().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_total_seconds`: - - >>> agnostic_total_seconds(s_pd) - 0 10 - 1 20 - dtype: int... - - >>> agnostic_total_seconds(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [i64] - [ - 10 - 20 - ] - - >>> agnostic_total_seconds(s_pa) # doctest: +ELLIPSIS - - [ - [ - 10, - 20 - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.total_seconds() @@ -828,53 +183,6 @@ def total_milliseconds(self: Self) -> SeriesT: Returns: A new Series containing the total number of milliseconds for each timedelta value. - - Examples: - >>> from datetime import timedelta - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [ - ... timedelta(milliseconds=10), - ... timedelta(milliseconds=20, microseconds=40), - ... ] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_total_milliseconds(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.dt.total_milliseconds().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_total_milliseconds`: - - >>> agnostic_total_milliseconds(s_pd) - 0 10 - 1 20 - dtype: int... - - >>> agnostic_total_milliseconds(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [i64] - [ - 10 - 20 - ] - - >>> agnostic_total_milliseconds(s_pa) # doctest: +ELLIPSIS - - [ - [ - 10, - 20 - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.total_milliseconds() @@ -890,53 +198,6 @@ def total_microseconds(self: Self) -> SeriesT: The function outputs the total microseconds in the int dtype by default, however, pandas may change the dtype to float when there are missing values, consider using `fill_null()` in this case. - - Examples: - >>> from datetime import timedelta - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [ - ... timedelta(microseconds=10), - ... timedelta(milliseconds=1, microseconds=200), - ... ] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_total_microseconds(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.dt.total_microseconds().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_total_microseconds`: - - >>> agnostic_total_microseconds(s_pd) - 0 10 - 1 1200 - dtype: int... - - >>> agnostic_total_microseconds(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [i64] - [ - 10 - 1200 - ] - - >>> agnostic_total_microseconds(s_pa) # doctest: +ELLIPSIS - - [ - [ - 10, - 1200 - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.total_microseconds() @@ -952,40 +213,6 @@ def total_nanoseconds(self: Self) -> SeriesT: Returns: A new Series containing the total number of nanoseconds for each timedelta value. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = ["2024-01-01 00:00:00.000000001", "2024-01-01 00:00:00.000000002"] - >>> s_pd = pd.to_datetime(pd.Series(data)) - >>> s_pl = pl.Series(data).str.to_datetime(time_unit="ns") - - We define a library agnostic function: - - >>> def agnostic_total_nanoseconds(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.diff().dt.total_nanoseconds().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_total_nanoseconds`: - - >>> agnostic_total_nanoseconds(s_pd) - 0 NaN - 1 1.0 - dtype: float64 - - >>> agnostic_total_nanoseconds(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [i64] - [ - null - 1 - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.total_nanoseconds() @@ -1030,57 +257,6 @@ def to_string(self: Self, format: str) -> SeriesT: # noqa: A002 If you have an application where this is not enough, please open an issue and let us know. - - Examples: - >>> from datetime import datetime - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [ - ... datetime(2020, 3, 1), - ... datetime(2020, 4, 1), - ... datetime(2020, 5, 1), - ... ] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a dataframe-agnostic function: - - >>> def agnostic_to_string(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.dt.to_string("%Y/%m/%d").to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_to_string`: - - >>> agnostic_to_string(s_pd) - 0 2020/03/01 - 1 2020/04/01 - 2 2020/05/01 - dtype: object - - >>> agnostic_to_string(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [str] - [ - "2020/03/01" - "2020/04/01" - "2020/05/01" - ] - - >>> agnostic_to_string(s_pa) # doctest: +ELLIPSIS - - [ - [ - "2020/03/01", - "2020/04/01", - "2020/05/01" - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.to_string(format) @@ -1094,53 +270,6 @@ def replace_time_zone(self: Self, time_zone: str | None) -> SeriesT: Returns: A new Series with the specified time zone. - - Examples: - >>> from datetime import datetime, timezone - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [ - ... datetime(2024, 1, 1, tzinfo=timezone.utc), - ... datetime(2024, 1, 2, tzinfo=timezone.utc), - ... ] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_replace_time_zone(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.dt.replace_time_zone("Asia/Kathmandu").to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_replace_time_zone`: - - >>> agnostic_replace_time_zone(s_pd) - 0 2024-01-01 00:00:00+05:45 - 1 2024-01-02 00:00:00+05:45 - dtype: datetime64[ns, Asia/Kathmandu] - - >>> agnostic_replace_time_zone(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [datetime[μs, Asia/Kathmandu]] - [ - 2024-01-01 00:00:00 +0545 - 2024-01-02 00:00:00 +0545 - ] - - >>> agnostic_replace_time_zone(s_pa) - - [ - [ - 2023-12-31 18:15:00.000000Z, - 2024-01-01 18:15:00.000000Z - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.replace_time_zone(time_zone) @@ -1157,53 +286,6 @@ def convert_time_zone(self: Self, time_zone: str) -> SeriesT: Returns: A new Series with the specified time zone. - - Examples: - >>> from datetime import datetime, timezone - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [ - ... datetime(2024, 1, 1, tzinfo=timezone.utc), - ... datetime(2024, 1, 2, tzinfo=timezone.utc), - ... ] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_convert_time_zone(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.dt.convert_time_zone("Asia/Kathmandu").to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_convert_time_zone`: - - >>> agnostic_convert_time_zone(s_pd) - 0 2024-01-01 05:45:00+05:45 - 1 2024-01-02 05:45:00+05:45 - dtype: datetime64[ns, Asia/Kathmandu] - - >>> agnostic_convert_time_zone(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [datetime[μs, Asia/Kathmandu]] - [ - 2024-01-01 05:45:00 +0545 - 2024-01-02 05:45:00 +0545 - ] - - >>> agnostic_convert_time_zone(s_pa) - - [ - [ - 2024-01-01 00:00:00.000000Z, - 2024-01-02 00:00:00.000000Z - ] - ] """ if time_zone is None: msg = "Target `time_zone` cannot be `None` in `convert_time_zone`. Please use `replace_time_zone(None)` if you want to remove the time zone." @@ -1221,53 +303,6 @@ def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> Series Returns: A new Series with timestamps in the specified time unit. - - Examples: - >>> from datetime import date - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [date(2001, 1, 1), None, date(2001, 1, 3)] - >>> s_pd = pd.Series(data, dtype="datetime64[ns]") - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_timestamp(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.dt.timestamp("ms").to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_timestamp`: - - >>> agnostic_timestamp(s_pd) - 0 9.783072e+11 - 1 NaN - 2 9.784800e+11 - dtype: float64 - - >>> agnostic_timestamp(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [i64] - [ - 978307200000 - null - 978480000000 - ] - - >>> agnostic_timestamp(s_pa) - - [ - [ - 978307200000, - null, - 978480000000 - ] - ] """ if time_unit not in {"ns", "us", "ms"}: msg = ( diff --git a/narwhals/series_list.py b/narwhals/series_list.py index 19de071e8..5b518a0a3 100644 --- a/narwhals/series_list.py +++ b/narwhals/series_list.py @@ -25,53 +25,6 @@ def len(self: Self) -> SeriesT: Returns: A new series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [[1, 2], [3, 4, None], None, []] - - Let's define a dataframe-agnostic function: - - >>> def agnostic_list_len(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.list.len().to_native() - - We can then pass pandas / PyArrow / Polars / any other supported library: - - >>> agnostic_list_len( - ... pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64()))) - ... ) # doctest: +SKIP - 0 2 - 1 3 - 2 - 3 0 - dtype: int32[pyarrow] - - >>> agnostic_list_len(pl.Series(data)) # doctest: +NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [u32] - [ - 2 - 3 - null - 0 - ] - - >>> agnostic_list_len(pa.chunked_array([data])) # doctest: +ELLIPSIS - - [ - [ - 2, - 3, - null, - 0 - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.list.len() diff --git a/narwhals/series_str.py b/narwhals/series_str.py index 737bf09df..57a2cda42 100644 --- a/narwhals/series_str.py +++ b/narwhals/series_str.py @@ -22,58 +22,6 @@ def len_chars(self: Self) -> SeriesT: Returns: A new Series containing the length of each string in characters. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = ["foo", "Café", "345", "東京", None] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a dataframe-agnostic function: - - >>> def agnostic_len_chars(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.str.len_chars().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_len_chars`: - - >>> agnostic_len_chars(s_pd) - 0 3.0 - 1 4.0 - 2 3.0 - 3 2.0 - 4 NaN - dtype: float64 - - >>> agnostic_len_chars(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (5,) - Series: '' [u32] - [ - 3 - 4 - 3 - 2 - null - ] - - >>> agnostic_len_chars(s_pa) # doctest: +ELLIPSIS - - [ - [ - 3, - 4, - 3, - 2, - null - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.len_chars() @@ -92,50 +40,6 @@ def replace( Returns: A new Series with the regex/literal pattern replaced with the specified value. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = ["123abc", "abc abc123"] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a dataframe-agnostic function: - - >>> def agnostic_replace(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... s = s.str.replace("abc", "") - ... return s.to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_replace`: - - >>> agnostic_replace(s_pd) - 0 123 - 1 abc123 - dtype: object - - >>> agnostic_replace(s_pl) # doctest:+NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [str] - [ - "123" - " abc123" - ] - - >>> agnostic_replace(s_pa) # doctest: +ELLIPSIS - - [ - [ - "123", - " abc123" - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.replace( @@ -155,50 +59,6 @@ def replace_all( Returns: A new Series with all occurrences of pattern replaced with the specified value. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = ["123abc", "abc abc123"] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a dataframe-agnostic function: - - >>> def agnostic_replace_all(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... s = s.str.replace_all("abc", "") - ... return s.to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_replace_all`: - - >>> agnostic_replace_all(s_pd) - 0 123 - 1 123 - dtype: object - - >>> agnostic_replace_all(s_pl) # doctest:+NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [str] - [ - "123" - " 123" - ] - - >>> agnostic_replace_all(s_pa) # doctest: +ELLIPSIS - - [ - [ - "123", - " 123" - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.replace_all( @@ -214,50 +74,6 @@ def strip_chars(self: Self, characters: str | None = None) -> SeriesT: Returns: A new Series with leading and trailing characters removed. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = ["apple", "\nmango"] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a dataframe-agnostic function: - - >>> def agnostic_strip_chars(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... s = s.str.strip_chars() - ... return s.to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_strip_chars`: - - >>> agnostic_strip_chars(s_pd) - 0 apple - 1 mango - dtype: object - - >>> agnostic_strip_chars(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [str] - [ - "apple" - "mango" - ] - - >>> agnostic_strip_chars(s_pa) # doctest: +ELLIPSIS - - [ - [ - "apple", - "mango" - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.strip_chars(characters) @@ -271,52 +87,6 @@ def starts_with(self: Self, prefix: str) -> SeriesT: Returns: A new Series with boolean values indicating if each string starts with the prefix. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = ["apple", "mango", None] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a dataframe-agnostic function: - - >>> def agnostic_starts_with(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.str.starts_with("app").to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_starts_with`: - - >>> agnostic_starts_with(s_pd) - 0 True - 1 False - 2 None - dtype: object - - >>> agnostic_starts_with(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [bool] - [ - true - false - null - ] - - >>> agnostic_starts_with(s_pa) # doctest: +ELLIPSIS - - [ - [ - true, - false, - null - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.starts_with(prefix) @@ -330,52 +100,6 @@ def ends_with(self: Self, suffix: str) -> SeriesT: Returns: A new Series with boolean values indicating if each string ends with the suffix. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = ["apple", "mango", None] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a dataframe-agnostic function: - - >>> def agnostic_ends_with(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.str.ends_with("ngo").to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_ends_with`: - - >>> agnostic_ends_with(s_pd) - 0 False - 1 True - 2 None - dtype: object - - >>> agnostic_ends_with(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [bool] - [ - false - true - null - ] - - >>> agnostic_ends_with(s_pa) # doctest: +ELLIPSIS - - [ - [ - false, - true, - null - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.ends_with(suffix) @@ -391,57 +115,6 @@ def contains(self: Self, pattern: str, *, literal: bool = False) -> SeriesT: Returns: A new Series with boolean values indicating if each string contains the pattern. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = ["cat", "dog", "rabbit and parrot", "dove", None] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a dataframe-agnostic function: - - >>> def agnostic_contains(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.str.contains("parrot|dove").to_native() - - We can then pass any supported library such as pandas, Polars, or PyArrow to `agnostic_contains`: - - >>> agnostic_contains(s_pd) - 0 False - 1 False - 2 True - 3 True - 4 None - dtype: object - - >>> agnostic_contains(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (5,) - Series: '' [bool] - [ - false - false - true - true - null - ] - - >>> agnostic_contains(s_pa) # doctest: +ELLIPSIS - - [ - [ - false, - false, - true, - true, - null - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.contains(pattern, literal=literal) @@ -457,89 +130,6 @@ def slice(self: Self, offset: int, length: int | None = None) -> SeriesT: Returns: A new Series containing subslices of each string. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = ["pear", None, "papaya", "dragonfruit"] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a dataframe-agnostic function: - - >>> def agnostic_slice(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.str.slice(4, length=3).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_slice`: - - >>> agnostic_slice(s_pd) # doctest: +NORMALIZE_WHITESPACE - 0 - 1 None - 2 ya - 3 onf - dtype: object - - >>> agnostic_slice(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [str] - [ - "" - null - "ya" - "onf" - ] - - >>> agnostic_slice(s_pa) # doctest: +ELLIPSIS - - [ - [ - "", - null, - "ya", - "onf" - ] - ] - - Using negative indexes: - - >>> def agnostic_slice(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.str.slice(-3).to_native() - - >>> agnostic_slice(s_pd) # doctest: +NORMALIZE_WHITESPACE - 0 ear - 1 None - 2 aya - 3 uit - dtype: object - - >>> agnostic_slice(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [str] - [ - "ear" - null - "aya" - "uit" - ] - - >>> agnostic_slice(s_pa) # doctest: +ELLIPSIS - - [ - [ - "ear", - null, - "aya", - "uit" - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.slice( @@ -560,55 +150,6 @@ def head(self: Self, n: int = 5) -> SeriesT: 1. When the `n` input is negative, `head` returns characters up to the n-th from the end of the string. For example, if `n = -3`, then all characters except the last three are returned. 2. If the length of the string has fewer than `n` characters, the full string is returned. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = ["Atatata", "taata", "taatatata", "zukkyun"] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a dataframe-agnostic function: - - >>> def agnostic_head(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.str.head().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_head`: - - >>> agnostic_head(s_pd) - 0 Atata - 1 taata - 2 taata - 3 zukky - dtype: object - - >>> agnostic_head(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [str] - [ - "Atata" - "taata" - "taata" - "zukky" - ] - - >>> agnostic_head(s_pa) # doctest: +ELLIPSIS - - [ - [ - "Atata", - "taata", - "taata", - "zukky" - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.slice(offset=0, length=n) @@ -627,55 +168,6 @@ def tail(self: Self, n: int = 5) -> SeriesT: 1. When the `n` input is negative, `tail` returns characters starting from the n-th from the beginning of the string. For example, if `n = -3`, then all characters except the first three are returned. 2. If the length of the string has fewer than `n` characters, the full string is returned. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = ["Atatata", "taata", "taatatata", "zukkyun"] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a dataframe-agnostic function: - - >>> def agnostic_tail(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.str.tail().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_tail`: - - >>> agnostic_tail(s_pd) - 0 atata - 1 taata - 2 atata - 3 kkyun - dtype: object - - >>> agnostic_tail(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [str] - [ - "atata" - "taata" - "atata" - "kkyun" - ] - - >>> agnostic_tail(s_pa) # doctest: +ELLIPSIS - - [ - [ - "atata", - "taata", - "atata", - "kkyun" - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.slice(offset=-n, length=None) @@ -691,52 +183,6 @@ def to_uppercase(self) -> SeriesT: The PyArrow backend will convert 'ß' to 'ẞ' instead of 'SS'. For more info see: https://github.com/apache/arrow/issues/34599 There may be other unicode-edge-case-related variations across implementations. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = ["apple", "mango", None] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a dataframe-agnostic function: - - >>> def agnostic_to_uppercase(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.str.to_uppercase().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_to_uppercase`: - - >>> agnostic_to_uppercase(s_pd) - 0 APPLE - 1 MANGO - 2 None - dtype: object - - >>> agnostic_to_uppercase(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [str] - [ - "APPLE" - "MANGO" - null - ] - - >>> agnostic_to_uppercase(s_pa) # doctest: +ELLIPSIS - - [ - [ - "APPLE", - "MANGO", - null - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.to_uppercase() @@ -747,52 +193,6 @@ def to_lowercase(self) -> SeriesT: Returns: A new Series with values converted to lowercase. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = ["APPLE", "MANGO", None] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a dataframe-agnostic function: - - >>> def agnostic_to_lowercase(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.str.to_lowercase().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_to_lowercase`: - - >>> agnostic_to_lowercase(s_pd) - 0 apple - 1 mango - 2 None - dtype: object - - >>> agnostic_to_lowercase(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: '' [str] - [ - "apple" - "mango" - null - ] - - >>> agnostic_to_lowercase(s_pa) # doctest: +ELLIPSIS - - [ - [ - "apple", - "mango", - null - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.to_lowercase() @@ -817,49 +217,6 @@ def to_datetime(self: Self, format: str | None = None) -> SeriesT: # noqa: A002 Returns: A new Series with datetime dtype. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = ["2020-01-01", "2020-01-02"] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a dataframe-agnostic function: - - >>> def agnostic_to_datetime(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.str.to_datetime(format="%Y-%m-%d").to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_to_datetime`: - - >>> agnostic_to_datetime(s_pd) - 0 2020-01-01 - 1 2020-01-02 - dtype: datetime64[ns] - - >>> agnostic_to_datetime(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [datetime[μs]] - [ - 2020-01-01 00:00:00 - 2020-01-02 00:00:00 - ] - - >>> agnostic_to_datetime(s_pa) # doctest: +ELLIPSIS - - [ - [ - 2020-01-01 00:00:00.000000, - 2020-01-02 00:00:00.000000 - ] - ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.to_datetime(format=format) diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index e8fd5a921..171c52912 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -171,45 +171,6 @@ def lazy(self) -> LazyFrame[Any]: Returns: A new LazyFrame. - - Examples: - Construct pandas, Polars and PyArrow DataFrames: - - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrame - >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_lazy(df_native: IntoFrame) -> IntoFrame: - ... df = nw.from_native(df_native) - ... return df.lazy().to_native() - - Note that then, pandas and pyarrow dataframe stay eager, but Polars DataFrame - becomes a Polars LazyFrame: - - >>> agnostic_lazy(df_pd) - foo bar ham - 0 1 6.0 a - 1 2 7.0 b - 2 3 8.0 c - >>> agnostic_lazy(df_pl) - - >>> agnostic_lazy(df_pa) - pyarrow.Table - foo: int64 - bar: double - ham: string - ---- - foo: [[1,2,3]] - bar: [[6,7,8]] - ham: [["a","b","c"]] """ return super().lazy() # type: ignore[return-value] @@ -232,40 +193,6 @@ def to_dict( Returns: A mapping from column name to values / Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> data = { - ... "A": [1, 2, 3, 4, 5], - ... "fruits": ["banana", "banana", "apple", "apple", "banana"], - ... "B": [5, 4, 3, 2, 1], - ... "animals": ["beetle", "fly", "beetle", "beetle", "beetle"], - ... "optional": [28, 300, None, 2, -30], - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_to_dict( - ... df_native: IntoDataFrame, - ... ) -> dict[str, list[int | str | float | None]]: - ... df = nw.from_native(df_native) - ... return df.to_dict(as_series=False) - - We can then pass either pandas, Polars or PyArrow to `agnostic_to_dict`: - - >>> agnostic_to_dict(df_pd) - {'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'animals': ['beetle', 'fly', 'beetle', 'beetle', 'beetle'], 'optional': [28.0, 300.0, nan, 2.0, -30.0]} - >>> agnostic_to_dict(df_pl) - {'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'animals': ['beetle', 'fly', 'beetle', 'beetle', 'beetle'], 'optional': [28, 300, None, 2, -30]} - >>> agnostic_to_dict(df_pa) - {'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'animals': ['beetle', 'fly', 'beetle', 'beetle', 'beetle'], 'optional': [28, 300, None, 2, -30]} """ return super().to_dict(as_series=as_series) # type: ignore[return-value] @@ -274,57 +201,6 @@ def is_duplicated(self: Self) -> Series: Returns: A new Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> from narwhals.typing import IntoSeries - >>> data = { - ... "a": [1, 2, 3, 1], - ... "b": ["x", "y", "z", "x"], - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_duplicated(df_native: IntoDataFrame) -> IntoSeries: - ... df = nw.from_native(df_native, eager_only=True) - ... return df.is_duplicated().to_native() - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_is_duplicated`: - - >>> agnostic_is_duplicated(df_pd) - 0 True - 1 False - 2 False - 3 True - dtype: bool - - >>> agnostic_is_duplicated(df_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [bool] - [ - true - false - false - true - ] - >>> agnostic_is_duplicated(df_pa) # doctest: +ELLIPSIS - - [ - [ - true, - false, - false, - true - ] - ] """ return super().is_duplicated() # type: ignore[return-value] @@ -333,57 +209,6 @@ def is_unique(self: Self) -> Series: Returns: A new Series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> from narwhals.typing import IntoSeries - >>> data = { - ... "a": [1, 2, 3, 1], - ... "b": ["x", "y", "z", "x"], - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_unique(df_native: IntoDataFrame) -> IntoSeries: - ... df = nw.from_native(df_native, eager_only=True) - ... return df.is_unique().to_native() - - We can then pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_is_unique`: - - >>> agnostic_is_unique(df_pd) - 0 False - 1 True - 2 True - 3 False - dtype: bool - - >>> agnostic_is_unique(df_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [bool] - [ - false - true - true - false - ] - >>> agnostic_is_unique(df_pa) # doctest: +ELLIPSIS - - [ - [ - false, - true, - true, - false - ] - ] """ return super().is_unique() # type: ignore[return-value] @@ -418,59 +243,6 @@ def collect(self) -> DataFrame[Any]: Returns: DataFrame - - Examples: - >>> import narwhals as nw - >>> import polars as pl - >>> import dask.dataframe as dd - >>> data = { - ... "a": ["a", "b", "a", "b", "b", "c"], - ... "b": [1, 2, 3, 4, 5, 6], - ... "c": [6, 5, 4, 3, 2, 1], - ... } - >>> lf_pl = pl.LazyFrame(data) - >>> lf_dask = dd.from_dict(data, npartitions=2) - - >>> lf = nw.from_native(lf_pl) - >>> lf # doctest:+ELLIPSIS - ┌─────────────────────────────┐ - | Narwhals LazyFrame | - |-----------------------------| - |>> df = lf.group_by("a").agg(nw.all().sum()).collect() - >>> df.to_native().sort("a") - shape: (3, 3) - ┌─────┬─────┬─────┐ - │ a ┆ b ┆ c │ - │ --- ┆ --- ┆ --- │ - │ str ┆ i64 ┆ i64 │ - ╞═════╪═════╪═════╡ - │ a ┆ 4 ┆ 10 │ - │ b ┆ 11 ┆ 10 │ - │ c ┆ 6 ┆ 1 │ - └─────┴─────┴─────┘ - - >>> lf = nw.from_native(lf_dask) - >>> lf - ┌───────────────────────────────────┐ - | Narwhals LazyFrame | - |-----------------------------------| - |Dask DataFrame Structure: | - | a b c| - |npartitions=2 | - |0 string int64 int64| - |3 ... ... ...| - |5 ... ... ...| - |Dask Name: frompandas, 1 expression| - |Expr=df | - └───────────────────────────────────┘ - >>> df = lf.group_by("a").agg(nw.col("b", "c").sum()).collect() - >>> df.to_native() - a b c - 0 a 4 10 - 1 b 11 10 - 2 c 6 1 """ return super().collect() # type: ignore[return-value] @@ -520,50 +292,6 @@ def to_frame(self) -> DataFrame[Any]: Returns: A DataFrame containing this Series as a single column. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 2] - >>> s_pd = pd.Series(data, name="a") - >>> s_pl = pl.Series("a", data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_to_frame(s_native: IntoSeries) -> IntoDataFrame: - ... s = nw.from_native(s_native, series_only=True) - ... return s.to_frame().to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_to_frame`: - - >>> agnostic_to_frame(s_pd) - a - 0 1 - 1 2 - - >>> agnostic_to_frame(s_pl) - shape: (2, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 1 │ - │ 2 │ - └─────┘ - - >>> agnostic_to_frame(s_pa) - pyarrow.Table - : int64 - ---- - : [[1,2]] """ return super().to_frame() # type: ignore[return-value] @@ -589,54 +317,6 @@ def value_counts( A DataFrame with two columns: - The original values as first column - Either count or proportion as second column, depending on normalize parameter. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> from narwhals.typing import IntoSeries - - >>> data = [1, 1, 2, 3, 2] - >>> s_pd = pd.Series(data, name="s") - >>> s_pl = pl.Series(values=data, name="s") - >>> s_pa = pa.chunked_array([data]) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_value_counts(s_native: IntoSeries) -> IntoDataFrame: - ... s = nw.from_native(s_native, series_only=True) - ... return s.value_counts(sort=True).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_value_counts`: - - >>> agnostic_value_counts(s_pd) - s count - 0 1 2 - 1 2 2 - 2 3 1 - - >>> agnostic_value_counts(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3, 2) - ┌─────┬───────┐ - │ s ┆ count │ - │ --- ┆ --- │ - │ i64 ┆ u32 │ - ╞═════╪═══════╡ - │ 1 ┆ 2 │ - │ 2 ┆ 2 │ - │ 3 ┆ 1 │ - └─────┴───────┘ - - >>> agnostic_value_counts(s_pa) - pyarrow.Table - : int64 - count: int64 - ---- - : [[1,2,3]] - count: [[2,2,1]] """ return super().value_counts( # type: ignore[return-value] sort=sort, parallel=parallel, name=name, normalize=normalize @@ -693,40 +373,6 @@ def ewm_mean( Returns: Series - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1, 2, 3] - >>> s_pd = pd.Series(name="a", data=data) - >>> s_pl = pl.Series(name="a", values=data) - - We define a library agnostic function: - - >>> def agnostic_ewm_mean(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.ewm_mean(com=1, ignore_nulls=False).to_native() - - We can then pass any supported library such as pandas or Polars - to `agnostic_ewm_mean`: - - >>> agnostic_ewm_mean(s_pd) - 0 1.000000 - 1 1.666667 - 2 2.428571 - Name: a, dtype: float64 - - >>> agnostic_ewm_mean(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3,) - Series: 'a' [f64] - [ - 1.0 - 1.666667 - 2.428571 - ] """ from narwhals.exceptions import NarwhalsUnstableWarning from narwhals.utils import find_stacklevel @@ -776,55 +422,6 @@ def rolling_sum( Returns: A new series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1.0, 2.0, 3.0, 4.0] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_rolling_sum(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.rolling_sum(window_size=2).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_rolling_sum`: - - >>> agnostic_rolling_sum(s_pd) - 0 NaN - 1 3.0 - 2 5.0 - 3 7.0 - dtype: float64 - - >>> agnostic_rolling_sum(s_pl) # doctest:+NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [f64] - [ - null - 3.0 - 5.0 - 7.0 - ] - - >>> agnostic_rolling_sum(s_pa) # doctest:+ELLIPSIS - - [ - [ - null, - 3, - 5, - 7 - ] - ] """ from narwhals.exceptions import NarwhalsUnstableWarning from narwhals.utils import find_stacklevel @@ -870,55 +467,6 @@ def rolling_mean( Returns: A new series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1.0, 2.0, 3.0, 4.0] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_rolling_mean(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.rolling_mean(window_size=2).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_rolling_mean`: - - >>> agnostic_rolling_mean(s_pd) - 0 NaN - 1 1.5 - 2 2.5 - 3 3.5 - dtype: float64 - - >>> agnostic_rolling_mean(s_pl) # doctest:+NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [f64] - [ - null - 1.5 - 2.5 - 3.5 - ] - - >>> agnostic_rolling_mean(s_pa) # doctest:+ELLIPSIS - - [ - [ - null, - 1.5, - 2.5, - 3.5 - ] - ] """ from narwhals.exceptions import NarwhalsUnstableWarning from narwhals.utils import find_stacklevel @@ -966,55 +514,6 @@ def rolling_var( Returns: A new series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1.0, 3.0, 1.0, 4.0] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_rolling_var(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.rolling_var(window_size=2, min_periods=1).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_rolling_var`: - - >>> agnostic_rolling_var(s_pd) - 0 NaN - 1 2.0 - 2 2.0 - 3 4.5 - dtype: float64 - - >>> agnostic_rolling_var(s_pl) # doctest:+NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [f64] - [ - null - 2.0 - 2.0 - 4.5 - ] - - >>> agnostic_rolling_var(s_pa) # doctest:+ELLIPSIS - - [ - [ - nan, - 2, - 2, - 4.5 - ] - ] """ from narwhals.exceptions import NarwhalsUnstableWarning from narwhals.utils import find_stacklevel @@ -1063,55 +562,6 @@ def rolling_std( Returns: A new series. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT - - >>> data = [1.0, 3.0, 1.0, 4.0] - >>> s_pd = pd.Series(data) - >>> s_pl = pl.Series(data) - >>> s_pa = pa.chunked_array([data]) - - We define a library agnostic function: - - >>> def agnostic_rolling_std(s_native: IntoSeriesT) -> IntoSeriesT: - ... s = nw.from_native(s_native, series_only=True) - ... return s.rolling_std(window_size=2, min_periods=1).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_rolling_std`: - - >>> agnostic_rolling_std(s_pd) - 0 NaN - 1 1.414214 - 2 1.414214 - 3 2.121320 - dtype: float64 - - >>> agnostic_rolling_std(s_pl) # doctest:+NORMALIZE_WHITESPACE - shape: (4,) - Series: '' [f64] - [ - null - 1.414214 - 1.414214 - 2.12132 - ] - - >>> agnostic_rolling_std(s_pa) # doctest:+ELLIPSIS - - [ - [ - nan, - 1.4142135623730951, - 1.4142135623730951, - 2.1213203435596424 - ] - ] """ from narwhals.exceptions import NarwhalsUnstableWarning from narwhals.utils import find_stacklevel @@ -1184,43 +634,6 @@ def ewm_mean( Returns: Expr - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = {"a": [1, 2, 3]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - - We define a library agnostic function: - - >>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("a").ewm_mean(com=1, ignore_nulls=False) - ... ).to_native() - - We can then pass either pandas or Polars to `func`: - - >>> my_library_agnostic_function(df_pd) - a - 0 1.000000 - 1 1.666667 - 2 2.428571 - - >>> my_library_agnostic_function(df_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3, 1) - ┌──────────┐ - │ a │ - │ --- │ - │ f64 │ - ╞══════════╡ - │ 1.0 │ - │ 1.666667 │ - │ 2.428571 │ - └──────────┘ """ from narwhals.exceptions import NarwhalsUnstableWarning from narwhals.utils import find_stacklevel @@ -1270,54 +683,6 @@ def rolling_sum( Returns: A new expression. - - Examples: - >>> import narwhals as nw - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> data = {"a": [1.0, 2.0, None, 4.0]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> @nw.narwhalify - ... def agnostic_rolling_sum(df): - ... return df.with_columns( - ... b=nw.col("a").rolling_sum(window_size=3, min_periods=1) - ... ) - - We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: - - >>> agnostic_rolling_sum(df_pd) - a b - 0 1.0 1.0 - 1 2.0 3.0 - 2 NaN 3.0 - 3 4.0 6.0 - - >>> agnostic_rolling_sum(df_pl) - shape: (4, 2) - ┌──────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞══════╪═════╡ - │ 1.0 ┆ 1.0 │ - │ 2.0 ┆ 3.0 │ - │ null ┆ 3.0 │ - │ 4.0 ┆ 6.0 │ - └──────┴─────┘ - - >>> agnostic_rolling_sum(df_pa) # doctest:+ELLIPSIS - pyarrow.Table - a: double - b: double - ---- - a: [[1,2,null,4]] - b: [[1,3,3,6]] """ from narwhals.exceptions import NarwhalsUnstableWarning from narwhals.utils import find_stacklevel @@ -1363,54 +728,6 @@ def rolling_mean( Returns: A new expression. - - Examples: - >>> import narwhals as nw - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> data = {"a": [1.0, 2.0, None, 4.0]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> @nw.narwhalify - ... def agnostic_rolling_mean(df): - ... return df.with_columns( - ... b=nw.col("a").rolling_mean(window_size=3, min_periods=1) - ... ) - - We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: - - >>> agnostic_rolling_mean(df_pd) - a b - 0 1.0 1.0 - 1 2.0 1.5 - 2 NaN 1.5 - 3 4.0 3.0 - - >>> agnostic_rolling_mean(df_pl) - shape: (4, 2) - ┌──────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞══════╪═════╡ - │ 1.0 ┆ 1.0 │ - │ 2.0 ┆ 1.5 │ - │ null ┆ 1.5 │ - │ 4.0 ┆ 3.0 │ - └──────┴─────┘ - - >>> agnostic_rolling_mean(df_pa) # doctest:+ELLIPSIS - pyarrow.Table - a: double - b: double - ---- - a: [[1,2,null,4]] - b: [[1,1.5,1.5,3]] """ from narwhals.exceptions import NarwhalsUnstableWarning from narwhals.utils import find_stacklevel @@ -1458,55 +775,6 @@ def rolling_var( Returns: A new expression. - - Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> data = {"a": [1.0, 2.0, None, 4.0]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_rolling_var(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... b=nw.col("a").rolling_var(window_size=3, min_periods=1) - ... ).to_native() - - We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: - - >>> agnostic_rolling_var(df_pd) - a b - 0 1.0 NaN - 1 2.0 0.5 - 2 NaN 0.5 - 3 4.0 2.0 - - >>> agnostic_rolling_var(df_pl) # doctest:+SKIP - shape: (4, 2) - ┌──────┬──────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞══════╪══════╡ - │ 1.0 ┆ null │ - │ 2.0 ┆ 0.5 │ - │ null ┆ 0.5 │ - │ 4.0 ┆ 2.0 │ - └──────┴──────┘ - - >>> agnostic_rolling_var(df_pa) # doctest:+ELLIPSIS - pyarrow.Table - a: double - b: double - ---- - a: [[1,2,null,4]] - b: [[nan,0.5,0.5,2]] """ from narwhals.exceptions import NarwhalsUnstableWarning from narwhals.utils import find_stacklevel @@ -1552,55 +820,6 @@ def rolling_std( Returns: A new expression. - - Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> data = {"a": [1.0, 2.0, None, 4.0]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_rolling_std(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... b=nw.col("a").rolling_std(window_size=3, min_periods=1) - ... ).to_native() - - We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: - - >>> agnostic_rolling_std(df_pd) - a b - 0 1.0 NaN - 1 2.0 0.707107 - 2 NaN 0.707107 - 3 4.0 1.414214 - - >>> agnostic_rolling_std(df_pl) # doctest:+SKIP - shape: (4, 2) - ┌──────┬──────────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞══════╪══════════╡ - │ 1.0 ┆ null │ - │ 2.0 ┆ 0.707107 │ - │ null ┆ 0.707107 │ - │ 4.0 ┆ 1.414214 │ - └──────┴──────────┘ - - >>> agnostic_rolling_std(df_pa) # doctest:+ELLIPSIS - pyarrow.Table - a: double - b: double - ---- - a: [[1,2,null,4]] - b: [[nan,0.7071067811865476,0.7071067811865476,1.4142135623730951]] """ from narwhals.exceptions import NarwhalsUnstableWarning from narwhals.utils import find_stacklevel @@ -1625,28 +844,6 @@ class Schema(NwSchema): schema: Mapping[str, DType] | Iterable[tuple[str, DType]] | None The schema definition given by column names and their associated. *instantiated* Narwhals data type. Accepts a mapping or an iterable of tuples. - - Examples: - Define a schema by passing *instantiated* data types. - - >>> import narwhals as nw - >>> schema = nw.Schema({"foo": nw.Int8(), "bar": nw.String()}) - >>> schema - Schema({'foo': Int8, 'bar': String}) - - Access the data type associated with a specific column name. - - >>> schema["foo"] - Int8 - - Access various schema properties using the `names`, `dtypes`, and `len` methods. - - >>> schema.names() - ['foo', 'bar'] - >>> schema.dtypes() - [Int8, String] - >>> schema.len() - 2 """ @@ -2271,21 +1468,6 @@ def narwhalify( Returns: Decorated function. - - Examples: - Instead of writing - - >>> import narwhals as nw - >>> def agnostic_group_by_sum(df): - ... df = nw.from_native(df, pass_through=True) - ... df = df.group_by("a").agg(nw.col("b").sum()) - ... return nw.to_native(df) - - you can just write - - >>> @nw.narwhalify - ... def agnostic_group_by_sum(df): - ... return df.group_by("a").agg(nw.col("b").sum()) """ pass_through = validate_strict_and_pass_though( strict, pass_through, pass_through_default=True, emit_deprecation_warning=False @@ -2346,53 +1528,6 @@ def all() -> Expr: Returns: A new expression. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_all(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.all() * 2).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_all`: - - >>> agnostic_all(df_pd) - a b - 0 2 8 - 1 4 10 - 2 6 12 - - >>> agnostic_all(df_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 2 ┆ 8 │ - │ 4 ┆ 10 │ - │ 6 ┆ 12 │ - └─────┴─────┘ - - >>> agnostic_all(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[2,4,6]] - b: [[8,10,12]] """ return _stableify(nw.all()) @@ -2405,49 +1540,6 @@ def col(*names: str | Iterable[str]) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2], "b": [3, 4]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_col(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a") * nw.col("b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_col`: - - >>> agnostic_col(df_pd) - a - 0 3 - 1 8 - - >>> agnostic_col(df_pl) - shape: (2, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 3 │ - │ 8 │ - └─────┘ - - >>> agnostic_col(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[3,8]] """ return _stableify(nw.col(*names)) @@ -2464,48 +1556,6 @@ def nth(*indices: int | Sequence[int]) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2], "b": [3, 4]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_nth(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.nth(0) * 2).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to `agnostic_nth`: - - >>> agnostic_nth(df_pd) - a - 0 2 - 1 4 - - >>> agnostic_nth(df_pl) - shape: (2, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 2 │ - │ 4 │ - └─────┘ - - >>> agnostic_nth(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[2,4]] """ return _stableify(nw.nth(*indices)) @@ -2515,45 +1565,6 @@ def len() -> Expr: Returns: A new expression. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2], "b": [5, 10]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_len(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.len()).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_len`: - - >>> agnostic_len(df_pd) - len - 0 2 - >>> agnostic_len(df_pl) - shape: (1, 1) - ┌─────┐ - │ len │ - │ --- │ - │ u32 │ - ╞═════╡ - │ 2 │ - └─────┘ - >>> agnostic_len(df_pa) - pyarrow.Table - len: int64 - ---- - len: [[2]] """ return _stableify(nw.len()) @@ -2568,51 +1579,6 @@ def lit(value: Any, dtype: DType | type[DType] | None = None) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_lit(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns(nw.lit(3)).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_lit`: - - >>> agnostic_lit(df_pd) - a literal - 0 1 3 - 1 2 3 - - >>> agnostic_lit(df_pl) - shape: (2, 2) - ┌─────┬─────────┐ - │ a ┆ literal │ - │ --- ┆ --- │ - │ i64 ┆ i32 │ - ╞═════╪═════════╡ - │ 1 ┆ 3 │ - │ 2 ┆ 3 │ - └─────┴─────────┘ - - >>> agnostic_lit(df_pa) - pyarrow.Table - a: int64 - literal: int64 - ---- - a: [[1,2]] - literal: [[3,3]] """ return _stableify(nw.lit(value, dtype)) @@ -2628,47 +1594,6 @@ def min(*columns: str) -> Expr: Returns: A new expression. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2], "b": [5, 10]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_min(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.min("b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_min`: - - >>> agnostic_min(df_pd) - b - 0 5 - - >>> agnostic_min(df_pl) - shape: (1, 1) - ┌─────┐ - │ b │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 5 │ - └─────┘ - - >>> agnostic_min(df_pa) - pyarrow.Table - b: int64 - ---- - b: [[5]] """ return _stableify(nw.min(*columns)) @@ -2684,47 +1609,6 @@ def max(*columns: str) -> Expr: Returns: A new expression. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2], "b": [5, 10]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_max(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.max("a")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_max`: - - >>> agnostic_max(df_pd) - a - 0 2 - - >>> agnostic_max(df_pl) - shape: (1, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 2 │ - └─────┘ - - >>> agnostic_max(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[2]] """ return _stableify(nw.max(*columns)) @@ -2740,47 +1624,6 @@ def mean(*columns: str) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 8, 3]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe agnostic function: - - >>> def agnostic_mean(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.mean("a")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_mean`: - - >>> agnostic_mean(df_pd) - a - 0 4.0 - - >>> agnostic_mean(df_pl) - shape: (1, 1) - ┌─────┐ - │ a │ - │ --- │ - │ f64 │ - ╞═════╡ - │ 4.0 │ - └─────┘ - - >>> agnostic_mean(df_pa) - pyarrow.Table - a: double - ---- - a: [[4]] """ return _stableify(nw.mean(*columns)) @@ -2798,47 +1641,6 @@ def median(*columns: str) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [4, 5, 2]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe agnostic function: - - >>> def agnostic_median(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.median("a")).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_median`: - - >>> agnostic_median(df_pd) - a - 0 4.0 - - >>> agnostic_median(df_pl) - shape: (1, 1) - ┌─────┐ - │ a │ - │ --- │ - │ f64 │ - ╞═════╡ - │ 4.0 │ - └─────┘ - - >>> agnostic_median(df_pa) - pyarrow.Table - a: double - ---- - a: [[4]] """ return _stableify(nw.median(*columns)) @@ -2854,47 +1656,6 @@ def sum(*columns: str) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_sum(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.sum("a")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_sum`: - - >>> agnostic_sum(df_pd) - a - 0 3 - - >>> agnostic_sum(df_pl) - shape: (1, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 3 │ - └─────┘ - - >>> agnostic_sum(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[3]] """ return _stableify(nw.sum(*columns)) @@ -2911,50 +1672,6 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3], "b": [5, 10, None]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_sum_horizontal(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.sum_horizontal("a", "b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to `agnostic_sum_horizontal`: - - >>> agnostic_sum_horizontal(df_pd) - a - 0 6.0 - 1 12.0 - 2 3.0 - - >>> agnostic_sum_horizontal(df_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 6 │ - │ 12 │ - │ 3 │ - └─────┘ - - >>> agnostic_sum_horizontal(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[6,12,3]] """ return _stableify(nw.sum_horizontal(*exprs)) @@ -2968,64 +1685,6 @@ def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [False, False, True, True, False, None], - ... "b": [False, True, True, None, None, None], - ... } - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow") - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_all_horizontal(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select("a", "b", all=nw.all_horizontal("a", "b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_all_horizontal`: - - >>> agnostic_all_horizontal(df_pd) - a b all - 0 False False False - 1 False True False - 2 True True True - 3 True - 4 False False - 5 - - >>> agnostic_all_horizontal(df_pl) - shape: (6, 3) - ┌───────┬───────┬───────┐ - │ a ┆ b ┆ all │ - │ --- ┆ --- ┆ --- │ - │ bool ┆ bool ┆ bool │ - ╞═══════╪═══════╪═══════╡ - │ false ┆ false ┆ false │ - │ false ┆ true ┆ false │ - │ true ┆ true ┆ true │ - │ true ┆ null ┆ null │ - │ false ┆ null ┆ false │ - │ null ┆ null ┆ null │ - └───────┴───────┴───────┘ - - >>> agnostic_all_horizontal(df_pa) - pyarrow.Table - a: bool - b: bool - all: bool - ---- - a: [[false,false,true,true,false,null]] - b: [[false,true,true,null,null,null]] - all: [[false,false,true,null,false,null]] """ return _stableify(nw.all_horizontal(*exprs)) @@ -3039,64 +1698,6 @@ def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [False, False, True, True, False, None], - ... "b": [False, True, True, None, None, None], - ... } - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow") - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_any_horizontal(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select("a", "b", any=nw.any_horizontal("a", "b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_any_horizontal`: - - >>> agnostic_any_horizontal(df_pd) - a b any - 0 False False False - 1 False True True - 2 True True True - 3 True True - 4 False - 5 - - >>> agnostic_any_horizontal(df_pl) - shape: (6, 3) - ┌───────┬───────┬───────┐ - │ a ┆ b ┆ any │ - │ --- ┆ --- ┆ --- │ - │ bool ┆ bool ┆ bool │ - ╞═══════╪═══════╪═══════╡ - │ false ┆ false ┆ false │ - │ false ┆ true ┆ true │ - │ true ┆ true ┆ true │ - │ true ┆ null ┆ true │ - │ false ┆ null ┆ null │ - │ null ┆ null ┆ null │ - └───────┴───────┴───────┘ - - >>> agnostic_any_horizontal(df_pa) - pyarrow.Table - a: bool - b: bool - any: bool - ---- - a: [[false,false,true,true,false,null]] - b: [[false,true,true,null,null,null]] - any: [[false,true,true,true,null,null]] """ return _stableify(nw.any_horizontal(*exprs)) @@ -3110,56 +1711,6 @@ def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [1, 8, 3], - ... "b": [4, 5, None], - ... "c": ["x", "y", "z"], - ... } - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function that computes the horizontal mean of "a" - and "b" columns: - - >>> def agnostic_mean_horizontal(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.mean_horizontal("a", "b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_mean_horizontal`: - - >>> agnostic_mean_horizontal(df_pd) - a - 0 2.5 - 1 6.5 - 2 3.0 - - >>> agnostic_mean_horizontal(df_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ f64 │ - ╞═════╡ - │ 2.5 │ - │ 6.5 │ - │ 3.0 │ - └─────┘ - - >>> agnostic_mean_horizontal(df_pa) - pyarrow.Table - a: double - ---- - a: [[2.5,6.5,3]] """ return _stableify(nw.mean_horizontal(*exprs)) @@ -3176,53 +1727,6 @@ def min_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [1, 8, 3], - ... "b": [4, 5, None], - ... "c": ["x", "y", "z"], - ... } - - We define a dataframe-agnostic function that computes the horizontal min of "a" - and "b" columns: - - >>> def agnostic_min_horizontal(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.min_horizontal("a", "b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_min_horizontal`: - - >>> agnostic_min_horizontal(pd.DataFrame(data)) - a - 0 1.0 - 1 5.0 - 2 3.0 - - >>> agnostic_min_horizontal(pl.DataFrame(data)) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 1 │ - │ 5 │ - │ 3 │ - └─────┘ - - >>> agnostic_min_horizontal(pa.table(data)) - pyarrow.Table - a: int64 - ---- - a: [[1,5,3]] """ return _stableify(nw.min_horizontal(*exprs)) @@ -3239,53 +1743,6 @@ def max_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [1, 8, 3], - ... "b": [4, 5, None], - ... "c": ["x", "y", "z"], - ... } - - We define a dataframe-agnostic function that computes the horizontal max of "a" - and "b" columns: - - >>> def agnostic_max_horizontal(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.max_horizontal("a", "b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_max_horizontal`: - - >>> agnostic_max_horizontal(pd.DataFrame(data)) - a - 0 4.0 - 1 8.0 - 2 3.0 - - >>> agnostic_max_horizontal(pl.DataFrame(data)) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 4 │ - │ 8 │ - │ 3 │ - └─────┘ - - >>> agnostic_max_horizontal(pa.table(data)) - pyarrow.Table - a: int64 - ---- - a: [[4,8,3]] """ return _stableify(nw.max_horizontal(*exprs)) @@ -3336,123 +1793,6 @@ def concat( Raises: TypeError: The items to concatenate should either all be eager, or all lazy - - Examples: - Let's take an example of vertical concatenation: - - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals as nw - >>> data_1 = {"a": [1, 2, 3], "b": [4, 5, 6]} - >>> data_2 = {"a": [5, 2], "b": [1, 4]} - - >>> df_pd_1 = pd.DataFrame(data_1) - >>> df_pd_2 = pd.DataFrame(data_2) - >>> df_pl_1 = pl.DataFrame(data_1) - >>> df_pl_2 = pl.DataFrame(data_2) - - Let's define a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def agnostic_vertical_concat(df1, df2): - ... return nw.concat([df1, df2], how="vertical") - - >>> agnostic_vertical_concat(df_pd_1, df_pd_2) - a b - 0 1 4 - 1 2 5 - 2 3 6 - 0 5 1 - 1 2 4 - >>> agnostic_vertical_concat(df_pl_1, df_pl_2) - shape: (5, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 4 │ - │ 2 ┆ 5 │ - │ 3 ┆ 6 │ - │ 5 ┆ 1 │ - │ 2 ┆ 4 │ - └─────┴─────┘ - - Let's look at case a for horizontal concatenation: - - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals as nw - >>> data_1 = {"a": [1, 2, 3], "b": [4, 5, 6]} - >>> data_2 = {"c": [5, 2], "d": [1, 4]} - - >>> df_pd_1 = pd.DataFrame(data_1) - >>> df_pd_2 = pd.DataFrame(data_2) - >>> df_pl_1 = pl.DataFrame(data_1) - >>> df_pl_2 = pl.DataFrame(data_2) - - Defining a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def agnostic_horizontal_concat(df1, df2): - ... return nw.concat([df1, df2], how="horizontal") - - >>> agnostic_horizontal_concat(df_pd_1, df_pd_2) - a b c d - 0 1 4 5.0 1.0 - 1 2 5 2.0 4.0 - 2 3 6 NaN NaN - - >>> agnostic_horizontal_concat(df_pl_1, df_pl_2) - shape: (3, 4) - ┌─────┬─────┬──────┬──────┐ - │ a ┆ b ┆ c ┆ d │ - │ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 ┆ i64 │ - ╞═════╪═════╪══════╪══════╡ - │ 1 ┆ 4 ┆ 5 ┆ 1 │ - │ 2 ┆ 5 ┆ 2 ┆ 4 │ - │ 3 ┆ 6 ┆ null ┆ null │ - └─────┴─────┴──────┴──────┘ - - Let's look at case a for diagonal concatenation: - - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals as nw - >>> data_1 = {"a": [1, 2], "b": [3.5, 4.5]} - >>> data_2 = {"a": [3, 4], "z": ["x", "y"]} - - >>> df_pd_1 = pd.DataFrame(data_1) - >>> df_pd_2 = pd.DataFrame(data_2) - >>> df_pl_1 = pl.DataFrame(data_1) - >>> df_pl_2 = pl.DataFrame(data_2) - - Defining a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def agnostic_diagonal_concat(df1, df2): - ... return nw.concat([df1, df2], how="diagonal") - - >>> agnostic_diagonal_concat(df_pd_1, df_pd_2) - a b z - 0 1 3.5 NaN - 1 2 4.5 NaN - 0 3 NaN x - 1 4 NaN y - - >>> agnostic_diagonal_concat(df_pl_1, df_pl_2) - shape: (4, 3) - ┌─────┬──────┬──────┐ - │ a ┆ b ┆ z │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ f64 ┆ str │ - ╞═════╪══════╪══════╡ - │ 1 ┆ 3.5 ┆ null │ - │ 2 ┆ 4.5 ┆ null │ - │ 3 ┆ null ┆ x │ - │ 4 ┆ null ┆ y │ - └─────┴──────┴──────┘ """ return _stableify(nw.concat(items, how=how)) @@ -3478,62 +1818,6 @@ def concat_str( Returns: A new expression. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [1, 2, 3], - ... "b": ["dogs", "cats", None], - ... "c": ["play", "swim", "walk"], - ... } - - We define a dataframe-agnostic function that computes the horizontal string - concatenation of different columns - - >>> def agnostic_concat_str(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.concat_str( - ... [ - ... nw.col("a") * 2, - ... nw.col("b"), - ... nw.col("c"), - ... ], - ... separator=" ", - ... ).alias("full_sentence") - ... ).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_concat_str`: - - >>> agnostic_concat_str(pd.DataFrame(data)) - full_sentence - 0 2 dogs play - 1 4 cats swim - 2 None - - >>> agnostic_concat_str(pl.DataFrame(data)) - shape: (3, 1) - ┌───────────────┐ - │ full_sentence │ - │ --- │ - │ str │ - ╞═══════════════╡ - │ 2 dogs play │ - │ 4 cats swim │ - │ null │ - └───────────────┘ - - >>> agnostic_concat_str(pa.table(data)) - pyarrow.Table - full_sentence: string - ---- - full_sentence: [["2 dogs play","4 cats swim",null]] """ return _stableify( nw.concat_str(exprs, *more_exprs, separator=separator, ignore_nulls=ignore_nulls) @@ -3578,57 +1862,6 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When: Returns: A "when" object, which `.then` can be called on. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3], "b": [5, 10, 15]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_when_then_otherwise(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when") - ... ).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_when_then_otherwise`: - - >>> agnostic_when_then_otherwise(df_pd) - a b a_when - 0 1 5 5 - 1 2 10 5 - 2 3 15 6 - - >>> agnostic_when_then_otherwise(df_pl) - shape: (3, 3) - ┌─────┬─────┬────────┐ - │ a ┆ b ┆ a_when │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i32 │ - ╞═════╪═════╪════════╡ - │ 1 ┆ 5 ┆ 5 │ - │ 2 ┆ 10 ┆ 5 │ - │ 3 ┆ 15 ┆ 6 │ - └─────┴─────┴────────┘ - - >>> agnostic_when_then_otherwise(df_pa) - pyarrow.Table - a: int64 - b: int64 - a_when: int64 - ---- - a: [[1,2,3]] - b: [[5,10,15]] - a_when: [[5,5,6]] """ return When.from_when(nw_when(*predicates)) @@ -3651,54 +1884,6 @@ def new_series( Returns: A new Series - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT, IntoSeriesT - >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} - - Let's define a dataframe-agnostic function: - - >>> def agnostic_new_series(df_native: IntoFrameT) -> IntoSeriesT: - ... values = [4, 1, 2, 3] - ... native_namespace = nw.get_native_namespace(df_native) - ... return nw.new_series( - ... name="a", - ... values=values, - ... dtype=nw.Int32, - ... native_namespace=native_namespace, - ... ).to_native() - - We can then pass any supported eager library, such as pandas / Polars / PyArrow: - - >>> agnostic_new_series(pd.DataFrame(data)) - 0 4 - 1 1 - 2 2 - 3 3 - Name: a, dtype: int32 - >>> agnostic_new_series(pl.DataFrame(data)) # doctest: +NORMALIZE_WHITESPACE - shape: (4,) - Series: 'a' [i32] - [ - 4 - 1 - 2 - 3 - ] - >>> agnostic_new_series(pa.table(data)) - - [ - [ - 4, - 1, - 2, - 3 - ] - ] """ return _stableify( # type: ignore[no-any-return] _new_series_impl( @@ -3722,38 +1907,6 @@ def from_arrow( Returns: A new DataFrame. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} - - Let's define a dataframe-agnostic function which creates a PyArrow - Table. - - >>> def agnostic_to_arrow(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return nw.from_arrow(df, native_namespace=pa).to_native() - - Let's see what happens when passing pandas / Polars input: - - >>> agnostic_to_arrow(pd.DataFrame(data)) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[1,2,3]] - b: [[4,5,6]] - >>> agnostic_to_arrow(pl.DataFrame(data)) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[1,2,3]] - b: [[4,5,6]] """ return _stableify( # type: ignore[no-any-return] nw_from_arrow(native_frame, native_namespace=native_namespace) @@ -3783,45 +1936,6 @@ def from_dict( Returns: A new DataFrame. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} - - Let's create a new dataframe of the same class as the dataframe we started with, from a dict of new data: - - >>> def agnostic_from_dict(df_native: IntoFrameT) -> IntoFrameT: - ... new_data = {"c": [5, 2], "d": [1, 4]} - ... native_namespace = nw.get_native_namespace(df_native) - ... return nw.from_dict(new_data, native_namespace=native_namespace).to_native() - - Let's see what happens when passing pandas, Polars or PyArrow input: - - >>> agnostic_from_dict(pd.DataFrame(data)) - c d - 0 5 1 - 1 2 4 - >>> agnostic_from_dict(pl.DataFrame(data)) - shape: (2, 2) - ┌─────┬─────┐ - │ c ┆ d │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 5 ┆ 1 │ - │ 2 ┆ 4 │ - └─────┴─────┘ - >>> agnostic_from_dict(pa.table(data)) - pyarrow.Table - c: int64 - d: int64 - ---- - c: [[5,2]] - d: [[1,4]] """ return _stableify( # type: ignore[no-any-return] _from_dict_impl( @@ -3854,123 +1968,6 @@ def from_numpy( Returns: A new DataFrame. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> import numpy as np - >>> from narwhals.typing import IntoFrameT - >>> data = {"a": [1, 2], "b": [3, 4]} - - Let's create a new dataframe of the same class as the dataframe we started with, from a NumPy ndarray of new data: - - >>> def agnostic_from_numpy(df_native: IntoFrameT) -> IntoFrameT: - ... new_data = np.array([[5, 2, 1], [1, 4, 3]]) - ... df = nw.from_native(df_native) - ... native_namespace = nw.get_native_namespace(df) - ... return nw.from_numpy(new_data, native_namespace=native_namespace).to_native() - - Let's see what happens when passing pandas, Polars or PyArrow input: - - >>> agnostic_from_numpy(pd.DataFrame(data)) - column_0 column_1 column_2 - 0 5 2 1 - 1 1 4 3 - >>> agnostic_from_numpy(pl.DataFrame(data)) - shape: (2, 3) - ┌──────────┬──────────┬──────────┐ - │ column_0 ┆ column_1 ┆ column_2 │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 │ - ╞══════════╪══════════╪══════════╡ - │ 5 ┆ 2 ┆ 1 │ - │ 1 ┆ 4 ┆ 3 │ - └──────────┴──────────┴──────────┘ - >>> agnostic_from_numpy(pa.table(data)) - pyarrow.Table - column_0: int64 - column_1: int64 - column_2: int64 - ---- - column_0: [[5,1]] - column_1: [[2,4]] - column_2: [[1,3]] - - Let's specify the column names: - - >>> def agnostic_from_numpy(df_native: IntoFrameT) -> IntoFrameT: - ... new_data = np.array([[5, 2, 1], [1, 4, 3]]) - ... schema = ["c", "d", "e"] - ... df = nw.from_native(df_native) - ... native_namespace = nw.get_native_namespace(df) - ... return nw.from_numpy( - ... new_data, native_namespace=native_namespace, schema=schema - ... ).to_native() - - Let's see the modified outputs: - - >>> agnostic_from_numpy(pd.DataFrame(data)) - c d e - 0 5 2 1 - 1 1 4 3 - >>> agnostic_from_numpy(pl.DataFrame(data)) - shape: (2, 3) - ┌─────┬─────┬─────┐ - │ c ┆ d ┆ e │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 │ - ╞═════╪═════╪═════╡ - │ 5 ┆ 2 ┆ 1 │ - │ 1 ┆ 4 ┆ 3 │ - └─────┴─────┴─────┘ - >>> agnostic_from_numpy(pa.table(data)) - pyarrow.Table - c: int64 - d: int64 - e: int64 - ---- - c: [[5,1]] - d: [[2,4]] - e: [[1,3]] - - Let's modify the function so that it specifies the schema: - - >>> def agnostic_from_numpy(df_native: IntoFrameT) -> IntoFrameT: - ... new_data = np.array([[5, 2, 1], [1, 4, 3]]) - ... schema = {"c": nw.Int16(), "d": nw.Float32(), "e": nw.Int8()} - ... df = nw.from_native(df_native) - ... native_namespace = nw.get_native_namespace(df) - ... return nw.from_numpy( - ... new_data, native_namespace=native_namespace, schema=schema - ... ).to_native() - - Let's see the outputs: - - >>> agnostic_from_numpy(pd.DataFrame(data)) - c d e - 0 5 2.0 1 - 1 1 4.0 3 - >>> agnostic_from_numpy(pl.DataFrame(data)) - shape: (2, 3) - ┌─────┬─────┬─────┐ - │ c ┆ d ┆ e │ - │ --- ┆ --- ┆ --- │ - │ i16 ┆ f32 ┆ i8 │ - ╞═════╪═════╪═════╡ - │ 5 ┆ 2.0 ┆ 1 │ - │ 1 ┆ 4.0 ┆ 3 │ - └─────┴─────┴─────┘ - >>> agnostic_from_numpy(pa.table(data)) - pyarrow.Table - c: int16 - d: float - e: int8 - ---- - c: [[5,1]] - d: [[2,4]] - e: [[1,3]] """ return _stableify( # type: ignore[no-any-return] _from_numpy_impl( @@ -3996,45 +1993,6 @@ def read_csv( Returns: DataFrame. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> from types import ModuleType - - Let's create an agnostic function that reads a csv file with a specified native namespace: - - >>> def agnostic_read_csv(native_namespace: ModuleType) -> IntoDataFrame: - ... return nw.read_csv("file.csv", native_namespace=native_namespace).to_native() - - Then we can read the file by passing pandas, Polars or PyArrow namespaces: - - >>> agnostic_read_csv(native_namespace=pd) # doctest:+SKIP - a b - 0 1 4 - 1 2 5 - 2 3 6 - >>> agnostic_read_csv(native_namespace=pl) # doctest:+SKIP - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 4 │ - │ 2 ┆ 5 │ - │ 3 ┆ 6 │ - └─────┴─────┘ - >>> agnostic_read_csv(native_namespace=pa) # doctest:+SKIP - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[1,2,3]] - b: [[4,5,6]] """ return _stableify( # type: ignore[no-any-return] _read_csv_impl(source, native_namespace=native_namespace, **kwargs) @@ -4058,38 +2016,6 @@ def scan_csv( Returns: LazyFrame. - - Examples: - >>> import dask.dataframe as dd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrame - >>> from types import ModuleType - - Let's create an agnostic function that lazily reads a csv file with a specified native namespace: - - >>> def agnostic_scan_csv(native_namespace: ModuleType) -> IntoFrame: - ... return nw.scan_csv("file.csv", native_namespace=native_namespace).to_native() - - Then we can read the file by passing, for example, Polars or Dask namespaces: - - >>> agnostic_scan_csv(native_namespace=pl).collect() # doctest:+SKIP - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 4 │ - │ 2 ┆ 5 │ - │ 3 ┆ 6 │ - └─────┴─────┘ - >>> agnostic_scan_csv(native_namespace=dd).compute() # doctest:+SKIP - a b - 0 1 4 - 1 2 5 - 2 3 6 """ return _stableify( # type: ignore[no-any-return] _scan_csv_impl(source, native_namespace=native_namespace, **kwargs) @@ -4110,47 +2036,6 @@ def read_parquet( Returns: DataFrame. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> from types import ModuleType - - Let's create an agnostic function that reads a parquet file with a specified native namespace: - - >>> def agnostic_read_parquet(native_namespace: ModuleType) -> IntoDataFrame: - ... return nw.read_parquet( - ... "file.parquet", native_namespace=native_namespace - ... ).to_native() - - Then we can read the file by passing pandas, Polars or PyArrow namespaces: - - >>> agnostic_read_parquet(native_namespace=pd) # doctest:+SKIP - a b - 0 1 4 - 1 2 5 - 2 3 6 - >>> agnostic_read_parquet(native_namespace=pl) # doctest:+SKIP - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 4 │ - │ 2 ┆ 5 │ - │ 3 ┆ 6 │ - └─────┴─────┘ - >>> agnostic_read_parquet(native_namespace=pa) # doctest:+SKIP - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[1,2,3]] - b: [[4,5,6]] """ return _stableify( # type: ignore[no-any-return] _read_parquet_impl(source, native_namespace=native_namespace, **kwargs) @@ -4174,40 +2059,6 @@ def scan_parquet( Returns: LazyFrame. - - Examples: - >>> import dask.dataframe as dd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from narwhals.typing import IntoFrame - >>> from types import ModuleType - - Let's create an agnostic function that lazily reads a parquet file with a specified native namespace: - - >>> def agnostic_scan_parquet(native_namespace: ModuleType) -> IntoFrame: - ... return nw.scan_parquet( - ... "file.parquet", native_namespace=native_namespace - ... ).to_native() - - Then we can read the file by passing, for example, Polars or Dask namespaces: - - >>> agnostic_scan_parquet(native_namespace=pl).collect() # doctest:+SKIP - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 4 │ - │ 2 ┆ 5 │ - │ 3 ┆ 6 │ - └─────┴─────┘ - >>> agnostic_scan_parquet(native_namespace=dd).compute() # doctest:+SKIP - a b - 0 1 4 - 1 2 5 - 2 3 6 """ return _stableify( # type: ignore[no-any-return] _scan_parquet_impl(source, native_namespace=native_namespace, **kwargs) diff --git a/narwhals/stable/v1/_dtypes.py b/narwhals/stable/v1/_dtypes.py index 00fe06141..df5728d4f 100644 --- a/narwhals/stable/v1/_dtypes.py +++ b/narwhals/stable/v1/_dtypes.py @@ -40,33 +40,6 @@ class Datetime(NwDatetime): Notes: Adapted from [Polars implementation](https://github.com/pola-rs/polars/blob/py-1.7.1/py-polars/polars/datatypes/classes.py#L398-L457) - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import pyarrow.compute as pc - >>> import narwhals as nw - >>> from datetime import datetime, timedelta - >>> data = [datetime(2024, 12, 9) + timedelta(days=n) for n in range(5)] - >>> ser_pd = ( - ... pd.Series(data) - ... .dt.tz_localize("Africa/Accra") - ... .astype("datetime64[ms, Africa/Accra]") - ... ) - >>> ser_pl = ( - ... pl.Series(data).cast(pl.Datetime("ms")).dt.replace_time_zone("Africa/Accra") - ... ) - >>> ser_pa = pc.assume_timezone( - ... pa.chunked_array([data], type=pa.timestamp("ms")), "Africa/Accra" - ... ) - - >>> nw.from_native(ser_pd, series_only=True).dtype - Datetime(time_unit='ms', time_zone='Africa/Accra') - >>> nw.from_native(ser_pl, series_only=True).dtype - Datetime(time_unit='ms', time_zone='Africa/Accra') - >>> nw.from_native(ser_pa, series_only=True).dtype - Datetime(time_unit='ms', time_zone='Africa/Accra') """ def __hash__(self) -> int: @@ -81,24 +54,6 @@ class Duration(NwDuration): Notes: Adapted from [Polars implementation](https://github.com/pola-rs/polars/blob/py-1.7.1/py-polars/polars/datatypes/classes.py#L460-L502) - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa - >>> import narwhals as nw - >>> from datetime import timedelta - >>> data = [timedelta(seconds=d) for d in range(1, 4)] - >>> ser_pd = pd.Series(data).astype("timedelta64[ms]") - >>> ser_pl = pl.Series(data).cast(pl.Duration("ms")) - >>> ser_pa = pa.chunked_array([data], type=pa.duration("ms")) - - >>> nw.from_native(ser_pd, series_only=True).dtype - Duration(time_unit='ms') - >>> nw.from_native(ser_pl, series_only=True).dtype - Duration(time_unit='ms') - >>> nw.from_native(ser_pa, series_only=True).dtype - Duration(time_unit='ms') """ def __hash__(self) -> int: diff --git a/narwhals/translate.py b/narwhals/translate.py index 9ad868016..f76177e7c 100644 --- a/narwhals/translate.py +++ b/narwhals/translate.py @@ -812,17 +812,6 @@ def get_native_namespace( Returns: Native module. - - Examples: - >>> import polars as pl - >>> import pandas as pd - >>> import narwhals as nw - >>> df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]})) - >>> nw.get_native_namespace(df) - - >>> df = nw.from_native(pl.DataFrame({"a": [1, 2, 3]})) - >>> nw.get_native_namespace(df) - """ if hasattr(obj, "__native_namespace__"): return obj.__native_namespace__() @@ -902,21 +891,6 @@ def narwhalify( Returns: Decorated function. - - Examples: - Instead of writing - - >>> import narwhals as nw - >>> def agnostic_group_by_sum(df): - ... df = nw.from_native(df, pass_through=True) - ... df = df.group_by("a").agg(nw.col("b").sum()) - ... return nw.to_native(df) - - you can just write - - >>> @nw.narwhalify - ... def agnostic_group_by_sum(df): - ... return df.group_by("a").agg(nw.col("b").sum()) """ from narwhals.utils import validate_strict_and_pass_though @@ -985,19 +959,6 @@ def to_py_scalar(scalar_like: Any) -> Any: Raises: ValueError: If the object is not convertible to a scalar. - - Examples: - >>> import narwhals as nw - >>> import pandas as pd - >>> df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]})) - >>> nw.to_py_scalar(df["a"].item(0)) - 1 - >>> import pyarrow as pa - >>> df = nw.from_native(pa.table({"a": [1, 2, 3]})) - >>> nw.to_py_scalar(df["a"].item(0)) - 1 - >>> nw.to_py_scalar(1) - 1 """ if scalar_like is None: return None diff --git a/narwhals/utils.py b/narwhals/utils.py index 509a0e36a..240140938 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -130,14 +130,6 @@ def is_pandas(self) -> bool: Returns: Boolean. - - Examples: - >>> import pandas as pd - >>> import narwhals as nw - >>> df_native = pd.DataFrame({"a": [1, 2, 3]}) - >>> df = nw.from_native(df_native) - >>> df.implementation.is_pandas() - True """ return self is Implementation.PANDAS @@ -146,14 +138,6 @@ def is_pandas_like(self) -> bool: Returns: Boolean. - - Examples: - >>> import pandas as pd - >>> import narwhals as nw - >>> df_native = pd.DataFrame({"a": [1, 2, 3]}) - >>> df = nw.from_native(df_native) - >>> df.implementation.is_pandas_like() - True """ return self in { Implementation.PANDAS, @@ -166,14 +150,6 @@ def is_polars(self) -> bool: Returns: Boolean. - - Examples: - >>> import polars as pl - >>> import narwhals as nw - >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) - >>> df = nw.from_native(df_native) - >>> df.implementation.is_polars() - True """ return self is Implementation.POLARS @@ -182,14 +158,6 @@ def is_cudf(self) -> bool: Returns: Boolean. - - Examples: - >>> import polars as pl - >>> import narwhals as nw - >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) - >>> df = nw.from_native(df_native) - >>> df.implementation.is_cudf() - False """ return self is Implementation.CUDF # pragma: no cover @@ -198,14 +166,6 @@ def is_modin(self) -> bool: Returns: Boolean. - - Examples: - >>> import polars as pl - >>> import narwhals as nw - >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) - >>> df = nw.from_native(df_native) - >>> df.implementation.is_modin() - False """ return self is Implementation.MODIN # pragma: no cover @@ -214,14 +174,6 @@ def is_pyspark(self) -> bool: Returns: Boolean. - - Examples: - >>> import polars as pl - >>> import narwhals as nw - >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) - >>> df = nw.from_native(df_native) - >>> df.implementation.is_pyspark() - False """ return self is Implementation.PYSPARK # pragma: no cover @@ -230,14 +182,6 @@ def is_pyarrow(self) -> bool: Returns: Boolean. - - Examples: - >>> import polars as pl - >>> import narwhals as nw - >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) - >>> df = nw.from_native(df_native) - >>> df.implementation.is_pyarrow() - False """ return self is Implementation.PYARROW # pragma: no cover @@ -246,14 +190,6 @@ def is_dask(self) -> bool: Returns: Boolean. - - Examples: - >>> import polars as pl - >>> import narwhals as nw - >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) - >>> df = nw.from_native(df_native) - >>> df.implementation.is_dask() - False """ return self is Implementation.DASK # pragma: no cover @@ -262,14 +198,6 @@ def is_duckdb(self) -> bool: Returns: Boolean. - - Examples: - >>> import polars as pl - >>> import narwhals as nw - >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) - >>> df = nw.from_native(df_native) - >>> df.implementation.is_duckdb() - False """ return self is Implementation.DUCKDB # pragma: no cover @@ -278,14 +206,6 @@ def is_ibis(self) -> bool: Returns: Boolean. - - Examples: - >>> import polars as pl - >>> import narwhals as nw - >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) - >>> df = nw.from_native(df_native) - >>> df.implementation.is_ibis() - False """ return self is Implementation.IBIS # pragma: no cover @@ -427,19 +347,6 @@ def maybe_align_index( rely on the Index. For non-pandas-like inputs, this only checks that `lhs` and `rhs` are the same length. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals as nw - >>> df_pd = pd.DataFrame({"a": [1, 2]}, index=[3, 4]) - >>> s_pd = pd.Series([6, 7], index=[4, 3]) - >>> df = nw.from_native(df_pd) - >>> s = nw.from_native(s_pd, series_only=True) - >>> nw.to_native(nw.maybe_align_index(df, s)) - a - 4 2 - 3 1 """ from narwhals._pandas_like.dataframe import PandasLikeDataFrame from narwhals._pandas_like.series import PandasLikeSeries @@ -520,19 +427,6 @@ def maybe_get_index(obj: DataFrame[Any] | LazyFrame[Any] | Series[Any]) -> Any | If you're designing a new library, we highly encourage you to not rely on the Index. For non-pandas-like inputs, this returns `None`. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals as nw - >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]}) - >>> df = nw.from_native(df_pd) - >>> nw.maybe_get_index(df) - RangeIndex(start=0, stop=2, step=1) - >>> series_pd = pd.Series([1, 2]) - >>> series = nw.from_native(series_pd, series_only=True) - >>> nw.maybe_get_index(series) - RangeIndex(start=0, stop=2, step=1) """ obj_any = cast(Any, obj) native_obj = obj_any.to_native() @@ -575,18 +469,6 @@ def maybe_set_index( rely on the Index. For non-pandas-like inputs, this is a no-op. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals as nw - >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]}) - >>> df = nw.from_native(df_pd) - >>> nw.to_native(nw.maybe_set_index(df, "b")) # doctest: +NORMALIZE_WHITESPACE - a - b - 4 1 - 5 2 """ from narwhals.translate import to_native @@ -649,21 +531,6 @@ def maybe_reset_index(obj: FrameOrSeriesT) -> FrameOrSeriesT: If you're designing a new library, we highly encourage you to not rely on the Index. For non-pandas-like inputs, this is a no-op. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals as nw - >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]}, index=([6, 7])) - >>> df = nw.from_native(df_pd) - >>> nw.to_native(nw.maybe_reset_index(df)) - a b - 0 1 4 - 1 2 5 - >>> series_pd = pd.Series([1, 2]) - >>> series = nw.from_native(series_pd, series_only=True) - >>> nw.maybe_get_index(series) - RangeIndex(start=0, stop=2, step=1) """ obj_any = cast(Any, obj) native_obj = obj_any.to_native() @@ -714,23 +581,6 @@ def maybe_convert_dtypes( Notes: For non-pandas-like inputs, this is a no-op. Also, `args` and `kwargs` just get passed down to the underlying library as-is. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals as nw - >>> import numpy as np - >>> df_pd = pd.DataFrame( - ... { - ... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")), - ... "b": pd.Series([True, False, np.nan], dtype=np.dtype("O")), - ... } - ... ) - >>> df = nw.from_native(df_pd) - >>> nw.to_native(nw.maybe_convert_dtypes(df)).dtypes # doctest: +NORMALIZE_WHITESPACE - a Int32 - b boolean - dtype: object """ obj_any = cast(Any, obj) native_obj = obj_any.to_native() @@ -794,27 +644,6 @@ def is_ordered_categorical(series: Series[Any]) -> bool: Returns: Whether the Series is an ordered categorical. - - Examples: - >>> import narwhals as nw - >>> import pandas as pd - >>> import polars as pl - >>> data = ["x", "y"] - >>> s_pd = pd.Series(data, dtype=pd.CategoricalDtype(ordered=True)) - >>> s_pl = pl.Series(data, dtype=pl.Categorical(ordering="physical")) - - Let's define a library-agnostic function: - - >>> @nw.narwhalify - ... def func(s): - ... return nw.is_ordered_categorical(s) - - Then, we can pass any supported library to `func`: - - >>> func(s_pd) - True - >>> func(s_pl) - True """ from narwhals._interchange.series import InterchangeSeries @@ -870,12 +699,6 @@ def generate_temporary_column_name(n_bytes: int, columns: list[str]) -> str: Raises: AssertionError: If a unique token cannot be generated after 100 attempts. - - Examples: - >>> import narwhals as nw - >>> columns = ["abc", "xyz"] - >>> nw.generate_temporary_column_name(n_bytes=8, columns=columns) not in columns - True """ counter = 0 while True: diff --git a/utils/add_docstring_examples.py b/utils/add_docstring_examples.py new file mode 100644 index 000000000..4384a4190 --- /dev/null +++ b/utils/add_docstring_examples.py @@ -0,0 +1,93 @@ +"""Add docstring examples to docstrings. + +In order to keep Narwhals lightweight, we keep lengthy docstring examples +in `docs/docstring_examples`. These then get dynamically added to the +docstrings in CI before running doctests and before publishing docs. + +To run it locally and add docstring examples to all tracked files, you +can run (on mac/linux): + + git ls-files narwhals | xargs python utils/add_docstring_examples.py +""" + +from __future__ import annotations + +import ast +import importlib +import sys +from ast import NodeVisitor + + +def visit_node( + node: ast.FunctionDef | ast.ClassDef, examples: dict[str, str] +) -> tuple[str, bool] | None: + """Visit node. + + Returns: + - If the node has a docstring, and there is a docstring example stored for + this function, then return a tuple with that docstring example and a boolean + indicating whether the function already had a docstring to begin with. + - Else, return None. + """ + if ( + node.name in examples + and node.body + and isinstance(expr := node.body[0], ast.Expr) + and isinstance(value := expr.value, ast.Constant) + and isinstance(docstring := value.value, str) + and value.end_lineno is not None + ): + # Subtract 1 as end_lineno is 1-indexed. + return examples[node.name], "Examples:" in docstring + return None + + +class Visitor(NodeVisitor): + def __init__(self, file: str) -> None: + self.file = file + self.additions: dict[int, str] = {} + self.already_has_docstring_example: dict[int, bool] = {} + self.examples: dict[str, str] = importlib.import_module( + self.file.replace("narwhals", "docs.docstring_examples") + .replace("/", ".") + .removesuffix(".py") + ).EXAMPLES + + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: # noqa: N802 + visit_node(node, self.examples) + self.generic_visit(node) + + def visit_ClassDef(self, node: ast.ClassDef) -> None: # noqa: N802 + visit_node(node, self.examples) + self.generic_visit(node) + + +if __name__ == "__main__": + files = sys.argv[1:] + for file in files: + if not file.endswith(".py"): + # Skip non-Python files. + continue + with open(file) as fd: + content = fd.read() + tree = ast.parse(content) + try: + visitor = Visitor(file) + except (AttributeError, ModuleNotFoundError): + # There are no docstrings examples to replace + # for this file. + continue + visitor.visit(tree) + if visitor.additions: + lines = content.splitlines() + for lineno, addition in visitor.additions.items(): + line = lines[lineno] + indent = len(line) - len(line.lstrip()) + rewritten_line = line.rstrip().removesuffix('"""') + "\n" + if not visitor.already_has_docstring_example[lineno]: + rewritten_line += " " * indent + "Examples:\n" + rewritten_line += addition.lstrip("\n") + rewritten_line += '"""\n' + lines[lineno] = rewritten_line + with open(file, "w") as fd: + fd.write("\n".join(lines) + "\n")