From 7889e9dd70ee8bd2cbe55044b6e880a04a626e16 Mon Sep 17 00:00:00 2001 From: Francesco Bruzzesi <42817048+FBruzzesi@users.noreply.github.com> Date: Thu, 27 Jun 2024 16:25:06 +0200 Subject: [PATCH] Enh: extend `narwhalify` (#328) * WIP enh: extend narwhalify * validate single backend * rm narhwalify method * pyproject rollback * feedback adjusted --- docs/api-reference/narwhals.md | 1 - docs/basics/column.md | 16 +- docs/basics/complete_example.md | 15 +- docs/basics/dataframe.md | 44 ++- narwhals/__init__.py | 2 - narwhals/_pandas_like/dataframe.py | 6 +- narwhals/_pandas_like/series.py | 12 + narwhals/dataframe.py | 6 +- narwhals/series.py | 432 ++++++++++++++--------------- narwhals/translate.py | 174 ++++-------- noxfile.py | 2 +- tests/translate/narwhalify_test.py | 85 +++--- 12 files changed, 384 insertions(+), 411 deletions(-) diff --git a/docs/api-reference/narwhals.md b/docs/api-reference/narwhals.md index e2246c268..23267ec4c 100644 --- a/docs/api-reference/narwhals.md +++ b/docs/api-reference/narwhals.md @@ -20,7 +20,6 @@ Here are the top-level functions available in Narwhals. - mean - min - narwhalify - - narwhalify_method - sum - sum_horizontal - show_versions diff --git a/docs/basics/column.md b/docs/basics/column.md index 26582262a..18039eb60 100644 --- a/docs/basics/column.md +++ b/docs/basics/column.md @@ -127,9 +127,9 @@ of using expressions, we'll extract a `Series`. ```python exec="1" source="above" session="ex2" import narwhals as nw -def my_func(df): - df_s = nw.from_native(df, eager_only=True) - return df_s['a'].mean() +@nw.narwhalify +def my_func(df_any): + return df_any['a'].mean() ``` === "pandas" @@ -148,8 +148,8 @@ def my_func(df): print(my_func(df)) ``` -Note that, this time, we couldn't use `@nw.narwhalify`, as the final step in -our function wasn't `nw.to_native`, so we had to explicitly use `nw.from_native` -as the first step. In general, we recommend using the decorator where possible, -as it looks a lot cleaner, and only using `nw.from_native` / `nw.to_native` explicitly -when you need them. +Note that, even though the output of our function is not a dataframe nor a series, we can +still use `narwhalify`. + +In general, we recommend using the decorator where possible, as it looks a lot cleaner, +and only using `nw.from_native` / `nw.to_native` explicitly when you need them. diff --git a/docs/basics/complete_example.md b/docs/basics/complete_example.md index cfbae39b4..b795af27c 100644 --- a/docs/basics/complete_example.md +++ b/docs/basics/complete_example.md @@ -17,14 +17,13 @@ stored them in attributes `self.means` and `self.std_devs`. ## Transform method We're going to take in a dataframe, and return a dataframe of the same type. -Therefore, we use `@nw.narwhalify_method` (the counterpart to `@nw.narwhalify` which is -meant to be used for methods): +Therefore, we use `@nw.narwhalify`: ```python import narwhals as nw class StandardScaler: - @nw.narwhalify_method + @nw.narwhalify def transform(self, df): return df.with_columns( (nw.col(col) - self._means[col]) / self._std_devs[col] @@ -45,16 +44,15 @@ To be able to get `Series` out of our `DataFrame`, we'll pass `eager_only=True` This is because Polars doesn't have a concept of lazy `Series`, and so Narwhals doesn't either. -Note how here, we're not returning a dataframe to the user - we just take a dataframe in, and -store some internal state. Therefore, we use `nw.from_native` explicitly, as opposed to using the -utility `@nw.narwhalify_method` decorator. +We can specify that in the `@nw.narwhalify` decorator by setting `eager_only=True`, and +the argument will be propagated to `nw.from_native`. ```python import narwhals as nw class StandardScaler: + @nw.narwhalify(eager_only=True) def fit(self, df_any): - df = nw.from_native(df_any, eager_only=True) self._means = {col: df[col].mean() for col in df.columns} self._std_devs = {col: df[col].std() for col in df.columns} ``` @@ -66,12 +64,13 @@ Here is our dataframe-agnostic standard scaler: import narwhals as nw class StandardScaler: + @nw.narwhalify(eager_only=True) def fit(self, df_any): df = nw.from_native(df_any, eager_only=True) self._means = {col: df[col].mean() for col in df.columns} self._std_devs = {col: df[col].std() for col in df.columns} - @nw.narwhalify_method + @nw.narwhalify def transform(self, df): return df.with_columns( (nw.col(col) - self._means[col]) / self._std_devs[col] diff --git a/docs/basics/dataframe.md b/docs/basics/dataframe.md index a8bb1556a..016b8d008 100644 --- a/docs/basics/dataframe.md +++ b/docs/basics/dataframe.md @@ -113,9 +113,8 @@ Let's try it out: ## Example 3: horizontal sum -Expressions can be free-standing functions which accept other -expressions as inputs. For example, we can compute a horizontal -sum using `nw.sum_horizontal`. +Expressions can be free-standing functions which accept other expressions as inputs. +For example, we can compute a horizontal sum using `nw.sum_horizontal`. Make a Python file with the following content: ```python exec="1" source="above" session="df_ex3" @@ -150,3 +149,42 @@ Let's try it out: df = pl.LazyFrame({'a': [1, 1, 2], 'b': [4, 5, 6]}) print(func(df).collect()) ``` + +## Example 4: multiple inputs + +`nw.narwhalify` can be used to decorate functions that take multiple inputs as well and +return a non dataframe/series-like object. + +For example, let's compute how many rows are left in a dataframe after filtering it based +on a series. + +Make a Python file with the following content: +```python exec="1" source="above" session="df_ex4" +import narwhals as nw + +@nw.narwhalify(eager_only=True) +def func(df: nw.DataFrame, s: nw.Series, col_name: str): + return df.filter(nw.col(col_name).is_in(s)).shape[0] +``` + +We require `eager_only=True` here because lazyframe doesn't support `.shape`. + +Let's try it out: + +=== "pandas" + ```python exec="true" source="material-block" result="python" session="df_ex4" + import pandas as pd + + df = pd.DataFrame({'a': [1, 1, 2, 2, 3], 'b': [4, 5, 6, 7, 8]}) + s = pd.Series([1, 3]) + print(func(df, s.to_numpy(), 'a')) + ``` + +=== "Polars (eager)" + ```python exec="true" source="material-block" result="python" session="df_ex4" + import polars as pl + + df = pl.DataFrame({'a': [1, 1, 2, 2, 3], 'b': [4, 5, 6, 7, 8]}) + s = pl.Series([1, 3]) + print(func(df, s.to_numpy(), 'a')) + ``` diff --git a/narwhals/__init__.py b/narwhals/__init__.py index e81f4236c..56db0ddfb 100644 --- a/narwhals/__init__.py +++ b/narwhals/__init__.py @@ -32,7 +32,6 @@ from narwhals.translate import from_native from narwhals.translate import get_native_namespace from narwhals.translate import narwhalify -from narwhals.translate import narwhalify_method from narwhals.translate import to_native from narwhals.utils import maybe_align_index from narwhals.utils import maybe_convert_dtypes @@ -78,6 +77,5 @@ "Datetime", "Date", "narwhalify", - "narwhalify_method", "show_versions", ] diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index 7e6cf76ba..8e49e04f2 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -15,9 +15,9 @@ from narwhals._pandas_like.utils import translate_dtype from narwhals._pandas_like.utils import validate_dataframe_comparand from narwhals._pandas_like.utils import validate_indices -from narwhals.translate import get_cudf -from narwhals.translate import get_modin -from narwhals.translate import get_pandas +from narwhals.dependencies import get_cudf +from narwhals.dependencies import get_modin +from narwhals.dependencies import get_pandas from narwhals.utils import flatten if TYPE_CHECKING: diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 22a449eee..d1b0c0c95 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -11,6 +11,8 @@ from narwhals._pandas_like.utils import to_datetime from narwhals._pandas_like.utils import translate_dtype from narwhals._pandas_like.utils import validate_column_comparand +from narwhals.dependencies import get_cudf +from narwhals.dependencies import get_modin from narwhals.dependencies import get_pandas from narwhals.utils import parse_version @@ -98,6 +100,16 @@ def __narwhals_namespace__(self) -> PandasNamespace: return PandasNamespace(self._implementation) + def __native_namespace__(self) -> Any: + if self._implementation == "pandas": + return get_pandas() + if self._implementation == "modin": # pragma: no cover + return get_modin() + if self._implementation == "cudf": # pragma: no cover + return get_cudf() + msg = f"Expected pandas/modin/cudf, got: {type(self._implementation)}" # pragma: no cover + raise AssertionError(msg) + def __narwhals_series__(self) -> Self: return self diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 17d2f8544..d0056dbfd 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -10,11 +10,11 @@ from typing import overload from narwhals._pandas_like.dataframe import PandasDataFrame +from narwhals.dependencies import get_cudf +from narwhals.dependencies import get_modin +from narwhals.dependencies import get_pandas from narwhals.dependencies import get_polars from narwhals.dtypes import to_narwhals_dtype -from narwhals.translate import get_cudf -from narwhals.translate import get_modin -from narwhals.translate import get_pandas from narwhals.utils import parse_version from narwhals.utils import validate_same_library diff --git a/narwhals/series.py b/narwhals/series.py index d6a6af982..8e68ee2dd 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -4,12 +4,12 @@ from typing import Any from typing import Literal +from narwhals.dependencies import get_cudf +from narwhals.dependencies import get_modin +from narwhals.dependencies import get_pandas +from narwhals.dependencies import get_polars from narwhals.dtypes import to_narwhals_dtype from narwhals.dtypes import translate_dtype -from narwhals.translate import get_cudf -from narwhals.translate import get_modin -from narwhals.translate import get_pandas -from narwhals.translate import get_polars if TYPE_CHECKING: import numpy as np @@ -66,6 +66,11 @@ def __getitem__(self, idx: int | slice) -> Any: return self._series[idx] return self._from_series(self._series[idx]) + def __native_namespace__(self) -> Any: + if self._is_polars: + return get_polars() + return self._series.__native_namespace__() + def __narwhals_namespace__(self) -> Any: if self._is_polars: return get_polars() @@ -86,9 +91,9 @@ def shape(self) -> tuple[int]: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... return s.shape + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.shape We can then pass either pandas or Polars to `func`: @@ -142,8 +147,9 @@ def len(self) -> int: Let's define a dataframe-agnostic function that computes the len of the series: - >>> def func(s): - ... return nw.from_native(s, allow_series=True).len() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.len() We can then pass either pandas or Polars to `func`: @@ -169,9 +175,9 @@ def dtype(self) -> Any: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... return s.dtype + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.dtype We can then pass either pandas or Polars to `func`: @@ -197,9 +203,9 @@ def name(self) -> str: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... return s.name + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.name We can then pass either pandas or Polars to `func`: @@ -230,10 +236,9 @@ def cast( We define a dataframe-agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... s = s.cast(nw.Int64) - ... return nw.to_native(s) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.cast(nw.Int64) We can then pass either pandas or Polars to `func`: @@ -269,10 +274,9 @@ def to_frame(self) -> DataFrame: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... df = s.to_frame() - ... return nw.to_native(df) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.to_frame() We can then pass either pandas or Polars to `func`: @@ -311,9 +315,9 @@ def mean(self) -> Any: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... return s.mean() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.mean() We can then pass either pandas or Polars to `func`: @@ -341,9 +345,9 @@ def any(self) -> Any: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... return s.any() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.any() We can then pass either pandas or Polars to `func`: @@ -368,9 +372,9 @@ def all(self) -> Any: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... return s.all() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.all() We can then pass either pandas or Polars to `func`: @@ -396,9 +400,9 @@ def min(self) -> Any: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... return s.min() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.min() We can then pass either pandas or Polars to `func`: @@ -423,9 +427,9 @@ def max(self) -> Any: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... return s.max() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.max() We can then pass either pandas or Polars to `func`: @@ -450,9 +454,9 @@ def sum(self) -> Any: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... return s.sum() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.sum() We can then pass either pandas or Polars to `func`: @@ -481,9 +485,9 @@ def std(self, *, ddof: int = 1) -> Any: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... return s.std() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.std() We can then pass either pandas or Polars to `func`: @@ -510,9 +514,9 @@ def is_in(self, other: Any) -> Self: We define a library agnostic function: - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.is_in([3, 2, 8]) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.is_in([3, 2, 8]) We can then pass either pandas or Polars to `func`: @@ -553,9 +557,9 @@ def drop_nulls(self) -> Self: Now define a dataframe-agnostic function with a `column` argument for the column to evaluate : - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.drop_nulls() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.drop_nulls() Then we can pass either Series (polars or pandas) to `func`: @@ -591,9 +595,9 @@ def cum_sum(self) -> Self: We define a dataframe-agnostic function: - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.cum_sum() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.cum_sum() We can then pass either pandas or Polars to `func`: @@ -627,9 +631,9 @@ def unique(self) -> Self: Let's define a dataframe-agnostic function: - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.unique() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.unique() We can then pass either pandas or Polars to `func`: @@ -661,7 +665,7 @@ def diff(self) -> Self: do: ```python - s.diff().fill_null(0).cast(nw.Int64) + s_any.diff().fill_null(0).cast(nw.Int64) ``` Examples: @@ -674,9 +678,9 @@ def diff(self) -> Self: We define a dataframe-agnostic function: - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.diff() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.diff() We can then pass either pandas or Polars to `func`: @@ -712,7 +716,7 @@ def shift(self, n: int) -> Self: do: ```python - s.shift(1).fill_null(0).cast(nw.Int64) + s_any.shift(1).fill_null(0).cast(nw.Int64) ``` Examples: @@ -725,9 +729,9 @@ def shift(self, n: int) -> Self: We define a dataframe-agnostic function: - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.shift(1) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.shift(1) We can then pass either pandas or Polars to `func`: @@ -779,9 +783,9 @@ def sample( We define a library agnostic function: - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.sample(fraction=1.0, with_replacement=True) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.sample(fraction=1.0, with_replacement=True) We can then pass either pandas or Polars to `func`: @@ -822,9 +826,9 @@ def alias(self, name: str) -> Self: We define a library agnostic function: - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.alias("bar") + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.alias("bar") We can then pass either pandas or Polars to `func`: @@ -861,13 +865,13 @@ def sort(self, *, descending: bool = False) -> Self: We define library agnostic functions: - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.sort() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.sort() - >>> @nw.narwhalify(series_only=True) - ... def func_descend(s): - ... return s.sort(descending=True) + >>> @nw.narwhalify + ... def func_descend(s_any): + ... return s_any.sort(descending=True) We can then pass either pandas or Polars to `func`: @@ -922,9 +926,9 @@ def is_null(self) -> Self: We define a dataframe-agnostic function: - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.is_null() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.is_null() We can then pass either pandas or Polars to `func`: @@ -965,9 +969,9 @@ def fill_null(self, value: Any) -> Self: We define a dataframe-agnostic function: - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.fill_null(5) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.fill_null(5) We can then pass either pandas or Polars to `func`: @@ -1013,9 +1017,9 @@ def is_between( We define a library agnostic function: - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.is_between(2, 4, "right") + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.is_between(2, 4, "right") We can then pass either pandas or Polars to `func`: @@ -1055,9 +1059,9 @@ def n_unique(self) -> int: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... return s.n_unique() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.n_unique() We can then pass either pandas or Polars to `func`: @@ -1082,10 +1086,9 @@ def to_numpy(self) -> Any: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... df = s.to_numpy() - ... return df + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.to_numpy() We can then pass either pandas or Polars to `func`: @@ -1110,10 +1113,9 @@ def to_pandas(self) -> Any: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... df = s.to_pandas() - ... return df + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.to_pandas() We can then pass either pandas or Polars to `func`: @@ -1211,9 +1213,9 @@ def filter(self, other: Any) -> Series: We define a library agnostic function: - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.filter(s > 10) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.filter(s_any > 10) We can then pass either pandas or Polars to `func`: @@ -1247,9 +1249,9 @@ def is_duplicated(self: Self) -> Series: Let's define a dataframe-agnostic function: - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.is_duplicated() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.is_duplicated() We can then pass either pandas or Polars to `func`: @@ -1283,9 +1285,9 @@ def is_empty(self: Self) -> bool: Let's define a dataframe-agnostic function that filters rows in which "foo" values are greater than 10, and then checks if the result is empty or not: - >>> def func(s_any): - ... series = nw.from_native(s_any, allow_series=True) - ... return series.filter(series > 10).is_empty() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.filter(s_any > 10).is_empty() We can then pass either pandas or Polars to `func`: @@ -1314,9 +1316,9 @@ def is_unique(self: Self) -> Series: Let's define a dataframe-agnostic function: - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.is_unique() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.is_unique() We can then pass either pandas or Polars to `func`: @@ -1357,9 +1359,9 @@ def null_count(self: Self) -> int: Let's define a dataframe-agnostic function that returns the null count of the series: - >>> def func(s_any): - ... series = nw.from_native(s_any, allow_series=True) - ... return series.null_count() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.null_count() We can then pass either pandas or Polars to `func`: >>> func(s_pd) # doctest:+SKIP @@ -1383,9 +1385,9 @@ def is_first_distinct(self: Self) -> Series: Let's define a dataframe-agnostic function: - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.is_first_distinct() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.is_first_distinct() We can then pass either pandas or Polars to `func`: @@ -1423,9 +1425,9 @@ def is_last_distinct(self: Self) -> Series: Let's define a dataframe-agnostic function: - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.is_last_distinct() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.is_last_distinct() We can then pass either pandas or Polars to `func`: @@ -1466,9 +1468,9 @@ def is_sorted(self: Self, *, descending: bool = False) -> bool: Let's define a dataframe-agnostic function: - >>> def func(s_any, descending=False): - ... series = nw.from_native(s_any, allow_series=True) - ... return series.is_sorted(descending=descending) + >>> @nw.narwhalify + ... def func(s_any, descending=False): + ... return s_any.is_sorted(descending=descending) We can then pass either pandas or Polars to `func`: @@ -1503,9 +1505,9 @@ def value_counts( Let's define a dataframe-agnostic function: - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.value_counts(sort=True) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.value_counts(sort=True) We can then pass either pandas or Polars to `func`: @@ -1558,10 +1560,10 @@ def quantile( Let's define a dataframe-agnostic function: - >>> def func(s_any): - ... series = nw.from_native(s_any, allow_series=True) + >>> @nw.narwhalify + ... def func(s_any): ... return [ - ... series.quantile(quantile=q, interpolation="nearest") + ... s_any.quantile(quantile=q, interpolation="nearest") ... for q in (0.1, 0.25, 0.5, 0.75, 0.9) ... ] @@ -1592,12 +1594,9 @@ def zip_with(self, mask: Any, other: Any) -> Self: Let's define a dataframe-agnostic function: - >>> def func(s1_any, mask_any, s2_any): - ... s1 = nw.from_native(s1_any, allow_series=True) - ... mask = nw.from_native(mask_any, series_only=True) - ... s2 = nw.from_native(s2_any, series_only=True) - ... s = s1.zip_with(mask, s2) - ... return nw.to_native(s) + >>> @nw.narwhalify + ... def func(s1_any, mask_any, s2_any): + ... return s1_any.zip_with(mask_any, s2_any) We can then pass either pandas or Polars to `func`: @@ -1638,9 +1637,9 @@ def item(self: Self, index: int | None = None) -> Any: Let's define a dataframe-agnostic function that returns item at given index - >>> def func(s_any, index=None): - ... s = nw.from_native(s_any, series_only=True) - ... return s.item(index) + >>> @nw.narwhalify + ... def func(s_any, index=None): + ... return s_any.item(index) We can then pass either pandas or Polars to `func`: @@ -1670,9 +1669,9 @@ def head(self: Self, n: int = 10) -> Self: Let's define a dataframe-agnostic function that returns the first 3 rows: - >>> @nw.narwhalify(allow_series=True) - ... def func(s): - ... return s.head(3) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.head(3) We can then pass either pandas or Polars to `func`: @@ -1712,9 +1711,9 @@ def tail(self: Self, n: int = 10) -> Self: Let's define a dataframe-agnostic function that returns the last 3 rows: - >>> @nw.narwhalify(allow_series=True) - ... def func(s): - ... return s.tail(3) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.tail(3) We can then pass either pandas or Polars to `func`: @@ -1760,9 +1759,9 @@ def round(self: Self, decimals: int = 0) -> Self: Let's define a dataframe-agnostic function that rounds to the first decimal: - >>> @nw.narwhalify(allow_series=True) - ... def func(s): - ... return s.round(1) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.round(1) We can then pass either pandas or Polars to `func`: @@ -1859,7 +1858,7 @@ def starts_with(self, prefix: str) -> Series: We define a dataframe-agnostic function: - >>> @nw.narwhalify(allow_series=True) + >>> @nw.narwhalify ... def func(series): ... return series.str.starts_with("app") @@ -1899,7 +1898,7 @@ def ends_with(self, suffix: str) -> Series: We define a dataframe-agnostic function: - >>> @nw.narwhalify(allow_series=True) + >>> @nw.narwhalify ... def func(series): ... return series.str.ends_with("ngo") @@ -1941,9 +1940,9 @@ def contains(self, pattern: str, *, literal: bool = False) -> Series: We define a dataframe-agnostic function: - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.str.contains("parrot|dove") + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.str.contains("parrot|dove") We can then pass either pandas or Polars to `func`: @@ -1989,9 +1988,9 @@ def slice(self, offset: int, length: int | None = None) -> Series: We define a dataframe-agnostic function: - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.str.slice(4, length=3) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.str.slice(4, length=3) We can then pass either pandas or Polars to `func`: @@ -2014,9 +2013,9 @@ def slice(self, offset: int, length: int | None = None) -> Series: Using negative indexes: - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.str.slice(-3) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.str.slice(-3) >>> func(s_pd) # doctest: +NORMALIZE_WHITESPACE 0 ear @@ -2061,9 +2060,9 @@ def head(self, n: int = 5) -> Series: We define a dataframe-agnostic function: - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.str.head() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.str.head() We can then pass either pandas or Polars to `func`: @@ -2107,9 +2106,9 @@ def tail(self, n: int = 5) -> Series: We define a dataframe-agnostic function: - >>> @nw.narwhalify(series_only=True) - ... def func(s): - ... return s.str.tail() + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.str.tail() We can then pass either pandas or Polars to `func`: @@ -2151,10 +2150,9 @@ def year(self) -> Series: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... s = s.dt.year() - ... return nw.to_native(s) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.dt.year() We can then pass either pandas or Polars to `func`: @@ -2187,10 +2185,9 @@ def month(self) -> Series: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... s = s.dt.month() - ... return nw.to_native(s) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.dt.month() We can then pass either pandas or Polars to `func`: @@ -2223,10 +2220,9 @@ def day(self) -> Series: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... s = s.dt.day() - ... return nw.to_native(s) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.dt.day() We can then pass either pandas or Polars to `func`: @@ -2259,10 +2255,9 @@ def hour(self) -> Series: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... s = s.dt.hour() - ... return nw.to_native(s) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.dt.hour() We can then pass either pandas or Polars to `func`: @@ -2295,10 +2290,9 @@ def minute(self) -> Series: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... s = s.dt.minute() - ... return nw.to_native(s) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.dt.minute() We can then pass either pandas or Polars to `func`: @@ -2331,10 +2325,9 @@ def second(self) -> Series: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... s = s.dt.second() - ... return nw.to_native(s) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.dt.second() We can then pass either pandas or Polars to `func`: @@ -2374,10 +2367,9 @@ def millisecond(self) -> Series: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... s = s.dt.millisecond().alias("datetime") - ... return nw.to_native(s) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.dt.millisecond().alias("datetime") We can then pass either pandas or Polars to `func`: @@ -2423,10 +2415,9 @@ def microsecond(self) -> Series: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... s = s.dt.microsecond().alias("datetime") - ... return nw.to_native(s) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.dt.microsecond().alias("datetime") We can then pass either pandas or Polars to `func`: @@ -2468,10 +2459,9 @@ def nanosecond(self) -> Series: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... s = s.dt.nanosecond() - ... return nw.to_native(s) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.dt.nanosecond() We can then pass either pandas or Polars to `func`: @@ -2504,10 +2494,9 @@ def ordinal_day(self) -> Series: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... s = s.dt.ordinal_day() - ... return nw.to_native(s) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.dt.ordinal_day() We can then pass either pandas or Polars to `func`: @@ -2545,10 +2534,9 @@ def total_minutes(self) -> Series: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... s = s.dt.total_minutes() - ... return nw.to_native(s) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.dt.total_minutes() We can then pass either pandas or Polars to `func`: @@ -2586,10 +2574,9 @@ def total_seconds(self) -> Series: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... s = s.dt.total_seconds() - ... return nw.to_native(s) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.dt.total_seconds() We can then pass either pandas or Polars to `func`: @@ -2630,10 +2617,9 @@ def total_milliseconds(self) -> Series: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... s = s.dt.total_milliseconds() - ... return nw.to_native(s) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.dt.total_milliseconds() We can then pass either pandas or Polars to `func`: @@ -2674,10 +2660,9 @@ def total_microseconds(self) -> Series: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... s = s.dt.total_microseconds() - ... return nw.to_native(s) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.dt.total_microseconds() We can then pass either pandas or Polars to `func`: @@ -2715,10 +2700,9 @@ def total_nanoseconds(self) -> Series: We define a library agnostic function: - >>> def func(s_any): - ... s = nw.from_native(s_any, series_only=True) - ... s = s.diff().dt.total_nanoseconds() - ... return nw.to_native(s) + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.diff().dt.total_nanoseconds() We can then pass either pandas or Polars to `func`: @@ -2755,9 +2739,9 @@ def to_string(self, format: str) -> Series: # noqa: A002 We define a dataframe-agnostic function: - >>> @nw.narwhalify(allow_series=True) - ... def func(s): - ... return s.dt.to_string("%Y/%m/%d") + >>> @nw.narwhalify + ... def func(s_any): + ... return s_any.dt.to_string("%Y/%m/%d") We can then pass either pandas or Polars to `func`: diff --git a/narwhals/translate.py b/narwhals/translate.py index 19c0bba11..ef6f514e9 100644 --- a/narwhals/translate.py +++ b/narwhals/translate.py @@ -1,6 +1,5 @@ from __future__ import annotations -import inspect from functools import wraps from typing import TYPE_CHECKING from typing import Any @@ -240,14 +239,24 @@ def get_native_namespace(obj: Any) -> Any: def narwhalify( func: Callable[..., Any] | None = None, *, - strict: bool = True, - eager_only: bool | None = None, - series_only: bool | None = None, - allow_series: bool | None = None, + strict: bool = False, + eager_only: bool | None = False, + series_only: bool | None = False, + allow_series: bool | None = True, ) -> Callable[..., Any]: """ Decorate function so it becomes dataframe-agnostic. + `narwhalify` will try to convert any dataframe/series-like object into the narwhal + respective DataFrame/Series, while leaving the other parameters as they are. + + Similarly, if the output of the function is a narwhals DataFrame or Series, it will be + converted back to the original dataframe/series type, while if the output is another + type it will be left as is. + + By setting `strict=True`, then every input and every output will be required to be a + dataframe/series-like object. + Instead of writing ```python @@ -281,139 +290,54 @@ def func(df): Arguments: func: Function to wrap in a `from_native`-`to_native` block. - strict: Whether to raise if object can't be converted (default) or - to just leave it as-is. + strict: Whether to raise if object can't be converted or to just leave it as-is + (default). eager_only: Whether to only allow eager objects. series_only: Whether to only allow series. allow_series: Whether to allow series (default is only dataframe / lazyframe). - - See Also: - narwhalify_method: If you want to narwhalify a class method, use that instead. """ def decorator(func: Callable[..., Any]) -> Callable[..., Any]: @wraps(func) def wrapper(*args: Any, **kwargs: Any) -> Any: - if args: - df_any = args[0] - elif kwargs: - params = list(inspect.signature(func).parameters.keys()) - first_key = params[0] - df_any = kwargs[first_key] - else: - raise TypeError("Expected function which takes at least one argument.") - df = from_native( - df_any, - strict=strict, - eager_only=eager_only, - series_only=series_only, - allow_series=allow_series, - ) - if args: - result = func(df, *args[1:], **kwargs) - else: - kwargs[first_key] = df - result = func(**kwargs) - return to_native(result, strict=strict) - - return wrapper - - # If func is None, it means the decorator is used with arguments - if func is None: - return decorator - else: - # If func is not None, it means the decorator is used without arguments - return decorator(func) - - -def narwhalify_method( - func: Callable[..., Any] | None = None, - *, - strict: bool = True, - eager_only: bool | None = None, - series_only: bool | None = None, - allow_series: bool | None = None, -) -> Callable[..., Any]: - """ - Decorate method so it becomes dataframe-agnostic. - - Instead of writing + args = [ + from_native( + arg, + strict=strict, + eager_only=eager_only, + series_only=series_only, + allow_series=allow_series, + ) + for arg in args + ] # type: ignore[assignment] + + kwargs = { + name: from_native( + value, + strict=strict, + eager_only=eager_only, + series_only=series_only, + allow_series=allow_series, + ) + for name, value in kwargs.items() + } + + backends = { + b() + for v in [*args, *kwargs.values()] + if (b := getattr(v, "__native_namespace__", None)) + } + + if len(backends) > 1: + msg = "Found multiple backends. Make sure that all dataframe/series inputs come from the same backend." + raise ValueError(msg) + + result = func(*args, **kwargs) - ```python - import narwhals as nw - - - class Foo: - def func(self, df_any): - df = nw.from_native(df_any, strict=False) - df = df.group_by("a").agg(nw.col("b").sum()) - return nw.to_native(df) - ``` - - you can just write - - ```python - import narwhals as nw - - - class Foo: - @nw.narwhalify_method - def func(self, df): - return df.group_by("a").agg(nw.col("b").sum()) - ``` - - You can also pass in extra arguments, e.g. - - ```python - @nw.narhwalify_method(eager_only=True) - ``` - - that will get passed down to `nw.from_native`. - - Arguments: - func: Function to wrap in a `from_native`-`to_native` block. - strict: Whether to raise if object can't be converted (default) or - to just leave it as-is. - eager_only: Whether to only allow eager objects. - series_only: Whether to only allow series. - allow_series: Whether to allow series (default is only dataframe / lazyframe). - """ - - def decorator(func: Callable[..., Any]) -> Callable[..., Any]: - @wraps(func) - def wrapper(self: Any, *args: Any, **kwargs: Any) -> Any: - if args: - df_any = args[0] - elif kwargs: - params = list(inspect.signature(func).parameters.keys()) - if params[0] not in ("cls", "self"): - msg = ( - "`@nw.narwhalify_method` is meant to be called on class methods, " - "where the first argument is typically `cls` or `self` - however, yours " - f"is: {params[0]}." - ) - raise TypeError(msg) - first_key = params[1] - df_any = kwargs[first_key] - else: - raise TypeError("Expected function which takes at least one argument.") - df = from_native( - df_any, - strict=strict, - eager_only=eager_only, - series_only=series_only, - allow_series=allow_series, - ) - if args: - result = func(self, df, *args[1:], **kwargs) - else: - kwargs[first_key] = df - result = func(self, **kwargs) return to_native(result, strict=strict) return wrapper - # If func is None, it means the decorator is used with arguments if func is None: return decorator else: diff --git a/noxfile.py b/noxfile.py index 23d05d952..7d958fd05 100644 --- a/noxfile.py +++ b/noxfile.py @@ -8,7 +8,7 @@ def run_common(session: Session, coverage_threshold: float) -> None: - session.install("-r", "requirements-dev.txt") + session.install("-e" ".", "-r", "requirements-dev.txt") session.run( "pytest", diff --git a/tests/translate/narwhalify_test.py b/tests/translate/narwhalify_test.py index f12b8f3ed..b34f6e707 100644 --- a/tests/translate/narwhalify_test.py +++ b/tests/translate/narwhalify_test.py @@ -1,10 +1,16 @@ +from __future__ import annotations + +from contextlib import nullcontext as does_not_raise from typing import Any import pandas as pd +import polars as pl import pytest import narwhals as nw +data = {"a": [2, 3, 4]} + def test_narwhalify() -> None: @nw.narwhalify @@ -13,78 +19,91 @@ def func(df: nw.DataFrame) -> nw.DataFrame: df = pd.DataFrame({"a": [1, 2, 3]}) result = func(df) - pd.testing.assert_frame_equal(result, pd.DataFrame({"a": [2, 3, 4]})) - result = func(df=df) - pd.testing.assert_frame_equal(result, pd.DataFrame({"a": [2, 3, 4]})) - - -def test_narwhalify_called() -> None: - @nw.narwhalify() - def func(df: nw.DataFrame, a: int = 1) -> nw.DataFrame: - return df.with_columns(nw.all() + a) - - df = pd.DataFrame({"a": [1, 2, 3]}) - result = func(df) - pd.testing.assert_frame_equal(result, pd.DataFrame({"a": [2, 3, 4]})) + pd.testing.assert_frame_equal(result, pd.DataFrame(data)) result = func(df=df) - pd.testing.assert_frame_equal(result, pd.DataFrame({"a": [2, 3, 4]})) - result = func(a=1, df=df) - pd.testing.assert_frame_equal(result, pd.DataFrame({"a": [2, 3, 4]})) + pd.testing.assert_frame_equal(result, pd.DataFrame(data)) def test_narwhalify_method() -> None: class Foo: - @nw.narwhalify_method + @nw.narwhalify def func(self, df: nw.DataFrame, a: int = 1) -> nw.DataFrame: return df.with_columns(nw.all() + a) df = pd.DataFrame({"a": [1, 2, 3]}) result = Foo().func(df) - pd.testing.assert_frame_equal(result, pd.DataFrame({"a": [2, 3, 4]})) + pd.testing.assert_frame_equal(result, pd.DataFrame(data)) result = Foo().func(a=1, df=df) - pd.testing.assert_frame_equal(result, pd.DataFrame({"a": [2, 3, 4]})) + pd.testing.assert_frame_equal(result, pd.DataFrame(data)) def test_narwhalify_method_called() -> None: class Foo: - @nw.narwhalify_method(eager_only=True) + @nw.narwhalify def func(self, df: nw.DataFrame, a: int = 1) -> nw.DataFrame: return df.with_columns(nw.all() + a) df = pd.DataFrame({"a": [1, 2, 3]}) result = Foo().func(df) - pd.testing.assert_frame_equal(result, pd.DataFrame({"a": [2, 3, 4]})) + pd.testing.assert_frame_equal(result, pd.DataFrame(data)) result = Foo().func(df=df) - pd.testing.assert_frame_equal(result, pd.DataFrame({"a": [2, 3, 4]})) + pd.testing.assert_frame_equal(result, pd.DataFrame(data)) result = Foo().func(a=1, df=df) - pd.testing.assert_frame_equal(result, pd.DataFrame({"a": [2, 3, 4]})) + pd.testing.assert_frame_equal(result, pd.DataFrame(data)) def test_narwhalify_method_invalid() -> None: class Foo: - @nw.narwhalify_method(eager_only=True) + @nw.narwhalify(strict=True, eager_only=True) def func(self) -> nw.DataFrame: # pragma: no cover return self # type: ignore[return-value] - @nw.narwhalify(eager_only=True) + @nw.narwhalify(strict=True, eager_only=True) def fun2(self, df: Any) -> nw.DataFrame: # pragma: no cover return df # type: ignore[no-any-return] with pytest.raises(TypeError): Foo().func() - @nw.narwhalify_method(eager_only=True) - def func(_df: Any, a: int = 1) -> nw.DataFrame: # pragma: no cover - return a # type: ignore[return-value] - - with pytest.raises(TypeError, match="is meant to be called"): - func(pd.DataFrame(), a=pd.DataFrame()) - def test_narwhalify_invalid() -> None: - @nw.narwhalify(eager_only=True) + @nw.narwhalify(strict=True) def func() -> nw.DataFrame: # pragma: no cover return None # type: ignore[return-value] with pytest.raises(TypeError): func() + + +@pytest.mark.parametrize( + ("arg1", "arg2", "context"), + [ + (pd.DataFrame(data), pd.Series(data["a"]), does_not_raise()), + (pl.DataFrame(data), pl.Series(data["a"]), does_not_raise()), + ( + pd.DataFrame(data), + pl.DataFrame(data), + pytest.raises( + ValueError, + match="Found multiple backends. Make sure that all dataframe/series inputs come from the same backend.", + ), + ), + ( + pl.DataFrame(data), + pd.Series(data["a"]), + pytest.raises( + ValueError, + match="Found multiple backends. Make sure that all dataframe/series inputs come from the same backend.", + ), + ), + ], +) +def test_narwhalify_backends(arg1: Any, arg2: Any, context: Any) -> None: + @nw.narwhalify + def func( + arg1: Any, arg2: Any, extra: int = 1 + ) -> tuple[Any, Any, int]: # pragma: no cover + return arg1, arg2, extra + + with context: + func(arg1, arg2)