Skip to content

Commit

Permalink
fixed expr.md
Browse files Browse the repository at this point in the history
  • Loading branch information
DeaMariaLeon committed Oct 27, 2024
1 parent a8facf1 commit b454de5
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 3 deletions.
1 change: 1 addition & 0 deletions docs/api-reference/expr.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
- is_null
- is_unique
- len
- map_batches
- max
- mean
- min
Expand Down
5 changes: 2 additions & 3 deletions narwhals/_arrow/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:

def mode(self: Self) -> Self:
return reuse_series_implementation(self, "mode")

def map_batches(
self: Self,
function: Callable[[Any], Any],
Expand All @@ -396,11 +396,10 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:

return self.__class__(
func,
depth=self._depth + 1, # correct depth or self.depth?
depth=self._depth + 1,
function_name=self._function_name + "->map_batches",
root_names=self._root_names,
output_names=self._output_names,
#implementation=self._implementation,
backend_version=self._backend_version,
dtypes=self._dtypes,
)
Expand Down
54 changes: 54 additions & 0 deletions narwhals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,60 @@ def map_batches(
*args: Any,
**kwargs: Any,
) -> Self:
"""
Apply a custom python function to a whole Series or sequence of Series.
The output of this custom function is presumed to be either a Series,
or a NumPy array (in which case it will be automatically converted into
a Series).
Arguments:
return_dtype: Dtype of the output Series.
If not set, the dtype will be inferred based on the first non-null value
that is returned by the function.
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(
... nw.col("a", "b")
... .map_batches(lambda s: s.to_numpy() + 1, return_dtype=nw.Float64)
... .sum()
... )
We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
>>> func(df_pd)
a b
0 9.0 18.0
>>> func(df_pl)
shape: (1, 2)
┌─────┬──────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞═════╪══════╡
│ 9.0 ┆ 18.0 │
└─────┴──────┘
>>> func(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[9]]
b: [[18]]
"""
return self.__class__(
lambda plx: self._call(plx).map_batches(
function=function, return_dtype=return_dtype
Expand Down

0 comments on commit b454de5

Please sign in to comment.