From 185e889c162311a5e0e2b894d6e446efd4ba0510 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 19 Mar 2024 09:47:09 +0000 Subject: [PATCH 1/6] fixup for cudf? --- narwhals/pandas_like/group_by.py | 2 +- narwhals/pandas_like/utils.py | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/narwhals/pandas_like/group_by.py b/narwhals/pandas_like/group_by.py index be0495796..fcc67a245 100644 --- a/narwhals/pandas_like/group_by.py +++ b/narwhals/pandas_like/group_by.py @@ -174,7 +174,7 @@ def agg_generic( # noqa: PLR0913 to_remove: list[int] = [] for i, expr in enumerate(exprs): if is_simple_aggregation(expr): - dfs.append(evaluate_simple_aggregation(expr, grouped)) + dfs.append(evaluate_simple_aggregation(expr, grouped, group_by_keys)) to_remove.append(i) exprs = [expr for i, expr in enumerate(exprs) if i not in to_remove] diff --git a/narwhals/pandas_like/utils.py b/narwhals/pandas_like/utils.py index c916263d8..f9d1aef02 100644 --- a/narwhals/pandas_like/utils.py +++ b/narwhals/pandas_like/utils.py @@ -217,7 +217,7 @@ def is_simple_aggregation(expr: PandasExpr) -> bool: ) -def evaluate_simple_aggregation(expr: PandasExpr, grouped: Any) -> Any: +def evaluate_simple_aggregation(expr: PandasExpr, grouped: Any, keys: list[str]) -> Any: """ Use fastpath for simple aggregations if possible. @@ -232,7 +232,13 @@ def evaluate_simple_aggregation(expr: PandasExpr, grouped: Any) -> Any: Returns naive DataFrame. """ if expr._depth == 0: - return grouped.size()["size"].rename(expr._output_names[0]) # type: ignore[index] + ser = grouped.size() + if len(ser.shape) > 1: + # dataframe + ser = ser.drop(columns=keys) + ser = ser[ser.columns[0]] + ser.name = expr._output_names[0] # type: ignore[index] + return ser if expr._root_names is None or expr._output_names is None: msg = "Expected expr to have root_names and output_names set, but they are None. Please report a bug." raise AssertionError(msg) From c81acd39230c9705a4d281d52566bedbdd8e9b7f Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 19 Mar 2024 21:29:44 +0000 Subject: [PATCH 2/6] wip --- narwhals/pandas_like/utils.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/narwhals/pandas_like/utils.py b/narwhals/pandas_like/utils.py index f9d1aef02..705d6966b 100644 --- a/narwhals/pandas_like/utils.py +++ b/narwhals/pandas_like/utils.py @@ -232,13 +232,10 @@ def evaluate_simple_aggregation(expr: PandasExpr, grouped: Any, keys: list[str]) Returns naive DataFrame. """ if expr._depth == 0: - ser = grouped.size() - if len(ser.shape) > 1: - # dataframe - ser = ser.drop(columns=keys) - ser = ser[ser.columns[0]] - ser.name = expr._output_names[0] # type: ignore[index] - return ser + # e.g. agg(pl.len()) + df = grouped.size() + df = df.drop(columns=keys) if len(df.shape) > 1 else df.to_frame("size") + return df.rename(columns={"size": expr._output_names[0]}) # type: ignore[index] if expr._root_names is None or expr._output_names is None: msg = "Expected expr to have root_names and output_names set, but they are None. Please report a bug." raise AssertionError(msg) From 1c55212c900ffea7c4c120e2df03b9a850495bc2 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 19 Mar 2024 21:30:02 +0000 Subject: [PATCH 3/6] wip --- narwhals/pandas_like/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/pandas_like/utils.py b/narwhals/pandas_like/utils.py index 705d6966b..7d0a6c809 100644 --- a/narwhals/pandas_like/utils.py +++ b/narwhals/pandas_like/utils.py @@ -233,7 +233,7 @@ def evaluate_simple_aggregation(expr: PandasExpr, grouped: Any, keys: list[str]) """ if expr._depth == 0: # e.g. agg(pl.len()) - df = grouped.size() + df = getattr(grouped, expr._function_name)() df = df.drop(columns=keys) if len(df.shape) > 1 else df.to_frame("size") return df.rename(columns={"size": expr._output_names[0]}) # type: ignore[index] if expr._root_names is None or expr._output_names is None: From 1d614da0068a706f7dcb5baf59e543c015deb574 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 19 Mar 2024 21:30:20 +0000 Subject: [PATCH 4/6] wip --- narwhals/pandas_like/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/pandas_like/utils.py b/narwhals/pandas_like/utils.py index 7d0a6c809..bdfbb5955 100644 --- a/narwhals/pandas_like/utils.py +++ b/narwhals/pandas_like/utils.py @@ -233,7 +233,7 @@ def evaluate_simple_aggregation(expr: PandasExpr, grouped: Any, keys: list[str]) """ if expr._depth == 0: # e.g. agg(pl.len()) - df = getattr(grouped, expr._function_name)() + df = getattr(grouped, expr._function_name.replace("len", "size"))() df = df.drop(columns=keys) if len(df.shape) > 1 else df.to_frame("size") return df.rename(columns={"size": expr._output_names[0]}) # type: ignore[index] if expr._root_names is None or expr._output_names is None: From 8b2734d6d4941842bcd1ecad34d5a6a9d7d5896f Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 19 Mar 2024 21:34:06 +0000 Subject: [PATCH 5/6] modin can use fastpath too --- narwhals/pandas_like/group_by.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/narwhals/pandas_like/group_by.py b/narwhals/pandas_like/group_by.py index fcc67a245..9253683e2 100644 --- a/narwhals/pandas_like/group_by.py +++ b/narwhals/pandas_like/group_by.py @@ -58,7 +58,9 @@ def agg( raise ValueError(msg) output_names.extend(expr._output_names) - if implementation == "pandas" and not os.environ.get("NARWHALS_FORCE_GENERIC"): + if implementation in ("pandas", "modin") and not os.environ.get( + "NARWHALS_FORCE_GENERIC" + ): return agg_pandas( grouped, exprs, From 4ff71b2565b982efdcce92ccbdde81c6f71b73a4 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 19 Mar 2024 21:44:39 +0000 Subject: [PATCH 6/6] modin can use fastpath too --- narwhals/pandas_like/utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/narwhals/pandas_like/utils.py b/narwhals/pandas_like/utils.py index bdfbb5955..9486e94fb 100644 --- a/narwhals/pandas_like/utils.py +++ b/narwhals/pandas_like/utils.py @@ -234,7 +234,11 @@ def evaluate_simple_aggregation(expr: PandasExpr, grouped: Any, keys: list[str]) if expr._depth == 0: # e.g. agg(pl.len()) df = getattr(grouped, expr._function_name.replace("len", "size"))() - df = df.drop(columns=keys) if len(df.shape) > 1 else df.to_frame("size") + df = ( + df.drop(columns=keys) + if len(df.shape) > 1 + else df.reset_index(drop=True).to_frame("size") + ) return df.rename(columns={"size": expr._output_names[0]}) # type: ignore[index] if expr._root_names is None or expr._output_names is None: msg = "Expected expr to have root_names and output_names set, but they are None. Please report a bug."