From 7dc0368a4f3f2b3bc6fe771e87b78c9a62abfb2f Mon Sep 17 00:00:00 2001 From: raisa <> Date: Thu, 28 Mar 2024 16:15:45 +0000 Subject: [PATCH 1/2] add vertical dataframe concat --- narwhals/functions.py | 8 +++++--- narwhals/pandas_like/namespace.py | 19 +++++++++++------- narwhals/pandas_like/utils.py | 33 ++++++++++++++++++++++++++++++- tests/test_common.py | 32 ++++++++++++++++++++++++++++++ 4 files changed, 81 insertions(+), 11 deletions(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index d07693b33..ac443af05 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -12,10 +12,12 @@ def concat( - items: Iterable[DataFrame | LazyFrame], *, how: Literal["horizontal"] + items: Iterable[DataFrame | LazyFrame], *, how: Literal["horizontal", "vertical"] ) -> DataFrame | LazyFrame: - if how != "horizontal": - raise NotImplementedError("Only horizontal concatenation is supported") + if how not in ("horizontal", "vertical"): + raise NotImplementedError( + "Only horizontal and vertical concatenations are supported" + ) if not items: raise ValueError("No items to concatenate") items = list(items) diff --git a/narwhals/pandas_like/namespace.py b/narwhals/pandas_like/namespace.py index 594592769..8cc7e40b5 100644 --- a/narwhals/pandas_like/namespace.py +++ b/narwhals/pandas_like/namespace.py @@ -13,6 +13,7 @@ from narwhals.pandas_like.utils import horizontal_concat from narwhals.pandas_like.utils import parse_into_exprs from narwhals.pandas_like.utils import series_from_iterable +from narwhals.pandas_like.utils import vertical_concat from narwhals.utils import flatten_str if TYPE_CHECKING: @@ -186,10 +187,14 @@ def concat( if len(kind) > 1: msg = "Can only concat DataFrames or LazyFrames, not mixtures of the two" raise TypeError(msg) - if how != "horizontal": - msg = "Only horizontal concatenation is supported for now" - raise TypeError(msg) - return PandasDataFrame( - horizontal_concat(dfs, implementation=self._implementation), - implementation=self._implementation, - ) + if how == "horizontal": + return PandasDataFrame( + horizontal_concat(dfs, implementation=self._implementation), + implementation=self._implementation, + ) + if how == "vertical": + return PandasDataFrame( + vertical_concat(dfs, implementation=self._implementation), + implementation=self._implementation, + ) + raise NotImplementedError diff --git a/narwhals/pandas_like/utils.py b/narwhals/pandas_like/utils.py index 6c2c507b7..f0f37440e 100644 --- a/narwhals/pandas_like/utils.py +++ b/narwhals/pandas_like/utils.py @@ -255,7 +255,7 @@ def evaluate_simple_aggregation(expr: PandasExpr, grouped: Any, keys: list[str]) def horizontal_concat(dfs: list[Any], implementation: str) -> Any: """ - Concatenate (native) DataFrames. + Concatenate (native) DataFrames horizontally. Should be in namespace. """ @@ -275,6 +275,37 @@ def horizontal_concat(dfs: list[Any], implementation: str) -> Any: raise TypeError(msg) +def vertical_concat(dfs: list[Any], implementation: str) -> Any: + """ + Concatenate (native) DataFrames vertically. + + Should be in namespace. + """ + if not dfs: + msg = "No dataframes to concatenate" + raise TypeError(msg) + cols = set(dfs[0].columns) + for df in dfs: + cols_current = set(df.columns) + if cols_current != cols: + msg = "Unable to vstack, column names don't match" + raise TypeError(msg) + if implementation == "pandas": + import pandas as pd + + return pd.concat(dfs, axis=0, copy=False) + if implementation == "cudf": + import cudf + + return cudf.concat(dfs, axis=0) + if implementation == "modin": + import modin.pandas as mpd + + return mpd.concat(dfs, axis=0) + msg = f"Unknown implementation: {implementation}" + raise TypeError(msg) + + def dataframe_from_dict(data: dict[str, Any], implementation: str) -> Any: """Return native dataframe.""" if implementation == "pandas": diff --git a/tests/test_common.py b/tests/test_common.py index 02d77cb77..a029a722d 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -17,6 +17,8 @@ df_lazy = pl.LazyFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) df_pandas_na = pd.DataFrame({"a": [None, 3, 2], "b": [4, 4, 6], "z": [7.0, None, 9]}) df_lazy_na = pl.LazyFrame({"a": [None, 3, 2], "b": [4, 4, 6], "z": [7.0, None, 9]}) +df_right_pandas = pd.DataFrame({"c": [6, 12, -1], "d": [0, -4, 2]}) +df_right_lazy = pl.LazyFrame({"c": [6, 12, -1], "d": [0, -4, 2]}) if os.environ.get("CI", None): import modin.pandas as mpd @@ -365,3 +367,33 @@ def test_drop_nulls(df_raw: Any) -> None: result = nw.to_native(df.select(nw.col("a").drop_nulls())) expected = {"a": [3, 2]} compare_dicts(result, expected) + + +@pytest.mark.parametrize( + ("df_raw", "df_raw_right"), [(df_pandas, df_right_pandas), (df_lazy, df_right_lazy)] +) +def test_concat_horizontal(df_raw: Any, df_raw_right: Any) -> None: + df_left = nw.LazyFrame(df_raw) + df_right = nw.LazyFrame(df_raw_right) + result = nw.concat([df_left, df_right], how="horizontal") + result_native = nw.to_native(result) + expected = { + "a": [1, 3, 2], + "b": [4, 4, 6], + "z": [7.0, 8, 9], + "c": [6, 12, -1], + "d": [0, -4, 2], + } + compare_dicts(result_native, expected) + + +@pytest.mark.parametrize( + ("df_raw", "df_raw_right"), [(df_pandas, df_right_pandas), (df_lazy, df_right_lazy)] +) +def test_concat_vertical(df_raw: Any, df_raw_right: Any) -> None: + df_left = nw.LazyFrame(df_raw).rename({"a": "c", "b": "d"}).drop("z") + df_right = nw.LazyFrame(df_raw_right) + result = nw.concat([df_left, df_right], how="vertical") + result_native = nw.to_native(result) + expected = {"c": [1, 3, 2, 6, 12, -1], "d": [4, 4, 6, 0, -4, 2]} + compare_dicts(result_native, expected) From a1bf55e96f47938cd142bb64a5d4891d2817d931 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 28 Mar 2024 16:29:44 +0000 Subject: [PATCH 2/2] add default --- narwhals/functions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index ac443af05..02a551775 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -12,7 +12,9 @@ def concat( - items: Iterable[DataFrame | LazyFrame], *, how: Literal["horizontal", "vertical"] + items: Iterable[DataFrame | LazyFrame], + *, + how: Literal["horizontal", "vertical"] = "vertical", ) -> DataFrame | LazyFrame: if how not in ("horizontal", "vertical"): raise NotImplementedError(