From 9abe7d25064057351864b8e646466a6cf3a52ee2 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Thu, 9 Jan 2025 16:19:13 +0000 Subject: [PATCH] fix: duckdb join was failing if column names contained spaces (#1775) * fix: duckdb column names with spaces * test * sort was raising too too --- narwhals/_duckdb/dataframe.py | 12 +++++----- tests/frame/join_test.py | 44 +++++++++++++++++------------------ tests/frame/sort_test.py | 16 ++++++------- 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py index e1c0f994c..33cfc19d2 100644 --- a/narwhals/_duckdb/dataframe.py +++ b/narwhals/_duckdb/dataframe.py @@ -243,7 +243,7 @@ def join( assert right_on is not None # noqa: S101 conditions = [ - f"lhs.{left} = rhs.{right}" for left, right in zip(left_on, right_on) + f'lhs."{left}" = rhs."{right}"' for left, right in zip(left_on, right_on) ] condition = " and ".join(conditions) rel = self._native_frame.set_alias("lhs").join( @@ -251,16 +251,16 @@ def join( ) if how in ("inner", "left", "cross"): - select = [f"lhs.{x}" for x in self._native_frame.columns] + select = [f'lhs."{x}"' for x in self._native_frame.columns] for col in other._native_frame.columns: if col in self._native_frame.columns and ( right_on is None or col not in right_on ): - select.append(f"rhs.{col} as {col}{suffix}") + select.append(f'rhs."{col}" as "{col}{suffix}"') elif right_on is None or col not in right_on: select.append(col) else: # semi - select = [f"lhs.{x}" for x in self._native_frame.columns] + select = ["lhs.*"] res = rel.select(", ".join(select)).set_alias(original_alias) return self._from_native_frame(res) @@ -317,9 +317,9 @@ def sort( result = self._native_frame.order( ",".join( ( - f"{col} {desc} nulls last" + f'"{col}" {desc} nulls last' if nulls_last - else f"{col} {desc} nulls first" + else f'"{col}" {desc} nulls first' for col, desc in zip(flat_by, descending_str) ) ) diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py index 5bf5c91f0..f176aca67 100644 --- a/tests/frame/join_test.py +++ b/tests/frame/join_test.py @@ -20,7 +20,7 @@ def test_inner_join_two_keys(constructor: Constructor) -> None: data = { "antananarivo": [1, 3, 2], "bob": [4, 4, 6], - "zorro": [7.0, 8, 9], + "zor ro": [7.0, 8, 9], "index": [0, 1, 2], } df = nw.from_native(constructor(data)) @@ -37,9 +37,9 @@ def test_inner_join_two_keys(constructor: Constructor) -> None: expected = { "antananarivo": [1, 3, 2], "bob": [4, 4, 6], - "zorro": [7.0, 8, 9], + "zor ro": [7.0, 8, 9], "index": [0, 1, 2], - "zorro_right": [7.0, 8, 9], + "zor ro_right": [7.0, 8, 9], } assert_equal_data(result, expected) assert_equal_data(result_on, expected) @@ -49,7 +49,7 @@ def test_inner_join_single_key(constructor: Constructor) -> None: data = { "antananarivo": [1, 3, 2], "bob": [4, 4, 6], - "zorro": [7.0, 8, 9], + "zor ro": [7.0, 8, 9], "index": [0, 1, 2], } df = nw.from_native(constructor(data)) @@ -66,10 +66,10 @@ def test_inner_join_single_key(constructor: Constructor) -> None: expected = { "antananarivo": [1, 3, 2], "bob": [4, 4, 6], - "zorro": [7.0, 8, 9], + "zor ro": [7.0, 8, 9], "index": [0, 1, 2], "bob_right": [4, 4, 6], - "zorro_right": [7.0, 8, 9], + "zor ro_right": [7.0, 8, 9], } assert_equal_data(result, expected) assert_equal_data(result_on, expected) @@ -99,7 +99,7 @@ def test_suffix(constructor: Constructor, how: str, suffix: str) -> None: data = { "antananarivo": [1, 3, 2], "bob": [4, 4, 6], - "zorro": [7.0, 8, 9], + "zor ro": [7.0, 8, 9], } df = nw.from_native(constructor(data)) df_right = df @@ -111,7 +111,7 @@ def test_suffix(constructor: Constructor, how: str, suffix: str) -> None: suffix=suffix, ) result_cols = result.collect_schema().names() - assert result_cols == ["antananarivo", "bob", "zorro", f"zorro{suffix}"] + assert result_cols == ["antananarivo", "bob", "zor ro", f"zor ro{suffix}"] @pytest.mark.parametrize("suffix", ["_right", "_custom_suffix"]) @@ -151,13 +151,13 @@ def test_cross_join_non_pandas() -> None: ( ["antananarivo", "bob"], (nw.col("bob") < 5), - {"antananarivo": [2], "bob": [6], "zorro": [9]}, + {"antananarivo": [2], "bob": [6], "zor ro": [9]}, ), - (["bob"], (nw.col("bob") < 5), {"antananarivo": [2], "bob": [6], "zorro": [9]}), + (["bob"], (nw.col("bob") < 5), {"antananarivo": [2], "bob": [6], "zor ro": [9]}), ( ["bob"], (nw.col("bob") > 5), - {"antananarivo": [1, 3], "bob": [4, 4], "zorro": [7.0, 8.0]}, + {"antananarivo": [1, 3], "bob": [4, 4], "zor ro": [7.0, 8.0]}, ), ], ) @@ -170,7 +170,7 @@ def test_anti_join( ) -> None: if "duckdb" in str(constructor): request.applymarker(pytest.mark.xfail) - data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]} + data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8, 9]} df = nw.from_native(constructor(data)) other = df.filter(filter_expr) result = df.join(other, how="anti", left_on=join_key, right_on=join_key) # type: ignore[arg-type] @@ -183,22 +183,22 @@ def test_anti_join( ( "antananarivo", (nw.col("bob") > 5), - {"antananarivo": [2], "bob": [6], "zorro": [9]}, + {"antananarivo": [2], "bob": [6], "zor ro": [9]}, ), ( ["antananarivo"], (nw.col("bob") > 5), - {"antananarivo": [2], "bob": [6], "zorro": [9]}, + {"antananarivo": [2], "bob": [6], "zor ro": [9]}, ), ( ["bob"], (nw.col("bob") < 5), - {"antananarivo": [1, 3], "bob": [4, 4], "zorro": [7, 8]}, + {"antananarivo": [1, 3], "bob": [4, 4], "zor ro": [7, 8]}, ), ( ["antananarivo", "bob"], (nw.col("bob") < 5), - {"antananarivo": [1, 3], "bob": [4, 4], "zorro": [7, 8]}, + {"antananarivo": [1, 3], "bob": [4, 4], "zor ro": [7, 8]}, ), ], ) @@ -208,7 +208,7 @@ def test_semi_join( filter_expr: nw.Expr, expected: dict[str, list[Any]], ) -> None: - data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]} + data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8, 9]} df = nw.from_native(constructor(data)) other = df.filter(filter_expr) result = df.join(other, how="semi", left_on=join_key, right_on=join_key).sort( # type: ignore[arg-type] @@ -219,7 +219,7 @@ def test_semi_join( @pytest.mark.parametrize("how", ["right", "full"]) def test_join_not_implemented(constructor: Constructor, how: str) -> None: - data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]} + data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8, 9]} df = nw.from_native(constructor(data)) with pytest.raises( @@ -333,7 +333,7 @@ def test_left_join_overlapping_column(constructor: Constructor) -> None: @pytest.mark.parametrize("how", ["inner", "left", "semi", "anti"]) def test_join_keys_exceptions(constructor: Constructor, how: str) -> None: - data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]} + data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8, 9]} df = nw.from_native(constructor(data)) with pytest.raises( @@ -538,7 +538,7 @@ def test_joinasof_by( def test_joinasof_not_implemented( constructor: Constructor, strategy: Literal["backward", "forward"] ) -> None: - data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]} + data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8, 9]} df = nw.from_native(constructor(data)) with pytest.raises( @@ -554,7 +554,7 @@ def test_joinasof_not_implemented( def test_joinasof_keys_exceptions(constructor: Constructor) -> None: - data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]} + data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8, 9]} df = nw.from_native(constructor(data)) with pytest.raises( @@ -595,7 +595,7 @@ def test_joinasof_keys_exceptions(constructor: Constructor) -> None: def test_joinasof_by_exceptions(constructor: Constructor) -> None: - data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]} + data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8, 9]} df = nw.from_native(constructor(data)) with pytest.raises( ValueError, diff --git a/tests/frame/sort_test.py b/tests/frame/sort_test.py index 5147c6f56..1ce3414c8 100644 --- a/tests/frame/sort_test.py +++ b/tests/frame/sort_test.py @@ -8,18 +8,18 @@ def test_sort(constructor: Constructor) -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} + data = {"an tan": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} df = nw.from_native(constructor(data)) - result = df.sort("a", "b") + result = df.sort("an tan", "b") expected = { - "a": [1, 2, 3], + "an tan": [1, 2, 3], "b": [4, 6, 4], "z": [7.0, 9.0, 8.0], } assert_equal_data(result, expected) - result = df.sort("a", "b", descending=[True, False]) + result = df.sort("an tan", "b", descending=[True, False]) expected = { - "a": [3, 2, 1], + "an tan": [3, 2, 1], "b": [4, 6, 4], "z": [8.0, 9.0, 7.0], } @@ -29,14 +29,14 @@ def test_sort(constructor: Constructor) -> None: @pytest.mark.parametrize( ("nulls_last", "expected"), [ - (True, {"a": [0, 2, 0, -1], "b": [3, 2, 1, None]}), - (False, {"a": [-1, 0, 2, 0], "b": [None, 3, 2, 1]}), + (True, {"antan desc": [0, 2, 0, -1], "b": [3, 2, 1, None]}), + (False, {"antan desc": [-1, 0, 2, 0], "b": [None, 3, 2, 1]}), ], ) def test_sort_nulls( constructor: Constructor, *, nulls_last: bool, expected: dict[str, float] ) -> None: - data = {"a": [0, 0, 2, -1], "b": [1, 3, 2, None]} + data = {"antan desc": [0, 0, 2, -1], "b": [1, 3, 2, None]} df = nw.from_native(constructor(data)) result = df.sort("b", descending=True, nulls_last=nulls_last) assert_equal_data(result, expected)