Skip to content

Commit

Permalink
fix: duckdb join was failing if column names contained spaces (#1775)
Browse files Browse the repository at this point in the history
* fix: duckdb column names with spaces

* test

* sort was raising too too
  • Loading branch information
MarcoGorelli authored Jan 9, 2025
1 parent 0f38521 commit 9abe7d2
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 36 deletions.
12 changes: 6 additions & 6 deletions narwhals/_duckdb/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,24 +243,24 @@ def join(
assert right_on is not None # noqa: S101

conditions = [
f"lhs.{left} = rhs.{right}" for left, right in zip(left_on, right_on)
f'lhs."{left}" = rhs."{right}"' for left, right in zip(left_on, right_on)
]
condition = " and ".join(conditions)
rel = self._native_frame.set_alias("lhs").join(
other._native_frame.set_alias("rhs"), condition=condition, how=how
)

if how in ("inner", "left", "cross"):
select = [f"lhs.{x}" for x in self._native_frame.columns]
select = [f'lhs."{x}"' for x in self._native_frame.columns]
for col in other._native_frame.columns:
if col in self._native_frame.columns and (
right_on is None or col not in right_on
):
select.append(f"rhs.{col} as {col}{suffix}")
select.append(f'rhs."{col}" as "{col}{suffix}"')
elif right_on is None or col not in right_on:
select.append(col)
else: # semi
select = [f"lhs.{x}" for x in self._native_frame.columns]
select = ["lhs.*"]

res = rel.select(", ".join(select)).set_alias(original_alias)
return self._from_native_frame(res)
Expand Down Expand Up @@ -317,9 +317,9 @@ def sort(
result = self._native_frame.order(
",".join(
(
f"{col} {desc} nulls last"
f'"{col}" {desc} nulls last'
if nulls_last
else f"{col} {desc} nulls first"
else f'"{col}" {desc} nulls first'
for col, desc in zip(flat_by, descending_str)
)
)
Expand Down
44 changes: 22 additions & 22 deletions tests/frame/join_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_inner_join_two_keys(constructor: Constructor) -> None:
data = {
"antananarivo": [1, 3, 2],
"bob": [4, 4, 6],
"zorro": [7.0, 8, 9],
"zor ro": [7.0, 8, 9],
"index": [0, 1, 2],
}
df = nw.from_native(constructor(data))
Expand All @@ -37,9 +37,9 @@ def test_inner_join_two_keys(constructor: Constructor) -> None:
expected = {
"antananarivo": [1, 3, 2],
"bob": [4, 4, 6],
"zorro": [7.0, 8, 9],
"zor ro": [7.0, 8, 9],
"index": [0, 1, 2],
"zorro_right": [7.0, 8, 9],
"zor ro_right": [7.0, 8, 9],
}
assert_equal_data(result, expected)
assert_equal_data(result_on, expected)
Expand All @@ -49,7 +49,7 @@ def test_inner_join_single_key(constructor: Constructor) -> None:
data = {
"antananarivo": [1, 3, 2],
"bob": [4, 4, 6],
"zorro": [7.0, 8, 9],
"zor ro": [7.0, 8, 9],
"index": [0, 1, 2],
}
df = nw.from_native(constructor(data))
Expand All @@ -66,10 +66,10 @@ def test_inner_join_single_key(constructor: Constructor) -> None:
expected = {
"antananarivo": [1, 3, 2],
"bob": [4, 4, 6],
"zorro": [7.0, 8, 9],
"zor ro": [7.0, 8, 9],
"index": [0, 1, 2],
"bob_right": [4, 4, 6],
"zorro_right": [7.0, 8, 9],
"zor ro_right": [7.0, 8, 9],
}
assert_equal_data(result, expected)
assert_equal_data(result_on, expected)
Expand Down Expand Up @@ -99,7 +99,7 @@ def test_suffix(constructor: Constructor, how: str, suffix: str) -> None:
data = {
"antananarivo": [1, 3, 2],
"bob": [4, 4, 6],
"zorro": [7.0, 8, 9],
"zor ro": [7.0, 8, 9],
}
df = nw.from_native(constructor(data))
df_right = df
Expand All @@ -111,7 +111,7 @@ def test_suffix(constructor: Constructor, how: str, suffix: str) -> None:
suffix=suffix,
)
result_cols = result.collect_schema().names()
assert result_cols == ["antananarivo", "bob", "zorro", f"zorro{suffix}"]
assert result_cols == ["antananarivo", "bob", "zor ro", f"zor ro{suffix}"]


@pytest.mark.parametrize("suffix", ["_right", "_custom_suffix"])
Expand Down Expand Up @@ -151,13 +151,13 @@ def test_cross_join_non_pandas() -> None:
(
["antananarivo", "bob"],
(nw.col("bob") < 5),
{"antananarivo": [2], "bob": [6], "zorro": [9]},
{"antananarivo": [2], "bob": [6], "zor ro": [9]},
),
(["bob"], (nw.col("bob") < 5), {"antananarivo": [2], "bob": [6], "zorro": [9]}),
(["bob"], (nw.col("bob") < 5), {"antananarivo": [2], "bob": [6], "zor ro": [9]}),
(
["bob"],
(nw.col("bob") > 5),
{"antananarivo": [1, 3], "bob": [4, 4], "zorro": [7.0, 8.0]},
{"antananarivo": [1, 3], "bob": [4, 4], "zor ro": [7.0, 8.0]},
),
],
)
Expand All @@ -170,7 +170,7 @@ def test_anti_join(
) -> None:
if "duckdb" in str(constructor):
request.applymarker(pytest.mark.xfail)
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]}
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8, 9]}
df = nw.from_native(constructor(data))
other = df.filter(filter_expr)
result = df.join(other, how="anti", left_on=join_key, right_on=join_key) # type: ignore[arg-type]
Expand All @@ -183,22 +183,22 @@ def test_anti_join(
(
"antananarivo",
(nw.col("bob") > 5),
{"antananarivo": [2], "bob": [6], "zorro": [9]},
{"antananarivo": [2], "bob": [6], "zor ro": [9]},
),
(
["antananarivo"],
(nw.col("bob") > 5),
{"antananarivo": [2], "bob": [6], "zorro": [9]},
{"antananarivo": [2], "bob": [6], "zor ro": [9]},
),
(
["bob"],
(nw.col("bob") < 5),
{"antananarivo": [1, 3], "bob": [4, 4], "zorro": [7, 8]},
{"antananarivo": [1, 3], "bob": [4, 4], "zor ro": [7, 8]},
),
(
["antananarivo", "bob"],
(nw.col("bob") < 5),
{"antananarivo": [1, 3], "bob": [4, 4], "zorro": [7, 8]},
{"antananarivo": [1, 3], "bob": [4, 4], "zor ro": [7, 8]},
),
],
)
Expand All @@ -208,7 +208,7 @@ def test_semi_join(
filter_expr: nw.Expr,
expected: dict[str, list[Any]],
) -> None:
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]}
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8, 9]}
df = nw.from_native(constructor(data))
other = df.filter(filter_expr)
result = df.join(other, how="semi", left_on=join_key, right_on=join_key).sort( # type: ignore[arg-type]
Expand All @@ -219,7 +219,7 @@ def test_semi_join(

@pytest.mark.parametrize("how", ["right", "full"])
def test_join_not_implemented(constructor: Constructor, how: str) -> None:
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]}
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8, 9]}
df = nw.from_native(constructor(data))

with pytest.raises(
Expand Down Expand Up @@ -333,7 +333,7 @@ def test_left_join_overlapping_column(constructor: Constructor) -> None:

@pytest.mark.parametrize("how", ["inner", "left", "semi", "anti"])
def test_join_keys_exceptions(constructor: Constructor, how: str) -> None:
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]}
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8, 9]}
df = nw.from_native(constructor(data))

with pytest.raises(
Expand Down Expand Up @@ -538,7 +538,7 @@ def test_joinasof_by(
def test_joinasof_not_implemented(
constructor: Constructor, strategy: Literal["backward", "forward"]
) -> None:
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]}
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8, 9]}
df = nw.from_native(constructor(data))

with pytest.raises(
Expand All @@ -554,7 +554,7 @@ def test_joinasof_not_implemented(


def test_joinasof_keys_exceptions(constructor: Constructor) -> None:
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]}
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8, 9]}
df = nw.from_native(constructor(data))

with pytest.raises(
Expand Down Expand Up @@ -595,7 +595,7 @@ def test_joinasof_keys_exceptions(constructor: Constructor) -> None:


def test_joinasof_by_exceptions(constructor: Constructor) -> None:
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]}
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8, 9]}
df = nw.from_native(constructor(data))
with pytest.raises(
ValueError,
Expand Down
16 changes: 8 additions & 8 deletions tests/frame/sort_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@


def test_sort(constructor: Constructor) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
data = {"an tan": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
df = nw.from_native(constructor(data))
result = df.sort("a", "b")
result = df.sort("an tan", "b")
expected = {
"a": [1, 2, 3],
"an tan": [1, 2, 3],
"b": [4, 6, 4],
"z": [7.0, 9.0, 8.0],
}
assert_equal_data(result, expected)
result = df.sort("a", "b", descending=[True, False])
result = df.sort("an tan", "b", descending=[True, False])
expected = {
"a": [3, 2, 1],
"an tan": [3, 2, 1],
"b": [4, 6, 4],
"z": [8.0, 9.0, 7.0],
}
Expand All @@ -29,14 +29,14 @@ def test_sort(constructor: Constructor) -> None:
@pytest.mark.parametrize(
("nulls_last", "expected"),
[
(True, {"a": [0, 2, 0, -1], "b": [3, 2, 1, None]}),
(False, {"a": [-1, 0, 2, 0], "b": [None, 3, 2, 1]}),
(True, {"antan desc": [0, 2, 0, -1], "b": [3, 2, 1, None]}),
(False, {"antan desc": [-1, 0, 2, 0], "b": [None, 3, 2, 1]}),
],
)
def test_sort_nulls(
constructor: Constructor, *, nulls_last: bool, expected: dict[str, float]
) -> None:
data = {"a": [0, 0, 2, -1], "b": [1, 3, 2, None]}
data = {"antan desc": [0, 0, 2, -1], "b": [1, 3, 2, None]}
df = nw.from_native(constructor(data))
result = df.sort("b", descending=True, nulls_last=nulls_last)
assert_equal_data(result, expected)

0 comments on commit 9abe7d2

Please sign in to comment.