From 9eb5f46aa58672763d3309537416eef1bdfb2273 Mon Sep 17 00:00:00 2001
From: Isaias Gutierrez-Cruz
 <64386035+IsaiasGutierrezCruz@users.noreply.github.com>
Date: Wed, 4 Sep 2024 09:47:58 -0600
Subject: [PATCH 01/30] ci: add tests for the queries of TPC-H  (#899)

---
 .github/workflows/check_tpch_queries.yml | 30 ++++++++++++++++++++++++
 pyproject.toml                           |  1 +
 requirements-dev.txt                     |  1 +
 tpch/tests/__init__.py                   |  0
 tpch/tests/test_queries.py               | 29 +++++++++++++++++++++++
 5 files changed, 61 insertions(+)
 create mode 100644 .github/workflows/check_tpch_queries.yml
 create mode 100644 tpch/tests/__init__.py
 create mode 100644 tpch/tests/test_queries.py

diff --git a/.github/workflows/check_tpch_queries.yml b/.github/workflows/check_tpch_queries.yml
new file mode 100644
index 000000000..397163091
--- /dev/null
+++ b/.github/workflows/check_tpch_queries.yml
@@ -0,0 +1,30 @@
+name: Tests for TPCH Queries
+
+on:
+  pull_request:
+    types: [labeled]
+
+jobs:
+  validate-queries:
+    if: ${{ github.event.label.name == 'full-test' }}
+    strategy:
+      matrix:
+        python-version: ["3.12"]
+        os: [ubuntu-latest]
+
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install uv 
+        run: curl -LsSf https://astral.sh/uv/install.sh | sh
+      - name: install-reqs
+        run: uv pip install --upgrade -r requirements-dev.txt --system
+      - name: local-install
+        run: uv pip install -e . --system
+      - name: generate-data
+        run: cd tpch && python generate_data.py
+      - name: tpch-tests 
+        run: python -m unittest discover -s 'tpch/tests'
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index a279280bf..b3a2a0c28 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -76,6 +76,7 @@ lint.ignore = [
 
 [tool.ruff.lint.per-file-ignores]
 "tests/*" = ["S101"]
+"tpch/tests/*" = ["S101"]
 "utils/*" = ["S311", "PTH123"]
 "tpch/execute/*" = ["T201"]
 
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 213fcdcb8..23ff1757e 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,3 +1,4 @@
+tqdm
 covdefaults
 duckdb
 pandas
diff --git a/tpch/tests/__init__.py b/tpch/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tpch/tests/test_queries.py b/tpch/tests/test_queries.py
new file mode 100644
index 000000000..4b7cdd866
--- /dev/null
+++ b/tpch/tests/test_queries.py
@@ -0,0 +1,29 @@
+import os
+import subprocess
+import sys
+import unittest
+from pathlib import Path
+
+
+class TestQueries(unittest.TestCase):
+    def test_execute_scripts(self) -> None:
+        root = Path(__file__).resolve().parent.parent
+        # directory containing all the queries
+        execute_dir = root / "execute"
+
+        env = os.environ.copy()
+        env["PYTHONPATH"] = str(root)
+
+        for script_path in execute_dir.glob("q[1-9]*.py"):
+            result = subprocess.run(  # noqa: S603
+                [sys.executable, str(script_path)],
+                capture_output=True,
+                text=True,
+                env=env,
+                cwd=root,
+                check=False,
+                shell=False,
+            )
+            assert (
+                result.returncode == 0
+            ), f"Script {script_path} failed with error: {result.stderr}"

From 69da133bbf86906b7787c95db403fcd4a626e78a Mon Sep 17 00:00:00 2001
From: Zhengbo Wang <lw7205675@gmail.com>
Date: Wed, 4 Sep 2024 23:50:39 +0800
Subject: [PATCH 02/30] feat: Add more queries of tpch (#898)

---
 tpch/execute/__init__.py | 30 +++++++++++++++++++++++
 tpch/execute/q1.py       | 23 ++----------------
 tpch/execute/q10.py      | 25 ++++----------------
 tpch/execute/q11.py      | 23 ++++--------------
 tpch/execute/q15.py      | 21 +++++++++++++++++
 tpch/execute/q17.py      | 21 +++++++++++++++++
 tpch/execute/q18.py      | 22 +++++++++++++++++
 tpch/execute/q19.py      | 17 ++++++++++++++
 tpch/execute/q2.py       | 32 +++++--------------------
 tpch/execute/q20.py      | 20 ++++++++++++++++
 tpch/execute/q21.py      | 19 +++++++++++++++
 tpch/execute/q3.py       | 25 ++++----------------
 tpch/execute/q4.py       | 21 +++--------------
 tpch/execute/q5.py       | 29 ++++++-----------------
 tpch/execute/q6.py       | 15 ++----------
 tpch/execute/q7.py       | 27 +++++++++++++++++++++
 tpch/execute/q9.py       | 35 +++++++++++++++++++++++++++
 tpch/queries/q15.py      | 33 ++++++++++++++++++++++++++
 tpch/queries/q17.py      | 23 ++++++++++++++++++
 tpch/queries/q18.py      | 31 ++++++++++++++++++++++++
 tpch/queries/q19.py      | 39 ++++++++++++++++++++++++++++++
 tpch/queries/q20.py      | 43 +++++++++++++++++++++++++++++++++
 tpch/queries/q21.py      | 43 +++++++++++++++++++++++++++++++++
 tpch/queries/q6.py       |  4 ----
 tpch/queries/q7.py       | 51 ++++++++++++++++++++++++++++++++++++++++
 tpch/queries/q9.py       | 36 ++++++++++++++++++++++++++++
 26 files changed, 544 insertions(+), 164 deletions(-)
 create mode 100644 tpch/execute/q15.py
 create mode 100644 tpch/execute/q17.py
 create mode 100644 tpch/execute/q18.py
 create mode 100644 tpch/execute/q19.py
 create mode 100644 tpch/execute/q20.py
 create mode 100644 tpch/execute/q21.py
 create mode 100644 tpch/execute/q7.py
 create mode 100644 tpch/execute/q9.py
 create mode 100644 tpch/queries/q15.py
 create mode 100644 tpch/queries/q17.py
 create mode 100644 tpch/queries/q18.py
 create mode 100644 tpch/queries/q19.py
 create mode 100644 tpch/queries/q20.py
 create mode 100644 tpch/queries/q21.py
 create mode 100644 tpch/queries/q7.py
 create mode 100644 tpch/queries/q9.py

diff --git a/tpch/execute/__init__.py b/tpch/execute/__init__.py
index e69de29bb..e0c448649 100644
--- a/tpch/execute/__init__.py
+++ b/tpch/execute/__init__.py
@@ -0,0 +1,30 @@
+from pathlib import Path
+
+import dask.dataframe as dd
+import pandas as pd
+import polars as pl
+import pyarrow.parquet as pq
+
+pd.options.mode.copy_on_write = True
+pd.options.future.infer_string = True
+
+lineitem = Path("data") / "lineitem.parquet"
+region = Path("data") / "region.parquet"
+nation = Path("data") / "nation.parquet"
+supplier = Path("data") / "supplier.parquet"
+part = Path("data") / "part.parquet"
+partsupp = Path("data") / "partsupp.parquet"
+orders = Path("data") / "orders.parquet"
+customer = Path("data") / "customer.parquet"
+line_item = Path("data") / "lineitem.parquet"
+
+IO_FUNCS = {
+    "pandas": lambda x: pd.read_parquet(x, engine="pyarrow"),
+    "pandas[pyarrow]": lambda x: pd.read_parquet(
+        x, engine="pyarrow", dtype_backend="pyarrow"
+    ),
+    "polars[eager]": lambda x: pl.read_parquet(x),
+    "polars[lazy]": lambda x: pl.scan_parquet(x),
+    "pyarrow": lambda x: pq.read_table(x),
+    "dask": lambda x: dd.read_parquet(x, engine="pyarrow", dtype_backend="pyarrow"),
+}
diff --git a/tpch/execute/q1.py b/tpch/execute/q1.py
index dd839b292..9889c3af0 100644
--- a/tpch/execute/q1.py
+++ b/tpch/execute/q1.py
@@ -1,26 +1,7 @@
-from pathlib import Path
-
-import dask.dataframe as dd
-import pandas as pd
-import polars as pl
-import pyarrow.parquet as pq
 from queries import q1
 
-pd.options.mode.copy_on_write = True
-pd.options.future.infer_string = True
-
-lineitem = Path("data") / "lineitem.parquet"
-
-IO_FUNCS = {
-    "pandas": lambda x: pd.read_parquet(x, engine="pyarrow"),
-    "pandas[pyarrow]": lambda x: pd.read_parquet(
-        x, engine="pyarrow", dtype_backend="pyarrow"
-    ),
-    "polars[eager]": lambda x: pl.read_parquet(x),
-    "polars[lazy]": lambda x: pl.scan_parquet(x),
-    "pyarrow": lambda x: pq.read_table(x),
-    "dask": lambda x: dd.read_parquet(x, engine="pyarrow", dtype_backend="pyarrow"),
-}
+from . import IO_FUNCS
+from . import lineitem
 
 print(q1.query(IO_FUNCS["pandas[pyarrow]"](lineitem)))
 print(q1.query(IO_FUNCS["polars[lazy]"](lineitem)).collect())
diff --git a/tpch/execute/q10.py b/tpch/execute/q10.py
index 19e2e7ce0..9876f2aa9 100644
--- a/tpch/execute/q10.py
+++ b/tpch/execute/q10.py
@@ -1,25 +1,10 @@
-from pathlib import Path
-
-import pandas as pd
-import polars as pl
 from queries import q10
 
-pd.options.mode.copy_on_write = True
-pd.options.future.infer_string = True
-
-customer = Path("data") / "customer.parquet"
-nation = Path("data") / "nation.parquet"
-lineitem = Path("data") / "lineitem.parquet"
-orders = Path("data") / "orders.parquet"
-
-IO_FUNCS = {
-    "pandas": lambda x: pd.read_parquet(x, engine="pyarrow"),
-    "pandas[pyarrow]": lambda x: pd.read_parquet(
-        x, engine="pyarrow", dtype_backend="pyarrow"
-    ),
-    "polars[eager]": lambda x: pl.read_parquet(x),
-    "polars[lazy]": lambda x: pl.scan_parquet(x),
-}
+from . import IO_FUNCS
+from . import customer
+from . import lineitem
+from . import nation
+from . import orders
 
 tool = "pandas"
 fn = IO_FUNCS[tool]
diff --git a/tpch/execute/q11.py b/tpch/execute/q11.py
index 55161ae6b..82b1936aa 100644
--- a/tpch/execute/q11.py
+++ b/tpch/execute/q11.py
@@ -1,24 +1,9 @@
-from pathlib import Path
-
-import pandas as pd
-import polars as pl
 from queries import q11
 
-pd.options.mode.copy_on_write = True
-pd.options.future.infer_string = True
-
-nation = Path("data") / "nation.parquet"
-partsupp = Path("data") / "partsupp.parquet"
-supplier = Path("data") / "supplier.parquet"
-
-IO_FUNCS = {
-    "pandas": lambda x: pd.read_parquet(x, engine="pyarrow"),
-    "pandas[pyarrow]": lambda x: pd.read_parquet(
-        x, engine="pyarrow", dtype_backend="pyarrow"
-    ),
-    "polars[eager]": lambda x: pl.read_parquet(x),
-    "polars[lazy]": lambda x: pl.scan_parquet(x),
-}
+from . import IO_FUNCS
+from . import nation
+from . import partsupp
+from . import supplier
 
 tool = "pandas"
 fn = IO_FUNCS[tool]
diff --git a/tpch/execute/q15.py b/tpch/execute/q15.py
new file mode 100644
index 000000000..8fdaf2ab1
--- /dev/null
+++ b/tpch/execute/q15.py
@@ -0,0 +1,21 @@
+from queries import q15
+
+from . import IO_FUNCS
+from . import lineitem
+from . import supplier
+
+tool = "pandas"
+fn = IO_FUNCS[tool]
+print(q15.query(fn(lineitem), fn(supplier)))
+
+tool = "pandas[pyarrow]"
+fn = IO_FUNCS[tool]
+print(q15.query(fn(lineitem), fn(supplier)))
+
+tool = "polars[eager]"
+fn = IO_FUNCS[tool]
+print(q15.query(fn(lineitem), fn(supplier)))
+
+tool = "polars[lazy]"
+fn = IO_FUNCS[tool]
+print(q15.query(fn(lineitem), fn(supplier)).collect())
diff --git a/tpch/execute/q17.py b/tpch/execute/q17.py
new file mode 100644
index 000000000..5f2228012
--- /dev/null
+++ b/tpch/execute/q17.py
@@ -0,0 +1,21 @@
+from queries import q17
+
+from . import IO_FUNCS
+from . import lineitem
+from . import part
+
+tool = "pandas"
+fn = IO_FUNCS[tool]
+print(q17.query(fn(lineitem), fn(part)))
+
+tool = "pandas[pyarrow]"
+fn = IO_FUNCS[tool]
+print(q17.query(fn(lineitem), fn(part)))
+
+tool = "polars[eager]"
+fn = IO_FUNCS[tool]
+print(q17.query(fn(lineitem), fn(part)))
+
+tool = "polars[lazy]"
+fn = IO_FUNCS[tool]
+print(q17.query(fn(lineitem), fn(part)).collect())
diff --git a/tpch/execute/q18.py b/tpch/execute/q18.py
new file mode 100644
index 000000000..5a59f0e5e
--- /dev/null
+++ b/tpch/execute/q18.py
@@ -0,0 +1,22 @@
+from queries import q18
+
+from . import IO_FUNCS
+from . import customer
+from . import lineitem
+from . import orders
+
+tool = "pandas"
+fn = IO_FUNCS[tool]
+print(q18.query(fn(customer), fn(lineitem), fn(orders)))
+
+tool = "pandas[pyarrow]"
+fn = IO_FUNCS[tool]
+print(q18.query(fn(customer), fn(lineitem), fn(orders)))
+
+tool = "polars[eager]"
+fn = IO_FUNCS[tool]
+print(q18.query(fn(customer), fn(lineitem), fn(orders)))
+
+tool = "polars[lazy]"
+fn = IO_FUNCS[tool]
+print(q18.query(fn(customer), fn(lineitem), fn(orders)).collect())
diff --git a/tpch/execute/q19.py b/tpch/execute/q19.py
new file mode 100644
index 000000000..87467064c
--- /dev/null
+++ b/tpch/execute/q19.py
@@ -0,0 +1,17 @@
+from queries import q19
+
+from . import IO_FUNCS
+from . import lineitem
+from . import part
+
+fn = IO_FUNCS["pandas"]
+print(q19.query(fn(lineitem), fn(part)))
+
+fn = IO_FUNCS["pandas[pyarrow]"]
+print(q19.query(fn(lineitem), fn(part)))
+
+fn = IO_FUNCS["polars[eager]"]
+print(q19.query(fn(lineitem), fn(part)))
+
+fn = IO_FUNCS["polars[lazy]"]
+print(q19.query(fn(lineitem), fn(part)).collect())
diff --git a/tpch/execute/q2.py b/tpch/execute/q2.py
index 22a7f4317..cd82a9047 100644
--- a/tpch/execute/q2.py
+++ b/tpch/execute/q2.py
@@ -1,31 +1,11 @@
-from pathlib import Path
-
-import dask.dataframe as dd
-import pandas as pd
-import polars as pl
-import pyarrow.parquet as pq
 from queries import q2
 
-pd.options.mode.copy_on_write = True
-pd.options.future.infer_string = True
-
-region = Path("data") / "region.parquet"
-nation = Path("data") / "nation.parquet"
-supplier = Path("data") / "supplier.parquet"
-part = Path("data") / "part.parquet"
-partsupp = Path("data") / "partsupp.parquet"
-
-IO_FUNCS = {
-    "pandas": lambda x: pd.read_parquet(x, engine="pyarrow"),
-    "pandas[pyarrow]": lambda x: pd.read_parquet(
-        x, engine="pyarrow", dtype_backend="pyarrow"
-    ),
-    "polars[eager]": lambda x: pl.read_parquet(x),
-    "polars[lazy]": lambda x: pl.scan_parquet(x),
-    "pyarrow": lambda x: pq.read_table(x),
-    "dask": lambda x: dd.read_parquet(x, engine="pyarrow", dtype_backend="pyarrow"),
-}
-
+from . import IO_FUNCS
+from . import nation
+from . import part
+from . import partsupp
+from . import region
+from . import supplier
 
 tool = "pandas[pyarrow]"
 fn = IO_FUNCS[tool]
diff --git a/tpch/execute/q20.py b/tpch/execute/q20.py
new file mode 100644
index 000000000..68d18a6b5
--- /dev/null
+++ b/tpch/execute/q20.py
@@ -0,0 +1,20 @@
+from queries import q20
+
+from . import IO_FUNCS
+from . import lineitem
+from . import nation
+from . import part
+from . import partsupp
+from . import supplier
+
+fn = IO_FUNCS["pandas"]
+print(q20.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(supplier)))
+
+fn = IO_FUNCS["pandas[pyarrow]"]
+print(q20.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(supplier)))
+
+fn = IO_FUNCS["polars[eager]"]
+print(q20.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(supplier)))
+
+fn = IO_FUNCS["polars[lazy]"]
+print(q20.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(supplier)).collect())
diff --git a/tpch/execute/q21.py b/tpch/execute/q21.py
new file mode 100644
index 000000000..693953870
--- /dev/null
+++ b/tpch/execute/q21.py
@@ -0,0 +1,19 @@
+from queries import q21
+
+from . import IO_FUNCS
+from . import lineitem
+from . import nation
+from . import orders
+from . import supplier
+
+fn = IO_FUNCS["pandas"]
+print(q21.query(fn(lineitem), fn(nation), fn(orders), fn(supplier)))
+
+fn = IO_FUNCS["pandas[pyarrow]"]
+print(q21.query(fn(lineitem), fn(nation), fn(orders), fn(supplier)))
+
+fn = IO_FUNCS["polars[eager]"]
+print(q21.query(fn(lineitem), fn(nation), fn(orders), fn(supplier)))
+
+fn = IO_FUNCS["polars[lazy]"]
+print(q21.query(fn(lineitem), fn(nation), fn(orders), fn(supplier)).collect())
diff --git a/tpch/execute/q3.py b/tpch/execute/q3.py
index 30194b5da..8602bb3d0 100644
--- a/tpch/execute/q3.py
+++ b/tpch/execute/q3.py
@@ -1,26 +1,9 @@
-from pathlib import Path
-
-import pandas as pd
-import polars as pl
 from queries import q3
 
-pd.options.mode.copy_on_write = True
-pd.options.future.infer_string = True
-
-
-customer = Path("data") / "customer.parquet"
-lineitem = Path("data") / "lineitem.parquet"
-orders = Path("data") / "orders.parquet"
-
-IO_FUNCS = {
-    "pandas": lambda x: pd.read_parquet(x, engine="pyarrow"),
-    "pandas[pyarrow]": lambda x: pd.read_parquet(
-        x, engine="pyarrow", dtype_backend="pyarrow"
-    ),
-    "polars[eager]": lambda x: pl.read_parquet(x),
-    "polars[lazy]": lambda x: pl.scan_parquet(x),
-}
-
+from . import IO_FUNCS
+from . import customer
+from . import lineitem
+from . import orders
 
 tool = "pandas"
 fn = IO_FUNCS[tool]
diff --git a/tpch/execute/q4.py b/tpch/execute/q4.py
index 672a43e17..3e67a9c87 100644
--- a/tpch/execute/q4.py
+++ b/tpch/execute/q4.py
@@ -1,23 +1,8 @@
-from pathlib import Path
-
-import pandas as pd
-import polars as pl
 from queries import q4
 
-pd.options.mode.copy_on_write = True
-pd.options.future.infer_string = True
-
-line_item = Path("data") / "lineitem.parquet"
-orders = Path("data") / "orders.parquet"
-
-IO_FUNCS = {
-    "pandas": lambda x: pd.read_parquet(x, engine="pyarrow"),
-    "pandas[pyarrow]": lambda x: pd.read_parquet(
-        x, engine="pyarrow", dtype_backend="pyarrow"
-    ),
-    "polars[eager]": lambda x: pl.read_parquet(x),
-    "polars[lazy]": lambda x: pl.scan_parquet(x),
-}
+from . import IO_FUNCS
+from . import line_item
+from . import orders
 
 tool = "pandas"
 fn = IO_FUNCS[tool]
diff --git a/tpch/execute/q5.py b/tpch/execute/q5.py
index b77f740d8..317b15fc7 100644
--- a/tpch/execute/q5.py
+++ b/tpch/execute/q5.py
@@ -1,27 +1,12 @@
-from pathlib import Path
-
-import pandas as pd
-import polars as pl
 from queries import q5
 
-pd.options.mode.copy_on_write = True
-pd.options.future.infer_string = True
-
-region = Path("data") / "region.parquet"
-nation = Path("data") / "nation.parquet"
-customer = Path("data") / "customer.parquet"
-line_item = Path("data") / "lineitem.parquet"
-orders = Path("data") / "orders.parquet"
-supplier = Path("data") / "supplier.parquet"
-
-IO_FUNCS = {
-    "pandas": lambda x: pd.read_parquet(x, engine="pyarrow"),
-    "pandas[pyarrow]": lambda x: pd.read_parquet(
-        x, engine="pyarrow", dtype_backend="pyarrow"
-    ),
-    "polars[eager]": lambda x: pl.read_parquet(x),
-    "polars[lazy]": lambda x: pl.scan_parquet(x),
-}
+from . import IO_FUNCS
+from . import customer
+from . import line_item
+from . import nation
+from . import orders
+from . import region
+from . import supplier
 
 tool = "pandas"
 fn = IO_FUNCS[tool]
diff --git a/tpch/execute/q6.py b/tpch/execute/q6.py
index 85b3d9968..adca0e26d 100644
--- a/tpch/execute/q6.py
+++ b/tpch/execute/q6.py
@@ -1,18 +1,7 @@
-from pathlib import Path
-
-import pandas as pd
-import polars as pl
 from queries import q6
 
-lineitem = Path("data") / "lineitem.parquet"
-IO_FUNCS = {
-    "pandas": lambda x: pd.read_parquet(x, engine="pyarrow"),
-    "pandas[pyarrow]": lambda x: pd.read_parquet(
-        x, engine="pyarrow", dtype_backend="pyarrow"
-    ),
-    "polars[eager]": lambda x: pl.read_parquet(x),
-    "polars[lazy]": lambda x: pl.scan_parquet(x),
-}
+from . import IO_FUNCS
+from . import lineitem
 
 tool = "pandas"
 fn = IO_FUNCS[tool]
diff --git a/tpch/execute/q7.py b/tpch/execute/q7.py
new file mode 100644
index 000000000..43e110a72
--- /dev/null
+++ b/tpch/execute/q7.py
@@ -0,0 +1,27 @@
+from queries import q7
+
+from . import IO_FUNCS
+from . import customer
+from . import lineitem
+from . import nation
+from . import orders
+from . import supplier
+
+tool = "pandas"
+fn = IO_FUNCS[tool]
+print(q7.query(fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier)))
+
+
+tool = "pandas[pyarrow]"
+fn = IO_FUNCS[tool]
+print(q7.query(fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier)))
+
+tool = "polars[eager]"
+fn = IO_FUNCS[tool]
+print(q7.query(fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier)))
+
+tool = "polars[lazy]"
+fn = IO_FUNCS[tool]
+print(
+    q7.query(fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier)).collect()
+)
diff --git a/tpch/execute/q9.py b/tpch/execute/q9.py
new file mode 100644
index 000000000..9ccbe35b7
--- /dev/null
+++ b/tpch/execute/q9.py
@@ -0,0 +1,35 @@
+from queries import q9
+
+from . import IO_FUNCS
+from . import lineitem
+from . import nation
+from . import orders
+from . import part
+from . import partsupp
+from . import supplier
+
+tool = "pandas"
+fn = IO_FUNCS[tool]
+print(
+    q9.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(orders), fn(supplier))
+)
+
+tool = "pandas[pyarrow]"
+fn = IO_FUNCS[tool]
+print(
+    q9.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(orders), fn(supplier))
+)
+
+tool = "polars[eager]"
+fn = IO_FUNCS[tool]
+print(
+    q9.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(orders), fn(supplier))
+)
+
+tool = "polars[lazy]"
+fn = IO_FUNCS[tool]
+print(
+    q9.query(
+        fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(orders), fn(supplier)
+    ).collect()
+)
diff --git a/tpch/queries/q15.py b/tpch/queries/q15.py
new file mode 100644
index 000000000..1ebae57d6
--- /dev/null
+++ b/tpch/queries/q15.py
@@ -0,0 +1,33 @@
+from datetime import datetime
+
+import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def query(
+    lineitem_ds: FrameT,
+    supplier_ds: FrameT,
+) -> FrameT:
+    var1 = datetime(1996, 1, 1)
+    var2 = datetime(1996, 4, 1)
+
+    revenue = (
+        lineitem_ds.filter(nw.col("l_shipdate").is_between(var1, var2, closed="left"))
+        .with_columns(
+            (nw.col("l_extendedprice") * (1 - nw.col("l_discount"))).alias(
+                "total_revenue"
+            )
+        )
+        .group_by("l_suppkey")
+        .agg(nw.sum("total_revenue"))
+        .select(nw.col("l_suppkey").alias("supplier_no"), nw.col("total_revenue"))
+    )
+
+    return (
+        supplier_ds.join(revenue, left_on="s_suppkey", right_on="supplier_no")
+        .filter(nw.col("total_revenue") == nw.col("total_revenue").max())
+        .with_columns(nw.col("total_revenue").round(2))
+        .select("s_suppkey", "s_name", "s_address", "s_phone", "total_revenue")
+        .sort("s_suppkey")
+    )
diff --git a/tpch/queries/q17.py b/tpch/queries/q17.py
new file mode 100644
index 000000000..5d35929d1
--- /dev/null
+++ b/tpch/queries/q17.py
@@ -0,0 +1,23 @@
+import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def query(lineitem_ds: FrameT, part_ds: FrameT) -> FrameT:
+    var1 = "Brand#23"
+    var2 = "MED BOX"
+
+    query1 = (
+        part_ds.filter(nw.col("p_brand") == var1)
+        .filter(nw.col("p_container") == var2)
+        .join(lineitem_ds, how="left", left_on="p_partkey", right_on="l_partkey")
+    )
+
+    return (
+        query1.group_by("p_partkey")
+        .agg((0.2 * nw.col("l_quantity").mean()).alias("avg_quantity"))
+        .select(nw.col("p_partkey").alias("key"), nw.col("avg_quantity"))
+        .join(query1, left_on="key", right_on="p_partkey")
+        .filter(nw.col("l_quantity") < nw.col("avg_quantity"))
+        .select((nw.col("l_extendedprice").sum() / 7.0).round(2).alias("avg_yearly"))
+    )
diff --git a/tpch/queries/q18.py b/tpch/queries/q18.py
new file mode 100644
index 000000000..d3d183176
--- /dev/null
+++ b/tpch/queries/q18.py
@@ -0,0 +1,31 @@
+import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def query(customer_ds: FrameT, lineitem_ds: FrameT, orders_ds: FrameT) -> FrameT:
+    var1 = 300
+
+    query1 = (
+        lineitem_ds.group_by("l_orderkey")
+        .agg(nw.col("l_quantity").sum().alias("sum_quantity"))
+        .filter(nw.col("sum_quantity") > var1)
+    )
+
+    return (
+        orders_ds.join(query1, left_on="o_orderkey", right_on="l_orderkey", how="semi")
+        .join(lineitem_ds, left_on="o_orderkey", right_on="l_orderkey")
+        .join(customer_ds, left_on="o_custkey", right_on="c_custkey")
+        .group_by("c_name", "o_custkey", "o_orderkey", "o_orderdate", "o_totalprice")
+        .agg(nw.col("l_quantity").sum().alias("col6"))
+        .select(
+            nw.col("c_name"),
+            nw.col("o_custkey").alias("c_custkey"),
+            nw.col("o_orderkey"),
+            nw.col("o_orderdate").alias("o_orderdat"),
+            nw.col("o_totalprice"),
+            nw.col("col6"),
+        )
+        .sort(by=["o_totalprice", "o_orderdat"], descending=[True, False])
+        .head(100)
+    )
diff --git a/tpch/queries/q19.py b/tpch/queries/q19.py
new file mode 100644
index 000000000..bcab36e9a
--- /dev/null
+++ b/tpch/queries/q19.py
@@ -0,0 +1,39 @@
+import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def query(lineitem_ds: FrameT, part_ds: FrameT) -> FrameT:
+    return (
+        part_ds.join(lineitem_ds, left_on="p_partkey", right_on="l_partkey")
+        .filter(nw.col("l_shipmode").is_in(["AIR", "AIR REG"]))
+        .filter(nw.col("l_shipinstruct") == "DELIVER IN PERSON")
+        .filter(
+            (
+                (nw.col("p_brand") == "Brand#12")
+                & nw.col("p_container").is_in(["SM CASE", "SM BOX", "SM PACK", "SM PKG"])
+                & (nw.col("l_quantity").is_between(1, 11))
+                & (nw.col("p_size").is_between(1, 5))
+            )
+            | (
+                (nw.col("p_brand") == "Brand#23")
+                & nw.col("p_container").is_in(
+                    ["MED BAG", "MED BOX", "MED PKG", "MED PACK"]
+                )
+                & (nw.col("l_quantity").is_between(10, 20))
+                & (nw.col("p_size").is_between(1, 10))
+            )
+            | (
+                (nw.col("p_brand") == "Brand#34")
+                & nw.col("p_container").is_in(["LG CASE", "LG BOX", "LG PACK", "LG PKG"])
+                & (nw.col("l_quantity").is_between(20, 30))
+                & (nw.col("p_size").is_between(1, 15))
+            )
+        )
+        .select(
+            (nw.col("l_extendedprice") * (1 - nw.col("l_discount")))
+            .sum()
+            .round(2)
+            .alias("revenue")
+        )
+    )
diff --git a/tpch/queries/q20.py b/tpch/queries/q20.py
new file mode 100644
index 000000000..d9014f7b8
--- /dev/null
+++ b/tpch/queries/q20.py
@@ -0,0 +1,43 @@
+from datetime import datetime
+
+import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def query(
+    part_ds: FrameT,
+    partsupp_ds: FrameT,
+    nation_ds: FrameT,
+    lineitem_ds: FrameT,
+    supplier_ds: FrameT,
+) -> FrameT:
+    var1 = datetime(1994, 1, 1)
+    var2 = datetime(1995, 1, 1)
+    var3 = "CANADA"
+    var4 = "forest"
+
+    query1 = (
+        lineitem_ds.filter(nw.col("l_shipdate").is_between(var1, var2, closed="left"))
+        .group_by("l_partkey", "l_suppkey")
+        .agg((nw.col("l_quantity").sum()).alias("sum_quantity"))
+        .with_columns(sum_quantity=nw.col("sum_quantity") * 0.5)
+    )
+    query2 = nation_ds.filter(nw.col("n_name") == var3)
+    query3 = supplier_ds.join(query2, left_on="s_nationkey", right_on="n_nationkey")
+
+    return (
+        part_ds.filter(nw.col("p_name").str.starts_with(var4))
+        .select(nw.col("p_partkey").unique())
+        .join(partsupp_ds, left_on="p_partkey", right_on="ps_partkey")
+        .join(
+            query1,
+            left_on=["ps_suppkey", "p_partkey"],
+            right_on=["l_suppkey", "l_partkey"],
+        )
+        .filter(nw.col("ps_availqty") > nw.col("sum_quantity"))
+        .select(nw.col("ps_suppkey").unique())
+        .join(query3, left_on="ps_suppkey", right_on="s_suppkey")
+        .select("s_name", "s_address")
+        .sort("s_name")
+    )
diff --git a/tpch/queries/q21.py b/tpch/queries/q21.py
new file mode 100644
index 000000000..d10ff394f
--- /dev/null
+++ b/tpch/queries/q21.py
@@ -0,0 +1,43 @@
+import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def query(
+    lineitem: FrameT,
+    nation: FrameT,
+    orders: FrameT,
+    supplier: FrameT,
+) -> FrameT:
+    var1 = "SAUDI ARABIA"
+
+    q1 = (
+        lineitem.group_by("l_orderkey")
+        .agg(nw.len().alias("n_supp_by_order"))
+        .filter(nw.col("n_supp_by_order") > 1)
+        .join(
+            lineitem.filter(nw.col("l_receiptdate") > nw.col("l_commitdate")),
+            left_on="l_orderkey",
+            right_on="l_orderkey",
+        )
+    )
+
+    return (
+        q1.group_by("l_orderkey")
+        .agg(nw.len().alias("n_supp_by_order"))
+        .join(
+            q1,
+            left_on="l_orderkey",
+            right_on="l_orderkey",
+        )
+        .join(supplier, left_on="l_suppkey", right_on="s_suppkey")
+        .join(nation, left_on="s_nationkey", right_on="n_nationkey")
+        .join(orders, left_on="l_orderkey", right_on="o_orderkey")
+        .filter(nw.col("n_supp_by_order") == 1)
+        .filter(nw.col("n_name") == var1)
+        .filter(nw.col("o_orderstatus") == "F")
+        .group_by("s_name")
+        .agg(nw.len().alias("numwait"))
+        .sort(by=["numwait", "s_name"], descending=[True, False])
+        .head(100)
+    )
diff --git a/tpch/queries/q6.py b/tpch/queries/q6.py
index 6a9b5c1d2..67f0ac785 100644
--- a/tpch/queries/q6.py
+++ b/tpch/queries/q6.py
@@ -1,12 +1,8 @@
 from datetime import datetime
 
-import pandas as pd
-
 import narwhals as nw
 from narwhals.typing import FrameT
 
-pd.options.mode.copy_on_write = True
-
 
 @nw.narwhalify
 def query(line_item_ds: FrameT) -> FrameT:
diff --git a/tpch/queries/q7.py b/tpch/queries/q7.py
new file mode 100644
index 000000000..ec0946ac3
--- /dev/null
+++ b/tpch/queries/q7.py
@@ -0,0 +1,51 @@
+from datetime import datetime
+
+import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def query(
+    nation_ds: FrameT,
+    customer_ds: FrameT,
+    line_item_ds: FrameT,
+    orders_ds: FrameT,
+    supplier_ds: FrameT,
+) -> FrameT:
+    n1 = nation_ds.filter(nw.col("n_name") == "FRANCE")
+    n2 = nation_ds.filter(nw.col("n_name") == "GERMANY")
+
+    var_1 = datetime(1995, 1, 1)
+    var_2 = datetime(1996, 12, 31)
+
+    df1 = (
+        customer_ds.join(n1, left_on="c_nationkey", right_on="n_nationkey")
+        .join(orders_ds, left_on="c_custkey", right_on="o_custkey")
+        .rename({"n_name": "cust_nation"})
+        .join(line_item_ds, left_on="o_orderkey", right_on="l_orderkey")
+        .join(supplier_ds, left_on="l_suppkey", right_on="s_suppkey")
+        .join(n2, left_on="s_nationkey", right_on="n_nationkey")
+        .rename({"n_name": "supp_nation"})
+    )
+
+    df2 = (
+        customer_ds.join(n2, left_on="c_nationkey", right_on="n_nationkey")
+        .join(orders_ds, left_on="c_custkey", right_on="o_custkey")
+        .rename({"n_name": "cust_nation"})
+        .join(line_item_ds, left_on="o_orderkey", right_on="l_orderkey")
+        .join(supplier_ds, left_on="l_suppkey", right_on="s_suppkey")
+        .join(n1, left_on="s_nationkey", right_on="n_nationkey")
+        .rename({"n_name": "supp_nation"})
+    )
+
+    return (
+        nw.concat([df1, df2])
+        .filter(nw.col("l_shipdate").is_between(var_1, var_2))
+        .with_columns(
+            (nw.col("l_extendedprice") * (1 - nw.col("l_discount"))).alias("volume")
+        )
+        .with_columns(nw.col("l_shipdate").dt.year().alias("l_year"))
+        .group_by("supp_nation", "cust_nation", "l_year")
+        .agg(nw.sum("volume").alias("revenue"))
+        .sort(by=["supp_nation", "cust_nation", "l_year"])
+    )
diff --git a/tpch/queries/q9.py b/tpch/queries/q9.py
new file mode 100644
index 000000000..09dff4787
--- /dev/null
+++ b/tpch/queries/q9.py
@@ -0,0 +1,36 @@
+import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def query(
+    part_ds: FrameT,
+    partsupp_ds: FrameT,
+    nation_ds: FrameT,
+    lineitem_ds: FrameT,
+    orders_ds: FrameT,
+    supplier_ds: FrameT,
+) -> FrameT:
+    return (
+        part_ds.join(partsupp_ds, left_on="p_partkey", right_on="ps_partkey")
+        .join(supplier_ds, left_on="ps_suppkey", right_on="s_suppkey")
+        .join(
+            lineitem_ds,
+            left_on=["p_partkey", "ps_suppkey"],
+            right_on=["l_partkey", "l_suppkey"],
+        )
+        .join(orders_ds, left_on="l_orderkey", right_on="o_orderkey")
+        .join(nation_ds, left_on="s_nationkey", right_on="n_nationkey")
+        .filter(nw.col("p_name").str.contains("green"))
+        .select(
+            nw.col("n_name").alias("nation"),
+            nw.col("o_orderdate").dt.year().alias("o_year"),
+            (
+                nw.col("l_extendedprice") * (1 - nw.col("l_discount"))
+                - nw.col("ps_supplycost") * nw.col("l_quantity")
+            ).alias("amount"),
+        )
+        .group_by("nation", "o_year")
+        .agg(nw.sum("amount").alias("sum_profit"))
+        .sort(by=["nation", "o_year"], descending=[False, True])
+    )

From cb82d26b7d9d6a1aef882aa6fcbda79a612f1223 Mon Sep 17 00:00:00 2001
From: Francesco Bruzzesi <42817048+FBruzzesi@users.noreply.github.com>
Date: Wed, 4 Sep 2024 17:51:36 +0200
Subject: [PATCH 03/30] feat: dask lit with dtype (#909)

---
 narwhals/_dask/namespace.py | 15 ++++++++++++---
 tests/frame/lit_test.py     |  6 +-----
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py
index 89ca372ec..1668ee323 100644
--- a/narwhals/_dask/namespace.py
+++ b/narwhals/_dask/namespace.py
@@ -12,6 +12,7 @@
 from narwhals._dask.dataframe import DaskLazyFrame
 from narwhals._dask.expr import DaskExpr
 from narwhals._dask.selectors import DaskSelectorNamespace
+from narwhals._dask.utils import reverse_translate_dtype
 from narwhals._dask.utils import validate_comparand
 from narwhals._expression_parsing import parse_into_exprs
 
@@ -19,6 +20,7 @@
     import dask_expr
 
     from narwhals._dask.typing import IntoDaskExpr
+    from narwhals.dtypes import DType
 
 
 class DaskNamespace:
@@ -70,10 +72,17 @@ def col(self, *column_names: str) -> DaskExpr:
         )
 
     def lit(self, value: Any, dtype: dtypes.DType | None) -> DaskExpr:
-        # TODO @FBruzzesi: cast to dtype once `narwhals_to_native_dtype` is implemented.
-        # It should be enough to add `.astype(narwhals_to_native_dtype(dtype))`
+        def convert_if_dtype(
+            series: dask_expr.Series, dtype: DType | type[DType]
+        ) -> dask_expr.Series:
+            return series.astype(reverse_translate_dtype(dtype)) if dtype else series
+
         return DaskExpr(
-            lambda df: [df._native_frame.assign(lit=value).loc[:, "lit"]],
+            lambda df: [
+                df._native_frame.assign(lit=value)
+                .loc[:, "lit"]
+                .pipe(convert_if_dtype, dtype)
+            ],
             depth=0,
             function_name="lit",
             root_names=None,
diff --git a/tests/frame/lit_test.py b/tests/frame/lit_test.py
index 328e4d8e0..e5756e035 100644
--- a/tests/frame/lit_test.py
+++ b/tests/frame/lit_test.py
@@ -17,11 +17,7 @@
     ("dtype", "expected_lit"),
     [(None, [2, 2, 2]), (nw.String, ["2", "2", "2"]), (nw.Float32, [2.0, 2.0, 2.0])],
 )
-def test_lit(
-    constructor: Any, dtype: DType | None, expected_lit: list[Any], request: Any
-) -> None:
-    if "dask" in str(constructor) and dtype == nw.String:
-        request.applymarker(pytest.mark.xfail)
+def test_lit(constructor: Any, dtype: DType | None, expected_lit: list[Any]) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
     df_raw = constructor(data)
     df = nw.from_native(df_raw).lazy()

From d2d10cecab3dead7f5bd2909f6e0fff5ff89a12f Mon Sep 17 00:00:00 2001
From: Liam Connors <connorsl@tcd.ie>
Date: Fri, 6 Sep 2024 03:41:46 -0400
Subject: [PATCH 04/30] xfail dt.date tests for cuDF (#912)

---
 tests/expr_and_series/dt/datetime_attributes_test.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/expr_and_series/dt/datetime_attributes_test.py b/tests/expr_and_series/dt/datetime_attributes_test.py
index 4d59567df..22e20590e 100644
--- a/tests/expr_and_series/dt/datetime_attributes_test.py
+++ b/tests/expr_and_series/dt/datetime_attributes_test.py
@@ -42,6 +42,8 @@ def test_datetime_attributes(
         and "pyarrow" not in str(constructor)
     ):
         request.applymarker(pytest.mark.xfail)
+    if attribute == "date" and "cudf" in str(constructor):
+        request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(constructor(data))
     result = df.select(getattr(nw.col("a").dt, attribute)())
@@ -73,6 +75,8 @@ def test_datetime_attributes_series(
         and "pyarrow" not in str(constructor_eager)
     ):
         request.applymarker(pytest.mark.xfail)
+    if attribute == "date" and "cudf" in str(constructor_eager):
+        request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df.select(getattr(df["a"].dt, attribute)())
@@ -82,6 +86,8 @@ def test_datetime_attributes_series(
 def test_datetime_chained_attributes(request: Any, constructor_eager: Any) -> None:
     if "pandas" in str(constructor_eager) and "pyarrow" not in str(constructor_eager):
         request.applymarker(pytest.mark.xfail)
+    if "cudf" in str(constructor_eager):
+        request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df.select(df["a"].dt.date().dt.year())

From fdc8f88be7c5f10bfc0d87040f8482b3ee56bf31 Mon Sep 17 00:00:00 2001
From: Marco Edward Gorelli <marcogorelli@protonmail.com>
Date: Fri, 6 Sep 2024 08:46:23 +0100
Subject: [PATCH 05/30] Update extremes.yml (#913)

---
 .github/workflows/extremes.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/extremes.yml b/.github/workflows/extremes.yml
index ae9c79009..7e1a5586e 100644
--- a/.github/workflows/extremes.yml
+++ b/.github/workflows/extremes.yml
@@ -104,7 +104,7 @@ jobs:
       - name: uninstall pandas
         run: uv pip uninstall pandas --system
       - name: install-pandas-nightly
-        run: uv pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pandas --system
+        run: uv pip install --prerelease=allow --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pandas --system
       - name: uninstall numpy
         run: uv pip uninstall numpy --system
       - name: install numpy nightly

From 029f590baf01869dd18b7b8ddfd518eaeba8af54 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Fri, 6 Sep 2024 09:17:49 +0100
Subject: [PATCH 06/30] feat: Add join_asof support for pandas and dask (#911)

---
 docs/api-reference/dataframe.md    |   1 +
 docs/api-reference/lazyframe.md    |   1 +
 narwhals/_arrow/dataframe.py       |  11 ++
 narwhals/_dask/dataframe.py        |  20 +++
 narwhals/_pandas_like/dataframe.py |  20 +++
 narwhals/dataframe.py              | 202 +++++++++++++++++++++++++++++
 tests/frame/join_test.py           | 112 ++++++++++++++++
 7 files changed, 367 insertions(+)

diff --git a/docs/api-reference/dataframe.md b/docs/api-reference/dataframe.md
index c144b4af0..f78b4e3da 100644
--- a/docs/api-reference/dataframe.md
+++ b/docs/api-reference/dataframe.md
@@ -22,6 +22,7 @@
         - item
         - iter_rows
         - join
+        - join_asof
         - lazy
         - null_count
         - pipe
diff --git a/docs/api-reference/lazyframe.md b/docs/api-reference/lazyframe.md
index 9ca6a9745..5d472bab6 100644
--- a/docs/api-reference/lazyframe.md
+++ b/docs/api-reference/lazyframe.md
@@ -15,6 +15,7 @@
         - group_by
         - head
         - join
+        - join_asof
         - lazy
         - pipe
         - rename
diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py
index f6cb47101..755a92416 100644
--- a/narwhals/_arrow/dataframe.py
+++ b/narwhals/_arrow/dataframe.py
@@ -315,6 +315,17 @@ def join(
             ),
         )
 
+    def join_asof(
+        self,
+        other: Self,
+        *,
+        left_on: str,
+        right_on: str,
+        strategy: Literal["backward", "forward", "nearest"] = "backward",
+    ) -> Self:
+        msg = "join_asof is not yet supported on PyArrow tables"
+        raise NotImplementedError(msg)
+
     def drop(self: Self, columns: list[str], strict: bool) -> Self:  # noqa: FBT001
         to_drop = parse_columns_to_drop(
             compliant_frame=self, columns=columns, strict=strict
diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py
index f11a88903..91a7e96a9 100644
--- a/narwhals/_dask/dataframe.py
+++ b/narwhals/_dask/dataframe.py
@@ -298,6 +298,26 @@ def join(
             ),
         )
 
+    def join_asof(
+        self,
+        other: Self,
+        *,
+        left_on: str,
+        right_on: str,
+        strategy: Literal["backward", "forward", "nearest"] = "backward",
+    ) -> Self:
+        plx = self.__native_namespace__()
+        return self._from_native_frame(
+            plx.merge_asof(
+                self._native_frame,
+                other._native_frame,
+                left_on=left_on,
+                right_on=right_on,
+                direction=strategy,
+                suffixes=("", "_right"),
+            ),
+        )
+
     def group_by(self, *by: str) -> DaskLazyGroupBy:
         from narwhals._dask.group_by import DaskLazyGroupBy
 
diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py
index 193955cbd..0425e28e1 100644
--- a/narwhals/_pandas_like/dataframe.py
+++ b/narwhals/_pandas_like/dataframe.py
@@ -509,6 +509,26 @@ def join(
             ),
         )
 
+    def join_asof(
+        self,
+        other: Self,
+        *,
+        left_on: str,
+        right_on: str,
+        strategy: Literal["backward", "forward", "nearest"] = "backward",
+    ) -> Self:
+        plx = self.__native_namespace__()
+        return self._from_native_frame(
+            plx.merge_asof(
+                self._native_frame,
+                other._native_frame,
+                left_on=left_on,
+                right_on=right_on,
+                direction=strategy,
+                suffixes=("", "_right"),
+            ),
+        )
+
     # --- partial reduction ---
 
     def head(self, n: int) -> Self:
diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py
index 9276fda10..da1ee1dc8 100644
--- a/narwhals/dataframe.py
+++ b/narwhals/dataframe.py
@@ -214,6 +214,29 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self:
             self._compliant_frame.gather_every(n=n, offset=offset)
         )
 
+    def join_asof(
+        self,
+        other: Self,
+        *,
+        left_on: str,
+        right_on: str,
+        strategy: Literal["backward", "forward", "nearest"] = "backward",
+    ) -> Self:
+        _supported_strategies = ("backward", "forward", "nearest")
+
+        if strategy not in _supported_strategies:
+            msg = f"Only the following strategies are supported: {_supported_strategies}; found '{strategy}'."
+            raise NotImplementedError(msg)
+
+        return self._from_compliant_dataframe(
+            self._compliant_frame.join_asof(
+                self._extract_compliant(other),
+                left_on=left_on,
+                right_on=right_on,
+                strategy=strategy,
+            )
+        )
+
 
 class DataFrame(BaseFrame[FrameT]):
     """
@@ -1839,6 +1862,96 @@ def join(
         """
         return super().join(other, how=how, left_on=left_on, right_on=right_on)
 
+    def join_asof(
+        self,
+        other: Self,
+        *,
+        left_on: str,
+        right_on: str,
+        strategy: Literal["backward", "forward", "nearest"] = "backward",
+    ) -> Self:
+        """
+        Perform an asof join.
+
+        This is similar to a left-join except that we match on nearest key rather than equal keys.
+
+        Both DataFrames must be sorted by the asof_join key.
+
+        Arguments:
+            other: DataFrame to join with.
+
+            left_on: Name(s) of the left join column(s).
+
+            right_on: Name(s) of the right join column(s).
+
+            strategy: Join strategy. The default is "backward".
+
+                  * *backward*: selects the last row in the right DataFrame whose "on" key is less than or equal to the left's key.
+                  * *forward*: selects the first row in the right DataFrame whose "on" key is greater than or equal to the left's key.
+                  * *nearest*: search selects the last row in the right DataFrame whose value is nearest to the left's key.
+
+        Returns:
+            A new joined DataFrame
+
+        Examples:
+            >>> from datetime import datetime
+            >>> import narwhals as nw
+            >>> import pandas as pd
+            >>> import polars as pl
+            >>> data_gdp = {
+            ...     "datetime": [
+            ...         datetime(2016, 1, 1),
+            ...         datetime(2017, 1, 1),
+            ...         datetime(2018, 1, 1),
+            ...         datetime(2019, 1, 1),
+            ...         datetime(2020, 1, 1),
+            ...     ],
+            ...     "gdp": [4164, 4411, 4566, 4696, 4827],
+            ... }
+            >>> data_population = {
+            ...     "datetime": [
+            ...         datetime(2016, 3, 1),
+            ...         datetime(2018, 8, 1),
+            ...         datetime(2019, 1, 1),
+            ...     ],
+            ...     "population": [82.19, 82.66, 83.12],
+            ... }
+            >>> gdp_pd = pd.DataFrame(data_gdp)
+            >>> population_pd = pd.DataFrame(data_population)
+
+            >>> gdp_pl = pl.DataFrame(data_gdp).sort("datetime")
+            >>> population_pl = pl.DataFrame(data_population).sort("datetime")
+
+            Let's define a dataframe-agnostic function in which we join over "datetime" column:
+
+            >>> @nw.narwhalify
+            ... def join_asof_date(df, other_any, strategy):
+            ...     return df.join_asof(
+            ...         other_any, left_on="datetime", right_on="datetime", strategy=strategy
+            ...     )
+            >>> # We can now pass either pandas or Polars to the function:
+            >>> join_asof_date(population_pd, gdp_pd, strategy="backward")
+                datetime  population   gdp
+            0 2016-03-01       82.19  4164
+            1 2018-08-01       82.66  4566
+            2 2019-01-01       83.12  4696
+
+            >>> join_asof_date(population_pl, gdp_pl, strategy="backward")
+            shape: (3, 3)
+            ┌─────────────────────┬────────────┬──────┐
+            │ datetime            ┆ population ┆ gdp  │
+            │ ---                 ┆ ---        ┆ ---  │
+            │ datetime[μs]        ┆ f64        ┆ i64  │
+            ╞═════════════════════╪════════════╪══════╡
+            │ 2016-03-01 00:00:00 ┆ 82.19      ┆ 4164 │
+            │ 2018-08-01 00:00:00 ┆ 82.66      ┆ 4566 │
+            │ 2019-01-01 00:00:00 ┆ 83.12      ┆ 4696 │
+            └─────────────────────┴────────────┴──────┘
+        """
+        return super().join_asof(
+            other, left_on=left_on, right_on=right_on, strategy=strategy
+        )
+
     # --- descriptive ---
     def is_duplicated(self: Self) -> Series:
         r"""
@@ -3378,6 +3491,95 @@ def join(
         """
         return super().join(other, how=how, left_on=left_on, right_on=right_on)
 
+    def join_asof(
+        self,
+        other: Self,
+        *,
+        left_on: str,
+        right_on: str,
+        strategy: Literal["backward", "forward", "nearest"] = "backward",
+    ) -> Self:
+        """
+        Perform an asof join.
+
+        This is similar to a left-join except that we match on nearest key rather than equal keys.
+
+        Both DataFrames must be sorted by the asof_join key.
+
+        Arguments:
+            other: DataFrame to join with.
+
+            left_on: Name(s) of the left join column(s).
+
+            right_on: Name(s) of the right join column(s).
+
+            strategy: Join strategy. The default is "backward".
+
+                  * *backward*: selects the last row in the right DataFrame whose "on" key is less than or equal to the left's key.
+                  * *forward*: selects the first row in the right DataFrame whose "on" key is greater than or equal to the left's key.
+                  * *nearest*: search selects the last row in the right DataFrame whose value is nearest to the left's key.
+
+        Returns:
+            A new joined DataFrame
+
+        Examples:
+            >>> from datetime import datetime
+            >>> import narwhals as nw
+            >>> import pandas as pd
+            >>> import polars as pl
+            >>> data_gdp = {
+            ...     "datetime": [
+            ...         datetime(2016, 1, 1),
+            ...         datetime(2017, 1, 1),
+            ...         datetime(2018, 1, 1),
+            ...         datetime(2019, 1, 1),
+            ...         datetime(2020, 1, 1),
+            ...     ],
+            ...     "gdp": [4164, 4411, 4566, 4696, 4827],
+            ... }
+            >>> data_population = {
+            ...     "datetime": [
+            ...         datetime(2016, 3, 1),
+            ...         datetime(2018, 8, 1),
+            ...         datetime(2019, 1, 1),
+            ...     ],
+            ...     "population": [82.19, 82.66, 83.12],
+            ... }
+            >>> gdp_pd = pd.DataFrame(data_gdp)
+            >>> population_pd = pd.DataFrame(data_population)
+            >>> gdp_pl = pl.LazyFrame(data_gdp).sort("datetime")
+            >>> population_pl = pl.LazyFrame(data_population).sort("datetime")
+
+            Let's define a dataframe-agnostic function in which we join over "datetime" column:
+
+            >>> @nw.narwhalify
+            ... def join_asof_date(df, other_any, strategy):
+            ...     return df.join_asof(
+            ...         other_any, left_on="datetime", right_on="datetime", strategy=strategy
+            ...     )
+            >>> # We can now pass either pandas or Polars to the function:
+            >>> join_asof_date(population_pd, gdp_pd, strategy="backward")
+                datetime  population   gdp
+            0 2016-03-01       82.19  4164
+            1 2018-08-01       82.66  4566
+            2 2019-01-01       83.12  4696
+
+            >>> join_asof_date(population_pl, gdp_pl, strategy="backward").collect()
+            shape: (3, 3)
+            ┌─────────────────────┬────────────┬──────┐
+            │ datetime            ┆ population ┆ gdp  │
+            │ ---                 ┆ ---        ┆ ---  │
+            │ datetime[μs]        ┆ f64        ┆ i64  │
+            ╞═════════════════════╪════════════╪══════╡
+            │ 2016-03-01 00:00:00 ┆ 82.19      ┆ 4164 │
+            │ 2018-08-01 00:00:00 ┆ 82.66      ┆ 4566 │
+            │ 2019-01-01 00:00:00 ┆ 83.12      ┆ 4696 │
+            └─────────────────────┴────────────┴──────┘
+        """
+        return super().join_asof(
+            other, left_on=left_on, right_on=right_on, strategy=strategy
+        )
+
     def clone(self) -> Self:
         r"""
         Create a copy of this DataFrame.
diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py
index e6dfad634..c9119e204 100644
--- a/tests/frame/join_test.py
+++ b/tests/frame/join_test.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import re
+from datetime import datetime
 from typing import Any
 
 import pandas as pd
@@ -8,6 +9,7 @@
 
 import narwhals.stable.v1 as nw
 from narwhals.utils import Implementation
+from narwhals.utils import parse_version
 from tests.utils import compare_dicts
 
 
@@ -202,3 +204,113 @@ def test_left_join_overlapping_column(constructor: Any) -> None:
         "index": [0, 1, 2],
     }
     compare_dicts(result, expected)
+
+
+def test_joinasof_numeric(constructor: Any, request: Any) -> None:
+    if "pyarrow_table" in str(constructor):
+        request.applymarker(pytest.mark.xfail)
+    if parse_version(pd.__version__) < (2, 1) and (
+        ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor))
+    ):
+        request.applymarker(pytest.mark.xfail)
+    df = nw.from_native(constructor({"a": [1, 5, 10], "val": ["a", "b", "c"]})).sort("a")
+    df_right = nw.from_native(
+        constructor({"a": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]})
+    ).sort("a")
+    result_backward = df.join_asof(df_right, left_on="a", right_on="a")  # type: ignore[arg-type]
+    result_forward = df.join_asof(df_right, left_on="a", right_on="a", strategy="forward")  # type: ignore[arg-type]
+    result_nearest = df.join_asof(df_right, left_on="a", right_on="a", strategy="nearest")  # type: ignore[arg-type]
+    expected_backward = {
+        "a": [1, 5, 10],
+        "val": ["a", "b", "c"],
+        "val_right": [1, 3, 7],
+    }
+    expected_forward = {
+        "a": [1, 5, 10],
+        "val": ["a", "b", "c"],
+        "val_right": [1, 6, float("nan")],
+    }
+    expected_nearest = {
+        "a": [1, 5, 10],
+        "val": ["a", "b", "c"],
+        "val_right": [1, 6, 7],
+    }
+    compare_dicts(result_backward, expected_backward)
+    compare_dicts(result_forward, expected_forward)
+    compare_dicts(result_nearest, expected_nearest)
+
+
+def test_joinasof_time(constructor: Any, request: Any) -> None:
+    if "pyarrow_table" in str(constructor):
+        request.applymarker(pytest.mark.xfail)
+    if parse_version(pd.__version__) < (2, 1) and ("pandas_pyarrow" in str(constructor)):
+        request.applymarker(pytest.mark.xfail)
+    df = nw.from_native(
+        constructor(
+            {
+                "datetime": [
+                    datetime(2016, 3, 1),
+                    datetime(2018, 8, 1),
+                    datetime(2019, 1, 1),
+                ],
+                "population": [82.19, 82.66, 83.12],
+            }
+        )
+    ).sort("datetime")
+    df_right = nw.from_native(
+        constructor(
+            {
+                "datetime": [
+                    datetime(2016, 1, 1),
+                    datetime(2017, 1, 1),
+                    datetime(2018, 1, 1),
+                    datetime(2019, 1, 1),
+                    datetime(2020, 1, 1),
+                ],
+                "gdp": [4164, 4411, 4566, 4696, 4827],
+            }
+        )
+    ).sort("datetime")
+    result_backward = df.join_asof(df_right, left_on="datetime", right_on="datetime")  # type: ignore[arg-type]
+    result_forward = df.join_asof(
+        df_right,  # type: ignore[arg-type]
+        left_on="datetime",
+        right_on="datetime",
+        strategy="forward",
+    )
+    result_nearest = df.join_asof(
+        df_right,  # type: ignore[arg-type]
+        left_on="datetime",
+        right_on="datetime",
+        strategy="nearest",
+    )
+    expected_backward = {
+        "datetime": [datetime(2016, 3, 1), datetime(2018, 8, 1), datetime(2019, 1, 1)],
+        "population": [82.19, 82.66, 83.12],
+        "gdp": [4164, 4566, 4696],
+    }
+    expected_forward = {
+        "datetime": [datetime(2016, 3, 1), datetime(2018, 8, 1), datetime(2019, 1, 1)],
+        "population": [82.19, 82.66, 83.12],
+        "gdp": [4411, 4696, 4696],
+    }
+    expected_nearest = {
+        "datetime": [datetime(2016, 3, 1), datetime(2018, 8, 1), datetime(2019, 1, 1)],
+        "population": [82.19, 82.66, 83.12],
+        "gdp": [4164, 4696, 4696],
+    }
+    compare_dicts(result_backward, expected_backward)
+    compare_dicts(result_forward, expected_forward)
+    compare_dicts(result_nearest, expected_nearest)
+
+
+@pytest.mark.parametrize("strategy", ["back", "furthest"])
+def test_joinasof_not_implemented(constructor: Any, strategy: str) -> None:
+    data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
+    df = nw.from_native(constructor(data))
+
+    with pytest.raises(
+        NotImplementedError,
+        match=rf"Only the following strategies are supported: \('backward', 'forward', 'nearest'\); found '{strategy}'.",
+    ):
+        df.join_asof(df, left_on="a", right_on="a", strategy=strategy)  # type: ignore[arg-type]

From 4cf94ce268c92b4d26b7e4fed6e245d3eee1bdfc Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Fri, 6 Sep 2024 16:16:42 +0100
Subject: [PATCH 07/30] feat: enable `on` key in `join_asof` (#916)

* enable `on` key in `join_asof`

* remove repeated keys validation in LazyFrame
---
 narwhals/_arrow/dataframe.py       |  5 ++-
 narwhals/_dask/dataframe.py        |  6 ++-
 narwhals/_pandas_like/dataframe.py |  6 ++-
 narwhals/dataframe.py              | 72 +++++++++++++++++++-----------
 tests/frame/join_test.py           | 47 +++++++++++++++++++
 5 files changed, 103 insertions(+), 33 deletions(-)

diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py
index 755a92416..f01ada158 100644
--- a/narwhals/_arrow/dataframe.py
+++ b/narwhals/_arrow/dataframe.py
@@ -319,8 +319,9 @@ def join_asof(
         self,
         other: Self,
         *,
-        left_on: str,
-        right_on: str,
+        left_on: str | None = None,
+        right_on: str | None = None,
+        on: str | None = None,
         strategy: Literal["backward", "forward", "nearest"] = "backward",
     ) -> Self:
         msg = "join_asof is not yet supported on PyArrow tables"
diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py
index 91a7e96a9..8f11ccaad 100644
--- a/narwhals/_dask/dataframe.py
+++ b/narwhals/_dask/dataframe.py
@@ -302,8 +302,9 @@ def join_asof(
         self,
         other: Self,
         *,
-        left_on: str,
-        right_on: str,
+        left_on: str | None = None,
+        right_on: str | None = None,
+        on: str | None = None,
         strategy: Literal["backward", "forward", "nearest"] = "backward",
     ) -> Self:
         plx = self.__native_namespace__()
@@ -313,6 +314,7 @@ def join_asof(
                 other._native_frame,
                 left_on=left_on,
                 right_on=right_on,
+                on=on,
                 direction=strategy,
                 suffixes=("", "_right"),
             ),
diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py
index 0425e28e1..9750cd9d4 100644
--- a/narwhals/_pandas_like/dataframe.py
+++ b/narwhals/_pandas_like/dataframe.py
@@ -513,8 +513,9 @@ def join_asof(
         self,
         other: Self,
         *,
-        left_on: str,
-        right_on: str,
+        left_on: str | None = None,
+        right_on: str | None = None,
+        on: str | None = None,
         strategy: Literal["backward", "forward", "nearest"] = "backward",
     ) -> Self:
         plx = self.__native_namespace__()
@@ -524,6 +525,7 @@ def join_asof(
                 other._native_frame,
                 left_on=left_on,
                 right_on=right_on,
+                on=on,
                 direction=strategy,
                 suffixes=("", "_right"),
             ),
diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py
index da1ee1dc8..440856eb4 100644
--- a/narwhals/dataframe.py
+++ b/narwhals/dataframe.py
@@ -218,8 +218,9 @@ def join_asof(
         self,
         other: Self,
         *,
-        left_on: str,
-        right_on: str,
+        left_on: str | None = None,
+        right_on: str | None = None,
+        on: str | None = None,
         strategy: Literal["backward", "forward", "nearest"] = "backward",
     ) -> Self:
         _supported_strategies = ("backward", "forward", "nearest")
@@ -228,14 +229,29 @@ def join_asof(
             msg = f"Only the following strategies are supported: {_supported_strategies}; found '{strategy}'."
             raise NotImplementedError(msg)
 
-        return self._from_compliant_dataframe(
-            self._compliant_frame.join_asof(
-                self._extract_compliant(other),
-                left_on=left_on,
-                right_on=right_on,
-                strategy=strategy,
+        if left_on is not None and right_on is not None and on is not None:
+            msg = "Either (`left_on` and `right_on`) or `on` keys should be specified."
+            raise ValueError(msg)
+        if left_on is not None and right_on is not None:
+            return self._from_compliant_dataframe(
+                self._compliant_frame.join_asof(
+                    self._extract_compliant(other),
+                    left_on=left_on,
+                    right_on=right_on,
+                    strategy=strategy,
+                )
             )
-        )
+        elif on is not None:
+            return self._from_compliant_dataframe(
+                self._compliant_frame.join_asof(
+                    self._extract_compliant(other),
+                    on=on,
+                    strategy=strategy,
+                )
+            )
+        else:
+            msg = "Either (`left_on` and `right_on`) or `on` keys should be specified."
+            raise ValueError(msg)
 
 
 class DataFrame(BaseFrame[FrameT]):
@@ -1866,8 +1882,9 @@ def join_asof(
         self,
         other: Self,
         *,
-        left_on: str,
-        right_on: str,
+        left_on: str | None = None,
+        right_on: str | None = None,
+        on: str | None = None,
         strategy: Literal["backward", "forward", "nearest"] = "backward",
     ) -> Self:
         """
@@ -1884,6 +1901,8 @@ def join_asof(
 
             right_on: Name(s) of the right join column(s).
 
+            on: Join column of both DataFrames. If set, left_on and right_on should be None.
+
             strategy: Join strategy. The default is "backward".
 
                   * *backward*: selects the last row in the right DataFrame whose "on" key is less than or equal to the left's key.
@@ -1925,18 +1944,16 @@ def join_asof(
             Let's define a dataframe-agnostic function in which we join over "datetime" column:
 
             >>> @nw.narwhalify
-            ... def join_asof_date(df, other_any, strategy):
-            ...     return df.join_asof(
-            ...         other_any, left_on="datetime", right_on="datetime", strategy=strategy
-            ...     )
+            ... def join_asof_datetime(df, other_any, strategy):
+            ...     return df.join_asof(other_any, on="datetime", strategy=strategy)
             >>> # We can now pass either pandas or Polars to the function:
-            >>> join_asof_date(population_pd, gdp_pd, strategy="backward")
+            >>> join_asof_datetime(population_pd, gdp_pd, strategy="backward")
                 datetime  population   gdp
             0 2016-03-01       82.19  4164
             1 2018-08-01       82.66  4566
             2 2019-01-01       83.12  4696
 
-            >>> join_asof_date(population_pl, gdp_pl, strategy="backward")
+            >>> join_asof_datetime(population_pl, gdp_pl, strategy="backward")
             shape: (3, 3)
             ┌─────────────────────┬────────────┬──────┐
             │ datetime            ┆ population ┆ gdp  │
@@ -1949,7 +1966,7 @@ def join_asof(
             └─────────────────────┴────────────┴──────┘
         """
         return super().join_asof(
-            other, left_on=left_on, right_on=right_on, strategy=strategy
+            other, left_on=left_on, right_on=right_on, on=on, strategy=strategy
         )
 
     # --- descriptive ---
@@ -3495,8 +3512,9 @@ def join_asof(
         self,
         other: Self,
         *,
-        left_on: str,
-        right_on: str,
+        left_on: str | None = None,
+        right_on: str | None = None,
+        on: str | None = None,
         strategy: Literal["backward", "forward", "nearest"] = "backward",
     ) -> Self:
         """
@@ -3513,6 +3531,8 @@ def join_asof(
 
             right_on: Name(s) of the right join column(s).
 
+            on: Join column of both DataFrames. If set, left_on and right_on should be None.
+
             strategy: Join strategy. The default is "backward".
 
                   * *backward*: selects the last row in the right DataFrame whose "on" key is less than or equal to the left's key.
@@ -3553,18 +3573,16 @@ def join_asof(
             Let's define a dataframe-agnostic function in which we join over "datetime" column:
 
             >>> @nw.narwhalify
-            ... def join_asof_date(df, other_any, strategy):
-            ...     return df.join_asof(
-            ...         other_any, left_on="datetime", right_on="datetime", strategy=strategy
-            ...     )
+            ... def join_asof_datetime(df, other_any, strategy):
+            ...     return df.join_asof(other_any, on="datetime", strategy=strategy)
             >>> # We can now pass either pandas or Polars to the function:
-            >>> join_asof_date(population_pd, gdp_pd, strategy="backward")
+            >>> join_asof_datetime(population_pd, gdp_pd, strategy="backward")
                 datetime  population   gdp
             0 2016-03-01       82.19  4164
             1 2018-08-01       82.66  4566
             2 2019-01-01       83.12  4696
 
-            >>> join_asof_date(population_pl, gdp_pl, strategy="backward").collect()
+            >>> join_asof_datetime(population_pl, gdp_pl, strategy="backward").collect()
             shape: (3, 3)
             ┌─────────────────────┬────────────┬──────┐
             │ datetime            ┆ population ┆ gdp  │
@@ -3577,7 +3595,7 @@ def join_asof(
             └─────────────────────┴────────────┴──────┘
         """
         return super().join_asof(
-            other, left_on=left_on, right_on=right_on, strategy=strategy
+            other, left_on=left_on, right_on=right_on, on=on, strategy=strategy
         )
 
     def clone(self) -> Self:
diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py
index c9119e204..72f1304df 100644
--- a/tests/frame/join_test.py
+++ b/tests/frame/join_test.py
@@ -220,6 +220,9 @@ def test_joinasof_numeric(constructor: Any, request: Any) -> None:
     result_backward = df.join_asof(df_right, left_on="a", right_on="a")  # type: ignore[arg-type]
     result_forward = df.join_asof(df_right, left_on="a", right_on="a", strategy="forward")  # type: ignore[arg-type]
     result_nearest = df.join_asof(df_right, left_on="a", right_on="a", strategy="nearest")  # type: ignore[arg-type]
+    result_backward_on = df.join_asof(df_right, on="a")  # type: ignore[arg-type]
+    result_forward_on = df.join_asof(df_right, on="a", strategy="forward")  # type: ignore[arg-type]
+    result_nearest_on = df.join_asof(df_right, on="a", strategy="nearest")  # type: ignore[arg-type]
     expected_backward = {
         "a": [1, 5, 10],
         "val": ["a", "b", "c"],
@@ -238,6 +241,9 @@ def test_joinasof_numeric(constructor: Any, request: Any) -> None:
     compare_dicts(result_backward, expected_backward)
     compare_dicts(result_forward, expected_forward)
     compare_dicts(result_nearest, expected_nearest)
+    compare_dicts(result_backward_on, expected_backward)
+    compare_dicts(result_forward_on, expected_forward)
+    compare_dicts(result_nearest_on, expected_nearest)
 
 
 def test_joinasof_time(constructor: Any, request: Any) -> None:
@@ -284,6 +290,17 @@ def test_joinasof_time(constructor: Any, request: Any) -> None:
         right_on="datetime",
         strategy="nearest",
     )
+    result_backward_on = df.join_asof(df_right, on="datetime")  # type: ignore[arg-type]
+    result_forward_on = df.join_asof(
+        df_right,  # type: ignore[arg-type]
+        on="datetime",
+        strategy="forward",
+    )
+    result_nearest_on = df.join_asof(
+        df_right,  # type: ignore[arg-type]
+        on="datetime",
+        strategy="nearest",
+    )
     expected_backward = {
         "datetime": [datetime(2016, 3, 1), datetime(2018, 8, 1), datetime(2019, 1, 1)],
         "population": [82.19, 82.66, 83.12],
@@ -302,6 +319,9 @@ def test_joinasof_time(constructor: Any, request: Any) -> None:
     compare_dicts(result_backward, expected_backward)
     compare_dicts(result_forward, expected_forward)
     compare_dicts(result_nearest, expected_nearest)
+    compare_dicts(result_backward_on, expected_backward)
+    compare_dicts(result_forward_on, expected_forward)
+    compare_dicts(result_nearest_on, expected_nearest)
 
 
 @pytest.mark.parametrize("strategy", ["back", "furthest"])
@@ -314,3 +334,30 @@ def test_joinasof_not_implemented(constructor: Any, strategy: str) -> None:
         match=rf"Only the following strategies are supported: \('backward', 'forward', 'nearest'\); found '{strategy}'.",
     ):
         df.join_asof(df, left_on="a", right_on="a", strategy=strategy)  # type: ignore[arg-type]
+
+
+def test_joinasof_no_keys(constructor: Any) -> None:
+    data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
+    df = nw.from_native(constructor(data))
+
+    msg = r"Either \(`left_on` and `right_on`\) or `on` keys should be specified."
+    with pytest.raises(
+        ValueError,
+        match=msg,
+    ):
+        df.join_asof(df, left_on="a")  # type: ignore[arg-type]
+    with pytest.raises(
+        ValueError,
+        match=msg,
+    ):
+        df.join_asof(df, right_on="a")  # type: ignore[arg-type]
+    with pytest.raises(
+        ValueError,
+        match=msg,
+    ):
+        df.join_asof(df)  # type: ignore[arg-type]
+    with pytest.raises(
+        ValueError,
+        match=msg,
+    ):
+        df.join_asof(df, left_on="a", right_on="a", on="a")  # type: ignore[arg-type]

From ad5616a2c6488c5cb1c5a6dcef71ac00a8c6d65a Mon Sep 17 00:00:00 2001
From: Francesco Bruzzesi <42817048+FBruzzesi@users.noreply.github.com>
Date: Fri, 6 Sep 2024 19:48:15 +0200
Subject: [PATCH 08/30] patch: group by `n_unique` (#917)

---
 narwhals/_arrow/group_by.py       | 27 +++++++++---
 narwhals/_dask/group_by.py        | 35 +++++++++++++---
 narwhals/_pandas_like/group_by.py | 69 ++++++++++++++++++++++++-------
 tests/test_group_by.py            | 51 +++++++++++++++++++++++
 4 files changed, 156 insertions(+), 26 deletions(-)

diff --git a/narwhals/_arrow/group_by.py b/narwhals/_arrow/group_by.py
index 27c7ff368..78b241c9b 100644
--- a/narwhals/_arrow/group_by.py
+++ b/narwhals/_arrow/group_by.py
@@ -15,6 +15,12 @@
     from narwhals._arrow.expr import ArrowExpr
     from narwhals._arrow.typing import IntoArrowExpr
 
+POLARS_TO_ARROW_AGGREGATIONS = {
+    "n_unique": "count_distinct",
+    "std": "stddev",
+    "var": "variance",  # currently unused, we don't have `var` yet
+}
+
 
 class ArrowGroupBy:
     def __init__(self, df: ArrowDataFrame, keys: list[str]) -> None:
@@ -112,16 +118,27 @@ def agg_arrow(
                 raise AssertionError(msg)
 
             function_name = remove_prefix(expr._function_name, "col->")
+            function_name = POLARS_TO_ARROW_AGGREGATIONS.get(function_name, function_name)
             for root_name, output_name in zip(expr._root_names, expr._output_names):
-                if function_name != "len":
+                if function_name == "len":
                     simple_aggregations[output_name] = (
-                        (root_name, function_name),
-                        f"{root_name}_{function_name}",
+                        (root_name, "count", pc.CountOptions(mode="all")),
+                        f"{root_name}_count",
+                    )
+                elif function_name == "count_distinct":
+                    simple_aggregations[output_name] = (
+                        (root_name, "count_distinct", pc.CountOptions(mode="all")),
+                        f"{root_name}_count_distinct",
+                    )
+                elif function_name == "stddev":
+                    simple_aggregations[output_name] = (
+                        (root_name, "stddev", pc.VarianceOptions(ddof=1)),
+                        f"{root_name}_stddev",
                     )
                 else:
                     simple_aggregations[output_name] = (
-                        (root_name, "count", pc.CountOptions(mode="all")),
-                        f"{root_name}_count",
+                        (root_name, function_name),
+                        f"{root_name}_{function_name}",
                     )
 
         aggs: list[Any] = []
diff --git a/narwhals/_dask/group_by.py b/narwhals/_dask/group_by.py
index 8538c62d2..463d6fc58 100644
--- a/narwhals/_dask/group_by.py
+++ b/narwhals/_dask/group_by.py
@@ -10,12 +10,33 @@
 from narwhals.utils import remove_prefix
 
 if TYPE_CHECKING:
+    import dask.dataframe as dd
+    import pandas as pd
+
     from narwhals._dask.dataframe import DaskLazyFrame
     from narwhals._dask.expr import DaskExpr
     from narwhals._dask.typing import IntoDaskExpr
 
-POLARS_TO_PANDAS_AGGREGATIONS = {
+
+def n_unique() -> dd.Aggregation:
+    import dask.dataframe as dd  # ignore-banned-import
+
+    def chunk(s: pd.core.groupby.generic.SeriesGroupBy) -> int:
+        return s.nunique(dropna=False)  # type: ignore[no-any-return]
+
+    def agg(s0: pd.core.groupby.generic.SeriesGroupBy) -> int:
+        return s0.sum()  # type: ignore[no-any-return]
+
+    return dd.Aggregation(
+        name="nunique",
+        chunk=chunk,
+        agg=agg,
+    )
+
+
+POLARS_TO_DASK_AGGREGATIONS = {
     "len": "size",
+    "n_unique": n_unique,
 }
 
 
@@ -85,7 +106,7 @@ def agg_dask(
             break
 
     if all_simple_aggs:
-        simple_aggregations: dict[str, tuple[str, str]] = {}
+        simple_aggregations: dict[str, tuple[str, str | dd.Aggregation]] = {}
         for expr in exprs:
             if expr._depth == 0:
                 # e.g. agg(nw.len()) # noqa: ERA001
@@ -93,7 +114,7 @@ def agg_dask(
                     msg = "Safety assertion failed, please report a bug to https://github.com/narwhals-dev/narwhals/issues"
                     raise AssertionError(msg)
 
-                function_name = POLARS_TO_PANDAS_AGGREGATIONS.get(
+                function_name = POLARS_TO_DASK_AGGREGATIONS.get(
                     expr._function_name, expr._function_name
                 )
                 for output_name in expr._output_names:
@@ -108,9 +129,11 @@ def agg_dask(
                 raise AssertionError(msg)
 
             function_name = remove_prefix(expr._function_name, "col->")
-            function_name = POLARS_TO_PANDAS_AGGREGATIONS.get(
-                function_name, function_name
-            )
+            function_name = POLARS_TO_DASK_AGGREGATIONS.get(function_name, function_name)
+
+            # deal with n_unique case in a "lazy" mode to not depend on dask globally
+            function_name = function_name() if callable(function_name) else function_name
+
             for root_name, output_name in zip(expr._root_names, expr._output_names):
                 simple_aggregations[output_name] = (root_name, function_name)
         try:
diff --git a/narwhals/_pandas_like/group_by.py b/narwhals/_pandas_like/group_by.py
index 11abc85c8..97a477dc4 100644
--- a/narwhals/_pandas_like/group_by.py
+++ b/narwhals/_pandas_like/group_by.py
@@ -21,6 +21,7 @@
 
 POLARS_TO_PANDAS_AGGREGATIONS = {
     "len": "size",
+    "n_unique": "nunique",
 }
 
 
@@ -103,7 +104,7 @@ def __iter__(self) -> Iterator[tuple[Any, PandasLikeDataFrame]]:
         yield from ((key, self._from_native_frame(sub_df)) for (key, sub_df) in iterator)
 
 
-def agg_pandas(
+def agg_pandas(  # noqa: PLR0915
     grouped: Any,
     exprs: list[PandasLikeExpr],
     keys: list[str],
@@ -120,13 +121,18 @@ def agg_pandas(
     - https://github.com/rapidsai/cudf/issues/15118
     - https://github.com/rapidsai/cudf/issues/15084
     """
-    all_simple_aggs = True
+    all_aggs_are_simple = True
     for expr in exprs:
         if not is_simple_aggregation(expr):
-            all_simple_aggs = False
+            all_aggs_are_simple = False
             break
 
-    if all_simple_aggs:
+    # dict of {output_name: root_name} that we count n_unique on
+    # We need to do this separately from the rest so that we
+    # can pass the `dropna` kwargs.
+    nunique_aggs: dict[str, str] = {}
+
+    if all_aggs_are_simple:
         simple_aggregations: dict[str, tuple[str, str]] = {}
         for expr in exprs:
             if expr._depth == 0:
@@ -154,21 +160,54 @@ def agg_pandas(
                 function_name, function_name
             )
             for root_name, output_name in zip(expr._root_names, expr._output_names):
-                simple_aggregations[output_name] = (root_name, function_name)
+                if function_name == "nunique":
+                    nunique_aggs[output_name] = root_name
+                else:
+                    simple_aggregations[output_name] = (root_name, function_name)
 
-        aggs = collections.defaultdict(list)
+        simple_aggs = collections.defaultdict(list)
         name_mapping = {}
         for output_name, named_agg in simple_aggregations.items():
-            aggs[named_agg[0]].append(named_agg[1])
+            simple_aggs[named_agg[0]].append(named_agg[1])
             name_mapping[f"{named_agg[0]}_{named_agg[1]}"] = output_name
-        try:
-            result_simple = grouped.agg(aggs)
-        except AttributeError as exc:
-            msg = "Failed to aggregated - does your aggregation function return a scalar?"
-            raise RuntimeError(msg) from exc
-        result_simple.columns = [f"{a}_{b}" for a, b in result_simple.columns]
-        result_simple = result_simple.rename(columns=name_mapping).reset_index()
-        return from_dataframe(result_simple.loc[:, output_names])
+        if simple_aggs:
+            try:
+                result_simple_aggs = grouped.agg(simple_aggs)
+            except AttributeError as exc:
+                msg = "Failed to aggregated - does your aggregation function return a scalar?"
+                raise RuntimeError(msg) from exc
+            result_simple_aggs.columns = [
+                f"{a}_{b}" for a, b in result_simple_aggs.columns
+            ]
+            result_simple_aggs = result_simple_aggs.rename(
+                columns=name_mapping
+            ).reset_index()
+        if nunique_aggs:
+            result_nunique_aggs = grouped[list(nunique_aggs.values())].nunique(
+                dropna=False
+            )
+            result_nunique_aggs.columns = list(nunique_aggs.keys())
+            result_nunique_aggs = result_nunique_aggs.reset_index()
+        if simple_aggs and nunique_aggs:
+            if (
+                set(result_simple_aggs.columns)
+                .difference(keys)
+                .intersection(result_nunique_aggs.columns)
+            ):
+                msg = (
+                    "Got two aggregations with the same output name. Please make sure "
+                    "that aggregations have unique output names."
+                )
+                raise ValueError(msg)
+            result_aggs = result_simple_aggs.merge(result_nunique_aggs, on=keys)
+        elif nunique_aggs and not simple_aggs:
+            result_aggs = result_nunique_aggs
+        elif simple_aggs and not nunique_aggs:
+            result_aggs = result_simple_aggs
+        else:  # pragma: no cover
+            msg = "Congrats, you entered unreachable code. Please report a bug to https://github.com/narwhals-dev/narwhals/issues."
+            raise RuntimeError(msg)
+        return from_dataframe(result_aggs.loc[:, output_names])
 
     if dataframe_is_empty:
         # Don't even attempt this, it's way too inconsistent across pandas versions.
diff --git a/tests/test_group_by.py b/tests/test_group_by.py
index 2bb8d435b..4bd3427a5 100644
--- a/tests/test_group_by.py
+++ b/tests/test_group_by.py
@@ -102,6 +102,57 @@ def test_group_by_len(constructor: Any) -> None:
     compare_dicts(result, expected)
 
 
+def test_group_by_n_unique(constructor: Any) -> None:
+    result = (
+        nw.from_native(constructor(data))
+        .group_by("a")
+        .agg(nw.col("b").n_unique())
+        .sort("a")
+    )
+    expected = {"a": [1, 3], "b": [1, 1]}
+    compare_dicts(result, expected)
+
+
+def test_group_by_std(constructor: Any) -> None:
+    data = {"a": [1, 1, 2, 2], "b": [5, 4, 3, 2]}
+    result = (
+        nw.from_native(constructor(data)).group_by("a").agg(nw.col("b").std()).sort("a")
+    )
+    expected = {"a": [1, 2], "b": [0.707107] * 2}
+    compare_dicts(result, expected)
+
+
+def test_group_by_n_unique_w_missing(constructor: Any) -> None:
+    data = {"a": [1, 1, 2], "b": [4, None, 5], "c": [None, None, 7], "d": [1, 1, 3]}
+    result = (
+        nw.from_native(constructor(data))
+        .group_by("a")
+        .agg(
+            nw.col("b").n_unique(),
+            c_n_unique=nw.col("c").n_unique(),
+            c_n_min=nw.col("b").min(),
+            d_n_unique=nw.col("d").n_unique(),
+        )
+        .sort("a")
+    )
+    expected = {
+        "a": [1, 2],
+        "b": [2, 1],
+        "c_n_unique": [1, 1],
+        "c_n_min": [4, 5],
+        "d_n_unique": [1, 1],
+    }
+    compare_dicts(result, expected)
+
+
+def test_group_by_same_name_twice() -> None:
+    import pandas as pd
+
+    df = pd.DataFrame({"a": [1, 1, 2], "b": [4, 5, 6]})
+    with pytest.raises(ValueError, match="two aggregations with the same"):
+        nw.from_native(df).group_by("a").agg(nw.col("b").sum(), nw.col("b").n_unique())
+
+
 def test_group_by_empty_result_pandas() -> None:
     df_any = pd.DataFrame({"a": [1, 2, 3], "b": [4, 3, 2]})
     df = nw.from_native(df_any, eager_only=True)

From 0061d5b6befc8e4ef67eaf94243eaee005862d05 Mon Sep 17 00:00:00 2001
From: Marco Edward Gorelli <marcogorelli@protonmail.com>
Date: Fri, 6 Sep 2024 22:09:20 +0100
Subject: [PATCH 09/30] test: fixup tpch tests (#918)

* test: fixup tpch tests

* test: fixup tpch tests

* fixup
---
 .github/workflows/check_tpch_queries.yml |  2 +-
 tpch/__init__.py                         |  0
 tpch/execute/q10.py                      |  8 -----
 tpch/execute/q11.py                      |  8 -----
 tpch/execute/q15.py                      |  8 -----
 tpch/execute/q17.py                      |  8 -----
 tpch/execute/q18.py                      |  8 -----
 tpch/execute/q3.py                       |  8 -----
 tpch/execute/q4.py                       |  8 -----
 tpch/execute/q5.py                       | 16 ----------
 tpch/execute/q6.py                       |  8 -----
 tpch/execute/q7.py                       |  9 ------
 tpch/execute/q9.py                       | 12 --------
 tpch/generate_data.py                    |  2 +-
 tpch/tests/test_queries.py               | 38 ++++++++++--------------
 15 files changed, 17 insertions(+), 126 deletions(-)
 delete mode 100644 tpch/__init__.py

diff --git a/.github/workflows/check_tpch_queries.yml b/.github/workflows/check_tpch_queries.yml
index 397163091..82a2f4aa4 100644
--- a/.github/workflows/check_tpch_queries.yml
+++ b/.github/workflows/check_tpch_queries.yml
@@ -27,4 +27,4 @@ jobs:
       - name: generate-data
         run: cd tpch && python generate_data.py
       - name: tpch-tests 
-        run: python -m unittest discover -s 'tpch/tests'
\ No newline at end of file
+        run: cd tpch && pytest tests
\ No newline at end of file
diff --git a/tpch/__init__.py b/tpch/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/tpch/execute/q10.py b/tpch/execute/q10.py
index 9876f2aa9..99d850f53 100644
--- a/tpch/execute/q10.py
+++ b/tpch/execute/q10.py
@@ -6,18 +6,10 @@
 from . import nation
 from . import orders
 
-tool = "pandas"
-fn = IO_FUNCS[tool]
-print(q10.query(fn(customer), fn(nation), fn(lineitem), fn(orders)))
-
 tool = "pandas[pyarrow]"
 fn = IO_FUNCS[tool]
 print(q10.query(fn(customer), fn(nation), fn(lineitem), fn(orders)))
 
-tool = "polars[eager]"
-fn = IO_FUNCS[tool]
-print(q10.query(fn(customer), fn(nation), fn(lineitem), fn(orders)))
-
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(q10.query(fn(customer), fn(nation), fn(lineitem), fn(orders)).collect())
diff --git a/tpch/execute/q11.py b/tpch/execute/q11.py
index 82b1936aa..101710adb 100644
--- a/tpch/execute/q11.py
+++ b/tpch/execute/q11.py
@@ -5,18 +5,10 @@
 from . import partsupp
 from . import supplier
 
-tool = "pandas"
-fn = IO_FUNCS[tool]
-print(q11.query(fn(nation), fn(partsupp), fn(supplier)))
-
 tool = "pandas[pyarrow]"
 fn = IO_FUNCS[tool]
 print(q11.query(fn(nation), fn(partsupp), fn(supplier)))
 
-tool = "polars[eager]"
-fn = IO_FUNCS[tool]
-print(q11.query(fn(nation), fn(partsupp), fn(supplier)))
-
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(q11.query(fn(nation), fn(partsupp), fn(supplier)).collect())
diff --git a/tpch/execute/q15.py b/tpch/execute/q15.py
index 8fdaf2ab1..0d9e9f374 100644
--- a/tpch/execute/q15.py
+++ b/tpch/execute/q15.py
@@ -4,18 +4,10 @@
 from . import lineitem
 from . import supplier
 
-tool = "pandas"
-fn = IO_FUNCS[tool]
-print(q15.query(fn(lineitem), fn(supplier)))
-
 tool = "pandas[pyarrow]"
 fn = IO_FUNCS[tool]
 print(q15.query(fn(lineitem), fn(supplier)))
 
-tool = "polars[eager]"
-fn = IO_FUNCS[tool]
-print(q15.query(fn(lineitem), fn(supplier)))
-
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(q15.query(fn(lineitem), fn(supplier)).collect())
diff --git a/tpch/execute/q17.py b/tpch/execute/q17.py
index 5f2228012..2d9920c69 100644
--- a/tpch/execute/q17.py
+++ b/tpch/execute/q17.py
@@ -4,18 +4,10 @@
 from . import lineitem
 from . import part
 
-tool = "pandas"
-fn = IO_FUNCS[tool]
-print(q17.query(fn(lineitem), fn(part)))
-
 tool = "pandas[pyarrow]"
 fn = IO_FUNCS[tool]
 print(q17.query(fn(lineitem), fn(part)))
 
-tool = "polars[eager]"
-fn = IO_FUNCS[tool]
-print(q17.query(fn(lineitem), fn(part)))
-
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(q17.query(fn(lineitem), fn(part)).collect())
diff --git a/tpch/execute/q18.py b/tpch/execute/q18.py
index 5a59f0e5e..4092fc0d6 100644
--- a/tpch/execute/q18.py
+++ b/tpch/execute/q18.py
@@ -5,18 +5,10 @@
 from . import lineitem
 from . import orders
 
-tool = "pandas"
-fn = IO_FUNCS[tool]
-print(q18.query(fn(customer), fn(lineitem), fn(orders)))
-
 tool = "pandas[pyarrow]"
 fn = IO_FUNCS[tool]
 print(q18.query(fn(customer), fn(lineitem), fn(orders)))
 
-tool = "polars[eager]"
-fn = IO_FUNCS[tool]
-print(q18.query(fn(customer), fn(lineitem), fn(orders)))
-
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(q18.query(fn(customer), fn(lineitem), fn(orders)).collect())
diff --git a/tpch/execute/q3.py b/tpch/execute/q3.py
index 8602bb3d0..a1eea74d1 100644
--- a/tpch/execute/q3.py
+++ b/tpch/execute/q3.py
@@ -5,18 +5,10 @@
 from . import lineitem
 from . import orders
 
-tool = "pandas"
-fn = IO_FUNCS[tool]
-print(q3.query(fn(customer), fn(lineitem), fn(orders)))
-
 tool = "pandas[pyarrow]"
 fn = IO_FUNCS[tool]
 print(q3.query(fn(customer), fn(lineitem), fn(orders)))
 
-tool = "polars[eager]"
-fn = IO_FUNCS[tool]
-print(q3.query(fn(customer), fn(lineitem), fn(orders)))
-
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(q3.query(fn(customer), fn(lineitem), fn(orders)).collect())
diff --git a/tpch/execute/q4.py b/tpch/execute/q4.py
index 3e67a9c87..79213f1ac 100644
--- a/tpch/execute/q4.py
+++ b/tpch/execute/q4.py
@@ -4,18 +4,10 @@
 from . import line_item
 from . import orders
 
-tool = "pandas"
-fn = IO_FUNCS[tool]
-print(q4.query(fn(line_item), fn(orders)))
-
 tool = "pandas[pyarrow]"
 fn = IO_FUNCS[tool]
 print(q4.query(fn(line_item), fn(orders)))
 
-tool = "polars[eager]"
-fn = IO_FUNCS[tool]
-print(q4.query(fn(line_item), fn(orders)))
-
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(q4.query(fn(line_item), fn(orders)).collect())
diff --git a/tpch/execute/q5.py b/tpch/execute/q5.py
index 317b15fc7..7a04dec1b 100644
--- a/tpch/execute/q5.py
+++ b/tpch/execute/q5.py
@@ -8,14 +8,6 @@
 from . import region
 from . import supplier
 
-tool = "pandas"
-fn = IO_FUNCS[tool]
-print(
-    q5.query(
-        fn(region), fn(nation), fn(customer), fn(line_item), fn(orders), fn(supplier)
-    )
-)
-
 tool = "pandas[pyarrow]"
 fn = IO_FUNCS[tool]
 print(
@@ -24,14 +16,6 @@
     )
 )
 
-tool = "polars[eager]"
-fn = IO_FUNCS[tool]
-print(
-    q5.query(
-        fn(region), fn(nation), fn(customer), fn(line_item), fn(orders), fn(supplier)
-    )
-)
-
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(
diff --git a/tpch/execute/q6.py b/tpch/execute/q6.py
index adca0e26d..402e6d452 100644
--- a/tpch/execute/q6.py
+++ b/tpch/execute/q6.py
@@ -3,18 +3,10 @@
 from . import IO_FUNCS
 from . import lineitem
 
-tool = "pandas"
-fn = IO_FUNCS[tool]
-print(q6.query(fn(lineitem)))
-
 tool = "pandas[pyarrow]"
 fn = IO_FUNCS[tool]
 print(q6.query(fn(lineitem)))
 
-tool = "polars[eager]"
-fn = IO_FUNCS[tool]
-print(q6.query(fn(lineitem)))
-
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(q6.query(fn(lineitem)).collect())
diff --git a/tpch/execute/q7.py b/tpch/execute/q7.py
index 43e110a72..9f6179d23 100644
--- a/tpch/execute/q7.py
+++ b/tpch/execute/q7.py
@@ -7,19 +7,10 @@
 from . import orders
 from . import supplier
 
-tool = "pandas"
-fn = IO_FUNCS[tool]
-print(q7.query(fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier)))
-
-
 tool = "pandas[pyarrow]"
 fn = IO_FUNCS[tool]
 print(q7.query(fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier)))
 
-tool = "polars[eager]"
-fn = IO_FUNCS[tool]
-print(q7.query(fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier)))
-
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(
diff --git a/tpch/execute/q9.py b/tpch/execute/q9.py
index 9ccbe35b7..e01dd0f2c 100644
--- a/tpch/execute/q9.py
+++ b/tpch/execute/q9.py
@@ -8,24 +8,12 @@
 from . import partsupp
 from . import supplier
 
-tool = "pandas"
-fn = IO_FUNCS[tool]
-print(
-    q9.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(orders), fn(supplier))
-)
-
 tool = "pandas[pyarrow]"
 fn = IO_FUNCS[tool]
 print(
     q9.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(orders), fn(supplier))
 )
 
-tool = "polars[eager]"
-fn = IO_FUNCS[tool]
-print(
-    q9.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(orders), fn(supplier))
-)
-
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(
diff --git a/tpch/generate_data.py b/tpch/generate_data.py
index 9ae7c3214..4d5695dcf 100644
--- a/tpch/generate_data.py
+++ b/tpch/generate_data.py
@@ -1,4 +1,4 @@
-from pathlib import Path
+from pathlib import Path  # noqa: INP001
 
 import duckdb
 import pyarrow as pa
diff --git a/tpch/tests/test_queries.py b/tpch/tests/test_queries.py
index 4b7cdd866..35909b683 100644
--- a/tpch/tests/test_queries.py
+++ b/tpch/tests/test_queries.py
@@ -1,29 +1,21 @@
-import os
 import subprocess
 import sys
-import unittest
 from pathlib import Path
 
 
-class TestQueries(unittest.TestCase):
-    def test_execute_scripts(self) -> None:
-        root = Path(__file__).resolve().parent.parent
-        # directory containing all the queries
-        execute_dir = root / "execute"
+def test_execute_scripts() -> None:
+    root = Path(__file__).resolve().parent.parent
+    # directory containing all the queries
+    execute_dir = root / "execute"
 
-        env = os.environ.copy()
-        env["PYTHONPATH"] = str(root)
-
-        for script_path in execute_dir.glob("q[1-9]*.py"):
-            result = subprocess.run(  # noqa: S603
-                [sys.executable, str(script_path)],
-                capture_output=True,
-                text=True,
-                env=env,
-                cwd=root,
-                check=False,
-                shell=False,
-            )
-            assert (
-                result.returncode == 0
-            ), f"Script {script_path} failed with error: {result.stderr}"
+    for script_path in execute_dir.glob("q[1-9]*.py"):
+        print(f"executing query {script_path.stem}")  # noqa: T201
+        result = subprocess.run(  # noqa: S603
+            [sys.executable, "-m", f"execute.{script_path.stem}"],
+            capture_output=True,
+            text=True,
+            check=False,
+        )
+        assert (
+            result.returncode == 0
+        ), f"Script {script_path} failed with error: {result.stderr}"

From 62c8adadd535ad8a10f5f45ef455678989d4c49c Mon Sep 17 00:00:00 2001
From: Zhengbo Wang <lw7205675@gmail.com>
Date: Sat, 7 Sep 2024 14:50:59 +0800
Subject: [PATCH 10/30] feat: Add q12, q13, q14, q16, q22 (#910)

---
 tpch/execute/q12.py | 13 +++++++++++++
 tpch/execute/q13.py | 13 +++++++++++++
 tpch/execute/q14.py | 13 +++++++++++++
 tpch/execute/q16.py | 14 ++++++++++++++
 tpch/execute/q22.py | 13 +++++++++++++
 tpch/queries/q12.py | 33 +++++++++++++++++++++++++++++++++
 tpch/queries/q13.py | 19 +++++++++++++++++++
 tpch/queries/q14.py | 27 +++++++++++++++++++++++++++
 tpch/queries/q16.py | 26 ++++++++++++++++++++++++++
 tpch/queries/q22.py | 32 ++++++++++++++++++++++++++++++++
 10 files changed, 203 insertions(+)
 create mode 100644 tpch/execute/q12.py
 create mode 100644 tpch/execute/q13.py
 create mode 100644 tpch/execute/q14.py
 create mode 100644 tpch/execute/q16.py
 create mode 100644 tpch/execute/q22.py
 create mode 100644 tpch/queries/q12.py
 create mode 100644 tpch/queries/q13.py
 create mode 100644 tpch/queries/q14.py
 create mode 100644 tpch/queries/q16.py
 create mode 100644 tpch/queries/q22.py

diff --git a/tpch/execute/q12.py b/tpch/execute/q12.py
new file mode 100644
index 000000000..b74742373
--- /dev/null
+++ b/tpch/execute/q12.py
@@ -0,0 +1,13 @@
+from queries import q12
+
+from . import IO_FUNCS
+from . import line_item
+from . import orders
+
+tool = "pandas[pyarrow]"
+fn = IO_FUNCS[tool]
+print(q12.query(fn(line_item), fn(orders)))
+
+tool = "polars[lazy]"
+fn = IO_FUNCS[tool]
+print(q12.query(fn(line_item), fn(orders)).collect())
diff --git a/tpch/execute/q13.py b/tpch/execute/q13.py
new file mode 100644
index 000000000..084fcca9b
--- /dev/null
+++ b/tpch/execute/q13.py
@@ -0,0 +1,13 @@
+from queries import q13
+
+from . import IO_FUNCS
+from . import customer
+from . import orders
+
+tool = "pandas[pyarrow]"
+fn = IO_FUNCS[tool]
+print(q13.query(fn(customer), fn(orders)))
+
+tool = "polars[lazy]"
+fn = IO_FUNCS[tool]
+print(q13.query(fn(customer), fn(orders)).collect())
diff --git a/tpch/execute/q14.py b/tpch/execute/q14.py
new file mode 100644
index 000000000..57f83a595
--- /dev/null
+++ b/tpch/execute/q14.py
@@ -0,0 +1,13 @@
+from queries import q14
+
+from . import IO_FUNCS
+from . import line_item
+from . import part
+
+tool = "pandas[pyarrow]"
+fn = IO_FUNCS[tool]
+print(q14.query(fn(line_item), fn(part)))
+
+tool = "polars[lazy]"
+fn = IO_FUNCS[tool]
+print(q14.query(fn(line_item), fn(part)).collect())
diff --git a/tpch/execute/q16.py b/tpch/execute/q16.py
new file mode 100644
index 000000000..5176a5cc6
--- /dev/null
+++ b/tpch/execute/q16.py
@@ -0,0 +1,14 @@
+from queries import q16
+
+from . import IO_FUNCS
+from . import part
+from . import partsupp
+from . import supplier
+
+tool = "pandas[pyarrow]"
+fn = IO_FUNCS[tool]
+print(q16.query(fn(part), fn(partsupp), fn(supplier)))
+
+tool = "polars[lazy]"
+fn = IO_FUNCS[tool]
+print(q16.query(fn(part), fn(partsupp), fn(supplier)).collect())
diff --git a/tpch/execute/q22.py b/tpch/execute/q22.py
new file mode 100644
index 000000000..91ed46d9d
--- /dev/null
+++ b/tpch/execute/q22.py
@@ -0,0 +1,13 @@
+from queries import q22
+
+from . import IO_FUNCS
+from . import customer
+from . import orders
+
+tool = "pandas[pyarrow]"
+fn = IO_FUNCS[tool]
+print(q22.query(fn(customer), fn(orders)))
+
+tool = "polars[lazy]"
+fn = IO_FUNCS[tool]
+print(q22.query(fn(customer), fn(orders)).collect())
diff --git a/tpch/queries/q12.py b/tpch/queries/q12.py
new file mode 100644
index 000000000..ced775830
--- /dev/null
+++ b/tpch/queries/q12.py
@@ -0,0 +1,33 @@
+from datetime import datetime
+
+import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def query(line_item_ds: FrameT, orders_ds: FrameT) -> FrameT:
+    var1 = "MAIL"
+    var2 = "SHIP"
+    var3 = datetime(1994, 1, 1)
+    var4 = datetime(1995, 1, 1)
+
+    return (
+        orders_ds.join(line_item_ds, left_on="o_orderkey", right_on="l_orderkey")
+        .filter(nw.col("l_shipmode").is_in([var1, var2]))
+        .filter(nw.col("l_commitdate") < nw.col("l_receiptdate"))
+        .filter(nw.col("l_shipdate") < nw.col("l_commitdate"))
+        .filter(nw.col("l_receiptdate").is_between(var3, var4, closed="left"))
+        .with_columns(
+            nw.when(nw.col("o_orderpriority").is_in(["1-URGENT", "2-HIGH"]))
+            .then(1)
+            .otherwise(0)
+            .alias("high_line_count"),
+            nw.when(~nw.col("o_orderpriority").is_in(["1-URGENT", "2-HIGH"]))
+            .then(1)
+            .otherwise(0)
+            .alias("low_line_count"),
+        )
+        .group_by("l_shipmode")
+        .agg(nw.col("high_line_count").sum(), nw.col("low_line_count").sum())
+        .sort("l_shipmode")
+    )
diff --git a/tpch/queries/q13.py b/tpch/queries/q13.py
new file mode 100644
index 000000000..adf57e5a2
--- /dev/null
+++ b/tpch/queries/q13.py
@@ -0,0 +1,19 @@
+import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def query(customer_ds: FrameT, orders_ds: FrameT) -> FrameT:
+    var1 = "special"
+    var2 = "requests"
+
+    orders = orders_ds.filter(~nw.col("o_comment").str.contains(f"{var1}.*{var2}"))
+    return (
+        customer_ds.join(orders, left_on="c_custkey", right_on="o_custkey", how="left")
+        .group_by("c_custkey")
+        .agg(nw.col("o_orderkey").count().alias("c_count"))
+        .group_by("c_count")
+        .agg(nw.len())
+        .select(nw.col("c_count"), nw.col("len").alias("custdist"))
+        .sort(by=["custdist", "c_count"], descending=[True, True])
+    )
diff --git a/tpch/queries/q14.py b/tpch/queries/q14.py
new file mode 100644
index 000000000..f1ec6cbe3
--- /dev/null
+++ b/tpch/queries/q14.py
@@ -0,0 +1,27 @@
+from datetime import datetime
+
+import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def query(line_item_ds: FrameT, part_ds: FrameT) -> FrameT:
+    var1 = datetime(1995, 9, 1)
+    var2 = datetime(1995, 10, 1)
+
+    return (
+        line_item_ds.join(part_ds, left_on="l_partkey", right_on="p_partkey")
+        .filter(nw.col("l_shipdate").is_between(var1, var2, closed="left"))
+        .select(
+            (
+                100.00
+                * nw.when(nw.col("p_type").str.contains("PROMO*"))
+                .then(nw.col("l_extendedprice") * (1 - nw.col("l_discount")))
+                .otherwise(0)
+                .sum()
+                / (nw.col("l_extendedprice") * (1 - nw.col("l_discount"))).sum()
+            )
+            .round(2)
+            .alias("promo_revenue")
+        )
+    )
diff --git a/tpch/queries/q16.py b/tpch/queries/q16.py
new file mode 100644
index 000000000..d84b9aab5
--- /dev/null
+++ b/tpch/queries/q16.py
@@ -0,0 +1,26 @@
+import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def query(part_ds: FrameT, partsupp_ds: FrameT, supplier_ds: FrameT) -> FrameT:
+    var1 = "Brand#45"
+
+    supplier = supplier_ds.filter(
+        nw.col("s_comment").str.contains(".*Customer.*Complaints.*")
+    ).select(nw.col("s_suppkey"), nw.col("s_suppkey").alias("ps_suppkey"))
+
+    return (
+        part_ds.join(partsupp_ds, left_on="p_partkey", right_on="ps_partkey")
+        .filter(nw.col("p_brand") != var1)
+        .filter(~nw.col("p_type").str.contains("MEDIUM POLISHED*"))
+        .filter(nw.col("p_size").is_in([49, 14, 23, 45, 19, 3, 36, 9]))
+        .join(supplier, left_on="ps_suppkey", right_on="s_suppkey", how="left")
+        .filter(nw.col("ps_suppkey_right").is_null())
+        .group_by("p_brand", "p_type", "p_size")
+        .agg(nw.col("ps_suppkey").n_unique().alias("supplier_cnt"))
+        .sort(
+            by=["supplier_cnt", "p_brand", "p_type", "p_size"],
+            descending=[True, False, False, False],
+        )
+    )
diff --git a/tpch/queries/q22.py b/tpch/queries/q22.py
new file mode 100644
index 000000000..4738c6fd3
--- /dev/null
+++ b/tpch/queries/q22.py
@@ -0,0 +1,32 @@
+import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def query(customer_ds: FrameT, orders_ds: FrameT) -> FrameT:
+    q1 = (
+        customer_ds.with_columns(nw.col("c_phone").str.slice(0, 2).alias("cntrycode"))
+        .filter(nw.col("cntrycode").str.contains("13|31|23|29|30|18|17"))
+        .select("c_acctbal", "c_custkey", "cntrycode")
+    )
+
+    q2 = q1.filter(nw.col("c_acctbal") > 0.0).select(
+        nw.col("c_acctbal").mean().alias("avg_acctbal")
+    )
+
+    q3 = orders_ds.select(nw.col("o_custkey").unique()).with_columns(
+        nw.col("o_custkey").alias("c_custkey")
+    )
+
+    return (
+        q1.join(q3, left_on="c_custkey", right_on="c_custkey", how="left")
+        .filter(nw.col("o_custkey").is_null())
+        .join(q2, how="cross")
+        .filter(nw.col("c_acctbal") > nw.col("avg_acctbal"))
+        .group_by("cntrycode")
+        .agg(
+            nw.col("c_acctbal").count().alias("numcust"),
+            nw.col("c_acctbal").sum().alias("totacctbal"),
+        )
+        .sort("cntrycode")
+    )

From 2969d75c5d1074af96744c7ce97234d042993e46 Mon Sep 17 00:00:00 2001
From: Marco Edward Gorelli <marcogorelli@protonmail.com>
Date: Sat, 7 Sep 2024 07:58:15 +0100
Subject: [PATCH 11/30] docs: Recommend `uv` in contributing guide (#873)

---
 CONTRIBUTING.md | 37 ++++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index d36d21a55..aeed2538f 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -47,22 +47,41 @@ git clone git@github.com:YOUR-USERNAME/narwhals.git
 
 ### 4. Setting up your environment
 
-Here's how you can set up your local development environment to contribute:
-
-1. Make sure you have Python3.8+ installed (for example, Python 3.11)
-2. Create a new virtual environment with `python3.11 -m venv .venv` (or whichever version of Python3.9+ you prefer)
-3. Activate it: `. .venv/bin/activate`
-4. Install Narwhals: `pip install -e .`
-5. Install test requirements: `pip install -r requirements-dev.txt`
-6. Install docs requirements: `pip install -r docs/requirements-docs.txt`
+Here's how you can set up your local development environment to contribute.
+
+#### Option 1: Use UV (recommended)
+
+1. Make sure you have Python3.8+ installed (for example, Python 3.11), create a virtual environment,
+   and activate it. If you're new to this, here's one way that we recommend:
+   1. Install uv: https://github.com/astral-sh/uv?tab=readme-ov-file#getting-started
+   2. Install some version of Python greater than Python3.8. For example, to install
+      Python3.11:
+      ```
+      uv python install 3.11
+      ```
+   3. Create a virtual environment:
+      ```
+      uv venv -p 3.11 --seed
+      ```
+   4. Activate it. On Linux, this is `. .venv/bin/activate`, on Windows `.\.venv\Scripts\activate`.
+2. Install Narwhals: `uv pip install -e .`
+3. Install test requirements: `uv pip install -r requirements-dev.txt`
+4. Install docs requirements: `uv pip install -r docs/requirements-docs.txt`
 
 You should also install pre-commit:
 ```
-pip install pre-commit
+uv pip install pre-commit
 pre-commit install
 ```
 This will automatically format and lint your code before each commit, and it will block the commit if any issues are found.
 
+#### Option 2: use python3-venv
+
+1. Make sure you have Python 3.8+ installed. If you don't, you can check [install Python](https://realpython.com/installing-python/)
+   to learn how. Then, [create and activate](https://realpython.com/python-virtual-environments-a-primer/)
+   a virtual environment.
+2. Then, follow steps 2-4 from above but using `pip install` instead of `uv pip install`.
+
 ### 5. Working on your issue
 
 Create a new git branch from the `main` branch in your local repository.

From 5f91aa17c6cd9274851d219f4b50abe12c0fdfeb Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 7 Sep 2024 08:43:20 +0100
Subject: [PATCH 12/30] [pre-commit.ci] pre-commit autoupdate (#818)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* [pre-commit.ci] pre-commit autoupdate

updates:
- [github.com/astral-sh/ruff-pre-commit: v0.5.7 → v0.6.3](https://github.com/astral-sh/ruff-pre-commit/compare/v0.5.7...v0.6.3)
- [github.com/pre-commit/mirrors-mypy: v1.11.1 → v1.11.2](https://github.com/pre-commit/mirrors-mypy/compare/v1.11.1...v1.11.2)

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* making ruff happy

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Edoardo Abati <29585319+EdAbati@users.noreply.github.com>
---
 .pre-commit-config.yaml                       |  4 +-
 pyproject.toml                                |  9 ++
 tests/expr_and_series/arithmetic_test.py      |  4 +-
 tests/expr_and_series/dt/ordinal_day_test.py  |  2 +-
 .../expr_and_series/dt/total_minutes_test.py  |  2 +-
 tests/hypothesis/test_basic_arithmetic.py     |  2 +-
 tests/hypothesis/test_concat.py               |  2 +-
 tests/hypothesis/test_join.py                 |  6 +-
 tpch/notebooks/q1/execute.ipynb               | 53 +++++-----
 tpch/notebooks/q10/execute.ipynb              | 41 ++++----
 tpch/notebooks/q11/execute.ipynb              | 44 ++++-----
 tpch/notebooks/q15/execute.ipynb              | 43 ++++----
 tpch/notebooks/q17/execute.ipynb              | 42 ++++----
 tpch/notebooks/q18/execute.ipynb              | 41 ++++----
 tpch/notebooks/q19/execute.ipynb              | 45 ++++-----
 tpch/notebooks/q2/execute.ipynb               | 51 +++++-----
 tpch/notebooks/q20/execute.ipynb              | 47 ++++-----
 tpch/notebooks/q21/execute.ipynb              | 99 +++++++++----------
 tpch/notebooks/q3/execute.ipynb               | 84 ++++++++--------
 tpch/notebooks/q4/execute.ipynb               | 79 +++++++--------
 tpch/notebooks/q5/execute.ipynb               | 73 +++++++-------
 tpch/notebooks/q6/execute.ipynb               | 77 ++++++++-------
 tpch/notebooks/q7/execute.ipynb               | 85 ++++++++--------
 tpch/notebooks/q9/execute.ipynb               | 42 ++++----
 24 files changed, 501 insertions(+), 476 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 57e766f59..f3a68e7a0 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,7 +1,7 @@
 repos:
 - repo: https://github.com/astral-sh/ruff-pre-commit
   # Ruff version.
-  rev: 'v0.5.7'
+  rev: 'v0.6.3'
   hooks:
     # Run the formatter.
     - id: ruff-format
@@ -9,7 +9,7 @@ repos:
     - id: ruff
       args: [--fix]
 - repo: https://github.com/pre-commit/mirrors-mypy
-  rev: 'v1.11.1'
+  rev: 'v1.11.2'
   hooks:
     - id: mypy
       additional_dependencies: ['polars==1.4.1', 'pytest==8.3.2']
diff --git a/pyproject.toml b/pyproject.toml
index b3a2a0c28..c4a10603f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -79,6 +79,15 @@ lint.ignore = [
 "tpch/tests/*" = ["S101"]
 "utils/*" = ["S311", "PTH123"]
 "tpch/execute/*" = ["T201"]
+"tpch/notebooks/*" = [
+  "ANN001",
+  "ANN201",
+  "EM101",
+  "EXE002",
+  "PTH123",
+  "T203",
+  "TRY003",
+]
 
 [tool.ruff.lint.pydocstyle]
 convention = "google"
diff --git a/tests/expr_and_series/arithmetic_test.py b/tests/expr_and_series/arithmetic_test.py
index 47d3e8ff0..7ff945c80 100644
--- a/tests/expr_and_series/arithmetic_test.py
+++ b/tests/expr_and_series/arithmetic_test.py
@@ -149,7 +149,7 @@ def test_truediv_same_dims(constructor_eager: Any, request: Any) -> None:
     compare_dicts({"a": result}, {"a": [2, 1, 1 / 3]})
 
 
-@pytest.mark.slow()
+@pytest.mark.slow
 @given(  # type: ignore[misc]
     left=st.integers(-100, 100),
     right=st.integers(-100, 100),
@@ -189,7 +189,7 @@ def test_floordiv(left: int, right: int) -> None:
     compare_dicts(result, expected)
 
 
-@pytest.mark.slow()
+@pytest.mark.slow
 @given(  # type: ignore[misc]
     left=st.integers(-100, 100),
     right=st.integers(-100, 100),
diff --git a/tests/expr_and_series/dt/ordinal_day_test.py b/tests/expr_and_series/dt/ordinal_day_test.py
index 1cb464259..2681188df 100644
--- a/tests/expr_and_series/dt/ordinal_day_test.py
+++ b/tests/expr_and_series/dt/ordinal_day_test.py
@@ -17,7 +17,7 @@
     parse_version(pd.__version__) < parse_version("2.0.0"),
     reason="pyarrow dtype not available",
 )
-@pytest.mark.slow()
+@pytest.mark.slow
 def test_ordinal_day(dates: datetime) -> None:
     result_pd = nw.from_native(pd.Series([dates]), series_only=True).dt.ordinal_day()[0]
     result_pdms = nw.from_native(
diff --git a/tests/expr_and_series/dt/total_minutes_test.py b/tests/expr_and_series/dt/total_minutes_test.py
index f2469e495..bcd664442 100644
--- a/tests/expr_and_series/dt/total_minutes_test.py
+++ b/tests/expr_and_series/dt/total_minutes_test.py
@@ -22,7 +22,7 @@
     parse_version(pd.__version__) < parse_version("2.2.0"),
     reason="pyarrow dtype not available",
 )
-@pytest.mark.slow()
+@pytest.mark.slow
 def test_total_minutes(timedeltas: timedelta) -> None:
     result_pd = nw.from_native(
         pd.Series([timedeltas]), series_only=True
diff --git a/tests/hypothesis/test_basic_arithmetic.py b/tests/hypothesis/test_basic_arithmetic.py
index 2ab7bad7b..00818271d 100644
--- a/tests/hypothesis/test_basic_arithmetic.py
+++ b/tests/hypothesis/test_basic_arithmetic.py
@@ -22,7 +22,7 @@
         max_size=3,
     ),
 )  # type: ignore[misc]
-@pytest.mark.slow()
+@pytest.mark.slow
 def test_mean(
     integer: st.SearchStrategy[list[int]],
     floats: st.SearchStrategy[float],
diff --git a/tests/hypothesis/test_concat.py b/tests/hypothesis/test_concat.py
index 1b1248628..9ae54dbc4 100644
--- a/tests/hypothesis/test_concat.py
+++ b/tests/hypothesis/test_concat.py
@@ -31,7 +31,7 @@
     ),
     how=st.sampled_from(["horizontal", "vertical"]),
 )  # type: ignore[misc]
-@pytest.mark.slow()
+@pytest.mark.slow
 @pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows")
 def test_concat(  # pragma: no cover
     integers: list[int],
diff --git a/tests/hypothesis/test_join.py b/tests/hypothesis/test_join.py
index ebdb88757..bc1cd735c 100644
--- a/tests/hypothesis/test_join.py
+++ b/tests/hypothesis/test_join.py
@@ -42,7 +42,7 @@
 )  # type: ignore[misc]
 @pytest.mark.skipif(pl_version < parse_version("0.20.13"), reason="0.0 == -0.0")
 @pytest.mark.skipif(pd_version < parse_version("2.0.0"), reason="requires pyarrow")
-@pytest.mark.slow()
+@pytest.mark.slow
 def test_join(  # pragma: no cover
     integers: st.SearchStrategy[list[int]],
     other_integers: st.SearchStrategy[list[int]],
@@ -88,7 +88,7 @@ def test_join(  # pragma: no cover
         max_size=3,
     ),
 )  # type: ignore[misc]
-@pytest.mark.slow()
+@pytest.mark.slow
 @pytest.mark.skipif(pd_version < parse_version("2.0.0"), reason="requires pyarrow")
 def test_cross_join(  # pragma: no cover
     integers: st.SearchStrategy[list[int]],
@@ -135,7 +135,7 @@ def test_cross_join(  # pragma: no cover
         st.sampled_from(["a", "b", "d"]), min_size=1, max_size=3, unique=True
     ),
 )
-@pytest.mark.slow()
+@pytest.mark.slow
 @pytest.mark.filterwarnings("ignore:the default coalesce behavior")
 def test_left_join(  # pragma: no cover
     a_left_data: list[int],
diff --git a/tpch/notebooks/q1/execute.ipynb b/tpch/notebooks/q1/execute.ipynb
index cc6dd4559..de9c52baa 100755
--- a/tpch/notebooks/q1/execute.ipynb
+++ b/tpch/notebooks/q1/execute.ipynb
@@ -58,10 +58,12 @@
    },
    "outputs": [],
    "source": [
-    "from typing import Any\n",
     "from datetime import datetime\n",
+    "from typing import Any\n",
+    "\n",
     "import narwhals as nw\n",
     "\n",
+    "\n",
     "@nw.narwhalify\n",
     "def q1(lineitem_ds: Any) -> Any:\n",
     "    var_1 = datetime(1998, 9, 2)\n",
@@ -107,14 +109,14 @@
    "outputs": [],
    "source": [
     "dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\n",
-    "region = dir_ + 'region.parquet'\n",
-    "nation = dir_ + 'nation.parquet'\n",
-    "customer = dir_ + 'customer.parquet'\n",
-    "lineitem = dir_ + 'lineitem.parquet'\n",
-    "orders = dir_ + 'orders.parquet'\n",
-    "supplier = dir_ + 'supplier.parquet'\n",
-    "part = dir_ + 'part.parquet'\n",
-    "partsupp = dir_ + 'partsupp.parquet'"
+    "region = dir_ + \"region.parquet\"\n",
+    "nation = dir_ + \"nation.parquet\"\n",
+    "customer = dir_ + \"customer.parquet\"\n",
+    "lineitem = dir_ + \"lineitem.parquet\"\n",
+    "orders = dir_ + \"orders.parquet\"\n",
+    "supplier = dir_ + \"supplier.parquet\"\n",
+    "part = dir_ + \"part.parquet\"\n",
+    "partsupp = dir_ + \"partsupp.parquet\""
    ]
   },
   {
@@ -133,16 +135,18 @@
    },
    "outputs": [],
    "source": [
-    "import pyarrow.parquet as pq\n",
     "import dask.dataframe as dd\n",
+    "import pyarrow.parquet as pq\n",
     "\n",
     "IO_FUNCS = {\n",
-    "    'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n",
-    "    'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
-    "    'polars[eager]': lambda x: pl.read_parquet(x),\n",
-    "    'polars[lazy]': lambda x: pl.scan_parquet(x),\n",
-    "    'pyarrow': lambda x: pq.read_table(x),\n",
-    "    'dask': lambda x: dd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
+    "    \"pandas\": lambda x: pd.read_parquet(x, engine=\"pyarrow\"),\n",
+    "    \"pandas[pyarrow]\": lambda x: pd.read_parquet(\n",
+    "        x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
+    "    ),\n",
+    "    \"polars[eager]\": lambda x: pl.read_parquet(x),\n",
+    "    \"polars[lazy]\": lambda x: pl.scan_parquet(x),\n",
+    "    \"pyarrow\": lambda x: pq.read_table(x),\n",
+    "    \"dask\": lambda x: dd.read_parquet(x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"),\n",
     "}"
    ]
   },
@@ -171,7 +175,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tool = 'pyarrow'\n",
+    "tool = \"pyarrow\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q1(fn(lineitem))\n",
     "results[tool] = timings.all_runs"
@@ -210,7 +214,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas'\n",
+    "tool = \"pandas\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q1(lineitem_ds=fn(lineitem))\n",
     "results[tool] = timings.all_runs"
@@ -249,7 +253,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow]'\n",
+    "tool = \"pandas[pyarrow]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q1(fn(lineitem))\n",
     "results[tool] = timings.all_runs"
@@ -288,7 +292,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[eager]'\n",
+    "tool = \"polars[eager]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q1(fn(lineitem))\n",
     "results[tool] = timings.all_runs"
@@ -327,7 +331,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[lazy]'\n",
+    "tool = \"polars[lazy]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q1(fn(lineitem)).collect()\n",
     "results[tool] = timings.all_runs"
@@ -348,7 +352,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tool = 'dask'\n",
+    "tool = \"dask\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q1(fn(lineitem)).collect()\n",
     "results[tool] = timings.all_runs"
@@ -370,8 +374,9 @@
    "outputs": [],
    "source": [
     "import json\n",
-    "with open('results.json', 'w') as fd:\n",
-    "    json.dump(results, fd)\n"
+    "\n",
+    "with open(\"results.json\", \"w\") as fd:\n",
+    "    json.dump(results, fd)"
    ]
   }
  ],
diff --git a/tpch/notebooks/q10/execute.ipynb b/tpch/notebooks/q10/execute.ipynb
index 85ec0f14b..9ff211773 100644
--- a/tpch/notebooks/q10/execute.ipynb
+++ b/tpch/notebooks/q10/execute.ipynb
@@ -55,22 +55,23 @@
    },
    "outputs": [],
    "source": [
-    "from typing import Any\n",
     "from datetime import datetime\n",
+    "from typing import Any\n",
+    "\n",
     "import narwhals as nw\n",
     "\n",
+    "\n",
     "def q10(\n",
     "    customer_ds_raw: Any,\n",
     "    nation_ds_raw: Any,\n",
     "    lineitem_ds_raw: Any,\n",
     "    orders_ds_raw: Any,\n",
     ") -> Any:\n",
-    "\n",
     "    nation_ds = nw.from_native(nation_ds_raw)\n",
     "    line_item_ds = nw.from_native(lineitem_ds_raw)\n",
     "    orders_ds = nw.from_native(orders_ds_raw)\n",
     "    customer_ds = nw.from_native(customer_ds_raw)\n",
-    "    \n",
+    "\n",
     "    var1 = datetime(1993, 10, 1)\n",
     "    var2 = datetime(1994, 1, 1)\n",
     "\n",
@@ -81,8 +82,7 @@
     "        .filter(nw.col(\"o_orderdate\").is_between(var1, var2, closed=\"left\"))\n",
     "        .filter(nw.col(\"l_returnflag\") == \"R\")\n",
     "        .with_columns(\n",
-    "            (nw.col(\"l_extendedprice\") * (1 - nw.col(\"l_discount\")))\n",
-    "            .alias(\"revenue\")\n",
+    "            (nw.col(\"l_extendedprice\") * (1 - nw.col(\"l_discount\"))).alias(\"revenue\")\n",
     "        )\n",
     "        .group_by(\n",
     "            \"c_custkey\",\n",
@@ -127,10 +127,10 @@
    "outputs": [],
    "source": [
     "dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\n",
-    "nation = dir_ + 'nation.parquet'\n",
-    "lineitem = dir_ + 'lineitem.parquet'\n",
-    "orders = dir_ + 'orders.parquet'\n",
-    "customer = dir_ + 'customer.parquet'"
+    "nation = dir_ + \"nation.parquet\"\n",
+    "lineitem = dir_ + \"lineitem.parquet\"\n",
+    "orders = dir_ + \"orders.parquet\"\n",
+    "customer = dir_ + \"customer.parquet\""
    ]
   },
   {
@@ -149,10 +149,12 @@
    "outputs": [],
    "source": [
     "IO_FUNCS = {\n",
-    "    'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n",
-    "    'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
-    "    'polars[eager]': lambda x: pl.read_parquet(x),\n",
-    "    'polars[lazy]': lambda x: pl.scan_parquet(x),\n",
+    "    \"pandas\": lambda x: pd.read_parquet(x, engine=\"pyarrow\"),\n",
+    "    \"pandas[pyarrow]\": lambda x: pd.read_parquet(\n",
+    "        x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
+    "    ),\n",
+    "    \"polars[eager]\": lambda x: pl.read_parquet(x),\n",
+    "    \"polars[lazy]\": lambda x: pl.scan_parquet(x),\n",
     "}"
    ]
   },
@@ -196,7 +198,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas'\n",
+    "tool = \"pandas\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q10(fn(customer), fn(nation), fn(lineitem), fn(orders))\n",
     "results[tool] = timings.all_runs"
@@ -233,7 +235,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow]'\n",
+    "tool = \"pandas[pyarrow]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q10(fn(customer), fn(nation), fn(lineitem), fn(orders))\n",
     "results[tool] = timings.all_runs"
@@ -270,7 +272,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[eager]'\n",
+    "tool = \"polars[eager]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q10(fn(customer), fn(nation), fn(lineitem), fn(orders))\n",
     "results[tool] = timings.all_runs"
@@ -307,7 +309,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[lazy]'\n",
+    "tool = \"polars[lazy]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q10(fn(customer), fn(nation), fn(lineitem), fn(orders)).collect()\n",
     "results[tool] = timings.all_runs"
@@ -327,8 +329,9 @@
    "outputs": [],
    "source": [
     "import json\n",
-    "with open('results.json', 'w') as fd:\n",
-    "    json.dump(results, fd)\n"
+    "\n",
+    "with open(\"results.json\", \"w\") as fd:\n",
+    "    json.dump(results, fd)"
    ]
   }
  ],
diff --git a/tpch/notebooks/q11/execute.ipynb b/tpch/notebooks/q11/execute.ipynb
index 33951d922..f5bbc0f9c 100644
--- a/tpch/notebooks/q11/execute.ipynb
+++ b/tpch/notebooks/q11/execute.ipynb
@@ -15,7 +15,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip uninstall apache-beam -y && pip install -U pandas polars pyarrow narwhals "
+    "!pip uninstall apache-beam -y && pip install -U pandas polars pyarrow narwhals"
    ]
   },
   {
@@ -56,19 +56,19 @@
    "outputs": [],
    "source": [
     "from typing import Any\n",
+    "\n",
     "import narwhals as nw\n",
     "\n",
+    "\n",
     "def q11(\n",
     "    partsupp_ds_raw: Any,\n",
     "    nation_ds_raw: Any,\n",
     "    supplier_ds_raw: Any,\n",
     ") -> Any:\n",
-    "\n",
     "    nation_ds = nw.from_native(nation_ds_raw)\n",
     "    partsupp_ds = nw.from_native(partsupp_ds_raw)\n",
     "    supplier_ds = nw.from_native(supplier_ds_raw)\n",
     "\n",
-    "    \n",
     "    var1 = \"GERMANY\"\n",
     "    var2 = 0.0001\n",
     "\n",
@@ -83,14 +83,9 @@
     "    )\n",
     "\n",
     "    q_final = (\n",
-    "        q1.with_columns(\n",
-    "            (nw.col(\"ps_supplycost\") * nw.col(\"ps_availqty\"))\n",
-    "            .alias(\"value\")\n",
-    "        )\n",
+    "        q1.with_columns((nw.col(\"ps_supplycost\") * nw.col(\"ps_availqty\")).alias(\"value\"))\n",
     "        .group_by(\"ps_partkey\")\n",
-    "        .agg(\n",
-    "            nw.sum(\"value\")\n",
-    "        )\n",
+    "        .agg(nw.sum(\"value\"))\n",
     "        .join(q2, how=\"cross\")\n",
     "        .filter(nw.col(\"value\") > nw.col(\"tmp\"))\n",
     "        .select(\"ps_partkey\", \"value\")\n",
@@ -116,9 +111,9 @@
    "outputs": [],
    "source": [
     "dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\n",
-    "nation = dir_ + 'nation.parquet'\n",
-    "supplier = dir_ + 'supplier.parquet'\n",
-    "partsupp = dir_ + 'partsupp.parquet'"
+    "nation = dir_ + \"nation.parquet\"\n",
+    "supplier = dir_ + \"supplier.parquet\"\n",
+    "partsupp = dir_ + \"partsupp.parquet\""
    ]
   },
   {
@@ -137,10 +132,12 @@
    "outputs": [],
    "source": [
     "IO_FUNCS = {\n",
-    "    'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n",
-    "    'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
-    "    'polars[eager]': lambda x: pl.read_parquet(x),\n",
-    "    'polars[lazy]': lambda x: pl.scan_parquet(x),\n",
+    "    \"pandas\": lambda x: pd.read_parquet(x, engine=\"pyarrow\"),\n",
+    "    \"pandas[pyarrow]\": lambda x: pd.read_parquet(\n",
+    "        x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
+    "    ),\n",
+    "    \"polars[eager]\": lambda x: pl.read_parquet(x),\n",
+    "    \"polars[lazy]\": lambda x: pl.scan_parquet(x),\n",
     "}"
    ]
   },
@@ -184,7 +181,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas'\n",
+    "tool = \"pandas\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q11(fn(partsupp), fn(nation), fn(supplier))\n",
     "results[tool] = timings.all_runs"
@@ -221,7 +218,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow]'\n",
+    "tool = \"pandas[pyarrow]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q11(fn(partsupp), fn(nation), fn(supplier))\n",
     "results[tool] = timings.all_runs"
@@ -258,7 +255,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[eager]'\n",
+    "tool = \"polars[eager]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q11(fn(partsupp), fn(nation), fn(supplier))\n",
     "results[tool] = timings.all_runs"
@@ -295,7 +292,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[lazy]'\n",
+    "tool = \"polars[lazy]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q11(fn(partsupp), fn(nation), fn(supplier)).collect()\n",
     "results[tool] = timings.all_runs"
@@ -315,8 +312,9 @@
    "outputs": [],
    "source": [
     "import json\n",
-    "with open('results.json', 'w') as fd:\n",
-    "    json.dump(results, fd)\n"
+    "\n",
+    "with open(\"results.json\", \"w\") as fd:\n",
+    "    json.dump(results, fd)"
    ]
   },
   {
diff --git a/tpch/notebooks/q15/execute.ipynb b/tpch/notebooks/q15/execute.ipynb
index 0baf11956..d108a7196 100644
--- a/tpch/notebooks/q15/execute.ipynb
+++ b/tpch/notebooks/q15/execute.ipynb
@@ -15,7 +15,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip uninstall apache-beam -y && pip install -U pandas polars pyarrow narwhals "
+    "!pip uninstall apache-beam -y && pip install -U pandas polars pyarrow narwhals"
    ]
   },
   {
@@ -55,32 +55,34 @@
    },
    "outputs": [],
    "source": [
+    "from datetime import datetime\n",
     "from typing import Any\n",
+    "\n",
     "import narwhals as nw\n",
-    "from datetime import datetime\n",
+    "\n",
     "\n",
     "def q15(\n",
     "    lineitem_ds_raw: Any,\n",
     "    supplier_ds_raw: Any,\n",
     ") -> Any:\n",
-    "\n",
     "    lineitem_ds = nw.from_native(lineitem_ds_raw)\n",
     "    supplier_ds = nw.from_native(supplier_ds_raw)\n",
-    "    \n",
+    "\n",
     "    var1 = datetime(1996, 1, 1)\n",
     "    var2 = datetime(1996, 4, 1)\n",
     "\n",
     "    revenue = (\n",
     "        lineitem_ds.filter(nw.col(\"l_shipdate\").is_between(var1, var2, closed=\"left\"))\n",
     "        .with_columns(\n",
-    "            (nw.col(\"l_extendedprice\") * (1 - nw.col(\"l_discount\")))\n",
-    "            .alias(\"total_revenue\")\n",
+    "            (nw.col(\"l_extendedprice\") * (1 - nw.col(\"l_discount\"))).alias(\n",
+    "                \"total_revenue\"\n",
+    "            )\n",
     "        )\n",
     "        .group_by(\"l_suppkey\")\n",
     "        .agg(nw.sum(\"total_revenue\"))\n",
     "        .select(nw.col(\"l_suppkey\").alias(\"supplier_no\"), nw.col(\"total_revenue\"))\n",
     "    )\n",
-    "    \n",
+    "\n",
     "    result = (\n",
     "        supplier_ds.join(revenue, left_on=\"s_suppkey\", right_on=\"supplier_no\")\n",
     "        .filter(nw.col(\"total_revenue\") == nw.col(\"total_revenue\").max())\n",
@@ -108,8 +110,8 @@
    "outputs": [],
    "source": [
     "dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\n",
-    "lineitem = dir_ + 'lineitem.parquet'\n",
-    "supplier = dir_ + 'supplier.parquet'"
+    "lineitem = dir_ + \"lineitem.parquet\"\n",
+    "supplier = dir_ + \"supplier.parquet\""
    ]
   },
   {
@@ -128,10 +130,12 @@
    "outputs": [],
    "source": [
     "IO_FUNCS = {\n",
-    "    'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n",
-    "    'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
-    "    'polars[eager]': lambda x: pl.read_parquet(x),\n",
-    "    'polars[lazy]': lambda x: pl.scan_parquet(x),\n",
+    "    \"pandas\": lambda x: pd.read_parquet(x, engine=\"pyarrow\"),\n",
+    "    \"pandas[pyarrow]\": lambda x: pd.read_parquet(\n",
+    "        x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
+    "    ),\n",
+    "    \"polars[eager]\": lambda x: pl.read_parquet(x),\n",
+    "    \"polars[lazy]\": lambda x: pl.scan_parquet(x),\n",
     "}"
    ]
   },
@@ -175,7 +179,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas'\n",
+    "tool = \"pandas\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q15(fn(lineitem), fn(supplier))\n",
     "results[tool] = timings.all_runs"
@@ -212,7 +216,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow]'\n",
+    "tool = \"pandas[pyarrow]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q15(fn(lineitem), fn(supplier))\n",
     "results[tool] = timings.all_runs"
@@ -249,7 +253,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[eager]'\n",
+    "tool = \"polars[eager]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q15(fn(lineitem), fn(supplier))\n",
     "results[tool] = timings.all_runs"
@@ -286,7 +290,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[lazy]'\n",
+    "tool = \"polars[lazy]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q15(fn(lineitem), fn(supplier)).collect()\n",
     "results[tool] = timings.all_runs"
@@ -306,8 +310,9 @@
    "outputs": [],
    "source": [
     "import json\n",
-    "with open('results.json', 'w') as fd:\n",
-    "    json.dump(results, fd)\n"
+    "\n",
+    "with open(\"results.json\", \"w\") as fd:\n",
+    "    json.dump(results, fd)"
    ]
   }
  ],
diff --git a/tpch/notebooks/q17/execute.ipynb b/tpch/notebooks/q17/execute.ipynb
index b13445d28..4d012f088 100644
--- a/tpch/notebooks/q17/execute.ipynb
+++ b/tpch/notebooks/q17/execute.ipynb
@@ -15,7 +15,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip uninstall apache-beam -y && pip install -U pandas polars pyarrow narwhals "
+    "!pip uninstall apache-beam -y && pip install -U pandas polars pyarrow narwhals"
    ]
   },
   {
@@ -56,25 +56,23 @@
    "outputs": [],
    "source": [
     "from typing import Any\n",
+    "\n",
     "import narwhals as nw\n",
     "\n",
-    "def q17(\n",
-    "    lineitem_ds_raw: Any,\n",
-    "    part_ds_raw: Any\n",
-    ") -> Any:\n",
     "\n",
+    "def q17(lineitem_ds_raw: Any, part_ds_raw: Any) -> Any:\n",
     "    lineitem_ds = nw.from_native(lineitem_ds_raw)\n",
     "    part_ds = nw.from_native(part_ds_raw)\n",
-    "    \n",
+    "\n",
     "    var1 = \"Brand#23\"\n",
     "    var2 = \"MED BOX\"\n",
-    "    \n",
+    "\n",
     "    query1 = (\n",
     "        part_ds.filter(nw.col(\"p_brand\") == var1)\n",
     "        .filter(nw.col(\"p_container\") == var2)\n",
     "        .join(lineitem_ds, how=\"left\", left_on=\"p_partkey\", right_on=\"l_partkey\")\n",
     "    )\n",
-    "    \n",
+    "\n",
     "    final_query = (\n",
     "        query1.group_by(\"p_partkey\")\n",
     "        .agg((0.2 * nw.col(\"l_quantity\").mean()).alias(\"avg_quantity\"))\n",
@@ -84,7 +82,6 @@
     "        .select((nw.col(\"l_extendedprice\").sum() / 7.0).round(2).alias(\"avg_yearly\"))\n",
     "    )\n",
     "\n",
-    "\n",
     "    return nw.to_native(final_query)"
    ]
   },
@@ -104,8 +101,8 @@
    "outputs": [],
    "source": [
     "dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\n",
-    "lineitem = dir_ + 'lineitem.parquet'\n",
-    "part = dir_ + 'part.parquet'"
+    "lineitem = dir_ + \"lineitem.parquet\"\n",
+    "part = dir_ + \"part.parquet\""
    ]
   },
   {
@@ -124,10 +121,12 @@
    "outputs": [],
    "source": [
     "IO_FUNCS = {\n",
-    "    'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n",
-    "    'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
-    "    'polars[eager]': lambda x: pl.read_parquet(x),\n",
-    "    'polars[lazy]': lambda x: pl.scan_parquet(x),\n",
+    "    \"pandas\": lambda x: pd.read_parquet(x, engine=\"pyarrow\"),\n",
+    "    \"pandas[pyarrow]\": lambda x: pd.read_parquet(\n",
+    "        x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
+    "    ),\n",
+    "    \"polars[eager]\": lambda x: pl.read_parquet(x),\n",
+    "    \"polars[lazy]\": lambda x: pl.scan_parquet(x),\n",
     "}"
    ]
   },
@@ -171,7 +170,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas'\n",
+    "tool = \"pandas\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q17(fn(lineitem), fn(part))\n",
     "results[tool] = timings.all_runs"
@@ -208,7 +207,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow]'\n",
+    "tool = \"pandas[pyarrow]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q17(fn(lineitem), fn(part))\n",
     "results[tool] = timings.all_runs"
@@ -245,7 +244,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[eager]'\n",
+    "tool = \"polars[eager]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q17(fn(lineitem), fn(part))\n",
     "results[tool] = timings.all_runs"
@@ -282,7 +281,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[lazy]'\n",
+    "tool = \"polars[lazy]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q17(fn(lineitem), fn(part)).collect()\n",
     "results[tool] = timings.all_runs"
@@ -302,8 +301,9 @@
    "outputs": [],
    "source": [
     "import json\n",
-    "with open('results.json', 'w') as fd:\n",
-    "    json.dump(results, fd)\n"
+    "\n",
+    "with open(\"results.json\", \"w\") as fd:\n",
+    "    json.dump(results, fd)"
    ]
   }
  ],
diff --git a/tpch/notebooks/q18/execute.ipynb b/tpch/notebooks/q18/execute.ipynb
index c90629e0f..edf635d9e 100644
--- a/tpch/notebooks/q18/execute.ipynb
+++ b/tpch/notebooks/q18/execute.ipynb
@@ -6,7 +6,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!pip uninstall apache-beam -y && pip install -U pandas polars pyarrow narwhals "
+    "!pip uninstall apache-beam -y && pip install -U pandas polars pyarrow narwhals"
    ]
   },
   {
@@ -29,18 +29,15 @@
    "outputs": [],
    "source": [
     "from typing import Any\n",
+    "\n",
     "import narwhals as nw\n",
     "\n",
-    "def q18(\n",
-    "    customer_ds_raw: Any,\n",
-    "    lineitem_ds_raw: Any,\n",
-    "    orders_ds_raw: Any\n",
-    ") -> Any:\n",
     "\n",
+    "def q18(customer_ds_raw: Any, lineitem_ds_raw: Any, orders_ds_raw: Any) -> Any:\n",
     "    customer_ds = nw.from_native(customer_ds_raw)\n",
     "    lineitem_ds = nw.from_native(lineitem_ds_raw)\n",
     "    orders_ds = nw.from_native(orders_ds_raw)\n",
-    "    \n",
+    "\n",
     "    var1 = 300\n",
     "\n",
     "    query1 = (\n",
@@ -67,7 +64,6 @@
     "        .head(100)\n",
     "    )\n",
     "\n",
-    "\n",
     "    return nw.to_native(q_final)"
    ]
   },
@@ -78,9 +74,9 @@
    "outputs": [],
    "source": [
     "dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\n",
-    "customer = dir_ + 'customer.parquet'\n",
-    "lineitem = dir_ + 'lineitem.parquet'\n",
-    "orders = dir_ + 'orders.parquet'"
+    "customer = dir_ + \"customer.parquet\"\n",
+    "lineitem = dir_ + \"lineitem.parquet\"\n",
+    "orders = dir_ + \"orders.parquet\""
    ]
   },
   {
@@ -90,10 +86,12 @@
    "outputs": [],
    "source": [
     "IO_FUNCS = {\n",
-    "    'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n",
-    "    'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
-    "    'polars[eager]': lambda x: pl.read_parquet(x),\n",
-    "    'polars[lazy]': lambda x: pl.scan_parquet(x),\n",
+    "    \"pandas\": lambda x: pd.read_parquet(x, engine=\"pyarrow\"),\n",
+    "    \"pandas[pyarrow]\": lambda x: pd.read_parquet(\n",
+    "        x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
+    "    ),\n",
+    "    \"polars[eager]\": lambda x: pl.read_parquet(x),\n",
+    "    \"polars[lazy]\": lambda x: pl.scan_parquet(x),\n",
     "}"
    ]
   },
@@ -119,7 +117,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tool = 'pandas'\n",
+    "tool = \"pandas\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q19(fn(lineitem), fn(part))\n",
     "results[tool] = timings.all_runs"
@@ -138,7 +136,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow]'\n",
+    "tool = \"pandas[pyarrow]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q18(fn(customer), fn(lineitem), fn(orders))\n",
     "results[tool] = timings.all_runs"
@@ -157,7 +155,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tool = 'polars[eager]'\n",
+    "tool = \"polars[eager]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q18(fn(customer), fn(lineitem), fn(orders))\n",
     "results[tool] = timings.all_runs"
@@ -176,7 +174,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tool = 'polars[lazy]'\n",
+    "tool = \"polars[lazy]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q18(fn(customer), fn(lineitem), fn(orders)).collect()\n",
     "results[tool] = timings.all_runs"
@@ -196,8 +194,9 @@
    "outputs": [],
    "source": [
     "import json\n",
-    "with open('results.json', 'w') as fd:\n",
-    "    json.dump(results, fd)\n"
+    "\n",
+    "with open(\"results.json\", \"w\") as fd:\n",
+    "    json.dump(results, fd)"
    ]
   }
  ],
diff --git a/tpch/notebooks/q19/execute.ipynb b/tpch/notebooks/q19/execute.ipynb
index 8483e06d5..8860cc773 100644
--- a/tpch/notebooks/q19/execute.ipynb
+++ b/tpch/notebooks/q19/execute.ipynb
@@ -15,7 +15,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip uninstall apache-beam -y && pip install -U pandas polars pyarrow narwhals "
+    "!pip uninstall apache-beam -y && pip install -U pandas polars pyarrow narwhals"
    ]
   },
   {
@@ -56,14 +56,11 @@
    "outputs": [],
    "source": [
     "from typing import Any\n",
+    "\n",
     "import narwhals as nw\n",
     "\n",
-    "def q19(\n",
-    "    lineitem_ds_raw: Any,\n",
-    "    part_ds_raw: Any\n",
-    "    \n",
-    ") -> Any:\n",
     "\n",
+    "def q19(lineitem_ds_raw: Any, part_ds_raw: Any) -> Any:\n",
     "    lineitem_ds = nw.from_native(lineitem_ds_raw)\n",
     "    part_ds = nw.from_native(part_ds_raw)\n",
     "\n",
@@ -74,9 +71,7 @@
     "        .filter(\n",
     "            (\n",
     "                (nw.col(\"p_brand\") == \"Brand#12\")\n",
-    "                & nw.col(\"p_container\").is_in(\n",
-    "                    [\"SM CASE\", \"SM BOX\", \"SM PACK\", \"SM PKG\"]\n",
-    "                )\n",
+    "                & nw.col(\"p_container\").is_in([\"SM CASE\", \"SM BOX\", \"SM PACK\", \"SM PKG\"])\n",
     "                & (nw.col(\"l_quantity\").is_between(1, 11))\n",
     "                & (nw.col(\"p_size\").is_between(1, 5))\n",
     "            )\n",
@@ -90,9 +85,7 @@
     "            )\n",
     "            | (\n",
     "                (nw.col(\"p_brand\") == \"Brand#34\")\n",
-    "                & nw.col(\"p_container\").is_in(\n",
-    "                    [\"LG CASE\", \"LG BOX\", \"LG PACK\", \"LG PKG\"]\n",
-    "                )\n",
+    "                & nw.col(\"p_container\").is_in([\"LG CASE\", \"LG BOX\", \"LG PACK\", \"LG PKG\"])\n",
     "                & (nw.col(\"l_quantity\").is_between(20, 30))\n",
     "                & (nw.col(\"p_size\").is_between(1, 15))\n",
     "            )\n",
@@ -105,7 +98,6 @@
     "        )\n",
     "    )\n",
     "\n",
-    "\n",
     "    return nw.to_native(result)"
    ]
   },
@@ -125,8 +117,8 @@
    "outputs": [],
    "source": [
     "dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\n",
-    "lineitem = dir_ + 'lineitem.parquet'\n",
-    "part = dir_ + 'part.parquet'"
+    "lineitem = dir_ + \"lineitem.parquet\"\n",
+    "part = dir_ + \"part.parquet\""
    ]
   },
   {
@@ -145,10 +137,12 @@
    "outputs": [],
    "source": [
     "IO_FUNCS = {\n",
-    "    'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n",
-    "    'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
-    "    'polars[eager]': lambda x: pl.read_parquet(x),\n",
-    "    'polars[lazy]': lambda x: pl.scan_parquet(x),\n",
+    "    \"pandas\": lambda x: pd.read_parquet(x, engine=\"pyarrow\"),\n",
+    "    \"pandas[pyarrow]\": lambda x: pd.read_parquet(\n",
+    "        x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
+    "    ),\n",
+    "    \"polars[eager]\": lambda x: pl.read_parquet(x),\n",
+    "    \"polars[lazy]\": lambda x: pl.scan_parquet(x),\n",
     "}"
    ]
   },
@@ -192,7 +186,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas'\n",
+    "tool = \"pandas\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q19(fn(lineitem), fn(part))\n",
     "results[tool] = timings.all_runs"
@@ -229,7 +223,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow]'\n",
+    "tool = \"pandas[pyarrow]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q19(fn(lineitem), fn(part))\n",
     "results[tool] = timings.all_runs"
@@ -266,7 +260,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[eager]'\n",
+    "tool = \"polars[eager]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q19(fn(lineitem), fn(part))\n",
     "results[tool] = timings.all_runs"
@@ -303,7 +297,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[lazy]'\n",
+    "tool = \"polars[lazy]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q19(fn(lineitem), fn(part)).collect()\n",
     "results[tool] = timings.all_runs"
@@ -323,8 +317,9 @@
    "outputs": [],
    "source": [
     "import json\n",
-    "with open('results.json', 'w') as fd:\n",
-    "    json.dump(results, fd)\n"
+    "\n",
+    "with open(\"results.json\", \"w\") as fd:\n",
+    "    json.dump(results, fd)"
    ]
   }
  ],
diff --git a/tpch/notebooks/q2/execute.ipynb b/tpch/notebooks/q2/execute.ipynb
index c05345336..74ba50f2a 100755
--- a/tpch/notebooks/q2/execute.ipynb
+++ b/tpch/notebooks/q2/execute.ipynb
@@ -69,8 +69,10 @@
    "outputs": [],
    "source": [
     "from typing import Any\n",
+    "\n",
     "import narwhals as nw\n",
     "\n",
+    "\n",
     "@nw.narwhalify\n",
     "def q2(\n",
     "    region_ds: Any,\n",
@@ -140,14 +142,14 @@
    "outputs": [],
    "source": [
     "dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\n",
-    "region = dir_ + 'region.parquet'\n",
-    "nation = dir_ + 'nation.parquet'\n",
-    "customer = dir_ + 'customer.parquet'\n",
-    "lineitem = dir_ + 'lineitem.parquet'\n",
-    "orders = dir_ + 'orders.parquet'\n",
-    "supplier = dir_ + 'supplier.parquet'\n",
-    "part = dir_ + 'part.parquet'\n",
-    "partsupp = dir_ + 'partsupp.parquet'"
+    "region = dir_ + \"region.parquet\"\n",
+    "nation = dir_ + \"nation.parquet\"\n",
+    "customer = dir_ + \"customer.parquet\"\n",
+    "lineitem = dir_ + \"lineitem.parquet\"\n",
+    "orders = dir_ + \"orders.parquet\"\n",
+    "supplier = dir_ + \"supplier.parquet\"\n",
+    "part = dir_ + \"part.parquet\"\n",
+    "partsupp = dir_ + \"partsupp.parquet\""
    ]
   },
   {
@@ -166,16 +168,18 @@
    },
    "outputs": [],
    "source": [
-    "import pyarrow.parquet as pq\n",
     "import dask.dataframe as dd\n",
+    "import pyarrow.parquet as pq\n",
     "\n",
     "IO_FUNCS = {\n",
-    "    'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n",
-    "    'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
-    "    'polars[eager]': lambda x: pl.read_parquet(x),\n",
-    "    'polars[lazy]': lambda x: pl.scan_parquet(x),\n",
-    "    'pyarrow': lambda x: pq.read_table(x),\n",
-    "    'dask': lambda x: dd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
+    "    \"pandas\": lambda x: pd.read_parquet(x, engine=\"pyarrow\"),\n",
+    "    \"pandas[pyarrow]\": lambda x: pd.read_parquet(\n",
+    "        x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
+    "    ),\n",
+    "    \"polars[eager]\": lambda x: pl.read_parquet(x),\n",
+    "    \"polars[lazy]\": lambda x: pl.scan_parquet(x),\n",
+    "    \"pyarrow\": lambda x: pq.read_table(x),\n",
+    "    \"dask\": lambda x: dd.read_parquet(x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"),\n",
     "}"
    ]
   },
@@ -222,7 +226,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas'\n",
+    "tool = \"pandas\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q2(fn(region), fn(nation), fn(supplier), fn(part), fn(partsupp))\n",
     "results[tool] = timings.all_runs"
@@ -261,7 +265,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow]'\n",
+    "tool = \"pandas[pyarrow]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q2(fn(region), fn(nation), fn(supplier), fn(part), fn(partsupp))\n",
     "results[tool] = timings.all_runs"
@@ -300,7 +304,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[eager]'\n",
+    "tool = \"polars[eager]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q2(fn(region), fn(nation), fn(supplier), fn(part), fn(partsupp))\n",
     "results[tool] = timings.all_runs"
@@ -339,7 +343,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[lazy]'\n",
+    "tool = \"polars[lazy]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q2(fn(region), fn(nation), fn(supplier), fn(part), fn(partsupp)).collect()\n",
     "results[tool] = timings.all_runs"
@@ -360,7 +364,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tool = 'pyarrow'\n",
+    "tool = \"pyarrow\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q2(fn(region), fn(nation), fn(supplier), fn(part), fn(partsupp))\n",
     "results[tool] = timings.all_runs"
@@ -381,7 +385,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tool = 'dask'\n",
+    "tool = \"dask\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q2(fn(region), fn(nation), fn(supplier), fn(part), fn(partsupp)).compute()\n",
     "results[tool] = timings.all_runs"
@@ -403,8 +407,9 @@
    "outputs": [],
    "source": [
     "import json\n",
-    "with open('results.json', 'w') as fd:\n",
-    "    json.dump(results, fd)\n"
+    "\n",
+    "with open(\"results.json\", \"w\") as fd:\n",
+    "    json.dump(results, fd)"
    ]
   }
  ],
diff --git a/tpch/notebooks/q20/execute.ipynb b/tpch/notebooks/q20/execute.ipynb
index aecb3a473..a9698c1ad 100644
--- a/tpch/notebooks/q20/execute.ipynb
+++ b/tpch/notebooks/q20/execute.ipynb
@@ -15,7 +15,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip uninstall apache-beam -y && pip install -U pandas polars pyarrow narwhals "
+    "!pip uninstall apache-beam -y && pip install -U pandas polars pyarrow narwhals"
    ]
   },
   {
@@ -55,24 +55,25 @@
    },
    "outputs": [],
    "source": [
+    "from datetime import datetime\n",
     "from typing import Any\n",
+    "\n",
     "import narwhals as nw\n",
-    "from datetime import datetime\n",
+    "\n",
     "\n",
     "def q20(\n",
     "    part_ds_raw: Any,\n",
     "    partsupp_ds_raw: Any,\n",
     "    nation_ds_raw: Any,\n",
     "    lineitem_ds_raw: Any,\n",
-    "    supplier_ds_raw: Any\n",
+    "    supplier_ds_raw: Any,\n",
     ") -> Any:\n",
-    "\n",
     "    part_ds = nw.from_native(part_ds_raw)\n",
     "    nation_ds = nw.from_native(nation_ds_raw)\n",
     "    partsupp_ds = nw.from_native(partsupp_ds_raw)\n",
     "    lineitem_ds = nw.from_native(lineitem_ds_raw)\n",
     "    supplier_ds = nw.from_native(supplier_ds_raw)\n",
-    "    \n",
+    "\n",
     "    var1 = datetime(1994, 1, 1)\n",
     "    var2 = datetime(1995, 1, 1)\n",
     "    var3 = \"CANADA\"\n",
@@ -82,7 +83,7 @@
     "        lineitem_ds.filter(nw.col(\"l_shipdate\").is_between(var1, var2, closed=\"left\"))\n",
     "        .group_by(\"l_partkey\", \"l_suppkey\")\n",
     "        .agg((nw.col(\"l_quantity\").sum()).alias(\"sum_quantity\"))\n",
-    "        .with_columns(sum_quantity = nw.col(\"sum_quantity\") * 0.5)\n",
+    "        .with_columns(sum_quantity=nw.col(\"sum_quantity\") * 0.5)\n",
     "    )\n",
     "    query2 = nation_ds.filter(nw.col(\"n_name\") == var3)\n",
     "    query3 = supplier_ds.join(query2, left_on=\"s_nationkey\", right_on=\"n_nationkey\")\n",
@@ -103,7 +104,6 @@
     "        .sort(\"s_name\")\n",
     "    )\n",
     "\n",
-    "\n",
     "    return nw.to_native(result)"
    ]
   },
@@ -123,11 +123,11 @@
    "outputs": [],
    "source": [
     "dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\n",
-    "nation = dir_ + 'nation.parquet'\n",
-    "lineitem = dir_ + 'lineitem.parquet'\n",
-    "supplier = dir_ + 'supplier.parquet'\n",
-    "part = dir_ + 'part.parquet'\n",
-    "partsupp = dir_ + 'partsupp.parquet'"
+    "nation = dir_ + \"nation.parquet\"\n",
+    "lineitem = dir_ + \"lineitem.parquet\"\n",
+    "supplier = dir_ + \"supplier.parquet\"\n",
+    "part = dir_ + \"part.parquet\"\n",
+    "partsupp = dir_ + \"partsupp.parquet\""
    ]
   },
   {
@@ -146,10 +146,12 @@
    "outputs": [],
    "source": [
     "IO_FUNCS = {\n",
-    "    'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n",
-    "    'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
-    "    'polars[eager]': lambda x: pl.read_parquet(x),\n",
-    "    'polars[lazy]': lambda x: pl.scan_parquet(x),\n",
+    "    \"pandas\": lambda x: pd.read_parquet(x, engine=\"pyarrow\"),\n",
+    "    \"pandas[pyarrow]\": lambda x: pd.read_parquet(\n",
+    "        x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
+    "    ),\n",
+    "    \"polars[eager]\": lambda x: pl.read_parquet(x),\n",
+    "    \"polars[lazy]\": lambda x: pl.scan_parquet(x),\n",
     "}"
    ]
   },
@@ -193,7 +195,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas'\n",
+    "tool = \"pandas\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q20(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(supplier))\n",
     "results[tool] = timings.all_runs"
@@ -230,7 +232,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow]'\n",
+    "tool = \"pandas[pyarrow]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q20(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(supplier))\n",
     "results[tool] = timings.all_runs"
@@ -267,7 +269,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[eager]'\n",
+    "tool = \"polars[eager]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q20(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(supplier))\n",
     "results[tool] = timings.all_runs"
@@ -304,7 +306,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[lazy]'\n",
+    "tool = \"polars[lazy]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q20(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(supplier)).collect()\n",
     "results[tool] = timings.all_runs"
@@ -324,8 +326,9 @@
    "outputs": [],
    "source": [
     "import json\n",
-    "with open('results.json', 'w') as fd:\n",
-    "    json.dump(results, fd)\n"
+    "\n",
+    "with open(\"results.json\", \"w\") as fd:\n",
+    "    json.dump(results, fd)"
    ]
   }
  ],
diff --git a/tpch/notebooks/q21/execute.ipynb b/tpch/notebooks/q21/execute.ipynb
index b51b15dce..af12a424c 100755
--- a/tpch/notebooks/q21/execute.ipynb
+++ b/tpch/notebooks/q21/execute.ipynb
@@ -36,13 +36,12 @@
    "outputs": [],
    "source": [
     "from typing import Any\n",
-    "from datetime import date\n",
-    "\n",
-    "import narwhals as nw\n",
     "\n",
     "import pandas as pd\n",
     "import polars as pl\n",
     "\n",
+    "import narwhals as nw\n",
+    "\n",
     "pd.options.mode.copy_on_write = True\n",
     "pd.options.future.infer_string = True"
    ]
@@ -66,10 +65,12 @@
     "Q_NUM = 21\n",
     "\n",
     "IO_FUNCS = {\n",
-    "    'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n",
-    "    'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
-    "    'polars[eager]': lambda x: pl.read_parquet(x),\n",
-    "    'polars[lazy]': lambda x: pl.scan_parquet(x),\n",
+    "    \"pandas\": lambda x: pd.read_parquet(x, engine=\"pyarrow\"),\n",
+    "    \"pandas[pyarrow]\": lambda x: pd.read_parquet(\n",
+    "        x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
+    "    ),\n",
+    "    \"polars[eager]\": lambda x: pl.read_parquet(x),\n",
+    "    \"polars[lazy]\": lambda x: pl.scan_parquet(x),\n",
     "}"
    ]
   },
@@ -95,34 +96,28 @@
     "    orders_raw: Any,\n",
     "    supplier_raw: Any,\n",
     ") -> Any:\n",
-    "    \n",
     "    lineitem = nw.from_native(lineitem_raw)\n",
     "    nation = nw.from_native(nation_raw)\n",
     "    orders = nw.from_native(orders_raw)\n",
     "    supplier = nw.from_native(supplier_raw)\n",
-    "    \n",
+    "\n",
     "    var1 = \"SAUDI ARABIA\"\n",
-    "    \n",
-    "    \n",
+    "\n",
     "    q1 = (\n",
     "        lineitem.group_by(\"l_orderkey\")\n",
-    "#         .agg(nw.col(\"l_suppkey\").len().alias(\"n_supp_by_order\"))\n",
     "        .agg(nw.len().alias(\"n_supp_by_order\"))\n",
     "        .filter(nw.col(\"n_supp_by_order\") > 1)\n",
     "        .join(\n",
     "            lineitem.filter(nw.col(\"l_receiptdate\") > nw.col(\"l_commitdate\")),\n",
-    "#             on=\"l_orderkey\",\n",
-    "            left_on=\"l_orderkey\", right_on=\"l_orderkey\",\n",
+    "            left_on=\"l_orderkey\",\n",
+    "            right_on=\"l_orderkey\",\n",
     "        )\n",
     "    )\n",
     "\n",
     "    q_final = (\n",
     "        q1.group_by(\"l_orderkey\")\n",
-    "#         .agg(nw.col(\"l_suppkey\").len().alias(\"n_supp_by_order\"))\n",
     "        .agg(nw.len().alias(\"n_supp_by_order\"))\n",
-    "        .join(q1, left_on=\"l_orderkey\", right_on=\"l_orderkey\"\n",
-    "              #on=\"l_orderkey\"\n",
-    "             )\n",
+    "        .join(q1, left_on=\"l_orderkey\", right_on=\"l_orderkey\")\n",
     "        .join(supplier, left_on=\"l_suppkey\", right_on=\"s_suppkey\")\n",
     "        .join(nation, left_on=\"s_nationkey\", right_on=\"n_nationkey\")\n",
     "        .join(orders, left_on=\"l_orderkey\", right_on=\"o_orderkey\")\n",
@@ -155,10 +150,10 @@
    "source": [
     "dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\n",
     "\n",
-    "lineitem = dir_ + 'lineitem.parquet'\n",
-    "nation = dir_ + 'nation.parquet'\n",
-    "orders = dir_ + 'orders.parquet'\n",
-    "supplier = dir_ + 'supplier.parquet'"
+    "lineitem = dir_ + \"lineitem.parquet\"\n",
+    "nation = dir_ + \"nation.parquet\"\n",
+    "orders = dir_ + \"orders.parquet\"\n",
+    "supplier = dir_ + \"supplier.parquet\""
    ]
   },
   {
@@ -213,10 +208,15 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas'\n",
+    "tool = \"pandas\"\n",
     "fn = IO_FUNCS[tool]\n",
     "\n",
-    "lineitem_raw, nation_raw, orders_raw, supplier_raw = fn(lineitem), fn(nation), fn(orders), fn(supplier)\n",
+    "lineitem_raw, nation_raw, orders_raw, supplier_raw = (\n",
+    "    fn(lineitem),\n",
+    "    fn(nation),\n",
+    "    fn(orders),\n",
+    "    fn(supplier),\n",
+    ")\n",
     "\n",
     "timings = %timeit -o -q q21(lineitem_raw, nation_raw, orders_raw, supplier_raw)\n",
     "results[tool] = timings.all_runs"
@@ -255,9 +255,14 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow]'\n",
+    "tool = \"pandas[pyarrow]\"\n",
     "fn = IO_FUNCS[tool]\n",
-    "lineitem_raw, nation_raw, orders_raw, supplier_raw = fn(lineitem), fn(nation), fn(orders), fn(supplier)\n",
+    "lineitem_raw, nation_raw, orders_raw, supplier_raw = (\n",
+    "    fn(lineitem),\n",
+    "    fn(nation),\n",
+    "    fn(orders),\n",
+    "    fn(supplier),\n",
+    ")\n",
     "\n",
     "timings = %timeit -o -q q21(lineitem_raw, nation_raw, orders_raw, supplier_raw)\n",
     "results[tool] = timings.all_runs"
@@ -296,10 +301,15 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[eager]'\n",
+    "tool = \"polars[eager]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "\n",
-    "lineitem_raw, nation_raw, orders_raw, supplier_raw = fn(lineitem), fn(nation), fn(orders), fn(supplier)\n",
+    "lineitem_raw, nation_raw, orders_raw, supplier_raw = (\n",
+    "    fn(lineitem),\n",
+    "    fn(nation),\n",
+    "    fn(orders),\n",
+    "    fn(supplier),\n",
+    ")\n",
     "timings = %timeit -o -q q21(lineitem_raw, nation_raw, orders_raw, supplier_raw)\n",
     "results[tool] = timings.all_runs"
    ]
@@ -337,10 +347,15 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[lazy]'\n",
+    "tool = \"polars[lazy]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "\n",
-    "lineitem_raw, nation_raw, orders_raw, supplier_raw = fn(lineitem), fn(nation), fn(orders), fn(supplier)\n",
+    "lineitem_raw, nation_raw, orders_raw, supplier_raw = (\n",
+    "    fn(lineitem),\n",
+    "    fn(nation),\n",
+    "    fn(orders),\n",
+    "    fn(supplier),\n",
+    ")\n",
     "timings = %timeit -o -q q21(lineitem_raw, nation_raw, orders_raw, supplier_raw).collect()\n",
     "results[tool] = timings.all_runs"
    ]
@@ -379,29 +394,9 @@
    "outputs": [],
    "source": [
     "import json\n",
-    "with open('results.json', 'w') as fd:\n",
-    "    json.dump(results, fd)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "16",
-   "metadata": {
-    "papermill": {
-     "duration": 0.02616,
-     "end_time": "2024-06-20T09:46:18.666732",
-     "exception": false,
-     "start_time": "2024-06-20T09:46:18.640572",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "from pprint import pprint\n",
     "\n",
-    "pprint(results)"
+    "with open(\"results.json\", \"w\") as fd:\n",
+    "    json.dump(results, fd)"
    ]
   }
  ],
diff --git a/tpch/notebooks/q3/execute.ipynb b/tpch/notebooks/q3/execute.ipynb
index 80178cae1..b81135fc3 100755
--- a/tpch/notebooks/q3/execute.ipynb
+++ b/tpch/notebooks/q3/execute.ipynb
@@ -49,14 +49,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from typing import Any\n",
     "from datetime import date\n",
+    "from typing import Any\n",
+    "\n",
     "\n",
     "def q3_pandas_native(\n",
     "    customer_ds: Any,\n",
     "    line_item_ds: Any,\n",
     "    orders_ds: Any,\n",
-    "):\n",
+    ") -> Any:\n",
     "    var1 = \"BUILDING\"\n",
     "    var2 = date(1995, 3, 15)\n",
     "\n",
@@ -69,18 +70,15 @@
     "    jn2 = jn2[jn2[\"l_shipdate\"] > var2]\n",
     "    jn2[\"revenue\"] = jn2.l_extendedprice * (1 - jn2.l_discount)\n",
     "\n",
-    "    gb = jn2.groupby(\n",
-    "        [\"o_orderkey\", \"o_orderdate\", \"o_shippriority\"], as_index=False\n",
-    "    )\n",
+    "    gb = jn2.groupby([\"o_orderkey\", \"o_orderdate\", \"o_shippriority\"], as_index=False)\n",
     "    agg = gb[\"revenue\"].sum()\n",
     "\n",
     "    sel = agg.loc[:, [\"o_orderkey\", \"revenue\", \"o_orderdate\", \"o_shippriority\"]]\n",
     "    sel = sel.rename({\"o_orderkey\": \"l_orderkey\"}, axis=\"columns\")\n",
     "\n",
     "    sorted = sel.sort_values(by=[\"revenue\", \"o_orderdate\"], ascending=[False, True])\n",
-    "    result_df = sorted.head(10)\n",
     "\n",
-    "    return result_df  # type: ignore[no-any-return]"
+    "    return sorted.head(10)  # type: ignore[no-any-return]"
    ]
   },
   {
@@ -99,10 +97,12 @@
    },
    "outputs": [],
    "source": [
-    "from typing import Any\n",
     "from datetime import datetime\n",
+    "from typing import Any\n",
+    "\n",
     "import narwhals as nw\n",
     "\n",
+    "\n",
     "def q3(\n",
     "    customer_ds_raw: Any,\n",
     "    line_item_ds_raw: Any,\n",
@@ -122,7 +122,8 @@
     "        .filter(\n",
     "            nw.col(\"o_orderdate\") < var_2,\n",
     "            nw.col(\"l_shipdate\") > var_1,\n",
-    "        ).with_columns(\n",
+    "        )\n",
+    "        .with_columns(\n",
     "            (nw.col(\"l_extendedprice\") * (1 - nw.col(\"l_discount\"))).alias(\"revenue\")\n",
     "        )\n",
     "        .group_by([\"o_orderkey\", \"o_orderdate\", \"o_shippriority\"])\n",
@@ -150,16 +151,16 @@
    "outputs": [],
    "source": [
     "from typing import Any\n",
-    "from datetime import datetime\n",
-    "import narwhals as nw\n",
+    "\n",
     "import ibis\n",
     "\n",
+    "\n",
     "def q3_ibis(\n",
     "    customer: Any,\n",
     "    lineitem: Any,\n",
     "    orders: Any,\n",
     "    *,\n",
-    "    tool,\n",
+    "    tool: str,\n",
     ") -> Any:\n",
     "    var1 = \"BUILDING\"\n",
     "    var2 = date(1995, 3, 15)\n",
@@ -186,9 +187,9 @@
     "        .order_by(ibis.desc(\"revenue\"), \"o_orderdate\")\n",
     "        .limit(10)\n",
     "    )\n",
-    "    if tool == 'pandas':\n",
+    "    if tool == \"pandas\":\n",
     "        return q_final.to_pandas()\n",
-    "    if tool == 'polars':\n",
+    "    if tool == \"polars\":\n",
     "        return q_final.to_polars()\n",
     "    raise ValueError(\"expected pandas or polars\")"
    ]
@@ -210,14 +211,14 @@
    "outputs": [],
    "source": [
     "dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\n",
-    "region = dir_ + 'region.parquet'\n",
-    "nation = dir_ + 'nation.parquet'\n",
-    "customer = dir_ + 'customer.parquet'\n",
-    "lineitem = dir_ + 'lineitem.parquet'\n",
-    "orders = dir_ + 'orders.parquet'\n",
-    "supplier = dir_ + 'supplier.parquet'\n",
-    "part = dir_ + 'part.parquet'\n",
-    "partsupp = dir_ + 'partsupp.parquet'"
+    "region = dir_ + \"region.parquet\"\n",
+    "nation = dir_ + \"nation.parquet\"\n",
+    "customer = dir_ + \"customer.parquet\"\n",
+    "lineitem = dir_ + \"lineitem.parquet\"\n",
+    "orders = dir_ + \"orders.parquet\"\n",
+    "supplier = dir_ + \"supplier.parquet\"\n",
+    "part = dir_ + \"part.parquet\"\n",
+    "partsupp = dir_ + \"partsupp.parquet\""
    ]
   },
   {
@@ -236,18 +237,20 @@
    },
    "outputs": [],
    "source": [
-    "import ibis\n",
-    "\n",
     "con_pd = ibis.pandas.connect()\n",
     "con_pl = ibis.polars.connect()\n",
     "\n",
     "IO_FUNCS = {\n",
-    "    'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n",
-    "    'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
-    "    'pandas[pyarrow][ibis]': lambda x: con_pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
-    "    'polars[eager]': lambda x: pl.read_parquet(x),\n",
-    "    'polars[lazy]': lambda x: pl.scan_parquet(x),\n",
-    "    'polars[lazy][ibis]': lambda x: con_pl.read_parquet(x),\n",
+    "    \"pandas\": lambda x: pd.read_parquet(x, engine=\"pyarrow\"),\n",
+    "    \"pandas[pyarrow]\": lambda x: pd.read_parquet(\n",
+    "        x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
+    "    ),\n",
+    "    \"pandas[pyarrow][ibis]\": lambda x: con_pd.read_parquet(\n",
+    "        x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
+    "    ),\n",
+    "    \"polars[eager]\": lambda x: pl.read_parquet(x),\n",
+    "    \"polars[lazy]\": lambda x: pl.scan_parquet(x),\n",
+    "    \"polars[lazy][ibis]\": lambda x: con_pl.read_parquet(x),\n",
     "}"
    ]
   },
@@ -276,7 +279,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow][ibis]'\n",
+    "tool = \"pandas[pyarrow][ibis]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q3_ibis(fn(customer), fn(lineitem), fn(orders), tool='pandas')\n",
     "results[tool] = timings.all_runs"
@@ -297,7 +300,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tool = 'polars[lazy][ibis]'\n",
+    "tool = \"polars[lazy][ibis]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q3_ibis(fn(customer), fn(lineitem), fn(orders), tool='polars')\n",
     "results[tool] = timings.all_runs"
@@ -318,10 +321,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow]'\n",
+    "tool = \"pandas[pyarrow]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q3_pandas_native(fn(customer), fn(lineitem), fn(orders))\n",
-    "results[tool+'[native]'] = timings.all_runs"
+    "results[tool + \"[native]\"] = timings.all_runs"
    ]
   },
   {
@@ -357,7 +360,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas'\n",
+    "tool = \"pandas\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q3(fn(customer), fn(lineitem), fn(orders))\n",
     "results[tool] = timings.all_runs"
@@ -396,7 +399,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow]'\n",
+    "tool = \"pandas[pyarrow]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q3(fn(customer), fn(lineitem), fn(orders))\n",
     "results[tool] = timings.all_runs"
@@ -435,7 +438,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[eager]'\n",
+    "tool = \"polars[eager]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q3(fn(customer), fn(lineitem), fn(orders))\n",
     "results[tool] = timings.all_runs"
@@ -474,7 +477,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[lazy]'\n",
+    "tool = \"polars[lazy]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q3(fn(customer), fn(lineitem), fn(orders)).collect()\n",
     "results[tool] = timings.all_runs"
@@ -496,8 +499,9 @@
    "outputs": [],
    "source": [
     "import json\n",
-    "with open('results.json', 'w') as fd:\n",
-    "    json.dump(results, fd)\n"
+    "\n",
+    "with open(\"results.json\", \"w\") as fd:\n",
+    "    json.dump(results, fd)"
    ]
   }
  ],
diff --git a/tpch/notebooks/q4/execute.ipynb b/tpch/notebooks/q4/execute.ipynb
index df07c9c5f..b0a55e345 100755
--- a/tpch/notebooks/q4/execute.ipynb
+++ b/tpch/notebooks/q4/execute.ipynb
@@ -52,6 +52,7 @@
     "from datetime import date\n",
     "from typing import Any\n",
     "\n",
+    "\n",
     "def q4_pandas_native(\n",
     "    line_item_ds: Any,\n",
     "    orders_ds: Any,\n",
@@ -72,9 +73,7 @@
     "    gb = jn.groupby(\"o_orderpriority\", as_index=False)\n",
     "    agg = gb.agg(order_count=pd.NamedAgg(column=\"o_orderkey\", aggfunc=\"count\"))\n",
     "\n",
-    "    result_df = agg.sort_values([\"o_orderpriority\"])\n",
-    "\n",
-    "    return result_df  # type: ignore[no-any-return]"
+    "    return agg.sort_values([\"o_orderpriority\"])  # type: ignore[no-any-return]"
    ]
   },
   {
@@ -93,10 +92,12 @@
    },
    "outputs": [],
    "source": [
-    "from typing import Any\n",
     "from datetime import datetime\n",
+    "from typing import Any\n",
+    "\n",
     "import narwhals as nw\n",
     "\n",
+    "\n",
     "def q4(\n",
     "    lineitem_ds_raw: Any,\n",
     "    orders_ds_raw: Any,\n",
@@ -112,7 +113,8 @@
     "        .filter(\n",
     "            nw.col(\"o_orderdate\").is_between(var_1, var_2, closed=\"left\"),\n",
     "            nw.col(\"l_commitdate\") < nw.col(\"l_receiptdate\"),\n",
-    "        ).unique(subset=[\"o_orderpriority\", \"l_orderkey\"])\n",
+    "        )\n",
+    "        .unique(subset=[\"o_orderpriority\", \"l_orderkey\"])\n",
     "        .group_by(\"o_orderpriority\")\n",
     "        .agg(nw.len().alias(\"order_count\"))\n",
     "        .sort(by=\"o_orderpriority\")\n",
@@ -130,15 +132,11 @@
    "outputs": [],
    "source": [
     "from typing import Any\n",
-    "from datetime import datetime\n",
+    "\n",
     "import ibis\n",
     "\n",
-    "def q4_ibis(\n",
-    "    lineitem: Any,\n",
-    "    orders: Any,\n",
-    "    *,\n",
-    "    tool: str\n",
-    ") -> Any:\n",
+    "\n",
+    "def q4_ibis(lineitem: Any, orders: Any, *, tool: str) -> Any:\n",
     "    var1 = datetime(1993, 7, 1)\n",
     "    var2 = datetime(1993, 10, 1)\n",
     "\n",
@@ -151,9 +149,9 @@
     "        .agg(order_count=ibis._.count())\n",
     "        .order_by(\"o_orderpriority\")\n",
     "    )\n",
-    "    if tool == 'pandas':\n",
+    "    if tool == \"pandas\":\n",
     "        return q_final.to_pandas()\n",
-    "    if tool == 'polars':\n",
+    "    if tool == \"polars\":\n",
     "        return q_final.to_polars()\n",
     "    raise ValueError(\"expected pandas or polars\")"
    ]
@@ -175,14 +173,14 @@
    "outputs": [],
    "source": [
     "dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\n",
-    "region = dir_ + 'region.parquet'\n",
-    "nation = dir_ + 'nation.parquet'\n",
-    "customer = dir_ + 'customer.parquet'\n",
-    "lineitem = dir_ + 'lineitem.parquet'\n",
-    "orders = dir_ + 'orders.parquet'\n",
-    "supplier = dir_ + 'supplier.parquet'\n",
-    "part = dir_ + 'part.parquet'\n",
-    "partsupp = dir_ + 'partsupp.parquet'"
+    "region = dir_ + \"region.parquet\"\n",
+    "nation = dir_ + \"nation.parquet\"\n",
+    "customer = dir_ + \"customer.parquet\"\n",
+    "lineitem = dir_ + \"lineitem.parquet\"\n",
+    "orders = dir_ + \"orders.parquet\"\n",
+    "supplier = dir_ + \"supplier.parquet\"\n",
+    "part = dir_ + \"part.parquet\"\n",
+    "partsupp = dir_ + \"partsupp.parquet\""
    ]
   },
   {
@@ -201,18 +199,20 @@
    },
    "outputs": [],
    "source": [
-    "import ibis\n",
-    "\n",
     "con_pd = ibis.pandas.connect()\n",
     "con_pl = ibis.polars.connect()\n",
     "\n",
     "IO_FUNCS = {\n",
-    "    'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n",
-    "    'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
-    "    'pandas[pyarrow][ibis]': lambda x: con_pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
-    "    'polars[eager]': lambda x: pl.read_parquet(x),\n",
-    "    'polars[lazy]': lambda x: pl.scan_parquet(x),\n",
-    "    'polars[lazy][ibis]': lambda x: con_pl.read_parquet(x),\n",
+    "    \"pandas\": lambda x: pd.read_parquet(x, engine=\"pyarrow\"),\n",
+    "    \"pandas[pyarrow]\": lambda x: pd.read_parquet(\n",
+    "        x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
+    "    ),\n",
+    "    \"pandas[pyarrow][ibis]\": lambda x: con_pd.read_parquet(\n",
+    "        x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
+    "    ),\n",
+    "    \"polars[eager]\": lambda x: pl.read_parquet(x),\n",
+    "    \"polars[lazy]\": lambda x: pl.scan_parquet(x),\n",
+    "    \"polars[lazy][ibis]\": lambda x: con_pl.read_parquet(x),\n",
     "}"
    ]
   },
@@ -241,7 +241,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tool = 'polars[lazy][ibis]'\n",
+    "tool = \"polars[lazy][ibis]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q4_ibis(fn(lineitem), fn(orders), tool='polars')\n",
     "results[tool] = timings.all_runs"
@@ -262,10 +262,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow]'\n",
+    "tool = \"pandas[pyarrow]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q4_pandas_native(fn(lineitem), fn(orders))\n",
-    "results[tool+'[native]'] = timings.all_runs"
+    "results[tool + \"[native]\"] = timings.all_runs"
    ]
   },
   {
@@ -301,7 +301,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas'\n",
+    "tool = \"pandas\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q4(fn(lineitem), fn(orders))\n",
     "results[tool] = timings.all_runs"
@@ -340,7 +340,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow]'\n",
+    "tool = \"pandas[pyarrow]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q4(fn(lineitem), fn(orders))\n",
     "results[tool] = timings.all_runs"
@@ -379,7 +379,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[eager]'\n",
+    "tool = \"polars[eager]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q4(fn(lineitem), fn(orders))\n",
     "results[tool] = timings.all_runs"
@@ -418,7 +418,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[lazy]'\n",
+    "tool = \"polars[lazy]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q4(fn(lineitem), fn(orders)).collect()\n",
     "results[tool] = timings.all_runs"
@@ -440,8 +440,9 @@
    "outputs": [],
    "source": [
     "import json\n",
-    "with open('results.json', 'w') as fd:\n",
-    "    json.dump(results, fd)\n"
+    "\n",
+    "with open(\"results.json\", \"w\") as fd:\n",
+    "    json.dump(results, fd)"
    ]
   }
  ],
diff --git a/tpch/notebooks/q5/execute.ipynb b/tpch/notebooks/q5/execute.ipynb
index 5f6df9bbc..da0cae78b 100755
--- a/tpch/notebooks/q5/execute.ipynb
+++ b/tpch/notebooks/q5/execute.ipynb
@@ -49,8 +49,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from typing import Any\n",
     "from datetime import date\n",
+    "from typing import Any\n",
+    "\n",
     "\n",
     "def q5_pandas_native(\n",
     "    region_ds: Any,\n",
@@ -79,9 +80,8 @@
     "    jn5[\"revenue\"] = jn5.l_extendedprice * (1.0 - jn5.l_discount)\n",
     "\n",
     "    gb = jn5.groupby(\"n_name\", as_index=False)[\"revenue\"].sum()\n",
-    "    result_df = gb.sort_values(\"revenue\", ascending=False)\n",
     "\n",
-    "    return result_df  # type: ignore[no-any-return]"
+    "    return gb.sort_values(\"revenue\", ascending=False)  # type: ignore[no-any-return]"
    ]
   },
   {
@@ -91,10 +91,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from typing import Any\n",
     "from datetime import datetime\n",
+    "from typing import Any\n",
+    "\n",
     "import narwhals as nw\n",
     "\n",
+    "\n",
     "def q5(\n",
     "    region_ds_raw: Any,\n",
     "    nation_ds_raw: Any,\n",
@@ -126,7 +128,7 @@
     "        )\n",
     "        .filter(\n",
     "            nw.col(\"r_name\") == var_1,\n",
-    "            nw.col(\"o_orderdate\").is_between(var_2, var_3, closed=\"left\")\n",
+    "            nw.col(\"o_orderdate\").is_between(var_2, var_3, closed=\"left\"),\n",
     "        )\n",
     "        .with_columns(\n",
     "            (nw.col(\"l_extendedprice\") * (1 - nw.col(\"l_discount\"))).alias(\"revenue\")\n",
@@ -147,10 +149,10 @@
    "outputs": [],
    "source": [
     "from typing import Any\n",
-    "from datetime import datetime\n",
-    "import narwhals as nw\n",
+    "\n",
     "import ibis\n",
     "\n",
+    "\n",
     "def q5_ibis(\n",
     "    region: Any,\n",
     "    nation: Any,\n",
@@ -183,9 +185,9 @@
     "        .order_by(ibis.desc(\"revenue\"))\n",
     "    )\n",
     "\n",
-    "    if tool == 'pandas':\n",
+    "    if tool == \"pandas\":\n",
     "        return q_final.to_pandas()\n",
-    "    if tool == 'polars':\n",
+    "    if tool == \"polars\":\n",
     "        return q_final.to_polars()\n",
     "    raise ValueError(\"expected pandas or polars\")"
    ]
@@ -207,14 +209,14 @@
    "outputs": [],
    "source": [
     "dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\n",
-    "region = dir_ + 'region.parquet'\n",
-    "nation = dir_ + 'nation.parquet'\n",
-    "customer = dir_ + 'customer.parquet'\n",
-    "lineitem = dir_ + 'lineitem.parquet'\n",
-    "orders = dir_ + 'orders.parquet'\n",
-    "supplier = dir_ + 'supplier.parquet'\n",
-    "part = dir_ + 'part.parquet'\n",
-    "partsupp = dir_ + 'partsupp.parquet'"
+    "region = dir_ + \"region.parquet\"\n",
+    "nation = dir_ + \"nation.parquet\"\n",
+    "customer = dir_ + \"customer.parquet\"\n",
+    "lineitem = dir_ + \"lineitem.parquet\"\n",
+    "orders = dir_ + \"orders.parquet\"\n",
+    "supplier = dir_ + \"supplier.parquet\"\n",
+    "part = dir_ + \"part.parquet\"\n",
+    "partsupp = dir_ + \"partsupp.parquet\""
    ]
   },
   {
@@ -233,18 +235,20 @@
    },
    "outputs": [],
    "source": [
-    "import ibis\n",
-    "\n",
     "con_pd = ibis.pandas.connect()\n",
     "con_pl = ibis.polars.connect()\n",
     "\n",
     "IO_FUNCS = {\n",
-    "    'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n",
-    "    'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
-    "    'pandas[pyarrow][ibis]': lambda x: con_pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
-    "    'polars[eager]': lambda x: pl.read_parquet(x),\n",
-    "    'polars[lazy]': lambda x: pl.scan_parquet(x),\n",
-    "    'polars[lazy][ibis]': lambda x: con_pl.read_parquet(x),\n",
+    "    \"pandas\": lambda x: pd.read_parquet(x, engine=\"pyarrow\"),\n",
+    "    \"pandas[pyarrow]\": lambda x: pd.read_parquet(\n",
+    "        x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
+    "    ),\n",
+    "    \"pandas[pyarrow][ibis]\": lambda x: con_pd.read_parquet(\n",
+    "        x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
+    "    ),\n",
+    "    \"polars[eager]\": lambda x: pl.read_parquet(x),\n",
+    "    \"polars[lazy]\": lambda x: pl.scan_parquet(x),\n",
+    "    \"polars[lazy][ibis]\": lambda x: con_pl.read_parquet(x),\n",
     "}"
    ]
   },
@@ -273,7 +277,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tool = 'polars[lazy][ibis]'\n",
+    "tool = \"polars[lazy][ibis]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q5_ibis(fn(region), fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier), tool='polars')\n",
     "results[tool] = timings.all_runs"
@@ -294,10 +298,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow]'\n",
+    "tool = \"pandas[pyarrow]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q5_pandas_native(fn(region), fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier))\n",
-    "results[tool+'[native]'] = timings.all_runs"
+    "results[tool + \"[native]\"] = timings.all_runs"
    ]
   },
   {
@@ -333,7 +337,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas'\n",
+    "tool = \"pandas\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q5(fn(region), fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier))\n",
     "results[tool] = timings.all_runs"
@@ -372,7 +376,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow]'\n",
+    "tool = \"pandas[pyarrow]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q5(fn(region), fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier))\n",
     "results[tool] = timings.all_runs"
@@ -411,7 +415,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[eager]'\n",
+    "tool = \"polars[eager]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q5(fn(region), fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier))\n",
     "results[tool] = timings.all_runs"
@@ -450,7 +454,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[lazy]'\n",
+    "tool = \"polars[lazy]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q5(fn(region), fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier)).collect()\n",
     "results[tool] = timings.all_runs"
@@ -472,8 +476,9 @@
    "outputs": [],
    "source": [
     "import json\n",
-    "with open('results.json', 'w') as fd:\n",
-    "    json.dump(results, fd)\n"
+    "\n",
+    "with open(\"results.json\", \"w\") as fd:\n",
+    "    json.dump(results, fd)"
    ]
   }
  ],
diff --git a/tpch/notebooks/q6/execute.ipynb b/tpch/notebooks/q6/execute.ipynb
index b101aa98d..5abcb65f0 100755
--- a/tpch/notebooks/q6/execute.ipynb
+++ b/tpch/notebooks/q6/execute.ipynb
@@ -50,6 +50,7 @@
    "source": [
     "from datetime import date\n",
     "\n",
+    "\n",
     "def q6_pandas_native(line_item_ds):\n",
     "    var1 = date(1994, 1, 1)\n",
     "    var2 = date(1995, 1, 1)\n",
@@ -66,9 +67,8 @@
     "    ]\n",
     "\n",
     "    result_value = (flineitem[\"l_extendedprice\"] * flineitem[\"l_discount\"]).sum()\n",
-    "    result_df = pd.DataFrame({\"revenue\": [result_value]})\n",
     "\n",
-    "    return result_df"
+    "    return pd.DataFrame({\"revenue\": [result_value]})"
    ]
   },
   {
@@ -87,10 +87,11 @@
    },
    "outputs": [],
    "source": [
-    "from typing import Any\n",
     "from datetime import datetime\n",
+    "\n",
     "import narwhals as nw\n",
     "\n",
+    "\n",
     "def q6(line_item_raw) -> None:\n",
     "    var_1 = datetime(1994, 1, 1)\n",
     "    var_2 = datetime(1995, 1, 1)\n",
@@ -103,12 +104,11 @@
     "            nw.col(\"l_shipdate\").is_between(var_1, var_2, closed=\"left\"),\n",
     "            nw.col(\"l_discount\").is_between(0.05, 0.07),\n",
     "            nw.col(\"l_quantity\") < var_3,\n",
-    "        ).with_columns(\n",
-    "            (nw.col(\"l_extendedprice\") * nw.col(\"l_discount\")).alias(\"revenue\")\n",
     "        )\n",
+    "        .with_columns((nw.col(\"l_extendedprice\") * nw.col(\"l_discount\")).alias(\"revenue\"))\n",
     "        .select(nw.sum(\"revenue\"))\n",
     "    )\n",
-    "    return nw.to_native(result)\n"
+    "    return nw.to_native(result)"
    ]
   },
   {
@@ -118,10 +118,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from typing import Any\n",
-    "from datetime import datetime\n",
-    "import narwhals as nw\n",
-    "\n",
     "def q6_ibis(lineitem, *, tool: str) -> None:\n",
     "    var1 = datetime(1994, 1, 1)\n",
     "    var2 = datetime(1995, 1, 1)\n",
@@ -138,12 +134,12 @@
     "        .mutate(revenue=ibis._[\"l_extendedprice\"] * (ibis._[\"l_discount\"]))\n",
     "        .agg(revenue=ibis._[\"revenue\"].sum())\n",
     "    )\n",
-    "    \n",
-    "    if tool == 'pandas':\n",
+    "\n",
+    "    if tool == \"pandas\":\n",
     "        return q_final.to_pandas()\n",
-    "    if tool == 'polars':\n",
+    "    if tool == \"polars\":\n",
     "        return q_final.to_polars()\n",
-    "    raise ValueError(\"expected pandas or polars\")\n"
+    "    raise ValueError(\"expected pandas or polars\")"
    ]
   },
   {
@@ -163,14 +159,14 @@
    "outputs": [],
    "source": [
     "dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\n",
-    "region = dir_ + 'region.parquet'\n",
-    "nation = dir_ + 'nation.parquet'\n",
-    "customer = dir_ + 'customer.parquet'\n",
-    "lineitem = dir_ + 'lineitem.parquet'\n",
-    "orders = dir_ + 'orders.parquet'\n",
-    "supplier = dir_ + 'supplier.parquet'\n",
-    "part = dir_ + 'part.parquet'\n",
-    "partsupp = dir_ + 'partsupp.parquet'"
+    "region = dir_ + \"region.parquet\"\n",
+    "nation = dir_ + \"nation.parquet\"\n",
+    "customer = dir_ + \"customer.parquet\"\n",
+    "lineitem = dir_ + \"lineitem.parquet\"\n",
+    "orders = dir_ + \"orders.parquet\"\n",
+    "supplier = dir_ + \"supplier.parquet\"\n",
+    "part = dir_ + \"part.parquet\"\n",
+    "partsupp = dir_ + \"partsupp.parquet\""
    ]
   },
   {
@@ -195,12 +191,16 @@
     "con_pl = ibis.polars.connect()\n",
     "\n",
     "IO_FUNCS = {\n",
-    "    'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n",
-    "    'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
-    "    'pandas[pyarrow][ibis]': lambda x: con_pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
-    "    'polars[eager]': lambda x: pl.read_parquet(x),\n",
-    "    'polars[lazy]': lambda x: pl.scan_parquet(x),\n",
-    "    'polars[lazy][ibis]': lambda x: con_pl.read_parquet(x),\n",
+    "    \"pandas\": lambda x: pd.read_parquet(x, engine=\"pyarrow\"),\n",
+    "    \"pandas[pyarrow]\": lambda x: pd.read_parquet(\n",
+    "        x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
+    "    ),\n",
+    "    \"pandas[pyarrow][ibis]\": lambda x: con_pd.read_parquet(\n",
+    "        x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
+    "    ),\n",
+    "    \"polars[eager]\": lambda x: pl.read_parquet(x),\n",
+    "    \"polars[lazy]\": lambda x: pl.scan_parquet(x),\n",
+    "    \"polars[lazy][ibis]\": lambda x: con_pl.read_parquet(x),\n",
     "}"
    ]
   },
@@ -229,7 +229,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow][ibis]'\n",
+    "tool = \"pandas[pyarrow][ibis]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q6_ibis(fn(lineitem), tool='pandas')\n",
     "results[tool] = timings.all_runs"
@@ -250,7 +250,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tool = 'polars[lazy][ibis]'\n",
+    "tool = \"polars[lazy][ibis]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q6_ibis(fn(lineitem), tool='polars')\n",
     "results[tool] = timings.all_runs"
@@ -271,10 +271,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow]'\n",
+    "tool = \"pandas[pyarrow]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q6_pandas_native(fn(lineitem))\n",
-    "results[tool+'[native]'] = timings.all_runs"
+    "results[tool + \"[native]\"] = timings.all_runs"
    ]
   },
   {
@@ -310,7 +310,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas'\n",
+    "tool = \"pandas\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q6(fn(lineitem))\n",
     "results[tool] = timings.all_runs"
@@ -349,7 +349,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow]'\n",
+    "tool = \"pandas[pyarrow]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q6(fn(lineitem))\n",
     "results[tool] = timings.all_runs"
@@ -388,7 +388,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[eager]'\n",
+    "tool = \"polars[eager]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q6(fn(lineitem))\n",
     "results[tool] = timings.all_runs"
@@ -427,7 +427,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[lazy]'\n",
+    "tool = \"polars[lazy]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q6(fn(lineitem)).collect()\n",
     "results[tool] = timings.all_runs"
@@ -449,8 +449,9 @@
    "outputs": [],
    "source": [
     "import json\n",
-    "with open('results.json', 'w') as fd:\n",
-    "    json.dump(results, fd)\n"
+    "\n",
+    "with open(\"results.json\", \"w\") as fd:\n",
+    "    json.dump(results, fd)"
    ]
   }
  ],
diff --git a/tpch/notebooks/q7/execute.ipynb b/tpch/notebooks/q7/execute.ipynb
index 1213043b0..8711d7505 100755
--- a/tpch/notebooks/q7/execute.ipynb
+++ b/tpch/notebooks/q7/execute.ipynb
@@ -49,10 +49,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "from datetime import date\n",
+    "from datetime import datetime\n",
     "from typing import Any\n",
-    "from datetime import datetime, date\n",
+    "\n",
     "import narwhals as nw\n",
     "\n",
+    "\n",
     "def q7_pandas_native(\n",
     "    nation_ds,\n",
     "    customer_ds,\n",
@@ -96,9 +99,7 @@
     "    gb = total.groupby([\"supp_nation\", \"cust_nation\", \"l_year\"], as_index=False)\n",
     "    agg = gb.agg(revenue=pd.NamedAgg(column=\"volume\", aggfunc=\"sum\"))\n",
     "\n",
-    "    result_df = agg.sort_values(by=[\"supp_nation\", \"cust_nation\", \"l_year\"])\n",
-    "\n",
-    "    return result_df  # type: ignore[no-any-return]"
+    "    return agg.sort_values(by=[\"supp_nation\", \"cust_nation\", \"l_year\"])  # type: ignore[no-any-return]"
    ]
   },
   {
@@ -117,10 +118,6 @@
    },
    "outputs": [],
    "source": [
-    "from typing import Any\n",
-    "from datetime import datetime\n",
-    "import narwhals as nw\n",
-    "\n",
     "def q7(\n",
     "    nation_ds,\n",
     "    customer_ds,\n",
@@ -171,7 +168,7 @@
     "        .agg(nw.sum(\"volume\").alias(\"revenue\"))\n",
     "        .sort(by=[\"supp_nation\", \"cust_nation\", \"l_year\"])\n",
     "    )\n",
-    "    return nw.to_native(result)\n"
+    "    return nw.to_native(result)"
    ]
   },
   {
@@ -181,18 +178,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from typing import Any\n",
-    "from datetime import datetime\n",
     "import ibis\n",
     "\n",
+    "\n",
     "def q7_ibis(\n",
-    "    nation: Any,\n",
-    "    customer: Any,\n",
-    "    lineitem: Any,\n",
-    "    orders: Any,\n",
-    "    supplier: Any,\n",
-    "    *,\n",
-    "    tool: str\n",
+    "    nation: Any, customer: Any, lineitem: Any, orders: Any, supplier: Any, *, tool: str\n",
     ") -> None:\n",
     "    var1 = \"FRANCE\"\n",
     "    var2 = \"GERMANY\"\n",
@@ -234,9 +224,9 @@
     "        .order_by(\"supp_nation\", \"cust_nation\", \"l_year\")\n",
     "    )\n",
     "\n",
-    "    if tool == 'pandas':\n",
+    "    if tool == \"pandas\":\n",
     "        return q_final.to_pandas()\n",
-    "    if tool == 'polars':\n",
+    "    if tool == \"polars\":\n",
     "        return q_final.to_polars()\n",
     "    raise ValueError(\"expected pandas or polars\")"
    ]
@@ -258,14 +248,14 @@
    "outputs": [],
    "source": [
     "dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\n",
-    "region = dir_ + 'region.parquet'\n",
-    "nation = dir_ + 'nation.parquet'\n",
-    "customer = dir_ + 'customer.parquet'\n",
-    "lineitem = dir_ + 'lineitem.parquet'\n",
-    "orders = dir_ + 'orders.parquet'\n",
-    "supplier = dir_ + 'supplier.parquet'\n",
-    "part = dir_ + 'part.parquet'\n",
-    "partsupp = dir_ + 'partsupp.parquet'"
+    "region = dir_ + \"region.parquet\"\n",
+    "nation = dir_ + \"nation.parquet\"\n",
+    "customer = dir_ + \"customer.parquet\"\n",
+    "lineitem = dir_ + \"lineitem.parquet\"\n",
+    "orders = dir_ + \"orders.parquet\"\n",
+    "supplier = dir_ + \"supplier.parquet\"\n",
+    "part = dir_ + \"part.parquet\"\n",
+    "partsupp = dir_ + \"partsupp.parquet\""
    ]
   },
   {
@@ -284,18 +274,20 @@
    },
    "outputs": [],
    "source": [
-    "import ibis\n",
-    "\n",
     "con_pd = ibis.pandas.connect()\n",
     "con_pl = ibis.polars.connect()\n",
     "\n",
     "IO_FUNCS = {\n",
-    "    'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n",
-    "    'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
-    "    'pandas[pyarrow][ibis]': lambda x: con_pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
-    "    'polars[eager]': lambda x: pl.read_parquet(x),\n",
-    "    'polars[lazy]': lambda x: pl.scan_parquet(x),\n",
-    "    'polars[lazy][ibis]': lambda x: con_pl.read_parquet(x),\n",
+    "    \"pandas\": lambda x: pd.read_parquet(x, engine=\"pyarrow\"),\n",
+    "    \"pandas[pyarrow]\": lambda x: pd.read_parquet(\n",
+    "        x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
+    "    ),\n",
+    "    \"pandas[pyarrow][ibis]\": lambda x: con_pd.read_parquet(\n",
+    "        x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
+    "    ),\n",
+    "    \"polars[eager]\": lambda x: pl.read_parquet(x),\n",
+    "    \"polars[lazy]\": lambda x: pl.scan_parquet(x),\n",
+    "    \"polars[lazy][ibis]\": lambda x: con_pl.read_parquet(x),\n",
     "}"
    ]
   },
@@ -324,7 +316,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow][ibis]'\n",
+    "tool = \"pandas[pyarrow][ibis]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q7_ibis(fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier), tool='pandas')\n",
     "results[tool] = timings.all_runs"
@@ -345,7 +337,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tool = 'polars[lazy][ibis]'\n",
+    "tool = \"polars[lazy][ibis]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q7_ibis(fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier), tool='polars')\n",
     "results[tool] = timings.all_runs"
@@ -366,10 +358,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow]'\n",
+    "tool = \"pandas[pyarrow]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q7_pandas_native(fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier))\n",
-    "results[tool+'[native]'] = timings.all_runs"
+    "results[tool + \"[native]\"] = timings.all_runs"
    ]
   },
   {
@@ -405,7 +397,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas'\n",
+    "tool = \"pandas\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q7(fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier))\n",
     "results[tool] = timings.all_runs"
@@ -444,7 +436,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow]'\n",
+    "tool = \"pandas[pyarrow]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q7(fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier))\n",
     "results[tool] = timings.all_runs"
@@ -483,7 +475,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[eager]'\n",
+    "tool = \"polars[eager]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q7(fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier))\n",
     "results[tool] = timings.all_runs"
@@ -522,7 +514,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[lazy]'\n",
+    "tool = \"polars[lazy]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q7(fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier)).collect()\n",
     "results[tool] = timings.all_runs"
@@ -544,8 +536,9 @@
    "outputs": [],
    "source": [
     "import json\n",
-    "with open('results.json', 'w') as fd:\n",
-    "    json.dump(results, fd)\n"
+    "\n",
+    "with open(\"results.json\", \"w\") as fd:\n",
+    "    json.dump(results, fd)"
    ]
   }
  ],
diff --git a/tpch/notebooks/q9/execute.ipynb b/tpch/notebooks/q9/execute.ipynb
index 86417e180..802799a01 100644
--- a/tpch/notebooks/q9/execute.ipynb
+++ b/tpch/notebooks/q9/execute.ipynb
@@ -15,7 +15,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip uninstall apache-beam -y && pip install -U pandas polars pyarrow narwhals "
+    "!pip uninstall apache-beam -y && pip install -U pandas polars pyarrow narwhals"
    ]
   },
   {
@@ -56,8 +56,10 @@
    "outputs": [],
    "source": [
     "from typing import Any\n",
+    "\n",
     "import narwhals as nw\n",
     "\n",
+    "\n",
     "def q9(\n",
     "    part_ds_raw: Any,\n",
     "    partsupp_ds_raw: Any,\n",
@@ -66,7 +68,6 @@
     "    orders_ds_raw: Any,\n",
     "    supplier_ds_raw: Any,\n",
     ") -> Any:\n",
-    "\n",
     "    part_ds = nw.from_native(part_ds_raw)\n",
     "    nation_ds = nw.from_native(nation_ds_raw)\n",
     "    partsupp_ds = nw.from_native(partsupp_ds_raw)\n",
@@ -91,7 +92,7 @@
     "            (\n",
     "                nw.col(\"l_extendedprice\") * (1 - nw.col(\"l_discount\"))\n",
     "                - nw.col(\"ps_supplycost\") * nw.col(\"l_quantity\")\n",
-    "            ).alias(\"amount\")\n",
+    "            ).alias(\"amount\"),\n",
     "        )\n",
     "        .group_by(\"nation\", \"o_year\")\n",
     "        .agg(nw.sum(\"amount\").alias(\"sum_profit\"))\n",
@@ -117,12 +118,12 @@
    "outputs": [],
    "source": [
     "dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\n",
-    "nation = dir_ + 'nation.parquet'\n",
-    "lineitem = dir_ + 'lineitem.parquet'\n",
-    "orders = dir_ + 'orders.parquet'\n",
-    "supplier = dir_ + 'supplier.parquet'\n",
-    "part = dir_ + 'part.parquet'\n",
-    "partsupp = dir_ + 'partsupp.parquet'"
+    "nation = dir_ + \"nation.parquet\"\n",
+    "lineitem = dir_ + \"lineitem.parquet\"\n",
+    "orders = dir_ + \"orders.parquet\"\n",
+    "supplier = dir_ + \"supplier.parquet\"\n",
+    "part = dir_ + \"part.parquet\"\n",
+    "partsupp = dir_ + \"partsupp.parquet\""
    ]
   },
   {
@@ -141,10 +142,12 @@
    "outputs": [],
    "source": [
     "IO_FUNCS = {\n",
-    "    'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n",
-    "    'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
-    "    'polars[eager]': lambda x: pl.read_parquet(x),\n",
-    "    'polars[lazy]': lambda x: pl.scan_parquet(x),\n",
+    "    \"pandas\": lambda x: pd.read_parquet(x, engine=\"pyarrow\"),\n",
+    "    \"pandas[pyarrow]\": lambda x: pd.read_parquet(\n",
+    "        x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
+    "    ),\n",
+    "    \"polars[eager]\": lambda x: pl.read_parquet(x),\n",
+    "    \"polars[lazy]\": lambda x: pl.scan_parquet(x),\n",
     "}"
    ]
   },
@@ -188,7 +191,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas'\n",
+    "tool = \"pandas\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q9(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(orders), fn(supplier))\n",
     "results[tool] = timings.all_runs"
@@ -225,7 +228,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'pandas[pyarrow]'\n",
+    "tool = \"pandas[pyarrow]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q9(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(orders), fn(supplier))\n",
     "results[tool] = timings.all_runs"
@@ -262,7 +265,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[eager]'\n",
+    "tool = \"polars[eager]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q9(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(orders), fn(supplier))\n",
     "results[tool] = timings.all_runs"
@@ -299,7 +302,7 @@
    },
    "outputs": [],
    "source": [
-    "tool = 'polars[lazy]'\n",
+    "tool = \"polars[lazy]\"\n",
     "fn = IO_FUNCS[tool]\n",
     "timings = %timeit -o -q q9(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(orders), fn(supplier)).collect()\n",
     "results[tool] = timings.all_runs"
@@ -319,8 +322,9 @@
    "outputs": [],
    "source": [
     "import json\n",
-    "with open('results.json', 'w') as fd:\n",
-    "    json.dump(results, fd)\n"
+    "\n",
+    "with open(\"results.json\", \"w\") as fd:\n",
+    "    json.dump(results, fd)"
    ]
   }
  ],

From 956274c9fc6eb901d95cc2af788658c66e6c3c87 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Sat, 7 Sep 2024 13:39:59 +0100
Subject: [PATCH 13/30] add `by` argument to join_asof (#921)

---
 narwhals/_arrow/dataframe.py       |   3 +
 narwhals/_dask/dataframe.py        |   6 +
 narwhals/_pandas_like/dataframe.py |   6 +
 narwhals/dataframe.py              | 222 ++++++++++++++++++++++++++++-
 tests/frame/join_test.py           |  59 ++++++++
 5 files changed, 292 insertions(+), 4 deletions(-)

diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py
index f01ada158..960d833a5 100644
--- a/narwhals/_arrow/dataframe.py
+++ b/narwhals/_arrow/dataframe.py
@@ -322,6 +322,9 @@ def join_asof(
         left_on: str | None = None,
         right_on: str | None = None,
         on: str | None = None,
+        by_left: str | list[str] | None = None,
+        by_right: str | list[str] | None = None,
+        by: str | list[str] | None = None,
         strategy: Literal["backward", "forward", "nearest"] = "backward",
     ) -> Self:
         msg = "join_asof is not yet supported on PyArrow tables"
diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py
index 8f11ccaad..5ef8c5a9d 100644
--- a/narwhals/_dask/dataframe.py
+++ b/narwhals/_dask/dataframe.py
@@ -305,6 +305,9 @@ def join_asof(
         left_on: str | None = None,
         right_on: str | None = None,
         on: str | None = None,
+        by_left: str | list[str] | None = None,
+        by_right: str | list[str] | None = None,
+        by: str | list[str] | None = None,
         strategy: Literal["backward", "forward", "nearest"] = "backward",
     ) -> Self:
         plx = self.__native_namespace__()
@@ -315,6 +318,9 @@ def join_asof(
                 left_on=left_on,
                 right_on=right_on,
                 on=on,
+                left_by=by_left,
+                right_by=by_right,
+                by=by,
                 direction=strategy,
                 suffixes=("", "_right"),
             ),
diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py
index 9750cd9d4..3040adda0 100644
--- a/narwhals/_pandas_like/dataframe.py
+++ b/narwhals/_pandas_like/dataframe.py
@@ -516,6 +516,9 @@ def join_asof(
         left_on: str | None = None,
         right_on: str | None = None,
         on: str | None = None,
+        by_left: str | list[str] | None = None,
+        by_right: str | list[str] | None = None,
+        by: str | list[str] | None = None,
         strategy: Literal["backward", "forward", "nearest"] = "backward",
     ) -> Self:
         plx = self.__native_namespace__()
@@ -526,6 +529,9 @@ def join_asof(
                 left_on=left_on,
                 right_on=right_on,
                 on=on,
+                left_by=by_left,
+                right_by=by_right,
+                by=by,
                 direction=strategy,
                 suffixes=("", "_right"),
             ),
diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py
index 440856eb4..165b65981 100644
--- a/narwhals/dataframe.py
+++ b/narwhals/dataframe.py
@@ -221,6 +221,9 @@ def join_asof(
         left_on: str | None = None,
         right_on: str | None = None,
         on: str | None = None,
+        by_left: str | list[str] | None = None,
+        by_right: str | list[str] | None = None,
+        by: str | list[str] | None = None,
         strategy: Literal["backward", "forward", "nearest"] = "backward",
     ) -> Self:
         _supported_strategies = ("backward", "forward", "nearest")
@@ -232,12 +235,30 @@ def join_asof(
         if left_on is not None and right_on is not None and on is not None:
             msg = "Either (`left_on` and `right_on`) or `on` keys should be specified."
             raise ValueError(msg)
+        if by_left is not None and by_right is not None and by is not None:
+            msg = "Can not specify `by_left`, `by_right`, and `by` keys at the same time."
+            raise ValueError(msg)
+        if by_left is not None and by_right is None and by is None:
+            msg = "`by_right` can not be None if `by_left` is specified."
+            raise ValueError(msg)
+        if by_left is None and by_right is not None and by is None:
+            msg = "`by_left` can not be None if `by_right` is specified."
+            raise ValueError(msg)
+        if (
+            (by_left is None and by_right is not None)
+            or (by_left is not None and by_right is None)
+        ) and by is not None:
+            msg = "Either (`by_left` and `by_right_`) or `by` keys should be specified."
+            raise ValueError(msg)
         if left_on is not None and right_on is not None:
             return self._from_compliant_dataframe(
                 self._compliant_frame.join_asof(
                     self._extract_compliant(other),
                     left_on=left_on,
                     right_on=right_on,
+                    by_left=by_left,
+                    by_right=by_right,
+                    by=by,
                     strategy=strategy,
                 )
             )
@@ -246,6 +267,9 @@ def join_asof(
                 self._compliant_frame.join_asof(
                     self._extract_compliant(other),
                     on=on,
+                    by_left=by_left,
+                    by_right=by_right,
+                    by=by,
                     strategy=strategy,
                 )
             )
@@ -1885,6 +1909,9 @@ def join_asof(
         left_on: str | None = None,
         right_on: str | None = None,
         on: str | None = None,
+        by_left: str | list[str] | None = None,
+        by_right: str | list[str] | None = None,
+        by: str | list[str] | None = None,
         strategy: Literal["backward", "forward", "nearest"] = "backward",
     ) -> Self:
         """
@@ -1903,6 +1930,12 @@ def join_asof(
 
             on: Join column of both DataFrames. If set, left_on and right_on should be None.
 
+            by_left: join on these columns before doing asof join
+
+            by_right: join on these columns before doing asof join
+
+            by: join on these columns before doing asof join
+
             strategy: Join strategy. The default is "backward".
 
                   * *backward*: selects the last row in the right DataFrame whose "on" key is less than or equal to the left's key.
@@ -1946,7 +1979,9 @@ def join_asof(
             >>> @nw.narwhalify
             ... def join_asof_datetime(df, other_any, strategy):
             ...     return df.join_asof(other_any, on="datetime", strategy=strategy)
-            >>> # We can now pass either pandas or Polars to the function:
+
+            We can now pass either pandas or Polars to the function:
+
             >>> join_asof_datetime(population_pd, gdp_pd, strategy="backward")
                 datetime  population   gdp
             0 2016-03-01       82.19  4164
@@ -1964,9 +1999,93 @@ def join_asof(
             │ 2018-08-01 00:00:00 ┆ 82.66      ┆ 4566 │
             │ 2019-01-01 00:00:00 ┆ 83.12      ┆ 4696 │
             └─────────────────────┴────────────┴──────┘
+
+            Here is a real-world times-series example that uses `by` argument.
+
+            >>> from datetime import datetime
+            >>> import narwhals as nw
+            >>> import pandas as pd
+            >>> import polars as pl
+            >>> data_quotes = {
+            ...     "datetime": [
+            ...         datetime(2016, 5, 25, 13, 30, 0, 23),
+            ...         datetime(2016, 5, 25, 13, 30, 0, 23),
+            ...         datetime(2016, 5, 25, 13, 30, 0, 30),
+            ...         datetime(2016, 5, 25, 13, 30, 0, 41),
+            ...         datetime(2016, 5, 25, 13, 30, 0, 48),
+            ...         datetime(2016, 5, 25, 13, 30, 0, 49),
+            ...         datetime(2016, 5, 25, 13, 30, 0, 72),
+            ...         datetime(2016, 5, 25, 13, 30, 0, 75),
+            ...     ],
+            ...     "ticker": [
+            ...         "GOOG",
+            ...         "MSFT",
+            ...         "MSFT",
+            ...         "MSFT",
+            ...         "GOOG",
+            ...         "AAPL",
+            ...         "GOOG",
+            ...         "MSFT",
+            ...     ],
+            ...     "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],
+            ...     "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03],
+            ... }
+            >>> data_trades = {
+            ...     "datetime": [
+            ...         datetime(2016, 5, 25, 13, 30, 0, 23),
+            ...         datetime(2016, 5, 25, 13, 30, 0, 38),
+            ...         datetime(2016, 5, 25, 13, 30, 0, 48),
+            ...         datetime(2016, 5, 25, 13, 30, 0, 48),
+            ...         datetime(2016, 5, 25, 13, 30, 0, 48),
+            ...     ],
+            ...     "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
+            ...     "price": [51.95, 51.95, 720.77, 720.92, 98.0],
+            ...     "quantity": [75, 155, 100, 100, 100],
+            ... }
+            >>> quotes_pd = pd.DataFrame(data_quotes)
+            >>> trades_pd = pd.DataFrame(data_trades)
+            >>> quotes_pl = pl.DataFrame(data_quotes).sort("datetime")
+            >>> trades_pl = pl.DataFrame(data_trades).sort("datetime")
+
+            Let's define a dataframe-agnostic function in which we join over "datetime" and by "ticker" columns:
+
+            >>> @nw.narwhalify
+            ... def join_asof_datetime_by_ticker(df, other_any):
+            ...     return df.join_asof(other_any, on="datetime", by="ticker")
+
+            We can now pass either pandas or Polars to the function:
+
+            >>> join_asof_datetime_by_ticker(trades_pd, quotes_pd)
+                                datetime ticker   price  quantity     bid     ask
+            0 2016-05-25 13:30:00.000023   MSFT   51.95        75   51.95   51.96
+            1 2016-05-25 13:30:00.000038   MSFT   51.95       155   51.97   51.98
+            2 2016-05-25 13:30:00.000048   GOOG  720.77       100  720.50  720.93
+            3 2016-05-25 13:30:00.000048   GOOG  720.92       100  720.50  720.93
+            4 2016-05-25 13:30:00.000048   AAPL   98.00       100     NaN     NaN
+
+            >>> join_asof_datetime_by_ticker(trades_pl, quotes_pl)
+            shape: (5, 6)
+            ┌────────────────────────────┬────────┬────────┬──────────┬───────┬────────┐
+            │ datetime                   ┆ ticker ┆ price  ┆ quantity ┆ bid   ┆ ask    │
+            │ ---                        ┆ ---    ┆ ---    ┆ ---      ┆ ---   ┆ ---    │
+            │ datetime[μs]               ┆ str    ┆ f64    ┆ i64      ┆ f64   ┆ f64    │
+            ╞════════════════════════════╪════════╪════════╪══════════╪═══════╪════════╡
+            │ 2016-05-25 13:30:00.000023 ┆ MSFT   ┆ 51.95  ┆ 75       ┆ 51.95 ┆ 51.96  │
+            │ 2016-05-25 13:30:00.000038 ┆ MSFT   ┆ 51.95  ┆ 155      ┆ 51.97 ┆ 51.98  │
+            │ 2016-05-25 13:30:00.000048 ┆ GOOG   ┆ 720.77 ┆ 100      ┆ 720.5 ┆ 720.93 │
+            │ 2016-05-25 13:30:00.000048 ┆ GOOG   ┆ 720.92 ┆ 100      ┆ 720.5 ┆ 720.93 │
+            │ 2016-05-25 13:30:00.000048 ┆ AAPL   ┆ 98.0   ┆ 100      ┆ null  ┆ null   │
+            └────────────────────────────┴────────┴────────┴──────────┴───────┴────────┘
         """
         return super().join_asof(
-            other, left_on=left_on, right_on=right_on, on=on, strategy=strategy
+            other,
+            left_on=left_on,
+            right_on=right_on,
+            on=on,
+            by_left=by_left,
+            by_right=by_right,
+            by=by,
+            strategy=strategy,
         )
 
     # --- descriptive ---
@@ -3515,6 +3634,9 @@ def join_asof(
         left_on: str | None = None,
         right_on: str | None = None,
         on: str | None = None,
+        by_left: str | list[str] | None = None,
+        by_right: str | list[str] | None = None,
+        by: str | list[str] | None = None,
         strategy: Literal["backward", "forward", "nearest"] = "backward",
     ) -> Self:
         """
@@ -3533,6 +3655,12 @@ def join_asof(
 
             on: Join column of both DataFrames. If set, left_on and right_on should be None.
 
+            by_left: join on these columns before doing asof join
+
+            by_right: join on these columns before doing asof join
+
+            by: join on these columns before doing asof join
+
             strategy: Join strategy. The default is "backward".
 
                   * *backward*: selects the last row in the right DataFrame whose "on" key is less than or equal to the left's key.
@@ -3575,7 +3703,9 @@ def join_asof(
             >>> @nw.narwhalify
             ... def join_asof_datetime(df, other_any, strategy):
             ...     return df.join_asof(other_any, on="datetime", strategy=strategy)
-            >>> # We can now pass either pandas or Polars to the function:
+
+            We can now pass either pandas or Polars to the function:
+
             >>> join_asof_datetime(population_pd, gdp_pd, strategy="backward")
                 datetime  population   gdp
             0 2016-03-01       82.19  4164
@@ -3593,9 +3723,93 @@ def join_asof(
             │ 2018-08-01 00:00:00 ┆ 82.66      ┆ 4566 │
             │ 2019-01-01 00:00:00 ┆ 83.12      ┆ 4696 │
             └─────────────────────┴────────────┴──────┘
+
+            Here is a real-world times-series example that uses `by` argument.
+
+            >>> from datetime import datetime
+            >>> import narwhals as nw
+            >>> import pandas as pd
+            >>> import polars as pl
+            >>> data_quotes = {
+            ...     "datetime": [
+            ...         datetime(2016, 5, 25, 13, 30, 0, 23),
+            ...         datetime(2016, 5, 25, 13, 30, 0, 23),
+            ...         datetime(2016, 5, 25, 13, 30, 0, 30),
+            ...         datetime(2016, 5, 25, 13, 30, 0, 41),
+            ...         datetime(2016, 5, 25, 13, 30, 0, 48),
+            ...         datetime(2016, 5, 25, 13, 30, 0, 49),
+            ...         datetime(2016, 5, 25, 13, 30, 0, 72),
+            ...         datetime(2016, 5, 25, 13, 30, 0, 75),
+            ...     ],
+            ...     "ticker": [
+            ...         "GOOG",
+            ...         "MSFT",
+            ...         "MSFT",
+            ...         "MSFT",
+            ...         "GOOG",
+            ...         "AAPL",
+            ...         "GOOG",
+            ...         "MSFT",
+            ...     ],
+            ...     "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],
+            ...     "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03],
+            ... }
+            >>> data_trades = {
+            ...     "datetime": [
+            ...         datetime(2016, 5, 25, 13, 30, 0, 23),
+            ...         datetime(2016, 5, 25, 13, 30, 0, 38),
+            ...         datetime(2016, 5, 25, 13, 30, 0, 48),
+            ...         datetime(2016, 5, 25, 13, 30, 0, 48),
+            ...         datetime(2016, 5, 25, 13, 30, 0, 48),
+            ...     ],
+            ...     "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
+            ...     "price": [51.95, 51.95, 720.77, 720.92, 98.0],
+            ...     "quantity": [75, 155, 100, 100, 100],
+            ... }
+            >>> quotes_pd = pd.DataFrame(data_quotes)
+            >>> trades_pd = pd.DataFrame(data_trades)
+            >>> quotes_pl = pl.LazyFrame(data_quotes).sort("datetime")
+            >>> trades_pl = pl.LazyFrame(data_trades).sort("datetime")
+
+            Let's define a dataframe-agnostic function in which we join over "datetime" and by "ticker" columns:
+
+            >>> @nw.narwhalify
+            ... def join_asof_datetime_by_ticker(df, other_any):
+            ...     return df.join_asof(other_any, on="datetime", by="ticker")
+
+            We can now pass either pandas or Polars to the function:
+
+            >>> join_asof_datetime_by_ticker(trades_pd, quotes_pd)
+                                datetime ticker   price  quantity     bid     ask
+            0 2016-05-25 13:30:00.000023   MSFT   51.95        75   51.95   51.96
+            1 2016-05-25 13:30:00.000038   MSFT   51.95       155   51.97   51.98
+            2 2016-05-25 13:30:00.000048   GOOG  720.77       100  720.50  720.93
+            3 2016-05-25 13:30:00.000048   GOOG  720.92       100  720.50  720.93
+            4 2016-05-25 13:30:00.000048   AAPL   98.00       100     NaN     NaN
+
+            >>> join_asof_datetime_by_ticker(trades_pl, quotes_pl).collect()
+            shape: (5, 6)
+            ┌────────────────────────────┬────────┬────────┬──────────┬───────┬────────┐
+            │ datetime                   ┆ ticker ┆ price  ┆ quantity ┆ bid   ┆ ask    │
+            │ ---                        ┆ ---    ┆ ---    ┆ ---      ┆ ---   ┆ ---    │
+            │ datetime[μs]               ┆ str    ┆ f64    ┆ i64      ┆ f64   ┆ f64    │
+            ╞════════════════════════════╪════════╪════════╪══════════╪═══════╪════════╡
+            │ 2016-05-25 13:30:00.000023 ┆ MSFT   ┆ 51.95  ┆ 75       ┆ 51.95 ┆ 51.96  │
+            │ 2016-05-25 13:30:00.000038 ┆ MSFT   ┆ 51.95  ┆ 155      ┆ 51.97 ┆ 51.98  │
+            │ 2016-05-25 13:30:00.000048 ┆ GOOG   ┆ 720.77 ┆ 100      ┆ 720.5 ┆ 720.93 │
+            │ 2016-05-25 13:30:00.000048 ┆ GOOG   ┆ 720.92 ┆ 100      ┆ 720.5 ┆ 720.93 │
+            │ 2016-05-25 13:30:00.000048 ┆ AAPL   ┆ 98.0   ┆ 100      ┆ null  ┆ null   │
+            └────────────────────────────┴────────┴────────┴──────────┴───────┴────────┘
         """
         return super().join_asof(
-            other, left_on=left_on, right_on=right_on, on=on, strategy=strategy
+            other,
+            left_on=left_on,
+            right_on=right_on,
+            on=on,
+            by_left=by_left,
+            by_right=by_right,
+            by=by,
+            strategy=strategy,
         )
 
     def clone(self) -> Self:
diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py
index 72f1304df..34a5961ef 100644
--- a/tests/frame/join_test.py
+++ b/tests/frame/join_test.py
@@ -324,6 +324,31 @@ def test_joinasof_time(constructor: Any, request: Any) -> None:
     compare_dicts(result_nearest_on, expected_nearest)
 
 
+def test_joinasof_by(constructor: Any, request: Any) -> None:
+    if "pyarrow_table" in str(constructor):
+        request.applymarker(pytest.mark.xfail)
+    if parse_version(pd.__version__) < (2, 1) and (
+        ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor))
+    ):
+        request.applymarker(pytest.mark.xfail)
+    df = nw.from_native(
+        constructor({"a": [1, 5, 7, 10], "b": ["D", "D", "C", "A"], "c": [9, 2, 1, 1]})
+    ).sort("a")
+    df_right = nw.from_native(
+        constructor({"a": [1, 4, 5, 8], "b": ["D", "D", "A", "F"], "d": [1, 3, 4, 1]})
+    ).sort("a")
+    result = df.join_asof(df_right, on="a", by_left="b", by_right="b")  # type: ignore[arg-type]
+    result_by = df.join_asof(df_right, on="a", by="b")  # type: ignore[arg-type]
+    expected = {
+        "a": [1, 5, 7, 10],
+        "b": ["D", "D", "C", "A"],
+        "c": [9, 2, 1, 1],
+        "d": [1, 3, float("nan"), 4],
+    }
+    compare_dicts(result, expected)
+    compare_dicts(result_by, expected)
+
+
 @pytest.mark.parametrize("strategy", ["back", "furthest"])
 def test_joinasof_not_implemented(constructor: Any, strategy: str) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
@@ -361,3 +386,37 @@ def test_joinasof_no_keys(constructor: Any) -> None:
         match=msg,
     ):
         df.join_asof(df, left_on="a", right_on="a", on="a")  # type: ignore[arg-type]
+
+
+def test_joinasof_by_exceptions(constructor: Any) -> None:
+    data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
+    df = nw.from_native(constructor(data))
+    with pytest.raises(
+        ValueError,
+        match=r"Can not specify `by_left`, `by_right`, and `by` keys at the same time.",
+    ):
+        df.join_asof(df, on="a", by_left="b", by_right="b", by="b")  # type: ignore[arg-type]
+
+    with pytest.raises(
+        ValueError,
+        match=r"`by_right` can not be None if `by_left` is specified.",
+    ):
+        df.join_asof(df, on="a", by_left="b")  # type: ignore[arg-type]
+
+    with pytest.raises(
+        ValueError,
+        match=r"`by_left` can not be None if `by_right` is specified.",
+    ):
+        df.join_asof(df, on="a", by_right="b")  # type: ignore[arg-type]
+
+    with pytest.raises(
+        ValueError,
+        match=r"Either \(`by_left` and `by_right_`\) or `by` keys should be specified.",
+    ):
+        df.join_asof(df, on="a", by_left="b", by="b")  # type: ignore[arg-type]
+
+    with pytest.raises(
+        ValueError,
+        match=r"Either \(`by_left` and `by_right_`\) or `by` keys should be specified.",
+    ):
+        df.join_asof(df, on="a", by_right="b", by="b")  # type: ignore[arg-type]

From b32fd4ac089c4d848eb6a92cbb2e23080f74824c Mon Sep 17 00:00:00 2001
From: Isaias Gutierrez-Cruz
 <64386035+IsaiasGutierrezCruz@users.noreply.github.com>
Date: Sat, 7 Sep 2024 14:45:47 -0600
Subject: [PATCH 14/30] feat: add first implementation of query 8 (#922)

---
 tpch/execute/q8.py | 39 +++++++++++++++++++++++++++++++++++
 tpch/queries/q8.py | 51 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+)
 create mode 100644 tpch/execute/q8.py
 create mode 100644 tpch/queries/q8.py

diff --git a/tpch/execute/q8.py b/tpch/execute/q8.py
new file mode 100644
index 000000000..f1a8677ff
--- /dev/null
+++ b/tpch/execute/q8.py
@@ -0,0 +1,39 @@
+from queries import q8
+
+from . import IO_FUNCS
+from . import customer
+from . import lineitem
+from . import nation
+from . import orders
+from . import part
+from . import region
+from . import supplier
+
+tool = "pandas[pyarrow]"
+fn = IO_FUNCS[tool]
+print(
+    q8.query(
+        fn(part),
+        fn(supplier),
+        fn(lineitem),
+        fn(orders),
+        fn(customer),
+        fn(nation),
+        fn(region),
+    )
+)
+
+
+tool = "polars[lazy]"
+fn = IO_FUNCS[tool]
+print(
+    q8.query(
+        fn(part),
+        fn(supplier),
+        fn(lineitem),
+        fn(orders),
+        fn(customer),
+        fn(nation),
+        fn(region),
+    ).collect()
+)
diff --git a/tpch/queries/q8.py b/tpch/queries/q8.py
new file mode 100644
index 000000000..3fba96313
--- /dev/null
+++ b/tpch/queries/q8.py
@@ -0,0 +1,51 @@
+from datetime import date
+
+import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def query(
+    part_ds: FrameT,
+    supplier_ds: FrameT,
+    line_item_ds: FrameT,
+    orders_ds: FrameT,
+    customer_ds: FrameT,
+    nation_ds: FrameT,
+    region_ds: FrameT,
+) -> FrameT:
+    nation = "BRAZIL"
+    region = "AMERICA"
+    type = "ECONOMY ANODIZED STEEL"
+    date1 = date(1995, 1, 1)
+    date2 = date(1996, 12, 31)
+
+    n1 = nation_ds.select("n_nationkey", "n_regionkey")
+    n2 = nation_ds.select("n_nationkey", "n_name")
+
+    return (
+        part_ds.join(line_item_ds, left_on="p_partkey", right_on="l_partkey")
+        .join(supplier_ds, left_on="l_suppkey", right_on="s_suppkey")
+        .join(orders_ds, left_on="l_orderkey", right_on="o_orderkey")
+        .join(customer_ds, left_on="o_custkey", right_on="c_custkey")
+        .join(n1, left_on="c_nationkey", right_on="n_nationkey")
+        .join(region_ds, left_on="n_regionkey", right_on="r_regionkey")
+        .filter(nw.col("r_name") == region)
+        .join(n2, left_on="s_nationkey", right_on="n_nationkey")
+        .filter(nw.col("o_orderdate").is_between(date1, date2))
+        .filter(nw.col("p_type") == type)
+        .select(
+            nw.col("o_orderdate").dt.year().alias("o_year"),
+            (nw.col("l_extendedprice") * (1 - nw.col("l_discount"))).alias("volume"),
+            nw.col("n_name").alias("nation"),
+        )
+        .with_columns(
+            nw.when(nw.col("nation") == nation)
+            .then(nw.col("volume"))
+            .otherwise(0)
+            .alias("_tmp")
+        )
+        .group_by("o_year")
+        .agg((nw.sum("_tmp") / nw.sum("volume")).round(2).alias("mkt_share"))
+        .sort("o_year")
+    )

From 525d92df9b644c30ae1f7371cca523bd0586a095 Mon Sep 17 00:00:00 2001
From: Alessandro Miola <37796412+AlessandroMiola@users.noreply.github.com>
Date: Sun, 8 Sep 2024 12:06:13 +0200
Subject: [PATCH 15/30] docs: fix a typo (#925)

---
 docs/why.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/why.md b/docs/why.md
index adf8f39b4..4ec605d16 100644
--- a/docs/why.md
+++ b/docs/why.md
@@ -27,7 +27,7 @@ pl_df_right = pl.DataFrame({"a": [1, 2, 3], "c": [4, 5, 6]})
 pl_left_merge = pl_df_left.join(pl_df_right, left_on="b", right_on="c", how="left")
 
 print(pd_left_merge.columns)
-print(pl_df_right.columns)
+print(pl_left_merge.columns)
 ```
 
 There are several such subtle difference between the libraries. Writing dataframe-agnostic code is hard!

From ee8c62a4bbc8201360d0e43a7056b2f163c31918 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Sun, 8 Sep 2024 06:49:35 -0400
Subject: [PATCH 16/30] fix: Fixes arrow support for df[:, list[int|str]]
 (#923)

---
 narwhals/_arrow/dataframe.py |  9 ++++++---
 narwhals/dataframe.py        | 10 +++++++++-
 narwhals/stable/v1.py        |  4 ++++
 tests/frame/slice_test.py    |  6 ++++++
 4 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py
index 960d833a5..064903d74 100644
--- a/narwhals/_arrow/dataframe.py
+++ b/narwhals/_arrow/dataframe.py
@@ -136,9 +136,12 @@ def __getitem__(
             and len(item) == 2
             and isinstance(item[1], (list, tuple))
         ):
-            return self._from_native_frame(
-                self._native_frame.take(item[0]).select(item[1])
-            )
+            if item[0] == slice(None):
+                selected_rows = self._native_frame
+            else:
+                selected_rows = self._native_frame.take(item[0])
+
+            return self._from_native_frame(selected_rows.select(item[1]))
 
         elif isinstance(item, tuple) and len(item) == 2:
             if isinstance(item[1], slice):
diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py
index 165b65981..f6fccb274 100644
--- a/narwhals/dataframe.py
+++ b/narwhals/dataframe.py
@@ -585,10 +585,14 @@ def __getitem__(self, item: tuple[Sequence[int], slice]) -> Self: ...
     @overload
     def __getitem__(self, item: tuple[Sequence[int], Sequence[int]]) -> Self: ...
     @overload
+    def __getitem__(self, item: tuple[slice, Sequence[int]]) -> Self: ...
+    @overload
     def __getitem__(self, item: tuple[Sequence[int], str]) -> Series: ...  # type: ignore[overload-overlap]
     @overload
     def __getitem__(self, item: tuple[Sequence[int], Sequence[str]]) -> Self: ...
     @overload
+    def __getitem__(self, item: tuple[slice, Sequence[str]]) -> Self: ...
+    @overload
     def __getitem__(self, item: tuple[Sequence[int], int]) -> Series: ...  # type: ignore[overload-overlap]
 
     @overload
@@ -606,7 +610,7 @@ def __getitem__(
         | slice
         | Sequence[int]
         | tuple[Sequence[int], str | int]
-        | tuple[Sequence[int], Sequence[int] | Sequence[str] | slice],
+        | tuple[slice | Sequence[int], Sequence[int] | Sequence[str] | slice],
     ) -> Series | Self:
         """
         Extract column or slice of DataFrame.
@@ -623,6 +627,10 @@ def __getitem__(
                     a `Series`.
                 - `df[[0, 1], [0, 1, 2]]` extracts the first two rows and the first three columns
                     and returns a `DataFrame`
+                - `df[:, [0, 1, 2]]` extracts all rows from the first three columns and returns a
+                  `DataFrame`.
+                - `df[:, ['a', 'c']]` extracts all rows and columns `'a'` and `'c'` and returns a
+                  `DataFrame`.
                 - `df[0: 2, ['a', 'c']]` extracts the first two rows and columns `'a'` and `'c'` and
                     returns a `DataFrame`
                 - `df[:, 0: 2]` extracts all rows from the first two columns and returns a `DataFrame`
diff --git a/narwhals/stable/v1.py b/narwhals/stable/v1.py
index 98ee12a7b..1af7a26f3 100644
--- a/narwhals/stable/v1.py
+++ b/narwhals/stable/v1.py
@@ -76,11 +76,15 @@ class DataFrame(NwDataFrame[IntoDataFrameT]):
     def __getitem__(self, item: tuple[Sequence[int], slice]) -> Self: ...
     @overload
     def __getitem__(self, item: tuple[Sequence[int], Sequence[int]]) -> Self: ...
+    @overload
+    def __getitem__(self, item: tuple[slice, Sequence[int]]) -> Self: ...
 
     @overload
     def __getitem__(self, item: tuple[Sequence[int], str]) -> Series: ...  # type: ignore[overload-overlap]
     @overload
     def __getitem__(self, item: tuple[Sequence[int], Sequence[str]]) -> Self: ...
+    @overload
+    def __getitem__(self, item: tuple[slice, Sequence[str]]) -> Self: ...
 
     @overload
     def __getitem__(self, item: tuple[Sequence[int], int]) -> Series: ...  # type: ignore[overload-overlap]
diff --git a/tests/frame/slice_test.py b/tests/frame/slice_test.py
index eea94d440..18b05bf3b 100644
--- a/tests/frame/slice_test.py
+++ b/tests/frame/slice_test.py
@@ -141,6 +141,12 @@ def test_slice_slice_columns(constructor_eager: Any) -> None:
     result = df[[0, 1], 1:]
     expected = {"b": [4, 5], "c": [7, 8], "d": [1, 4]}
     compare_dicts(result, expected)
+    result = df[:, ["b", "d"]]
+    expected = {"b": [4, 5, 6], "d": [1, 4, 2]}
+    compare_dicts(result, expected)
+    result = df[:, [0, 2]]
+    expected = {"a": [1, 2, 3], "c": [7, 8, 9]}
+    compare_dicts(result, expected)
 
 
 def test_slice_invalid(constructor_eager: Any) -> None:

From 64b58a7daa77e83a18d3b2487c58f8e77fd7a8a3 Mon Sep 17 00:00:00 2001
From: Marco Edward Gorelli <marcogorelli@protonmail.com>
Date: Sun, 8 Sep 2024 12:02:12 +0100
Subject: [PATCH 17/30] release: Bump version to 1.6.3 (#926)

---
 docs/installation.md | 2 +-
 narwhals/__init__.py | 2 +-
 pyproject.toml       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/installation.md b/docs/installation.md
index 796cd8708..b89aa4b69 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -11,6 +11,6 @@ Then, if you start the Python REPL and see the following:
 ```python
 >>> import narwhals
 >>> narwhals.__version__
-'1.6.2'
+'1.6.3'
 ```
 then installation worked correctly!
diff --git a/narwhals/__init__.py b/narwhals/__init__.py
index 56c638e84..b26cf9490 100644
--- a/narwhals/__init__.py
+++ b/narwhals/__init__.py
@@ -53,7 +53,7 @@
 from narwhals.utils import maybe_get_index
 from narwhals.utils import maybe_set_index
 
-__version__ = "1.6.2"
+__version__ = "1.6.3"
 
 __all__ = [
     "dependencies",
diff --git a/pyproject.toml b/pyproject.toml
index c4a10603f..a928ae0df 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "narwhals"
-version = "1.6.2"
+version = "1.6.3"
 authors = [
   { name="Marco Gorelli", email="33491632+MarcoGorelli@users.noreply.github.com" },
 ]

From 693d53a0014c407e57c19854ece235d0ccb39227 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Sun, 8 Sep 2024 13:54:43 +0100
Subject: [PATCH 18/30] feat: add `on` key to `join` (#927)

---
 narwhals/_arrow/dataframe.py       |  5 --
 narwhals/_dask/dataframe.py        |  5 --
 narwhals/_pandas_like/dataframe.py |  5 --
 narwhals/dataframe.py              | 81 ++++++++++++++++++------------
 tests/frame/join_test.py           | 71 ++++++++++++++++++++------
 5 files changed, 106 insertions(+), 61 deletions(-)

diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py
index 064903d74..2750f8c09 100644
--- a/narwhals/_arrow/dataframe.py
+++ b/narwhals/_arrow/dataframe.py
@@ -278,11 +278,6 @@ def join(
         left_on: str | list[str] | None,
         right_on: str | list[str] | None,
     ) -> Self:
-        if isinstance(left_on, str):
-            left_on = [left_on]
-        if isinstance(right_on, str):
-            right_on = [right_on]
-
         how_to_join_map = {
             "anti": "left anti",
             "semi": "left semi",
diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py
index 5ef8c5a9d..1a40d7a6c 100644
--- a/narwhals/_dask/dataframe.py
+++ b/narwhals/_dask/dataframe.py
@@ -209,11 +209,6 @@ def join(
         left_on: str | list[str] | None,
         right_on: str | list[str] | None,
     ) -> Self:
-        if isinstance(left_on, str):
-            left_on = [left_on]
-        if isinstance(right_on, str):
-            right_on = [right_on]
-
         if how == "cross":
             key_token = generate_unique_token(
                 n_bytes=8, columns=[*self.columns, *other.columns]
diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py
index 3040adda0..880e2d140 100644
--- a/narwhals/_pandas_like/dataframe.py
+++ b/narwhals/_pandas_like/dataframe.py
@@ -404,11 +404,6 @@ def join(
         left_on: str | list[str] | None,
         right_on: str | list[str] | None,
     ) -> Self:
-        if isinstance(left_on, str):
-            left_on = [left_on]
-        if isinstance(right_on, str):
-            right_on = [right_on]
-
         if how == "cross":
             if (
                 self._implementation is Implementation.MODIN
diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py
index f6fccb274..ffd7ce36d 100644
--- a/narwhals/dataframe.py
+++ b/narwhals/dataframe.py
@@ -186,6 +186,7 @@ def join(
         how: Literal["inner", "left", "cross", "semi", "anti"] = "inner",
         left_on: str | list[str] | None = None,
         right_on: str | list[str] | None = None,
+        on: str | list[str] | None = None,
     ) -> Self:
         _supported_joins = ("inner", "left", "cross", "anti", "semi")
 
@@ -193,10 +194,25 @@ def join(
             msg = f"Only the following join strategies are supported: {_supported_joins}; found '{how}'."
             raise NotImplementedError(msg)
 
-        if how == "cross" and (left_on or right_on):
-            msg = "Can not pass left_on, right_on for cross join"
+        if how == "cross" and (
+            left_on is not None or right_on is not None or on is not None
+        ):
+            msg = "Can not pass `left_on`, `right_on` or `on` keys for cross join"
+            raise ValueError(msg)
+
+        if how != "cross" and (on is None and (left_on is None or right_on is None)):
+            msg = f"Either (`left_on` and `right_on`) or `on` keys should be specified for {how}."
             raise ValueError(msg)
 
+        if how != "cross" and (
+            on is not None and (left_on is not None or right_on is not None)
+        ):
+            msg = f"If `on` is specified, `left_on` and `right_on` should be None for {how}."
+            raise ValueError(msg)
+
+        if on is not None:
+            left_on = right_on = on
+
         return self._from_compliant_dataframe(
             self._compliant_frame.join(
                 self._extract_compliant(other),
@@ -232,37 +248,24 @@ def join_asof(
             msg = f"Only the following strategies are supported: {_supported_strategies}; found '{strategy}'."
             raise NotImplementedError(msg)
 
-        if left_on is not None and right_on is not None and on is not None:
+        if (on is None) and (left_on is None or right_on is None):
             msg = "Either (`left_on` and `right_on`) or `on` keys should be specified."
             raise ValueError(msg)
-        if by_left is not None and by_right is not None and by is not None:
-            msg = "Can not specify `by_left`, `by_right`, and `by` keys at the same time."
-            raise ValueError(msg)
-        if by_left is not None and by_right is None and by is None:
-            msg = "`by_right` can not be None if `by_left` is specified."
-            raise ValueError(msg)
-        if by_left is None and by_right is not None and by is None:
-            msg = "`by_left` can not be None if `by_right` is specified."
+        if (on is not None) and (left_on is not None or right_on is not None):
+            msg = "If `on` is specified, `left_on` and `right_on` should be None."
             raise ValueError(msg)
-        if (
+        if (by is None) and (
             (by_left is None and by_right is not None)
             or (by_left is not None and by_right is None)
-        ) and by is not None:
-            msg = "Either (`by_left` and `by_right_`) or `by` keys should be specified."
-            raise ValueError(msg)
-        if left_on is not None and right_on is not None:
-            return self._from_compliant_dataframe(
-                self._compliant_frame.join_asof(
-                    self._extract_compliant(other),
-                    left_on=left_on,
-                    right_on=right_on,
-                    by_left=by_left,
-                    by_right=by_right,
-                    by=by,
-                    strategy=strategy,
-                )
+        ):
+            msg = (
+                "Can not specify only `by_left` or `by_right`, you need to specify both."
             )
-        elif on is not None:
+            raise ValueError(msg)
+        if (by is not None) and (by_left is not None or by_right is not None):
+            msg = "If `by` is specified, `by_left` and `by_right` should be None."
+            raise ValueError(msg)
+        if on is not None:
             return self._from_compliant_dataframe(
                 self._compliant_frame.join_asof(
                     self._extract_compliant(other),
@@ -273,9 +276,17 @@ def join_asof(
                     strategy=strategy,
                 )
             )
-        else:
-            msg = "Either (`left_on` and `right_on`) or `on` keys should be specified."
-            raise ValueError(msg)
+        return self._from_compliant_dataframe(
+            self._compliant_frame.join_asof(
+                self._extract_compliant(other),
+                left_on=left_on,
+                right_on=right_on,
+                by_left=by_left,
+                by_right=by_right,
+                by=by,
+                strategy=strategy,
+            )
+        )
 
 
 class DataFrame(BaseFrame[FrameT]):
@@ -1843,6 +1854,7 @@ def join(
         how: Literal["inner", "left", "cross", "semi", "anti"] = "inner",
         left_on: str | list[str] | None = None,
         right_on: str | list[str] | None = None,
+        on: str | list[str] | None = None,
     ) -> Self:
         r"""
         Join in SQL-like fashion.
@@ -1861,6 +1873,8 @@ def join(
 
             right_on: Name(s) of the right join column(s).
 
+            on: Join column of both DataFrames. If set, left_on and right_on should be None.
+
         Returns:
             A new joined DataFrame
 
@@ -1908,7 +1922,7 @@ def join(
             │ 2   ┆ 7.0 ┆ b   ┆ y     │
             └─────┴─────┴─────┴───────┘
         """
-        return super().join(other, how=how, left_on=left_on, right_on=right_on)
+        return super().join(other, how=how, left_on=left_on, right_on=right_on, on=on)
 
     def join_asof(
         self,
@@ -3568,6 +3582,7 @@ def join(
         how: Literal["inner", "left", "cross", "semi", "anti"] = "inner",
         left_on: str | list[str] | None = None,
         right_on: str | list[str] | None = None,
+        on: str | list[str] | None = None,
     ) -> Self:
         r"""
         Add a join operation to the Logical Plan.
@@ -3586,6 +3601,8 @@ def join(
 
             right_on: Join column of the right DataFrame.
 
+            on: Join column of both DataFrames. If set, left_on and right_on should be None.
+
         Returns:
             A new joined LazyFrame
 
@@ -3633,7 +3650,7 @@ def join(
             │ 2   ┆ 7.0 ┆ b   ┆ y     │
             └─────┴─────┴─────┴───────┘
         """
-        return super().join(other, how=how, left_on=left_on, right_on=right_on)
+        return super().join(other, how=how, left_on=left_on, right_on=right_on, on=on)
 
     def join_asof(
         self,
diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py
index 34a5961ef..6615d5031 100644
--- a/tests/frame/join_test.py
+++ b/tests/frame/join_test.py
@@ -18,8 +18,9 @@ def test_inner_join_two_keys(constructor: Any) -> None:
     df = nw.from_native(constructor(data))
     df_right = df
     result = df.join(df_right, left_on=["a", "b"], right_on=["a", "b"], how="inner")  # type: ignore[arg-type]
-    result = result.sort("index")
-    result = result.drop("index_right")
+    result_on = df.join(df_right, on=["a", "b"], how="inner")  # type: ignore[arg-type]
+    result = result.sort("index").drop("index_right")
+    result_on = result_on.sort("index").drop("index_right")
     expected = {
         "a": [1, 3, 2],
         "b": [4, 4, 6],
@@ -28,6 +29,7 @@ def test_inner_join_two_keys(constructor: Any) -> None:
         "index": [0, 1, 2],
     }
     compare_dicts(result, expected)
+    compare_dicts(result_on, expected)
 
 
 def test_inner_join_single_key(constructor: Any) -> None:
@@ -35,7 +37,9 @@ def test_inner_join_single_key(constructor: Any) -> None:
     df = nw.from_native(constructor(data))
     df_right = df
     result = df.join(df_right, left_on="a", right_on="a", how="inner").sort("index")  # type: ignore[arg-type]
+    result_on = df.join(df_right, on="a", how="inner").sort("index")  # type: ignore[arg-type]
     result = result.drop("index_right")
+    result_on = result_on.drop("index_right")
     expected = {
         "a": [1, 3, 2],
         "b": [4, 4, 6],
@@ -45,6 +49,7 @@ def test_inner_join_single_key(constructor: Any) -> None:
         "index": [0, 1, 2],
     }
     compare_dicts(result, expected)
+    compare_dicts(result_on, expected)
 
 
 def test_cross_join(constructor: Any) -> None:
@@ -57,7 +62,9 @@ def test_cross_join(constructor: Any) -> None:
     }
     compare_dicts(result, expected)
 
-    with pytest.raises(ValueError, match="Can not pass left_on, right_on for cross join"):
+    with pytest.raises(
+        ValueError, match="Can not pass `left_on`, `right_on` or `on` keys for cross join"
+    ):
         df.join(df, how="cross", left_on="a")  # type: ignore[arg-type]
 
 
@@ -206,6 +213,33 @@ def test_left_join_overlapping_column(constructor: Any) -> None:
     compare_dicts(result, expected)
 
 
+@pytest.mark.parametrize("how", ["inner", "left", "semi", "anti"])
+def test_join_keys_exceptions(constructor: Any, how: str) -> None:
+    data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
+    df = nw.from_native(constructor(data))
+
+    with pytest.raises(
+        ValueError,
+        match=rf"Either \(`left_on` and `right_on`\) or `on` keys should be specified for {how}.",
+    ):
+        df.join(df, how=how)  # type: ignore[arg-type]
+    with pytest.raises(
+        ValueError,
+        match=rf"Either \(`left_on` and `right_on`\) or `on` keys should be specified for {how}.",
+    ):
+        df.join(df, how=how, left_on="a")  # type: ignore[arg-type]
+    with pytest.raises(
+        ValueError,
+        match=rf"Either \(`left_on` and `right_on`\) or `on` keys should be specified for {how}.",
+    ):
+        df.join(df, how=how, right_on="a")  # type: ignore[arg-type]
+    with pytest.raises(
+        ValueError,
+        match=f"If `on` is specified, `left_on` and `right_on` should be None for {how}.",
+    ):
+        df.join(df, how=how, on="a", right_on="a")  # type: ignore[arg-type]
+
+
 def test_joinasof_numeric(constructor: Any, request: Any) -> None:
     if "pyarrow_table" in str(constructor):
         request.applymarker(pytest.mark.xfail)
@@ -361,31 +395,40 @@ def test_joinasof_not_implemented(constructor: Any, strategy: str) -> None:
         df.join_asof(df, left_on="a", right_on="a", strategy=strategy)  # type: ignore[arg-type]
 
 
-def test_joinasof_no_keys(constructor: Any) -> None:
+def test_joinasof_keys_exceptions(constructor: Any) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
     df = nw.from_native(constructor(data))
 
-    msg = r"Either \(`left_on` and `right_on`\) or `on` keys should be specified."
     with pytest.raises(
         ValueError,
-        match=msg,
+        match=r"Either \(`left_on` and `right_on`\) or `on` keys should be specified.",
     ):
         df.join_asof(df, left_on="a")  # type: ignore[arg-type]
     with pytest.raises(
         ValueError,
-        match=msg,
+        match=r"Either \(`left_on` and `right_on`\) or `on` keys should be specified.",
     ):
         df.join_asof(df, right_on="a")  # type: ignore[arg-type]
     with pytest.raises(
         ValueError,
-        match=msg,
+        match=r"Either \(`left_on` and `right_on`\) or `on` keys should be specified.",
     ):
         df.join_asof(df)  # type: ignore[arg-type]
     with pytest.raises(
         ValueError,
-        match=msg,
+        match="If `on` is specified, `left_on` and `right_on` should be None.",
     ):
         df.join_asof(df, left_on="a", right_on="a", on="a")  # type: ignore[arg-type]
+    with pytest.raises(
+        ValueError,
+        match="If `on` is specified, `left_on` and `right_on` should be None.",
+    ):
+        df.join_asof(df, left_on="a", on="a")  # type: ignore[arg-type]
+    with pytest.raises(
+        ValueError,
+        match="If `on` is specified, `left_on` and `right_on` should be None.",
+    ):
+        df.join_asof(df, right_on="a", on="a")  # type: ignore[arg-type]
 
 
 def test_joinasof_by_exceptions(constructor: Any) -> None:
@@ -393,30 +436,30 @@ def test_joinasof_by_exceptions(constructor: Any) -> None:
     df = nw.from_native(constructor(data))
     with pytest.raises(
         ValueError,
-        match=r"Can not specify `by_left`, `by_right`, and `by` keys at the same time.",
+        match="If `by` is specified, `by_left` and `by_right` should be None.",
     ):
         df.join_asof(df, on="a", by_left="b", by_right="b", by="b")  # type: ignore[arg-type]
 
     with pytest.raises(
         ValueError,
-        match=r"`by_right` can not be None if `by_left` is specified.",
+        match="Can not specify only `by_left` or `by_right`, you need to specify both.",
     ):
         df.join_asof(df, on="a", by_left="b")  # type: ignore[arg-type]
 
     with pytest.raises(
         ValueError,
-        match=r"`by_left` can not be None if `by_right` is specified.",
+        match="Can not specify only `by_left` or `by_right`, you need to specify both.",
     ):
         df.join_asof(df, on="a", by_right="b")  # type: ignore[arg-type]
 
     with pytest.raises(
         ValueError,
-        match=r"Either \(`by_left` and `by_right_`\) or `by` keys should be specified.",
+        match="If `by` is specified, `by_left` and `by_right` should be None.",
     ):
         df.join_asof(df, on="a", by_left="b", by="b")  # type: ignore[arg-type]
 
     with pytest.raises(
         ValueError,
-        match=r"Either \(`by_left` and `by_right_`\) or `by` keys should be specified.",
+        match="If `by` is specified, `by_left` and `by_right` should be None.",
     ):
         df.join_asof(df, on="a", by_right="b", by="b")  # type: ignore[arg-type]

From 7bfb7764d7ade3cf0753ee759379f090d47e6f41 Mon Sep 17 00:00:00 2001
From: Liam Connors <connorsl@tcd.ie>
Date: Mon, 9 Sep 2024 03:04:53 -0400
Subject: [PATCH 19/30] feat: implement `to_arrow` for cuDF (#924)

* xfail to_arrow tests for cuDF

* Revert "xfail to_arrow tests for cuDF"

This reverts commit d695efd210fb30b2e2e5beea84a67d25836f152b.

* implement to_arrow for cuDF
---
 narwhals/_pandas_like/dataframe.py | 3 +--
 narwhals/_pandas_like/series.py    | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py
index 880e2d140..ce0cae8ac 100644
--- a/narwhals/_pandas_like/dataframe.py
+++ b/narwhals/_pandas_like/dataframe.py
@@ -672,8 +672,7 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self:
 
     def to_arrow(self: Self) -> Any:
         if self._implementation is Implementation.CUDF:  # pragma: no cover
-            msg = "`to_arrow` is not implemented for CuDF backend."
-            raise NotImplementedError(msg)
+            return self._native_frame.to_arrow(preserve_index=False)
 
         import pyarrow as pa  # ignore-banned-import()
 
diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py
index e94c95a8c..b28a04088 100644
--- a/narwhals/_pandas_like/series.py
+++ b/narwhals/_pandas_like/series.py
@@ -635,8 +635,7 @@ def clip(
 
     def to_arrow(self: Self) -> Any:
         if self._implementation is Implementation.CUDF:  # pragma: no cover
-            msg = "`to_arrow` is not implemented for CuDF backend."
-            raise NotImplementedError(msg)
+            return self._native_series.to_arrow()
 
         import pyarrow as pa  # ignore-banned-import()
 

From e7b3b83e8e6808ff7d6c43f4edf2a03ba5aceac5 Mon Sep 17 00:00:00 2001
From: Liam Connors <connorsl@tcd.ie>
Date: Mon, 9 Sep 2024 03:18:19 -0400
Subject: [PATCH 20/30] update drop_nulls docstring (#928)

---
 narwhals/series.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/narwhals/series.py b/narwhals/series.py
index d80564d22..0b1645ea7 100644
--- a/narwhals/series.py
+++ b/narwhals/series.py
@@ -783,12 +783,9 @@ def drop_nulls(self) -> Self:
         """
         Drop all null values.
 
-        See Also:
-          drop_nans
-
         Notes:
-          A null value is not the same as a NaN value.
-          To drop NaN values, use :func:`drop_nans`.
+          pandas and Polars handle null values differently. Polars distinguishes
+          between NaN and Null, whereas pandas doesn't.
 
         Examples:
           >>> import pandas as pd

From 767fbfb138c1efc665723e4c08ecab741d56140c Mon Sep 17 00:00:00 2001
From: Marco Edward Gorelli <marcogorelli@protonmail.com>
Date: Mon, 9 Sep 2024 14:54:32 +0100
Subject: [PATCH 21/30] fix: (#930)

fix join when using string literals
---
 narwhals/_pandas_like/dataframe.py |   4 +
 tests/frame/join_test.py           | 283 +++++++++++++++++++----------
 2 files changed, 191 insertions(+), 96 deletions(-)

diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py
index ce0cae8ac..4ec42ef59 100644
--- a/narwhals/_pandas_like/dataframe.py
+++ b/narwhals/_pandas_like/dataframe.py
@@ -404,6 +404,10 @@ def join(
         left_on: str | list[str] | None,
         right_on: str | list[str] | None,
     ) -> Self:
+        if isinstance(left_on, str):
+            left_on = [left_on]
+        if isinstance(right_on, str):
+            right_on = [right_on]
         if how == "cross":
             if (
                 self._implementation is Implementation.MODIN
diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py
index 6615d5031..6a1985f41 100644
--- a/tests/frame/join_test.py
+++ b/tests/frame/join_test.py
@@ -3,6 +3,7 @@
 import re
 from datetime import datetime
 from typing import Any
+from typing import Literal
 
 import pandas as pd
 import pytest
@@ -14,18 +15,28 @@
 
 
 def test_inner_join_two_keys(constructor: Any) -> None:
-    data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9], "index": [0, 1, 2]}
+    data = {
+        "antananarivo": [1, 3, 2],
+        "bob": [4, 4, 6],
+        "zorro": [7.0, 8, 9],
+        "index": [0, 1, 2],
+    }
     df = nw.from_native(constructor(data))
     df_right = df
-    result = df.join(df_right, left_on=["a", "b"], right_on=["a", "b"], how="inner")  # type: ignore[arg-type]
-    result_on = df.join(df_right, on=["a", "b"], how="inner")  # type: ignore[arg-type]
+    result = df.join(
+        df_right,  # type: ignore[arg-type]
+        left_on=["antananarivo", "bob"],
+        right_on=["antananarivo", "bob"],
+        how="inner",
+    )
+    result_on = df.join(df_right, on=["antananarivo", "bob"], how="inner")  # type: ignore[arg-type]
     result = result.sort("index").drop("index_right")
     result_on = result_on.sort("index").drop("index_right")
     expected = {
-        "a": [1, 3, 2],
-        "b": [4, 4, 6],
-        "z": [7.0, 8, 9],
-        "z_right": [7.0, 8, 9],
+        "antananarivo": [1, 3, 2],
+        "bob": [4, 4, 6],
+        "zorro": [7.0, 8, 9],
+        "zorro_right": [7.0, 8, 9],
         "index": [0, 1, 2],
     }
     compare_dicts(result, expected)
@@ -33,19 +44,29 @@ def test_inner_join_two_keys(constructor: Any) -> None:
 
 
 def test_inner_join_single_key(constructor: Any) -> None:
-    data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9], "index": [0, 1, 2]}
+    data = {
+        "antananarivo": [1, 3, 2],
+        "bob": [4, 4, 6],
+        "zorro": [7.0, 8, 9],
+        "index": [0, 1, 2],
+    }
     df = nw.from_native(constructor(data))
     df_right = df
-    result = df.join(df_right, left_on="a", right_on="a", how="inner").sort("index")  # type: ignore[arg-type]
-    result_on = df.join(df_right, on="a", how="inner").sort("index")  # type: ignore[arg-type]
+    result = df.join(
+        df_right,  # type: ignore[arg-type]
+        left_on="antananarivo",
+        right_on="antananarivo",
+        how="inner",
+    ).sort("index")
+    result_on = df.join(df_right, on="antananarivo", how="inner").sort("index")  # type: ignore[arg-type]
     result = result.drop("index_right")
     result_on = result_on.drop("index_right")
     expected = {
-        "a": [1, 3, 2],
-        "b": [4, 4, 6],
-        "b_right": [4, 4, 6],
-        "z": [7.0, 8, 9],
-        "z_right": [7.0, 8, 9],
+        "antananarivo": [1, 3, 2],
+        "bob": [4, 4, 6],
+        "bob_right": [4, 4, 6],
+        "zorro": [7.0, 8, 9],
+        "zorro_right": [7.0, 8, 9],
         "index": [0, 1, 2],
     }
     compare_dicts(result, expected)
@@ -53,30 +74,30 @@ def test_inner_join_single_key(constructor: Any) -> None:
 
 
 def test_cross_join(constructor: Any) -> None:
-    data = {"a": [1, 3, 2]}
+    data = {"antananarivo": [1, 3, 2]}
     df = nw.from_native(constructor(data))
-    result = df.join(df, how="cross").sort("a", "a_right")  # type: ignore[arg-type]
+    result = df.join(df, how="cross").sort("antananarivo", "antananarivo_right")  # type: ignore[arg-type]
     expected = {
-        "a": [1, 1, 1, 2, 2, 2, 3, 3, 3],
-        "a_right": [1, 2, 3, 1, 2, 3, 1, 2, 3],
+        "antananarivo": [1, 1, 1, 2, 2, 2, 3, 3, 3],
+        "antananarivo_right": [1, 2, 3, 1, 2, 3, 1, 2, 3],
     }
     compare_dicts(result, expected)
 
     with pytest.raises(
         ValueError, match="Can not pass `left_on`, `right_on` or `on` keys for cross join"
     ):
-        df.join(df, how="cross", left_on="a")  # type: ignore[arg-type]
+        df.join(df, how="cross", left_on="antananarivo")  # type: ignore[arg-type]
 
 
 def test_cross_join_non_pandas() -> None:
-    data = {"a": [1, 3, 2]}
+    data = {"antananarivo": [1, 3, 2]}
     df = nw.from_native(pd.DataFrame(data))
     # HACK to force testing for a non-pandas codepath
     df._compliant_frame._implementation = Implementation.MODIN
     result = df.join(df, how="cross")  # type: ignore[arg-type]
     expected = {
-        "a": [1, 1, 1, 3, 3, 3, 2, 2, 2],
-        "a_right": [1, 3, 2, 1, 3, 2, 1, 3, 2],
+        "antananarivo": [1, 1, 1, 3, 3, 3, 2, 2, 2],
+        "antananarivo_right": [1, 3, 2, 1, 3, 2, 1, 3, 2],
     }
     compare_dicts(result, expected)
 
@@ -84,9 +105,17 @@ def test_cross_join_non_pandas() -> None:
 @pytest.mark.parametrize(
     ("join_key", "filter_expr", "expected"),
     [
-        (["a", "b"], (nw.col("b") < 5), {"a": [2], "b": [6], "z": [9]}),
-        (["b"], (nw.col("b") < 5), {"a": [2], "b": [6], "z": [9]}),
-        (["b"], (nw.col("b") > 5), {"a": [1, 3], "b": [4, 4], "z": [7.0, 8.0]}),
+        (
+            ["antananarivo", "bob"],
+            (nw.col("bob") < 5),
+            {"antananarivo": [2], "bob": [6], "zorro": [9]},
+        ),
+        (["bob"], (nw.col("bob") < 5), {"antananarivo": [2], "bob": [6], "zorro": [9]}),
+        (
+            ["bob"],
+            (nw.col("bob") > 5),
+            {"antananarivo": [1, 3], "bob": [4, 4], "zorro": [7.0, 8.0]},
+        ),
     ],
 )
 def test_anti_join(
@@ -95,7 +124,7 @@ def test_anti_join(
     filter_expr: nw.Expr,
     expected: dict[str, list[Any]],
 ) -> None:
-    data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
+    data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]}
     df = nw.from_native(constructor(data))
     other = df.filter(filter_expr)
     result = df.join(other, how="anti", left_on=join_key, right_on=join_key)  # type: ignore[arg-type]
@@ -105,9 +134,21 @@ def test_anti_join(
 @pytest.mark.parametrize(
     ("join_key", "filter_expr", "expected"),
     [
-        (["a"], (nw.col("b") > 5), {"a": [2], "b": [6], "z": [9]}),
-        (["b"], (nw.col("b") < 5), {"a": [1, 3], "b": [4, 4], "z": [7, 8]}),
-        (["a", "b"], (nw.col("b") < 5), {"a": [1, 3], "b": [4, 4], "z": [7, 8]}),
+        (
+            ["antananarivo"],
+            (nw.col("bob") > 5),
+            {"antananarivo": [2], "bob": [6], "zorro": [9]},
+        ),
+        (
+            ["bob"],
+            (nw.col("bob") < 5),
+            {"antananarivo": [1, 3], "bob": [4, 4], "zorro": [7, 8]},
+        ),
+        (
+            ["antananarivo", "bob"],
+            (nw.col("bob") < 5),
+            {"antananarivo": [1, 3], "bob": [4, 4], "zorro": [7, 8]},
+        ),
     ],
 )
 def test_semi_join(
@@ -116,16 +157,18 @@ def test_semi_join(
     filter_expr: nw.Expr,
     expected: dict[str, list[Any]],
 ) -> None:
-    data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
+    data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]}
     df = nw.from_native(constructor(data))
     other = df.filter(filter_expr)
-    result = df.join(other, how="semi", left_on=join_key, right_on=join_key).sort("a")  # type: ignore[arg-type]
+    result = df.join(other, how="semi", left_on=join_key, right_on=join_key).sort(  # type: ignore[arg-type]
+        "antananarivo"
+    )
     compare_dicts(result, expected)
 
 
 @pytest.mark.parametrize("how", ["right", "full"])
 def test_join_not_implemented(constructor: Any, how: str) -> None:
-    data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
+    data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]}
     df = nw.from_native(constructor(data))
 
     with pytest.raises(
@@ -134,24 +177,28 @@ def test_join_not_implemented(constructor: Any, how: str) -> None:
             f"Only the following join strategies are supported: ('inner', 'left', 'cross', 'anti', 'semi'); found '{how}'."
         ),
     ):
-        df.join(df, left_on="a", right_on="a", how=how)  # type: ignore[arg-type]
+        df.join(df, left_on="antananarivo", right_on="antananarivo", how=how)  # type: ignore[arg-type]
 
 
 @pytest.mark.filterwarnings("ignore:the default coalesce behavior")
 def test_left_join(constructor: Any) -> None:
-    data_left = {"a": [1.0, 2, 3], "b": [4.0, 5, 6], "index": [0.0, 1.0, 2.0]}
-    data_right = {"a": [1.0, 2, 3], "c": [4.0, 5, 7], "index": [0.0, 1.0, 2.0]}
+    data_left = {
+        "antananarivo": [1.0, 2, 3],
+        "bob": [4.0, 5, 6],
+        "index": [0.0, 1.0, 2.0],
+    }
+    data_right = {"antananarivo": [1.0, 2, 3], "c": [4.0, 5, 7], "index": [0.0, 1.0, 2.0]}
     df_left = nw.from_native(constructor(data_left))
     df_right = nw.from_native(constructor(data_right))
-    result = df_left.join(df_right, left_on="b", right_on="c", how="left").select(  # type: ignore[arg-type]
+    result = df_left.join(df_right, left_on="bob", right_on="c", how="left").select(  # type: ignore[arg-type]
         nw.all().fill_null(float("nan"))
     )
     result = result.sort("index")
     result = result.drop("index_right")
     expected = {
-        "a": [1, 2, 3],
-        "b": [4, 5, 6],
-        "a_right": [1, 2, float("nan")],
+        "antananarivo": [1, 2, 3],
+        "bob": [4, 5, 6],
+        "antananarivo_right": [1, 2, float("nan")],
         "index": [0, 1, 2],
     }
     compare_dicts(result, expected)
@@ -159,54 +206,62 @@ def test_left_join(constructor: Any) -> None:
 
 @pytest.mark.filterwarnings("ignore: the default coalesce behavior")
 def test_left_join_multiple_column(constructor: Any) -> None:
-    data_left = {"a": [1, 2, 3], "b": [4, 5, 6], "index": [0, 1, 2]}
-    data_right = {"a": [1, 2, 3], "c": [4, 5, 6], "index": [0, 1, 2]}
+    data_left = {"antananarivo": [1, 2, 3], "bob": [4, 5, 6], "index": [0, 1, 2]}
+    data_right = {"antananarivo": [1, 2, 3], "c": [4, 5, 6], "index": [0, 1, 2]}
     df_left = nw.from_native(constructor(data_left))
     df_right = nw.from_native(constructor(data_right))
-    result = df_left.join(df_right, left_on=["a", "b"], right_on=["a", "c"], how="left")  # type: ignore[arg-type]
+    result = df_left.join(
+        df_right,  # type: ignore[arg-type]
+        left_on=["antananarivo", "bob"],
+        right_on=["antananarivo", "c"],
+        how="left",
+    )
     result = result.sort("index")
     result = result.drop("index_right")
-    expected = {"a": [1, 2, 3], "b": [4, 5, 6], "index": [0, 1, 2]}
+    expected = {"antananarivo": [1, 2, 3], "bob": [4, 5, 6], "index": [0, 1, 2]}
     compare_dicts(result, expected)
 
 
 @pytest.mark.filterwarnings("ignore: the default coalesce behavior")
 def test_left_join_overlapping_column(constructor: Any) -> None:
     data_left = {
-        "a": [1.0, 2, 3],
-        "b": [4.0, 5, 6],
+        "antananarivo": [1.0, 2, 3],
+        "bob": [4.0, 5, 6],
         "d": [1.0, 4, 2],
         "index": [0.0, 1.0, 2.0],
     }
     data_right = {
-        "a": [1.0, 2, 3],
+        "antananarivo": [1.0, 2, 3],
         "c": [4.0, 5, 6],
         "d": [1.0, 4, 2],
         "index": [0.0, 1.0, 2.0],
     }
     df_left = nw.from_native(constructor(data_left))
     df_right = nw.from_native(constructor(data_right))
-    result = df_left.join(df_right, left_on="b", right_on="c", how="left").sort("index")  # type: ignore[arg-type]
+    result = df_left.join(df_right, left_on="bob", right_on="c", how="left").sort("index")  # type: ignore[arg-type]
     result = result.drop("index_right")
     expected: dict[str, list[Any]] = {
-        "a": [1, 2, 3],
-        "b": [4, 5, 6],
+        "antananarivo": [1, 2, 3],
+        "bob": [4, 5, 6],
         "d": [1, 4, 2],
-        "a_right": [1, 2, 3],
+        "antananarivo_right": [1, 2, 3],
         "d_right": [1, 4, 2],
         "index": [0, 1, 2],
     }
     compare_dicts(result, expected)
-    result = df_left.join(df_right, left_on="a", right_on="d", how="left").select(  # type: ignore[arg-type]
-        nw.all().fill_null(float("nan"))
-    )
+    result = df_left.join(
+        df_right,  # type: ignore[arg-type]
+        left_on="antananarivo",
+        right_on="d",
+        how="left",
+    ).select(nw.all().fill_null(float("nan")))
     result = result.sort("index")
     result = result.drop("index_right")
     expected = {
-        "a": [1, 2, 3],
-        "b": [4, 5, 6],
+        "antananarivo": [1, 2, 3],
+        "bob": [4, 5, 6],
         "d": [1, 4, 2],
-        "a_right": [1.0, 3.0, float("nan")],
+        "antananarivo_right": [1.0, 3.0, float("nan")],
         "c": [4.0, 6.0, float("nan")],
         "index": [0, 1, 2],
     }
@@ -215,7 +270,7 @@ def test_left_join_overlapping_column(constructor: Any) -> None:
 
 @pytest.mark.parametrize("how", ["inner", "left", "semi", "anti"])
 def test_join_keys_exceptions(constructor: Any, how: str) -> None:
-    data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
+    data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]}
     df = nw.from_native(constructor(data))
 
     with pytest.raises(
@@ -227,17 +282,17 @@ def test_join_keys_exceptions(constructor: Any, how: str) -> None:
         ValueError,
         match=rf"Either \(`left_on` and `right_on`\) or `on` keys should be specified for {how}.",
     ):
-        df.join(df, how=how, left_on="a")  # type: ignore[arg-type]
+        df.join(df, how=how, left_on="antananarivo")  # type: ignore[arg-type]
     with pytest.raises(
         ValueError,
         match=rf"Either \(`left_on` and `right_on`\) or `on` keys should be specified for {how}.",
     ):
-        df.join(df, how=how, right_on="a")  # type: ignore[arg-type]
+        df.join(df, how=how, right_on="antananarivo")  # type: ignore[arg-type]
     with pytest.raises(
         ValueError,
         match=f"If `on` is specified, `left_on` and `right_on` should be None for {how}.",
     ):
-        df.join(df, how=how, on="a", right_on="a")  # type: ignore[arg-type]
+        df.join(df, how=how, on="antananarivo", right_on="antananarivo")  # type: ignore[arg-type]
 
 
 def test_joinasof_numeric(constructor: Any, request: Any) -> None:
@@ -247,28 +302,44 @@ def test_joinasof_numeric(constructor: Any, request: Any) -> None:
         ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor))
     ):
         request.applymarker(pytest.mark.xfail)
-    df = nw.from_native(constructor({"a": [1, 5, 10], "val": ["a", "b", "c"]})).sort("a")
+    df = nw.from_native(
+        constructor({"antananarivo": [1, 5, 10], "val": ["a", "b", "c"]})
+    ).sort("antananarivo")
     df_right = nw.from_native(
-        constructor({"a": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]})
-    ).sort("a")
-    result_backward = df.join_asof(df_right, left_on="a", right_on="a")  # type: ignore[arg-type]
-    result_forward = df.join_asof(df_right, left_on="a", right_on="a", strategy="forward")  # type: ignore[arg-type]
-    result_nearest = df.join_asof(df_right, left_on="a", right_on="a", strategy="nearest")  # type: ignore[arg-type]
-    result_backward_on = df.join_asof(df_right, on="a")  # type: ignore[arg-type]
-    result_forward_on = df.join_asof(df_right, on="a", strategy="forward")  # type: ignore[arg-type]
-    result_nearest_on = df.join_asof(df_right, on="a", strategy="nearest")  # type: ignore[arg-type]
+        constructor({"antananarivo": [1, 2, 3, 6, 7], "val": [1, 2, 3, 6, 7]})
+    ).sort("antananarivo")
+    result_backward = df.join_asof(
+        df_right,  # type: ignore[arg-type]
+        left_on="antananarivo",
+        right_on="antananarivo",
+    )
+    result_forward = df.join_asof(
+        df_right,  # type: ignore[arg-type]
+        left_on="antananarivo",
+        right_on="antananarivo",
+        strategy="forward",
+    )
+    result_nearest = df.join_asof(
+        df_right,  # type: ignore[arg-type]
+        left_on="antananarivo",
+        right_on="antananarivo",
+        strategy="nearest",
+    )
+    result_backward_on = df.join_asof(df_right, on="antananarivo")  # type: ignore[arg-type]
+    result_forward_on = df.join_asof(df_right, on="antananarivo", strategy="forward")  # type: ignore[arg-type]
+    result_nearest_on = df.join_asof(df_right, on="antananarivo", strategy="nearest")  # type: ignore[arg-type]
     expected_backward = {
-        "a": [1, 5, 10],
+        "antananarivo": [1, 5, 10],
         "val": ["a", "b", "c"],
         "val_right": [1, 3, 7],
     }
     expected_forward = {
-        "a": [1, 5, 10],
+        "antananarivo": [1, 5, 10],
         "val": ["a", "b", "c"],
         "val_right": [1, 6, float("nan")],
     }
     expected_nearest = {
-        "a": [1, 5, 10],
+        "antananarivo": [1, 5, 10],
         "val": ["a", "b", "c"],
         "val_right": [1, 6, 7],
     }
@@ -366,16 +437,24 @@ def test_joinasof_by(constructor: Any, request: Any) -> None:
     ):
         request.applymarker(pytest.mark.xfail)
     df = nw.from_native(
-        constructor({"a": [1, 5, 7, 10], "b": ["D", "D", "C", "A"], "c": [9, 2, 1, 1]})
-    ).sort("a")
+        constructor(
+            {
+                "antananarivo": [1, 5, 7, 10],
+                "bob": ["D", "D", "C", "A"],
+                "c": [9, 2, 1, 1],
+            }
+        )
+    ).sort("antananarivo")
     df_right = nw.from_native(
-        constructor({"a": [1, 4, 5, 8], "b": ["D", "D", "A", "F"], "d": [1, 3, 4, 1]})
-    ).sort("a")
-    result = df.join_asof(df_right, on="a", by_left="b", by_right="b")  # type: ignore[arg-type]
-    result_by = df.join_asof(df_right, on="a", by="b")  # type: ignore[arg-type]
+        constructor(
+            {"antananarivo": [1, 4, 5, 8], "bob": ["D", "D", "A", "F"], "d": [1, 3, 4, 1]}
+        )
+    ).sort("antananarivo")
+    result = df.join_asof(df_right, on="antananarivo", by_left="bob", by_right="bob")  # type: ignore[arg-type]
+    result_by = df.join_asof(df_right, on="antananarivo", by="bob")  # type: ignore[arg-type]
     expected = {
-        "a": [1, 5, 7, 10],
-        "b": ["D", "D", "C", "A"],
+        "antananarivo": [1, 5, 7, 10],
+        "bob": ["D", "D", "C", "A"],
         "c": [9, 2, 1, 1],
         "d": [1, 3, float("nan"), 4],
     }
@@ -384,31 +463,38 @@ def test_joinasof_by(constructor: Any, request: Any) -> None:
 
 
 @pytest.mark.parametrize("strategy", ["back", "furthest"])
-def test_joinasof_not_implemented(constructor: Any, strategy: str) -> None:
-    data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
+def test_joinasof_not_implemented(
+    constructor: Any, strategy: Literal["backward", "forward"]
+) -> None:
+    data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]}
     df = nw.from_native(constructor(data))
 
     with pytest.raises(
         NotImplementedError,
         match=rf"Only the following strategies are supported: \('backward', 'forward', 'nearest'\); found '{strategy}'.",
     ):
-        df.join_asof(df, left_on="a", right_on="a", strategy=strategy)  # type: ignore[arg-type]
+        df.join_asof(
+            df,  # type: ignore[arg-type]
+            left_on="antananarivo",
+            right_on="antananarivo",
+            strategy=strategy,
+        )
 
 
 def test_joinasof_keys_exceptions(constructor: Any) -> None:
-    data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
+    data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]}
     df = nw.from_native(constructor(data))
 
     with pytest.raises(
         ValueError,
         match=r"Either \(`left_on` and `right_on`\) or `on` keys should be specified.",
     ):
-        df.join_asof(df, left_on="a")  # type: ignore[arg-type]
+        df.join_asof(df, left_on="antananarivo")  # type: ignore[arg-type]
     with pytest.raises(
         ValueError,
         match=r"Either \(`left_on` and `right_on`\) or `on` keys should be specified.",
     ):
-        df.join_asof(df, right_on="a")  # type: ignore[arg-type]
+        df.join_asof(df, right_on="antananarivo")  # type: ignore[arg-type]
     with pytest.raises(
         ValueError,
         match=r"Either \(`left_on` and `right_on`\) or `on` keys should be specified.",
@@ -418,48 +504,53 @@ def test_joinasof_keys_exceptions(constructor: Any) -> None:
         ValueError,
         match="If `on` is specified, `left_on` and `right_on` should be None.",
     ):
-        df.join_asof(df, left_on="a", right_on="a", on="a")  # type: ignore[arg-type]
+        df.join_asof(
+            df,  # type: ignore[arg-type]
+            left_on="antananarivo",
+            right_on="antananarivo",
+            on="antananarivo",
+        )
     with pytest.raises(
         ValueError,
         match="If `on` is specified, `left_on` and `right_on` should be None.",
     ):
-        df.join_asof(df, left_on="a", on="a")  # type: ignore[arg-type]
+        df.join_asof(df, left_on="antananarivo", on="antananarivo")  # type: ignore[arg-type]
     with pytest.raises(
         ValueError,
         match="If `on` is specified, `left_on` and `right_on` should be None.",
     ):
-        df.join_asof(df, right_on="a", on="a")  # type: ignore[arg-type]
+        df.join_asof(df, right_on="antananarivo", on="antananarivo")  # type: ignore[arg-type]
 
 
 def test_joinasof_by_exceptions(constructor: Any) -> None:
-    data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
+    data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]}
     df = nw.from_native(constructor(data))
     with pytest.raises(
         ValueError,
         match="If `by` is specified, `by_left` and `by_right` should be None.",
     ):
-        df.join_asof(df, on="a", by_left="b", by_right="b", by="b")  # type: ignore[arg-type]
+        df.join_asof(df, on="antananarivo", by_left="bob", by_right="bob", by="bob")  # type: ignore[arg-type]
 
     with pytest.raises(
         ValueError,
         match="Can not specify only `by_left` or `by_right`, you need to specify both.",
     ):
-        df.join_asof(df, on="a", by_left="b")  # type: ignore[arg-type]
+        df.join_asof(df, on="antananarivo", by_left="bob")  # type: ignore[arg-type]
 
     with pytest.raises(
         ValueError,
         match="Can not specify only `by_left` or `by_right`, you need to specify both.",
     ):
-        df.join_asof(df, on="a", by_right="b")  # type: ignore[arg-type]
+        df.join_asof(df, on="antananarivo", by_right="bob")  # type: ignore[arg-type]
 
     with pytest.raises(
         ValueError,
         match="If `by` is specified, `by_left` and `by_right` should be None.",
     ):
-        df.join_asof(df, on="a", by_left="b", by="b")  # type: ignore[arg-type]
+        df.join_asof(df, on="antananarivo", by_left="bob", by="bob")  # type: ignore[arg-type]
 
     with pytest.raises(
         ValueError,
         match="If `by` is specified, `by_left` and `by_right` should be None.",
     ):
-        df.join_asof(df, on="a", by_right="b", by="b")  # type: ignore[arg-type]
+        df.join_asof(df, on="antananarivo", by_right="bob", by="bob")  # type: ignore[arg-type]

From 06f7b875fcfcff980f7ccd0fc2b3bce1ce096165 Mon Sep 17 00:00:00 2001
From: Francesco Bruzzesi <42817048+FBruzzesi@users.noreply.github.com>
Date: Mon, 9 Sep 2024 15:55:04 +0200
Subject: [PATCH 22/30] refactor pyarrow (#931)

---
 narwhals/_arrow/group_by.py | 40 ++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/narwhals/_arrow/group_by.py b/narwhals/_arrow/group_by.py
index 78b241c9b..6c7b20485 100644
--- a/narwhals/_arrow/group_by.py
+++ b/narwhals/_arrow/group_by.py
@@ -16,12 +16,26 @@
     from narwhals._arrow.typing import IntoArrowExpr
 
 POLARS_TO_ARROW_AGGREGATIONS = {
+    "len": "count",
     "n_unique": "count_distinct",
     "std": "stddev",
     "var": "variance",  # currently unused, we don't have `var` yet
 }
 
 
+def get_function_name_option(function_name: str) -> Any | None:
+    """Map specific pyarrow compute function to respective option to match polars behaviour."""
+    import pyarrow.compute as pc  # ignore-banned-import
+
+    function_name_to_options = {
+        "count": pc.CountOptions(mode="all"),
+        "count_distinct": pc.CountOptions(mode="all"),
+        "stddev": pc.VarianceOptions(ddof=1),
+        "variance": pc.VarianceOptions(ddof=1),
+    }
+    return function_name_to_options.get(function_name)
+
+
 class ArrowGroupBy:
     def __init__(self, df: ArrowDataFrame, keys: list[str]) -> None:
         import pyarrow as pa  # ignore-banned-import()
@@ -119,27 +133,13 @@ def agg_arrow(
 
             function_name = remove_prefix(expr._function_name, "col->")
             function_name = POLARS_TO_ARROW_AGGREGATIONS.get(function_name, function_name)
+
+            option = get_function_name_option(function_name)
             for root_name, output_name in zip(expr._root_names, expr._output_names):
-                if function_name == "len":
-                    simple_aggregations[output_name] = (
-                        (root_name, "count", pc.CountOptions(mode="all")),
-                        f"{root_name}_count",
-                    )
-                elif function_name == "count_distinct":
-                    simple_aggregations[output_name] = (
-                        (root_name, "count_distinct", pc.CountOptions(mode="all")),
-                        f"{root_name}_count_distinct",
-                    )
-                elif function_name == "stddev":
-                    simple_aggregations[output_name] = (
-                        (root_name, "stddev", pc.VarianceOptions(ddof=1)),
-                        f"{root_name}_stddev",
-                    )
-                else:
-                    simple_aggregations[output_name] = (
-                        (root_name, function_name),
-                        f"{root_name}_{function_name}",
-                    )
+                simple_aggregations[output_name] = (
+                    (root_name, function_name, option),
+                    f"{root_name}_{function_name}",
+                )
 
         aggs: list[Any] = []
         name_mapping = {}

From 047bb96c13924a99a865870cbd0213aec7ff3ee6 Mon Sep 17 00:00:00 2001
From: Marco Edward Gorelli <marcogorelli@protonmail.com>
Date: Mon, 9 Sep 2024 15:02:41 +0100
Subject: [PATCH 23/30] release: Bump version to 1.6.4 (#932)

---
 docs/installation.md | 2 +-
 narwhals/__init__.py | 2 +-
 pyproject.toml       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/installation.md b/docs/installation.md
index b89aa4b69..58302d417 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -11,6 +11,6 @@ Then, if you start the Python REPL and see the following:
 ```python
 >>> import narwhals
 >>> narwhals.__version__
-'1.6.3'
+'1.6.4'
 ```
 then installation worked correctly!
diff --git a/narwhals/__init__.py b/narwhals/__init__.py
index b26cf9490..f410a1b24 100644
--- a/narwhals/__init__.py
+++ b/narwhals/__init__.py
@@ -53,7 +53,7 @@
 from narwhals.utils import maybe_get_index
 from narwhals.utils import maybe_set_index
 
-__version__ = "1.6.3"
+__version__ = "1.6.4"
 
 __all__ = [
     "dependencies",
diff --git a/pyproject.toml b/pyproject.toml
index a928ae0df..12482a349 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "narwhals"
-version = "1.6.3"
+version = "1.6.4"
 authors = [
   { name="Marco Gorelli", email="33491632+MarcoGorelli@users.noreply.github.com" },
 ]

From 9246f11f647b68283d9fbd6f39b376633d93c0e7 Mon Sep 17 00:00:00 2001
From: Marco Edward Gorelli <marcogorelli@protonmail.com>
Date: Mon, 9 Sep 2024 16:26:20 +0100
Subject: [PATCH 24/30] test: always run tpch tests (#933)

---
 .github/workflows/check_tpch_queries.yml |  4 ++--
 tpch/execute/q10.py                      |  4 ++++
 tpch/execute/q11.py                      |  4 ++++
 tpch/execute/q12.py                      |  4 ++++
 tpch/execute/q13.py                      |  4 ++++
 tpch/execute/q14.py                      |  4 ++++
 tpch/execute/q15.py                      |  4 ++++
 tpch/execute/q16.py                      |  4 ++++
 tpch/execute/q17.py                      |  4 ++++
 tpch/execute/q18.py                      |  4 ++++
 tpch/execute/q19.py                      |  9 +++------
 tpch/execute/q20.py                      |  9 +++------
 tpch/execute/q21.py                      |  9 +++------
 tpch/execute/q22.py                      |  4 ++++
 tpch/execute/q3.py                       |  4 ++++
 tpch/execute/q4.py                       |  4 ++++
 tpch/execute/q5.py                       |  8 ++++++++
 tpch/execute/q6.py                       |  4 ++++
 tpch/execute/q7.py                       |  4 ++++
 tpch/execute/q8.py                       | 14 ++++++++++++++
 tpch/execute/q9.py                       |  6 ++++++
 tpch/queries/q17.py                      |  5 +++--
 tpch/queries/q8.py                       |  3 ++-
 23 files changed, 100 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/check_tpch_queries.yml b/.github/workflows/check_tpch_queries.yml
index 82a2f4aa4..46dd5df20 100644
--- a/.github/workflows/check_tpch_queries.yml
+++ b/.github/workflows/check_tpch_queries.yml
@@ -2,11 +2,11 @@ name: Tests for TPCH Queries
 
 on:
   pull_request:
-    types: [labeled]
+  push:
+    branches: [main]
 
 jobs:
   validate-queries:
-    if: ${{ github.event.label.name == 'full-test' }}
     strategy:
       matrix:
         python-version: ["3.12"]
diff --git a/tpch/execute/q10.py b/tpch/execute/q10.py
index 99d850f53..e1d56d36b 100644
--- a/tpch/execute/q10.py
+++ b/tpch/execute/q10.py
@@ -13,3 +13,7 @@
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(q10.query(fn(customer), fn(nation), fn(lineitem), fn(orders)).collect())
+
+tool = "pyarrow"
+fn = IO_FUNCS[tool]
+print(q10.query(fn(customer), fn(nation), fn(lineitem), fn(orders)))
diff --git a/tpch/execute/q11.py b/tpch/execute/q11.py
index 101710adb..a6b830f30 100644
--- a/tpch/execute/q11.py
+++ b/tpch/execute/q11.py
@@ -12,3 +12,7 @@
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(q11.query(fn(nation), fn(partsupp), fn(supplier)).collect())
+
+tool = "pyarrow"
+fn = IO_FUNCS[tool]
+print(q11.query(fn(nation), fn(partsupp), fn(supplier)))
diff --git a/tpch/execute/q12.py b/tpch/execute/q12.py
index b74742373..0cdc0378b 100644
--- a/tpch/execute/q12.py
+++ b/tpch/execute/q12.py
@@ -11,3 +11,7 @@
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(q12.query(fn(line_item), fn(orders)).collect())
+
+tool = "pyarrow"
+fn = IO_FUNCS[tool]
+print(q12.query(fn(line_item), fn(orders)))
diff --git a/tpch/execute/q13.py b/tpch/execute/q13.py
index 084fcca9b..b5e6c8bbe 100644
--- a/tpch/execute/q13.py
+++ b/tpch/execute/q13.py
@@ -11,3 +11,7 @@
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(q13.query(fn(customer), fn(orders)).collect())
+
+tool = "pyarrow"
+fn = IO_FUNCS[tool]
+print(q13.query(fn(customer), fn(orders)))
diff --git a/tpch/execute/q14.py b/tpch/execute/q14.py
index 57f83a595..1a89dbbbe 100644
--- a/tpch/execute/q14.py
+++ b/tpch/execute/q14.py
@@ -11,3 +11,7 @@
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(q14.query(fn(line_item), fn(part)).collect())
+
+tool = "pyarrow"
+fn = IO_FUNCS[tool]
+print(q14.query(fn(line_item), fn(part)))
diff --git a/tpch/execute/q15.py b/tpch/execute/q15.py
index 0d9e9f374..ac858841d 100644
--- a/tpch/execute/q15.py
+++ b/tpch/execute/q15.py
@@ -11,3 +11,7 @@
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(q15.query(fn(lineitem), fn(supplier)).collect())
+
+tool = "pyarrow"
+fn = IO_FUNCS[tool]
+print(q15.query(fn(lineitem), fn(supplier)))
diff --git a/tpch/execute/q16.py b/tpch/execute/q16.py
index 5176a5cc6..7fa6c72b0 100644
--- a/tpch/execute/q16.py
+++ b/tpch/execute/q16.py
@@ -12,3 +12,7 @@
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(q16.query(fn(part), fn(partsupp), fn(supplier)).collect())
+
+tool = "pyarrow"
+fn = IO_FUNCS[tool]
+print(q16.query(fn(part), fn(partsupp), fn(supplier)))
diff --git a/tpch/execute/q17.py b/tpch/execute/q17.py
index 2d9920c69..8eefb92dc 100644
--- a/tpch/execute/q17.py
+++ b/tpch/execute/q17.py
@@ -11,3 +11,7 @@
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(q17.query(fn(lineitem), fn(part)).collect())
+
+tool = "pyarrow"
+fn = IO_FUNCS[tool]
+print(q17.query(fn(lineitem), fn(part)))
diff --git a/tpch/execute/q18.py b/tpch/execute/q18.py
index 4092fc0d6..fdd50c095 100644
--- a/tpch/execute/q18.py
+++ b/tpch/execute/q18.py
@@ -12,3 +12,7 @@
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(q18.query(fn(customer), fn(lineitem), fn(orders)).collect())
+
+tool = "pyarrow"
+fn = IO_FUNCS[tool]
+print(q18.query(fn(customer), fn(lineitem), fn(orders)))
diff --git a/tpch/execute/q19.py b/tpch/execute/q19.py
index 87467064c..e1dff3eb5 100644
--- a/tpch/execute/q19.py
+++ b/tpch/execute/q19.py
@@ -4,14 +4,11 @@
 from . import lineitem
 from . import part
 
-fn = IO_FUNCS["pandas"]
-print(q19.query(fn(lineitem), fn(part)))
-
 fn = IO_FUNCS["pandas[pyarrow]"]
 print(q19.query(fn(lineitem), fn(part)))
 
-fn = IO_FUNCS["polars[eager]"]
-print(q19.query(fn(lineitem), fn(part)))
-
 fn = IO_FUNCS["polars[lazy]"]
 print(q19.query(fn(lineitem), fn(part)).collect())
+
+fn = IO_FUNCS["pyarrow"]
+print(q19.query(fn(lineitem), fn(part)))
diff --git a/tpch/execute/q20.py b/tpch/execute/q20.py
index 68d18a6b5..d15f8c85f 100644
--- a/tpch/execute/q20.py
+++ b/tpch/execute/q20.py
@@ -7,14 +7,11 @@
 from . import partsupp
 from . import supplier
 
-fn = IO_FUNCS["pandas"]
-print(q20.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(supplier)))
-
 fn = IO_FUNCS["pandas[pyarrow]"]
 print(q20.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(supplier)))
 
-fn = IO_FUNCS["polars[eager]"]
-print(q20.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(supplier)))
-
 fn = IO_FUNCS["polars[lazy]"]
 print(q20.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(supplier)).collect())
+
+fn = IO_FUNCS["pyarrow"]
+print(q20.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(supplier)))
diff --git a/tpch/execute/q21.py b/tpch/execute/q21.py
index 693953870..9940e6232 100644
--- a/tpch/execute/q21.py
+++ b/tpch/execute/q21.py
@@ -6,14 +6,11 @@
 from . import orders
 from . import supplier
 
-fn = IO_FUNCS["pandas"]
-print(q21.query(fn(lineitem), fn(nation), fn(orders), fn(supplier)))
-
 fn = IO_FUNCS["pandas[pyarrow]"]
 print(q21.query(fn(lineitem), fn(nation), fn(orders), fn(supplier)))
 
-fn = IO_FUNCS["polars[eager]"]
-print(q21.query(fn(lineitem), fn(nation), fn(orders), fn(supplier)))
-
 fn = IO_FUNCS["polars[lazy]"]
 print(q21.query(fn(lineitem), fn(nation), fn(orders), fn(supplier)).collect())
+
+fn = IO_FUNCS["pyarrow"]
+print(q21.query(fn(lineitem), fn(nation), fn(orders), fn(supplier)))
diff --git a/tpch/execute/q22.py b/tpch/execute/q22.py
index 91ed46d9d..3b3fe523f 100644
--- a/tpch/execute/q22.py
+++ b/tpch/execute/q22.py
@@ -11,3 +11,7 @@
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(q22.query(fn(customer), fn(orders)).collect())
+
+tool = "pyarrow"
+fn = IO_FUNCS[tool]
+print(q22.query(fn(customer), fn(orders)))
diff --git a/tpch/execute/q3.py b/tpch/execute/q3.py
index a1eea74d1..f836fae27 100644
--- a/tpch/execute/q3.py
+++ b/tpch/execute/q3.py
@@ -12,3 +12,7 @@
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(q3.query(fn(customer), fn(lineitem), fn(orders)).collect())
+
+tool = "pyarrow"
+fn = IO_FUNCS[tool]
+print(q3.query(fn(customer), fn(lineitem), fn(orders)))
diff --git a/tpch/execute/q4.py b/tpch/execute/q4.py
index 79213f1ac..ca60f38ee 100644
--- a/tpch/execute/q4.py
+++ b/tpch/execute/q4.py
@@ -11,3 +11,7 @@
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(q4.query(fn(line_item), fn(orders)).collect())
+
+tool = "pyarrow"
+fn = IO_FUNCS[tool]
+print(q4.query(fn(line_item), fn(orders)))
diff --git a/tpch/execute/q5.py b/tpch/execute/q5.py
index 7a04dec1b..c343fea5d 100644
--- a/tpch/execute/q5.py
+++ b/tpch/execute/q5.py
@@ -23,3 +23,11 @@
         fn(region), fn(nation), fn(customer), fn(line_item), fn(orders), fn(supplier)
     ).collect()
 )
+
+tool = "pyarrow"
+fn = IO_FUNCS[tool]
+print(
+    q5.query(
+        fn(region), fn(nation), fn(customer), fn(line_item), fn(orders), fn(supplier)
+    )
+)
diff --git a/tpch/execute/q6.py b/tpch/execute/q6.py
index 402e6d452..eebf3f864 100644
--- a/tpch/execute/q6.py
+++ b/tpch/execute/q6.py
@@ -10,3 +10,7 @@
 tool = "polars[lazy]"
 fn = IO_FUNCS[tool]
 print(q6.query(fn(lineitem)).collect())
+
+tool = "pyarrow"
+fn = IO_FUNCS[tool]
+print(q6.query(fn(lineitem)))
diff --git a/tpch/execute/q7.py b/tpch/execute/q7.py
index 9f6179d23..c59f82ce7 100644
--- a/tpch/execute/q7.py
+++ b/tpch/execute/q7.py
@@ -16,3 +16,7 @@
 print(
     q7.query(fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier)).collect()
 )
+
+tool = "pyarrow"
+fn = IO_FUNCS[tool]
+print(q7.query(fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier)))
diff --git a/tpch/execute/q8.py b/tpch/execute/q8.py
index f1a8677ff..902a34e70 100644
--- a/tpch/execute/q8.py
+++ b/tpch/execute/q8.py
@@ -37,3 +37,17 @@
         fn(region),
     ).collect()
 )
+
+tool = "pyarrow"
+fn = IO_FUNCS[tool]
+print(
+    q8.query(
+        fn(part),
+        fn(supplier),
+        fn(lineitem),
+        fn(orders),
+        fn(customer),
+        fn(nation),
+        fn(region),
+    )
+)
diff --git a/tpch/execute/q9.py b/tpch/execute/q9.py
index e01dd0f2c..44d4154aa 100644
--- a/tpch/execute/q9.py
+++ b/tpch/execute/q9.py
@@ -21,3 +21,9 @@
         fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(orders), fn(supplier)
     ).collect()
 )
+
+tool = "pyarrow"
+fn = IO_FUNCS[tool]
+print(
+    q9.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(orders), fn(supplier))
+)
diff --git a/tpch/queries/q17.py b/tpch/queries/q17.py
index 5d35929d1..976f476f0 100644
--- a/tpch/queries/q17.py
+++ b/tpch/queries/q17.py
@@ -14,8 +14,9 @@ def query(lineitem_ds: FrameT, part_ds: FrameT) -> FrameT:
     )
 
     return (
-        query1.group_by("p_partkey")
-        .agg((0.2 * nw.col("l_quantity").mean()).alias("avg_quantity"))
+        query1.with_columns(l_quantity_times_point_2=nw.col("l_quantity") * 0.2)
+        .group_by("p_partkey")
+        .agg(nw.col("l_quantity_times_point_2").mean().alias("avg_quantity"))
         .select(nw.col("p_partkey").alias("key"), nw.col("avg_quantity"))
         .join(query1, left_on="key", right_on="p_partkey")
         .filter(nw.col("l_quantity") < nw.col("avg_quantity"))
diff --git a/tpch/queries/q8.py b/tpch/queries/q8.py
index 3fba96313..ac3fa4baf 100644
--- a/tpch/queries/q8.py
+++ b/tpch/queries/q8.py
@@ -46,6 +46,7 @@ def query(
             .alias("_tmp")
         )
         .group_by("o_year")
-        .agg((nw.sum("_tmp") / nw.sum("volume")).round(2).alias("mkt_share"))
+        .agg(_tmp_sum=nw.sum("_tmp"), volume_sum=nw.sum("volume"))
+        .select("o_year", mkt_share=nw.col("_tmp_sum") / nw.col("volume_sum"))
         .sort("o_year")
     )

From 5dc43000dfcd3e93c81f6a1cb90ea1d8bda38ffa Mon Sep 17 00:00:00 2001
From: Francesco Bruzzesi <42817048+FBruzzesi@users.noreply.github.com>
Date: Mon, 9 Sep 2024 17:52:21 +0200
Subject: [PATCH 25/30] feat: join suffix (#934)

---
 narwhals/_arrow/dataframe.py       |  5 ++--
 narwhals/_arrow/expr.py            |  4 ++-
 narwhals/_dask/dataframe.py        |  7 +++--
 narwhals/_dask/expr.py             |  2 +-
 narwhals/_pandas_like/dataframe.py | 11 +++----
 narwhals/_pandas_like/expr.py      |  4 ++-
 narwhals/dataframe.py              | 46 ++++++++++++++++--------------
 tests/frame/join_test.py           | 35 +++++++++++++++++++++++
 8 files changed, 80 insertions(+), 34 deletions(-)

diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py
index 2750f8c09..fa5a69950 100644
--- a/narwhals/_arrow/dataframe.py
+++ b/narwhals/_arrow/dataframe.py
@@ -277,6 +277,7 @@ def join(
         how: Literal["left", "inner", "outer", "cross", "anti", "semi"] = "inner",
         left_on: str | list[str] | None,
         right_on: str | list[str] | None,
+        suffix: str,
     ) -> Self:
         how_to_join_map = {
             "anti": "left anti",
@@ -298,7 +299,7 @@ def join(
                     keys=key_token,
                     right_keys=key_token,
                     join_type="inner",
-                    right_suffix="_right",
+                    right_suffix=suffix,
                 )
                 .drop([key_token]),
             )
@@ -309,7 +310,7 @@ def join(
                 keys=left_on,
                 right_keys=right_on,
                 join_type=how_to_join_map[how],
-                right_suffix="_right",
+                right_suffix=suffix,
             ),
         )
 
diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py
index 593e73eb3..31052fa52 100644
--- a/narwhals/_arrow/expr.py
+++ b/narwhals/_arrow/expr.py
@@ -309,7 +309,9 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
                 )
                 raise ValueError(msg)
             tmp = df.group_by(*keys).agg(self)
-            tmp = df.select(*keys).join(tmp, how="left", left_on=keys, right_on=keys)
+            tmp = df.select(*keys).join(
+                tmp, how="left", left_on=keys, right_on=keys, suffix="_right"
+            )
             return [tmp[name] for name in self._output_names]
 
         return self.__class__(
diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py
index 1a40d7a6c..e2a034ae2 100644
--- a/narwhals/_dask/dataframe.py
+++ b/narwhals/_dask/dataframe.py
@@ -208,6 +208,7 @@ def join(
         how: Literal["left", "inner", "outer", "cross", "anti", "semi"] = "inner",
         left_on: str | list[str] | None,
         right_on: str | list[str] | None,
+        suffix: str,
     ) -> Self:
         if how == "cross":
             key_token = generate_unique_token(
@@ -221,7 +222,7 @@ def join(
                     how="inner",
                     left_on=key_token,
                     right_on=key_token,
-                    suffixes=("", "_right"),
+                    suffixes=("", suffix),
                 )
                 .drop(columns=key_token),
             )
@@ -273,7 +274,7 @@ def join(
                 how="left",
                 left_on=left_on,
                 right_on=right_on,
-                suffixes=("", "_right"),
+                suffixes=("", suffix),
             )
             extra = []
             for left_key, right_key in zip(left_on, right_on):  # type: ignore[arg-type]
@@ -289,7 +290,7 @@ def join(
                 left_on=left_on,
                 right_on=right_on,
                 how=how,
-                suffixes=("", "_right"),
+                suffixes=("", suffix),
             ),
         )
 
diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py
index e3030a787..730824508 100644
--- a/narwhals/_dask/expr.py
+++ b/narwhals/_dask/expr.py
@@ -629,7 +629,7 @@ def func(df: DaskLazyFrame) -> list[Any]:
             tmp = df.group_by(*keys).agg(self)
             tmp_native = (
                 df.select(*keys)
-                .join(tmp, how="left", left_on=keys, right_on=keys)
+                .join(tmp, how="left", left_on=keys, right_on=keys, suffix="_right")
                 ._native_frame
             )
             return [tmp_native[name] for name in self._output_names]
diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py
index 4ec42ef59..59cff49fc 100644
--- a/narwhals/_pandas_like/dataframe.py
+++ b/narwhals/_pandas_like/dataframe.py
@@ -403,6 +403,7 @@ def join(
         how: Literal["left", "inner", "outer", "cross", "anti", "semi"] = "inner",
         left_on: str | list[str] | None,
         right_on: str | list[str] | None,
+        suffix: str,
     ) -> Self:
         if isinstance(left_on, str):
             left_on = [left_on]
@@ -427,7 +428,7 @@ def join(
                         how="inner",
                         left_on=key_token,
                         right_on=key_token,
-                        suffixes=("", "_right"),
+                        suffixes=("", suffix),
                     )
                     .drop(columns=key_token),
                 )
@@ -436,7 +437,7 @@ def join(
                     self._native_frame.merge(
                         other._native_frame,
                         how="cross",
-                        suffixes=("", "_right"),
+                        suffixes=("", suffix),
                     ),
                 )
 
@@ -488,14 +489,14 @@ def join(
                 how="left",
                 left_on=left_on,
                 right_on=right_on,
-                suffixes=("", "_right"),
+                suffixes=("", suffix),
             )
             extra = []
             for left_key, right_key in zip(left_on, right_on):  # type: ignore[arg-type]
                 if right_key != left_key and right_key not in self.columns:
                     extra.append(right_key)
                 elif right_key != left_key:
-                    extra.append(f"{right_key}_right")
+                    extra.append(f"{right_key}{suffix}")
             return self._from_native_frame(result_native.drop(columns=extra))
 
         return self._from_native_frame(
@@ -504,7 +505,7 @@ def join(
                 left_on=left_on,
                 right_on=right_on,
                 how=how,
-                suffixes=("", "_right"),
+                suffixes=("", suffix),
             ),
         )
 
diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py
index 44154453d..8c3536c77 100644
--- a/narwhals/_pandas_like/expr.py
+++ b/narwhals/_pandas_like/expr.py
@@ -287,7 +287,9 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
                 )
                 raise ValueError(msg)
             tmp = df.group_by(*keys).agg(self)
-            tmp = df.select(*keys).join(tmp, how="left", left_on=keys, right_on=keys)
+            tmp = df.select(*keys).join(
+                tmp, how="left", left_on=keys, right_on=keys, suffix="_right"
+            )
             return [tmp[name] for name in self._output_names]
 
         return self.__class__(
diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py
index ffd7ce36d..a266b73c7 100644
--- a/narwhals/dataframe.py
+++ b/narwhals/dataframe.py
@@ -182,11 +182,12 @@ def sort(
     def join(
         self,
         other: Self,
-        *,
+        on: str | list[str] | None = None,
         how: Literal["inner", "left", "cross", "semi", "anti"] = "inner",
+        *,
         left_on: str | list[str] | None = None,
         right_on: str | list[str] | None = None,
-        on: str | list[str] | None = None,
+        suffix: str = "_right",
     ) -> Self:
         _supported_joins = ("inner", "left", "cross", "anti", "semi")
 
@@ -219,6 +220,7 @@ def join(
                 how=how,
                 left_on=left_on,
                 right_on=right_on,
+                suffix=suffix,
             )
         )
 
@@ -1850,30 +1852,29 @@ def sort(
     def join(
         self,
         other: Self,
-        *,
+        on: str | list[str] | None = None,
         how: Literal["inner", "left", "cross", "semi", "anti"] = "inner",
+        *,
         left_on: str | list[str] | None = None,
         right_on: str | list[str] | None = None,
-        on: str | list[str] | None = None,
+        suffix: str = "_right",
     ) -> Self:
         r"""
         Join in SQL-like fashion.
 
         Arguments:
-            other: DataFrame to join with.
-
+            other: Lazy DataFrame to join with.
+            on: Name(s) of the join columns in both DataFrames. If set, `left_on` and
+                `right_on` should be None.
             how: Join strategy.
 
                   * *inner*: Returns rows that have matching values in both tables.
                   * *cross*: Returns the Cartesian product of rows from both tables.
                   * *semi*: Filter rows that have a match in the right table.
                   * *anti*: Filter rows that do not have a match in the right table.
-
-            left_on: Name(s) of the left join column(s).
-
-            right_on: Name(s) of the right join column(s).
-
-            on: Join column of both DataFrames. If set, left_on and right_on should be None.
+            left_on: Join column of the left DataFrame.
+            right_on: Join column of the right DataFrame.
+            suffix: Suffix to append to columns with a duplicate name.
 
         Returns:
             A new joined DataFrame
@@ -1922,7 +1923,9 @@ def join(
             │ 2   ┆ 7.0 ┆ b   ┆ y     │
             └─────┴─────┴─────┴───────┘
         """
-        return super().join(other, how=how, left_on=left_on, right_on=right_on, on=on)
+        return super().join(
+            other, how=how, left_on=left_on, right_on=right_on, on=on, suffix=suffix
+        )
 
     def join_asof(
         self,
@@ -3578,30 +3581,29 @@ def sort(
     def join(
         self,
         other: Self,
-        *,
+        on: str | list[str] | None = None,
         how: Literal["inner", "left", "cross", "semi", "anti"] = "inner",
+        *,
         left_on: str | list[str] | None = None,
         right_on: str | list[str] | None = None,
-        on: str | list[str] | None = None,
+        suffix: str = "_right",
     ) -> Self:
         r"""
         Add a join operation to the Logical Plan.
 
         Arguments:
             other: Lazy DataFrame to join with.
-
+            on: Name(s) of the join columns in both DataFrames. If set, `left_on` and
+                `right_on` should be None.
             how: Join strategy.
 
                   * *inner*: Returns rows that have matching values in both tables.
                   * *cross*: Returns the Cartesian product of rows from both tables.
                   * *semi*: Filter rows that have a match in the right table.
                   * *anti*: Filter rows that do not have a match in the right table.
-
             left_on: Join column of the left DataFrame.
-
             right_on: Join column of the right DataFrame.
-
-            on: Join column of both DataFrames. If set, left_on and right_on should be None.
+            suffix: Suffix to append to columns with a duplicate name.
 
         Returns:
             A new joined LazyFrame
@@ -3650,7 +3652,9 @@ def join(
             │ 2   ┆ 7.0 ┆ b   ┆ y     │
             └─────┴─────┴─────┴───────┘
         """
-        return super().join(other, how=how, left_on=left_on, right_on=right_on, on=on)
+        return super().join(
+            other, how=how, left_on=left_on, right_on=right_on, on=on, suffix=suffix
+        )
 
     def join_asof(
         self,
diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py
index 6a1985f41..18e9aae64 100644
--- a/tests/frame/join_test.py
+++ b/tests/frame/join_test.py
@@ -89,6 +89,41 @@ def test_cross_join(constructor: Any) -> None:
         df.join(df, how="cross", left_on="antananarivo")  # type: ignore[arg-type]
 
 
+@pytest.mark.parametrize("how", ["inner", "left"])
+@pytest.mark.parametrize("suffix", ["_right", "_custom_suffix"])
+def test_suffix(constructor: Any, how: str, suffix: str) -> None:
+    data = {
+        "antananarivo": [1, 3, 2],
+        "bob": [4, 4, 6],
+        "zorro": [7.0, 8, 9],
+    }
+    df = nw.from_native(constructor(data))
+    df_right = df
+    result = df.join(
+        df_right,  # type: ignore[arg-type]
+        left_on=["antananarivo", "bob"],
+        right_on=["antananarivo", "bob"],
+        how=how,  # type: ignore[arg-type]
+        suffix=suffix,
+    )
+    result_cols = result.collect_schema().names()
+    assert result_cols == ["antananarivo", "bob", "zorro", f"zorro{suffix}"]
+
+
+@pytest.mark.parametrize("suffix", ["_right", "_custom_suffix"])
+def test_cross_join_suffix(constructor: Any, suffix: str) -> None:
+    data = {"antananarivo": [1, 3, 2]}
+    df = nw.from_native(constructor(data))
+    result = df.join(df, how="cross", suffix=suffix).sort(  # type: ignore[arg-type]
+        "antananarivo", f"antananarivo{suffix}"
+    )
+    expected = {
+        "antananarivo": [1, 1, 1, 2, 2, 2, 3, 3, 3],
+        f"antananarivo{suffix}": [1, 2, 3, 1, 2, 3, 1, 2, 3],
+    }
+    compare_dicts(result, expected)
+
+
 def test_cross_join_non_pandas() -> None:
     data = {"antananarivo": [1, 3, 2]}
     df = nw.from_native(pd.DataFrame(data))

From b906621bbd3672b043c1a6535a5a0a83aabb6c94 Mon Sep 17 00:00:00 2001
From: Liam Connors <connorsl@tcd.ie>
Date: Tue, 10 Sep 2024 00:02:58 -0400
Subject: [PATCH 26/30] docs: fix `maybe_align_index` docstring formatting
 (#938)

---
 narwhals/stable/v1.py | 2 +-
 narwhals/utils.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/narwhals/stable/v1.py b/narwhals/stable/v1.py
index 1af7a26f3..78cfa5ba1 100644
--- a/narwhals/stable/v1.py
+++ b/narwhals/stable/v1.py
@@ -1541,7 +1541,7 @@ def is_ordered_categorical(series: Series) -> bool:
 
 def maybe_align_index(lhs: T, rhs: Series | DataFrame[Any] | LazyFrame[Any]) -> T:
     """
-    Align `lhs` to the Index of `rhs, if they're both pandas-like.
+    Align `lhs` to the Index of `rhs`, if they're both pandas-like.
 
     Notes:
         This is only really intended for backwards-compatibility purposes,
diff --git a/narwhals/utils.py b/narwhals/utils.py
index 6c1b5c1b4..ec3c722d4 100644
--- a/narwhals/utils.py
+++ b/narwhals/utils.py
@@ -153,7 +153,7 @@ def validate_laziness(items: Iterable[Any]) -> None:
 
 def maybe_align_index(lhs: T, rhs: Series | BaseFrame[Any]) -> T:
     """
-    Align `lhs` to the Index of `rhs, if they're both pandas-like.
+    Align `lhs` to the Index of `rhs`, if they're both pandas-like.
 
     Notes:
         This is only really intended for backwards-compatibility purposes,

From be95f2e83615dd93c7a55619236e54e98be73a18 Mon Sep 17 00:00:00 2001
From: Liam Connors <connorsl@tcd.ie>
Date: Tue, 10 Sep 2024 03:22:55 -0400
Subject: [PATCH 27/30] fix: Update `copy` param on `to_numpy` to default to
 `True` for cuDF (#937)

* set copy=True for cuDF

* set copy=True for cuDF series
---
 narwhals/_pandas_like/dataframe.py | 4 ++--
 narwhals/_pandas_like/series.py    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py
index 59cff49fc..499777833 100644
--- a/narwhals/_pandas_like/dataframe.py
+++ b/narwhals/_pandas_like/dataframe.py
@@ -590,8 +590,8 @@ def to_numpy(self, dtype: Any = None, copy: bool | None = None) -> Any:
         from narwhals._pandas_like.series import PANDAS_TO_NUMPY_DTYPE_MISSING
 
         if copy is None:
-            # pandas default differs from Polars
-            copy = False
+            # pandas default differs from Polars, but cuDF default is True
+            copy = self._implementation is Implementation.CUDF
 
         if dtype is not None:
             return self._native_frame.to_numpy(dtype=dtype, copy=copy)
diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py
index b28a04088..a0830784f 100644
--- a/narwhals/_pandas_like/series.py
+++ b/narwhals/_pandas_like/series.py
@@ -473,7 +473,7 @@ def __array__(self, dtype: Any = None, copy: bool | None = None) -> Any:
     def to_numpy(self, dtype: Any = None, copy: bool | None = None) -> Any:
         # the default is meant to be None, but pandas doesn't allow it?
         # https://numpy.org/doc/stable/reference/generated/numpy.ndarray.__array__.html
-        copy = copy or False
+        copy = copy or self._implementation is Implementation.CUDF
 
         has_missing = self._native_series.isna().any()
         if (

From 270adbd432259f21a24f8f9a4f3121ee19a3d646 Mon Sep 17 00:00:00 2001
From: Francesco Bruzzesi <42817048+FBruzzesi@users.noreply.github.com>
Date: Tue, 10 Sep 2024 09:23:41 +0200
Subject: [PATCH 28/30] chore: ban dask boolean mask filtering (#939)

---
 narwhals/_dask/dataframe.py | 15 ++++++++-------
 tests/frame/filter_test.py  | 19 ++++++++++++++++---
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py
index e2a034ae2..d4433fb39 100644
--- a/narwhals/_dask/dataframe.py
+++ b/narwhals/_dask/dataframe.py
@@ -79,14 +79,15 @@ def filter(
             and isinstance(predicates[0], list)
             and all(isinstance(x, bool) for x in predicates[0])
         ):
-            mask = predicates[0]
-        else:
-            from narwhals._dask.namespace import DaskNamespace
+            msg = "Filtering with boolean mask is not supported for `DaskLazyFrame`"
+            raise NotImplementedError(msg)
+
+        from narwhals._dask.namespace import DaskNamespace
 
-            plx = DaskNamespace(backend_version=self._backend_version)
-            expr = plx.all_horizontal(*predicates)
-            # Safety: all_horizontal's expression only returns a single column.
-            mask = expr._call(self)[0]
+        plx = DaskNamespace(backend_version=self._backend_version)
+        expr = plx.all_horizontal(*predicates)
+        # Safety: all_horizontal's expression only returns a single column.
+        mask = expr._call(self)[0]
         return self._from_native_frame(self._native_frame.loc[mask])
 
     def lazy(self) -> Self:
diff --git a/tests/frame/filter_test.py b/tests/frame/filter_test.py
index a8d3144aa..609f8ef91 100644
--- a/tests/frame/filter_test.py
+++ b/tests/frame/filter_test.py
@@ -1,5 +1,8 @@
+from contextlib import nullcontext as does_not_raise
 from typing import Any
 
+import pytest
+
 import narwhals.stable.v1 as nw
 from tests.utils import compare_dicts
 
@@ -15,6 +18,16 @@ def test_filter(constructor: Any) -> None:
 def test_filter_with_boolean_list(constructor: Any) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
     df = nw.from_native(constructor(data))
-    result = df.filter([False, True, True])
-    expected = {"a": [3, 2], "b": [4, 6], "z": [8.0, 9.0]}
-    compare_dicts(result, expected)
+
+    context = (
+        pytest.raises(
+            NotImplementedError, match="Filtering with boolean mask is not supported"
+        )
+        if "dask" in str(constructor)
+        else does_not_raise()
+    )
+
+    with context:
+        result = df.filter([False, True, True])
+        expected = {"a": [3, 2], "b": [4, 6], "z": [8.0, 9.0]}
+        compare_dicts(result, expected)

From 359905b95f8c76fdb7eaaf48cb77cc08eafd6209 Mon Sep 17 00:00:00 2001
From: raisadz <34237447+raisadz@users.noreply.github.com>
Date: Tue, 10 Sep 2024 14:11:27 +0100
Subject: [PATCH 29/30] support `__getitem__` with single tuple of column names
 (#935)

---
 narwhals/_arrow/dataframe.py       |  9 ++++++++-
 narwhals/_pandas_like/dataframe.py | 17 ++++++++++++++---
 narwhals/dataframe.py              |  8 +++++++-
 narwhals/stable/v1.py              |  5 +++--
 tests/frame/slice_test.py          |  3 +++
 5 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py
index fa5a69950..f409ef735 100644
--- a/narwhals/_arrow/dataframe.py
+++ b/narwhals/_arrow/dataframe.py
@@ -121,7 +121,12 @@ def __getitem__(self, item: str) -> ArrowSeries: ...
     def __getitem__(self, item: slice) -> ArrowDataFrame: ...
 
     def __getitem__(
-        self, item: str | slice | Sequence[int] | tuple[Sequence[int], str | int]
+        self,
+        item: str
+        | slice
+        | Sequence[int]
+        | Sequence[str]
+        | tuple[Sequence[int], str | int],
     ) -> ArrowSeries | ArrowDataFrame:
         if isinstance(item, str):
             from narwhals._arrow.series import ArrowSeries
@@ -191,6 +196,8 @@ def __getitem__(
             )
 
         elif isinstance(item, Sequence) or (is_numpy_array(item) and item.ndim == 1):
+            if isinstance(item, Sequence) and all(isinstance(x, str) for x in item):
+                return self._from_native_frame(self._native_frame.select(item))
             return self._from_native_frame(self._native_frame.take(item))
 
         else:  # pragma: no cover
diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py
index 499777833..71a659998 100644
--- a/narwhals/_pandas_like/dataframe.py
+++ b/narwhals/_pandas_like/dataframe.py
@@ -111,13 +111,22 @@ def __getitem__(self, item: tuple[Sequence[int], str | int]) -> PandasLikeSeries
     def __getitem__(self, item: Sequence[int]) -> PandasLikeDataFrame: ...
 
     @overload
-    def __getitem__(self, item: str) -> PandasLikeSeries: ...
+    def __getitem__(self, item: str) -> PandasLikeSeries: ...  # type: ignore[overload-overlap]
+
+    @overload
+    def __getitem__(self, item: Sequence[str]) -> PandasLikeDataFrame: ...
 
     @overload
     def __getitem__(self, item: slice) -> PandasLikeDataFrame: ...
 
     def __getitem__(
-        self, item: str | slice | Sequence[int] | tuple[Sequence[int], str | int]
+        self,
+        item: str
+        | int
+        | slice
+        | Sequence[int]
+        | Sequence[str]
+        | tuple[Sequence[int], str | int],
     ) -> PandasLikeSeries | PandasLikeDataFrame:
         if isinstance(item, str):
             from narwhals._pandas_like.series import PandasLikeSeries
@@ -174,7 +183,7 @@ def __getitem__(
             from narwhals._pandas_like.series import PandasLikeSeries
 
             if isinstance(item[1], str):
-                item = (item[0], self._native_frame.columns.get_loc(item[1]))
+                item = (item[0], self._native_frame.columns.get_loc(item[1]))  # type: ignore[assignment]
                 native_series = self._native_frame.iloc[item]
             elif isinstance(item[1], int):
                 native_series = self._native_frame.iloc[item]
@@ -191,6 +200,8 @@ def __getitem__(
         elif isinstance(item, (slice, Sequence)) or (
             is_numpy_array(item) and item.ndim == 1
         ):
+            if isinstance(item, Sequence) and all(isinstance(x, str) for x in item):
+                return self._from_native_frame(self._native_frame.loc[:, item])
             return self._from_native_frame(self._native_frame.iloc[item])
 
         else:  # pragma: no cover
diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py
index a266b73c7..1b91f0910 100644
--- a/narwhals/dataframe.py
+++ b/narwhals/dataframe.py
@@ -612,7 +612,10 @@ def __getitem__(self, item: tuple[Sequence[int], int]) -> Series: ...  # type: i
     def __getitem__(self, item: Sequence[int]) -> Self: ...
 
     @overload
-    def __getitem__(self, item: str) -> Series: ...
+    def __getitem__(self, item: str) -> Series: ...  # type: ignore[overload-overlap]
+
+    @overload
+    def __getitem__(self, item: Sequence[str]) -> Self: ...
 
     @overload
     def __getitem__(self, item: slice) -> Self: ...
@@ -622,6 +625,7 @@ def __getitem__(
         item: str
         | slice
         | Sequence[int]
+        | Sequence[str]
         | tuple[Sequence[int], str | int]
         | tuple[slice | Sequence[int], Sequence[int] | Sequence[str] | slice],
     ) -> Series | Self:
@@ -644,6 +648,8 @@ def __getitem__(
                   `DataFrame`.
                 - `df[:, ['a', 'c']]` extracts all rows and columns `'a'` and `'c'` and returns a
                   `DataFrame`.
+                - `df[['a', 'c']]` extracts all rows and columns `'a'` and `'c'` and returns a
+                  `DataFrame`.
                 - `df[0: 2, ['a', 'c']]` extracts the first two rows and columns `'a'` and `'c'` and
                     returns a `DataFrame`
                 - `df[:, 0: 2]` extracts all rows from the first two columns and returns a `DataFrame`
diff --git a/narwhals/stable/v1.py b/narwhals/stable/v1.py
index 78cfa5ba1..862ba5d1a 100644
--- a/narwhals/stable/v1.py
+++ b/narwhals/stable/v1.py
@@ -91,9 +91,10 @@ def __getitem__(self, item: tuple[Sequence[int], int]) -> Series: ...  # type: i
 
     @overload
     def __getitem__(self, item: Sequence[int]) -> Self: ...
-
     @overload
-    def __getitem__(self, item: str) -> Series: ...
+    def __getitem__(self, item: str) -> Series: ...  # type: ignore[overload-overlap]
+    @overload
+    def __getitem__(self, item: Sequence[str]) -> Self: ...
 
     @overload
     def __getitem__(self, item: slice) -> Self: ...
diff --git a/tests/frame/slice_test.py b/tests/frame/slice_test.py
index 18b05bf3b..834e88bff 100644
--- a/tests/frame/slice_test.py
+++ b/tests/frame/slice_test.py
@@ -147,6 +147,9 @@ def test_slice_slice_columns(constructor_eager: Any) -> None:
     result = df[:, [0, 2]]
     expected = {"a": [1, 2, 3], "c": [7, 8, 9]}
     compare_dicts(result, expected)
+    result = df[["b", "c"]]
+    expected = {"b": [4, 5, 6], "c": [7, 8, 9]}
+    compare_dicts(result, expected)
 
 
 def test_slice_invalid(constructor_eager: Any) -> None:

From e9afffd233ed4b4df5364dc8c16ba00e16f86871 Mon Sep 17 00:00:00 2001
From: Aidos Kanapyanov <65722512+aidoskanapyanov@users.noreply.github.com>
Date: Tue, 10 Sep 2024 20:44:45 +0500
Subject: [PATCH 30/30] docs: add `appears on` section with links to relevant
 podcasts/blogs (#941)

* docs: add `appears on` section with links to relevant podcasts/blogs

* add "talk python to me" as well

Co-authored-by: Marco Edward Gorelli <marcogorelli@protonmail.com>

* Update README.md

---------

Co-authored-by: Marco Edward Gorelli <marcogorelli@protonmail.com>
---
 README.md | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/README.md b/README.md
index d26107e67..74630fd03 100644
--- a/README.md
+++ b/README.md
@@ -113,6 +113,31 @@ provided some funding / development time:
 If you contribute to Narwhals on your organization's time, please let us know. We'd be happy to add your employer
 to this list!
 
+## Appears on
+
+Narwhals has been featured in several talks, podcasts, and blog posts:
+
+- [Talk Python to me Podcast](https://youtu.be/FSH7BZ0tuE0)
+  Ahoy, Narwhals are bridging the data science APIs
+
+- [Super Data Science: ML & AI Podcast](https://www.youtube.com/watch?v=TeG4U8R0U8U)  
+  Narwhals: For Pandas-to-Polars DataFrame Compatibility
+
+- [Sample Space Podcast | probabl](https://youtu.be/8hYdq4sWbbQ?si=WG0QP1CZ6gkFf18b)  
+  How Narwhals has many end users ... that never use it directly. - Marco Gorelli
+
+- [Pycon Lithuania](https://www.youtube.com/watch?v=-mdx7Cn6_6E)  
+  Marco Gorelli - DataFrame interoperatiblity - what's been achieved, and what comes next?
+
+- [Pycon Italy](https://www.youtube.com/watch?v=3IqUli9XsmQ)  
+  How you can write a dataframe-agnostic library - Marco Gorelli
+
+- [Polars Blog Post](https://pola.rs/posts/lightweight_plotting/)  
+  Polars has a new lightweight plotting backend
+
+- [Quansight Labs blog post (w/ Scikit-Lego)](https://labs.quansight.org/blog/scikit-lego-narwhals)  
+  How Narwhals and scikit-lego came together to achieve dataframe-agnosticism
+
 ## Why "Narwhals"?
 
 [Coz they are so awesome](https://youtu.be/ykwqXuMPsoc?si=A-i8LdR38teYsos4).