diff --git a/.github/workflows/downstream_tests.yml b/.github/workflows/downstream_tests.yml
index 542dacbbf..7e091bab1 100644
--- a/.github/workflows/downstream_tests.yml
+++ b/.github/workflows/downstream_tests.yml
@@ -87,21 +87,16 @@ jobs:
       - name: show-deps
         run: uv pip freeze
       - name: Create assets directory, copy over index.html
+        continue-on-error: true
         run: |
             mkdir -p marimo/marimo/_static/assets
             cp marimo/frontend/index.html marimo/marimo/_static/index.html
             cp marimo/frontend/public/favicon.ico marimo/marimo/_static/favicon.ico
-      - name: Run tests with minimal dependencies
-        if: ${{ matrix.dependencies == 'core' }}
-        run: |
-            cd marimo
-            hatch run +py=${{ matrix.python-version }} test:test -v tests/ -k "not test_cli"
-        timeout-minutes: 15
-      - name: Run tests with optional dependencies
+      - name: Run tests with full dependencies
         if: ${{ matrix.dependencies == 'core,optional' }}
         run: |
             cd marimo
-            hatch run +py=${{ matrix.python-version }} test-optional:test -v tests/ -k "not test_cli"
+            hatch run +py=${{ matrix.python-version }} test-optional:test-narwhals
         timeout-minutes: 15
       - name: Run typechecks
         run: |
@@ -186,3 +181,43 @@ jobs:
         run: |
             cd py-shiny
             make narwhals-test-integration
+  
+  tubular:
+    strategy:
+      matrix:
+        python-version: ["3.12"]
+        os: [ubuntu-latest]
+
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+        with:
+          enable-cache: "true"
+          cache-suffix: ${{ matrix.python-version }}
+          cache-dependency-glob: "**requirements*.txt"
+      - name: clone-tubular
+        run: |
+            git clone https://github.com/lvgig/tubular --depth=1
+            cd tubular
+            git log
+      - name: install-basics
+        run: uv pip install --upgrade tox virtualenv setuptools pytest-env --system
+      - name: install-tubular-dev
+        run: |
+            cd tubular
+            uv pip install -e .[dev] --system
+      - name: install-narwhals-dev
+        run: |
+            uv pip uninstall narwhals --system
+            uv pip install -e . --system
+      - name: show-deps
+        run: uv pip freeze
+      - name: Run pytest
+        run: |
+            cd tubular
+            pytest tests --config-file=pyproject.toml
diff --git a/.github/workflows/extremes.yml b/.github/workflows/extremes.yml
index cf488fd2d..fd6a7cfb2 100644
--- a/.github/workflows/extremes.yml
+++ b/.github/workflows/extremes.yml
@@ -90,7 +90,7 @@ jobs:
   nightlies:
     strategy:
       matrix:
-        python-version: ["3.11"]
+        python-version: ["3.13"]
         os: [ubuntu-latest]
     if: github.event.pull_request.head.repo.full_name == github.repository
     runs-on: ${{ matrix.os }}
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index ee88911ea..7847939b9 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -34,7 +34,7 @@ jobs:
   pytest-windows:
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.12"]
         os: [windows-latest]
 
     runs-on: ${{ matrix.os }}
@@ -61,7 +61,7 @@ jobs:
   pytest-coverage:
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.11", "3.13"]
         os: [ubuntu-latest]
 
     runs-on: ${{ matrix.os }}
diff --git a/.gitignore b/.gitignore
index 8b9adeb8f..774f09637 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,6 +17,7 @@ coverage.xml
 # Documentation
 site/
 todo.md
+docs/this.md
 docs/api-completeness/*.md
 !docs/api-completeness/index.md
 
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 89d05e542..b03d649dc 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,7 +1,7 @@
 repos:
 - repo: https://github.com/astral-sh/ruff-pre-commit
   # Ruff version.
-  rev: 'v0.6.9'
+  rev: 'v0.7.0'
   hooks:
     # Run the formatter.
     - id: ruff-format
@@ -9,7 +9,7 @@ repos:
     - id: ruff
       args: [--fix]
 - repo: https://github.com/pre-commit/mirrors-mypy
-  rev: 'v1.11.2'
+  rev: 'v1.12.1'
   hooks:
     - id: mypy
       additional_dependencies: ['polars==1.4.1', 'pytest==8.3.2']
@@ -40,7 +40,7 @@ repos:
   hooks:
     - id: nbstripout
 - repo: https://github.com/adamchainz/blacken-docs
-  rev: "1.18.0"  # replace with latest tag on GitHub
+  rev: "1.19.0"  # replace with latest tag on GitHub
   hooks:
   -   id: blacken-docs
       args: [--skip-errors]
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a30273970..b8f333f1e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -51,17 +51,20 @@ Here's how you can set up your local development environment to contribute.
 
 #### Option 1: Use UV (recommended)
 
-1. Make sure you have Python3.8+ installed (for example, Python 3.11), create a virtual environment,
+1. Make sure you have Python3.12 installed, create a virtual environment,
    and activate it. If you're new to this, here's one way that we recommend:
    1. Install uv: https://github.com/astral-sh/uv?tab=readme-ov-file#getting-started
-   2. Install some version of Python greater than Python3.8. For example, to install
-      Python3.11:
+      or make sure it is up-to-date with:
       ```
-      uv python install 3.11
+      uv self update
+      ```
+   2. Install Python3.12:
+      ```
+      uv python install 3.12
       ```
    3. Create a virtual environment:
       ```
-      uv venv -p 3.11 --seed
+      uv venv -p 3.12 --seed
       ```
    4. Activate it. On Linux, this is `. .venv/bin/activate`, on Windows `.\.venv\Scripts\activate`.
 2. Install Narwhals: `uv pip install -e .`
@@ -106,6 +109,10 @@ nox
 
 Notice that nox will also require to have all the python versions that are defined in the `noxfile.py` installed in your system.
 
+#### Testing cuDF
+
+We can't currently test in CI against cuDF, but you can test it manually in Kaggle using GPUs. Please follow this [Kaggle notebook](https://www.kaggle.com/code/marcogorelli/testing-cudf-in-narwhals) to run the tests.
+
 ### 7. Building docs
 
 To build the docs, run `mkdocs serve`, and then open the link provided in a browser.
diff --git a/README.md b/README.md
index 44fc31e56..b3acb17ba 100644
--- a/README.md
+++ b/README.md
@@ -43,10 +43,13 @@ Join the party!
 
 - [Altair](https://github.com/vega/altair/)
 - [Hamilton](https://github.com/DAGWorks-Inc/hamilton/tree/main/examples/narwhals)
+- [marimo](https://github.com/marimo-team/marimo)
+- [pymarginaleffects](https://github.com/vincentarelbundock/pymarginaleffects)
 - [scikit-lego](https://github.com/koaning/scikit-lego)
 - [scikit-playtime](https://github.com/koaning/scikit-playtime)
 - [timebasedcv](https://github.com/FBruzzesi/timebasedcv)
-- [marimo](https://github.com/marimo-team/marimo)
+- [tubular](https://github.com/lvgig/tubular)
+- [wimsey](https://github.com/benrutter/wimsey)
 
 Feel free to add your project to the list if it's missing, and/or
 [chat with us on Discord](https://discord.gg/V3PqtB4VA4) if you'd like any support.
diff --git a/docs/api-reference/dependencies.md b/docs/api-reference/dependencies.md
index 959e8ee0c..75ef4b277 100644
--- a/docs/api-reference/dependencies.md
+++ b/docs/api-reference/dependencies.md
@@ -11,14 +11,19 @@
         - get_polars
         - get_pyarrow
         - is_cudf_dataframe
+        - is_cudf_index
         - is_cudf_series
         - is_dask_dataframe
         - is_ibis_table
+        - is_into_series
         - is_modin_dataframe
+        - is_modin_index
         - is_modin_series
         - is_numpy_array
         - is_pandas_dataframe
+        - is_pandas_index
         - is_pandas_like_dataframe
+        - is_pandas_like_index
         - is_pandas_like_series
         - is_pandas_series
         - is_polars_dataframe
diff --git a/docs/api-reference/dtypes.md b/docs/api-reference/dtypes.md
index eb96608a6..77bf1266b 100644
--- a/docs/api-reference/dtypes.md
+++ b/docs/api-reference/dtypes.md
@@ -6,7 +6,6 @@
       members:
         - Array
         - List
-        - Struct
         - Int64
         - Int32
         - Int16
@@ -15,12 +14,14 @@
         - UInt32
         - UInt16
         - UInt8
+        - Field
         - Float64
         - Float32
         - Boolean
         - Categorical
         - Enum
         - String
+        - Struct
         - Date
         - Datetime
         - Duration
diff --git a/docs/api-reference/expr_dt.md b/docs/api-reference/expr_dt.md
index 5c9ab41f3..604ac4abf 100644
--- a/docs/api-reference/expr_dt.md
+++ b/docs/api-reference/expr_dt.md
@@ -6,22 +6,23 @@
       members:
         - convert_time_zone
         - date
-        - year
-        - month
         - day
-        - ordinal_day
         - hour
-        - minute
-        - second
-        - millisecond
         - microsecond
+        - millisecond
+        - minute
+        - month
         - nanosecond
+        - ordinal_day
         - replace_time_zone
-        - total_minutes
-        - total_seconds
-        - total_milliseconds
+        - second
+        - timestamp
         - total_microseconds
+        - total_milliseconds
+        - total_minutes
         - total_nanoseconds
+        - total_seconds
         - to_string
+        - year
       show_source: false
       show_bases: false
diff --git a/docs/api-reference/narwhals.md b/docs/api-reference/narwhals.md
index 044b20e0a..2b5be6e8c 100644
--- a/docs/api-reference/narwhals.md
+++ b/docs/api-reference/narwhals.md
@@ -14,6 +14,8 @@ Here are the top-level functions available in Narwhals.
         - concat_str
         - from_dict
         - from_native
+        - from_arrow
+        - generate_temporary_column_name
         - get_level
         - get_native_namespace
         - is_ordered_categorical
@@ -38,4 +40,5 @@ Here are the top-level functions available in Narwhals.
         - when
         - show_versions
         - to_native
+        - to_py_scalar
       show_source: false
diff --git a/docs/api-reference/series_dt.md b/docs/api-reference/series_dt.md
index c92592411..23d4817cb 100644
--- a/docs/api-reference/series_dt.md
+++ b/docs/api-reference/series_dt.md
@@ -6,22 +6,23 @@
       members:
         - convert_time_zone
         - date
-        - year
-        - month
         - day
-        - ordinal_day
         - hour
-        - minute
-        - second
-        - millisecond
         - microsecond
+        - millisecond
+        - minute
+        - month
         - nanosecond
+        - ordinal_day
         - replace_time_zone
-        - total_minutes
-        - total_seconds
-        - total_milliseconds
+        - second
+        - timestamp
         - total_microseconds
+        - total_milliseconds
+        - total_minutes
         - total_nanoseconds
+        - total_seconds
         - to_string
+        - year
       show_source: false
       show_bases: false
diff --git a/docs/basics/dataframe_conversion.md b/docs/basics/dataframe_conversion.md
new file mode 100644
index 000000000..690f5d093
--- /dev/null
+++ b/docs/basics/dataframe_conversion.md
@@ -0,0 +1,76 @@
+# Conversion between libraries
+
+Some library maintainers must apply complex dataframe operations, using methods and functions that may not (yet) be implemented in Narwhals. In such cases, Narwhals can still be highly beneficial, by allowing easy dataframe conversion.
+
+## Dataframe X in, pandas out
+
+Imagine that you maintain a library with a function that operates on pandas dataframes to produce automated reports. You want to allow users to supply a dataframe in any format to that function (pandas, Polars, DuckDB, cuDF, Modin, etc.) without adding all those dependencies to your own project and without special-casing each input library's variation of `to_pandas` / `toPandas` / `to_pandas_df` / `df` ...
+
+One solution is to use Narwhals as a thin Dataframe ingestion layer, to convert user-supplied dataframe to the format that your library uses internally. Since Narwhals is zero-dependency, this is a much more lightweight solution than including all the dataframe libraries as dependencies,
+and easier to write than special casing each input library's `to_pandas` method (if it even exists!).
+
+To illustrate, we create dataframes in various formats:
+
+```python exec="1" source="above" session="conversion"
+import narwhals as nw
+from narwhals.typing import IntoDataFrame
+
+import duckdb
+import polars as pl
+import pandas as pd
+
+df_polars = pl.DataFrame(
+    {
+        "A": [1, 2, 3, 4, 5],
+        "fruits": ["banana", "banana", "apple", "apple", "banana"],
+        "B": [5, 4, 3, 2, 1],
+        "cars": ["beetle", "audi", "beetle", "beetle", "beetle"],
+    }
+)
+df_pandas = df_polars.to_pandas()
+df_duckdb = duckdb.sql("SELECT * FROM df_polars")
+```
+
+Now, we define a function that can ingest any dataframe type supported by Narwhals, and convert it to a pandas DataFrame for internal use:
+
+```python exec="1" source="above" session="conversion" result="python"
+def df_to_pandas(df: IntoDataFrame) -> pd.DataFrame:
+    return nw.from_native(df).to_pandas()
+
+
+print(df_to_pandas(df_polars))
+```
+
+## Dataframe X in, Polars out
+
+### Via PyCapsule Interface
+
+Similarly, if your library uses Polars internally, you can convert any user-supplied dataframe to Polars format using Narwhals.
+
+```python exec="1" source="above" session="conversion" result="python"
+def df_to_polars(df: IntoDataFrame) -> pl.DataFrame:
+    return nw.from_arrow(nw.from_native(df), native_namespace=pl).to_native()
+
+
+print(df_to_polars(df_duckdb))  # You can only execute this line of code once.
+```
+
+It works to pass Polars to `native_namespace` here because Polars supports the [PyCapsule Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html) for import.
+
+Note that the PyCapsule Interface makes no guarantee that you can call it repeatedly, so the approach above only works if you
+only expect to perform the conversion a single time on each input object.
+
+### Via PyArrow
+
+If you need to ingest the same dataframe multiple times, then you may want to go via PyArrow instead.
+This may be less efficient than the PyCapsule approach above (and always requires PyArrow!), but is more forgiving:
+
+```python exec="1" source="above" session="conversion" result="python"
+def df_to_polars(df: IntoDataFrame) -> pl.DataFrame:
+    return pl.DataFrame(nw.from_native(df).to_arrow())
+
+
+df_duckdb = duckdb.sql("SELECT * FROM df_polars")
+print(df_to_polars(df_duckdb))  # We can execute this...
+print(df_to_polars(df_duckdb))  # ...as many times as we like!
+```
diff --git a/docs/extending.md b/docs/extending.md
index 22d85f701..814db3287 100644
--- a/docs/extending.md
+++ b/docs/extending.md
@@ -37,6 +37,7 @@ def func(df: FrameT) -> FrameT:
         b_std=nw.col("b").std(),
     )
 ```
+
 will work for any of pandas, Polars, cuDF, Modin, and PyArrow.
 
 However, sometimes you don't need to do complex operations on dataframes - all you need
@@ -57,9 +58,21 @@ def func(df: Any) -> Schema:
     df = nw.from_native(df, eager_or_interchange_only=True)
     return df.schema
 ```
+
 is also supported, meaning that, in addition to the libraries mentioned above, you can
 also pass Ibis, DuckDB, Vaex, and any library which implements the protocol.
 
+#### Interchange-only support
+
+While libraries for which we have full support can benefit from the whole Narwhals API,
+libraries which have interchange only support can access the following methods after 
+converting to Narwhals DataFrame:
+
+- `.schema`, hence column names via `.schema.names()` and column types via `.schema.dtypes()`
+- `.to_pandas()` and `.to_arrow()`, for converting to Pandas and Arrow, respectively.
+- `.select(names)` (Ibis and DuckDB), where `names` is a list of (string) column names. This is useful for
+  selecting columns before converting to another library.
+
 ### Extending Narwhals
 
 If you want your own library to be recognised too, you're welcome open a PR (with tests)!.
diff --git a/docs/index.md b/docs/index.md
index f18d9af85..e9fe02170 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -6,7 +6,7 @@ Extremely lightweight and extensible compatibility layer between dataframe libra
 
 - **Full API support**: cuDF, Modin, pandas, Polars, PyArrow
 - **Lazy-only support**: Dask
-- **Interchange-level support**: Ibis, Vaex, anything else which implements the DataFrame Interchange Protocol
+- **Interchange-level support**: Ibis, DuckDB, Vaex, anything else which implements the DataFrame Interchange Protocol
 
 Seamlessly support all, without depending on any!
 
diff --git a/docs/installation.md b/docs/installation.md
index e3cd8f6db..58824eef3 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -2,20 +2,36 @@
 
 ## Installation
 
-First, make sure you have [created and activated](https://docs.python.org/3/library/venv.html) a Python3.8+ virtual environment.
+=== "UV"
 
-Then, run
-```console
-python -m pip install narwhals
-```
+    First, ensure you have installed [UV](https://github.com/astral-sh/uv), and make sure you have [created and activated](https://docs.astral.sh/uv/pip/environments/#python-environments) a Python 3.8+ virtual environment.
+
+    If you haven't, you can follow our [_setting up your environment_](https://github.com/narwhals-dev/narwhals/blob/main/CONTRIBUTING.md#option-1-use-uv-recommended) guide.
+    Then, run:
+
+    ```console
+    uv pip install narwhals
+    ```
+
+=== "Python's venv"
+
+    First, ensure you have [created and activated](https://docs.python.org/3/library/venv.html) a Python 3.8+ virtual environment.
+
+    Then, run:
+
+    ```console
+    python -m pip install narwhals
+    ```
+
+### Verifying the Installation
 
-Then, if you start the Python REPL and see the following:
+To verify the installation, start the Python REPL and execute:
 ```python
 >>> import narwhals
 >>> narwhals.__version__
-'1.9.3'
+'1.11.1'
 ```
-then installation worked correctly!
+If you see the version number, then the installation was successful!
 
 ## Quick start
 
@@ -69,4 +85,4 @@ If you run `python t.py` then your output should look like the above. This is th
 function - as we'll soon see, we can do much more advanced things.
 Let's learn about what you just did, and what Narwhals can do for you!
 
-Note: these examples are only using pandas and Polars. Please see the following to find the [supported libraries](extending.md).
+Note: these examples are only using pandas, Polars and PyArrow. Please see the following to find the [supported libraries](extending.md).
diff --git a/docs/requirements-docs.txt b/docs/requirements-docs.txt
index 929f35790..beec6070b 100644
--- a/docs/requirements-docs.txt
+++ b/docs/requirements-docs.txt
@@ -1,4 +1,5 @@
 jinja2
+duckdb
 markdown-exec[ansi]
 mkdocs
 mkdocs-autorefs
diff --git a/mkdocs.yml b/mkdocs.yml
index 770fd2d1e..46cb5335f 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -10,6 +10,7 @@ nav:
     - basics/dataframe.md
     - basics/series.md
     - basics/complete_example.md
+    - basics/dataframe_conversion.md
   - Pandas-like concepts:
     - other/pandas_index.md
     - other/user_warning.md
@@ -26,6 +27,7 @@ nav:
     - Supported Expr methods: api-completeness/expr.md
     - Supported Series methods: api-completeness/series.md
   - API Reference:
+    - api-reference/index.md
     - api-reference/narwhals.md
     - api-reference/dataframe.md
     - api-reference/expr.md
@@ -42,9 +44,9 @@ nav:
     - api-reference/series_str.md
     - api-reference/dependencies.md
     - api-reference/dtypes.md
-    - api-reference/index.md
     - api-reference/selectors.md
     - api-reference/typing.md
+  - This: this.md
 theme:
   name: material
   font: false
@@ -92,6 +94,7 @@ plugins:
 
 hooks:
 - utils/generate_backend_completeness.py
+- utils/generate_zen_content.py
 
 
 markdown_extensions:
diff --git a/narwhals/__init__.py b/narwhals/__init__.py
index 124f10c45..1d0af012b 100644
--- a/narwhals/__init__.py
+++ b/narwhals/__init__.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from narwhals import dependencies
 from narwhals import selectors
 from narwhals import stable
@@ -10,6 +12,7 @@
 from narwhals.dtypes import Datetime
 from narwhals.dtypes import Duration
 from narwhals.dtypes import Enum
+from narwhals.dtypes import Field
 from narwhals.dtypes import Float32
 from narwhals.dtypes import Float64
 from narwhals.dtypes import Int8
@@ -44,6 +47,7 @@
 from narwhals.expr import sum_horizontal
 from narwhals.expr import when
 from narwhals.functions import concat
+from narwhals.functions import from_arrow
 from narwhals.functions import from_dict
 from narwhals.functions import get_level
 from narwhals.functions import new_series
@@ -54,6 +58,8 @@
 from narwhals.translate import get_native_namespace
 from narwhals.translate import narwhalify
 from narwhals.translate import to_native
+from narwhals.translate import to_py_scalar
+from narwhals.utils import generate_temporary_column_name
 from narwhals.utils import is_ordered_categorical
 from narwhals.utils import maybe_align_index
 from narwhals.utils import maybe_convert_dtypes
@@ -61,13 +67,15 @@
 from narwhals.utils import maybe_reset_index
 from narwhals.utils import maybe_set_index
 
-__version__ = "1.9.3"
+__version__ = "1.11.1"
 
 __all__ = [
     "dependencies",
     "selectors",
     "concat",
     "from_dict",
+    "from_arrow",
+    "generate_temporary_column_name",
     "get_level",
     "new_series",
     "to_native",
@@ -79,6 +87,7 @@
     "maybe_reset_index",
     "maybe_set_index",
     "get_native_namespace",
+    "to_py_scalar",
     "all",
     "all_horizontal",
     "any_horizontal",
@@ -118,6 +127,7 @@
     "String",
     "Datetime",
     "Duration",
+    "Field",
     "Struct",
     "Array",
     "List",
diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py
index a0a4e16cb..ac845853a 100644
--- a/narwhals/_arrow/dataframe.py
+++ b/narwhals/_arrow/dataframe.py
@@ -17,7 +17,7 @@
 from narwhals.dependencies import is_numpy_array
 from narwhals.utils import Implementation
 from narwhals.utils import flatten
-from narwhals.utils import generate_unique_token
+from narwhals.utils import generate_temporary_column_name
 from narwhals.utils import is_sequence_but_not_str
 from narwhals.utils import parse_columns_to_drop
 
@@ -83,6 +83,26 @@ def __len__(self) -> int:
     def row(self, index: int) -> tuple[Any, ...]:
         return tuple(col[index] for col in self._native_frame)
 
+    @overload
+    def rows(
+        self,
+        *,
+        named: Literal[True],
+    ) -> list[dict[str, Any]]: ...
+
+    @overload
+    def rows(
+        self,
+        *,
+        named: Literal[False] = False,
+    ) -> list[tuple[Any, ...]]: ...
+    @overload
+    def rows(
+        self,
+        *,
+        named: bool,
+    ) -> list[tuple[Any, ...]] | list[dict[str, Any]]: ...
+
     def rows(
         self, *, named: bool = False
     ) -> list[tuple[Any, ...]] | list[dict[str, Any]]:
@@ -141,16 +161,18 @@ def __getitem__(self, item: tuple[slice, slice]) -> ArrowDataFrame: ...
 
     def __getitem__(
         self,
-        item: str
-        | slice
-        | Sequence[int]
-        | Sequence[str]
-        | tuple[Sequence[int], str | int]
-        | tuple[slice, str | int]
-        | tuple[slice, slice],
+        item: (
+            str
+            | slice
+            | Sequence[int]
+            | Sequence[str]
+            | tuple[Sequence[int], str | int]
+            | tuple[slice, str | int]
+            | tuple[slice, slice]
+        ),
     ) -> ArrowSeries | ArrowDataFrame:
         if isinstance(item, tuple):
-            item = tuple(list(i) if is_sequence_but_not_str(i) else i for i in item)
+            item = tuple(list(i) if is_sequence_but_not_str(i) else i for i in item)  # type: ignore[assignment]
 
         if isinstance(item, str):
             from narwhals._arrow.series import ArrowSeries
@@ -313,10 +335,10 @@ def with_columns(
         df = self._native_frame.__class__.from_arrays(to_concat, names=output_names)
         return self._from_native_frame(df)
 
-    def group_by(self, *keys: str) -> ArrowGroupBy:
+    def group_by(self, *keys: str, drop_null_keys: bool) -> ArrowGroupBy:
         from narwhals._arrow.group_by import ArrowGroupBy
 
-        return ArrowGroupBy(self, list(keys))
+        return ArrowGroupBy(self, list(keys), drop_null_keys=drop_null_keys)
 
     def join(
         self,
@@ -336,7 +358,7 @@ def join(
 
         if how == "cross":
             plx = self.__narwhals_namespace__()
-            key_token = generate_unique_token(
+            key_token = generate_temporary_column_name(
                 n_bytes=8, columns=[*self.columns, *other.columns]
             )
 
@@ -496,7 +518,9 @@ def lazy(self) -> Self:
 
     def collect(self) -> ArrowDataFrame:
         return ArrowDataFrame(
-            self._native_frame, backend_version=self._backend_version, dtypes=self._dtypes
+            self._native_frame,
+            backend_version=self._backend_version,
+            dtypes=self._dtypes,
         )
 
     def clone(self) -> Self:
@@ -555,7 +579,7 @@ def is_duplicated(self: Self) -> ArrowSeries:
         df = self._native_frame
 
         columns = self.columns
-        col_token = generate_unique_token(n_bytes=8, columns=columns)
+        col_token = generate_temporary_column_name(n_bytes=8, columns=columns)
         row_count = (
             df.append_column(col_token, pa.array(np.arange(len(self))))
             .group_by(columns)
@@ -614,7 +638,7 @@ def unique(
             agg_func_map = {"any": "min", "first": "min", "last": "max"}
 
             agg_func = agg_func_map[keep]
-            col_token = generate_unique_token(n_bytes=8, columns=self.columns)
+            col_token = generate_temporary_column_name(n_bytes=8, columns=self.columns)
             keep_idx = (
                 df.append_column(col_token, pa.array(np.arange(len(self))))
                 .group_by(subset)
diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py
index 55c529d30..35e936d72 100644
--- a/narwhals/_arrow/expr.py
+++ b/narwhals/_arrow/expr.py
@@ -353,7 +353,7 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
                     "`nw.col('a', 'b')`\n"
                 )
                 raise ValueError(msg)
-            tmp = df.group_by(*keys).agg(self)
+            tmp = df.group_by(*keys, drop_null_keys=False).agg(self)
             tmp = df.select(*keys).join(
                 tmp, how="left", left_on=keys, right_on=keys, suffix="_right"
             )
@@ -420,6 +420,11 @@ def convert_time_zone(self: Self, time_zone: str) -> ArrowExpr:
             self._expr, "dt", "convert_time_zone", time_zone
         )
 
+    def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> ArrowExpr:
+        return reuse_series_namespace_implementation(
+            self._expr, "dt", "timestamp", time_unit
+        )
+
     def date(self: Self) -> ArrowExpr:
         return reuse_series_namespace_implementation(self._expr, "dt", "date")
 
diff --git a/narwhals/_arrow/group_by.py b/narwhals/_arrow/group_by.py
index 6c7b20485..991a96a51 100644
--- a/narwhals/_arrow/group_by.py
+++ b/narwhals/_arrow/group_by.py
@@ -37,10 +37,15 @@ def get_function_name_option(function_name: str) -> Any | None:
 
 
 class ArrowGroupBy:
-    def __init__(self, df: ArrowDataFrame, keys: list[str]) -> None:
+    def __init__(
+        self, df: ArrowDataFrame, keys: list[str], *, drop_null_keys: bool
+    ) -> None:
         import pyarrow as pa  # ignore-banned-import()
 
-        self._df = df
+        if drop_null_keys:
+            self._df = df.drop_nulls(keys)
+        else:
+            self._df = df
         self._keys = list(keys)
         self._grouped = pa.TableGroupBy(self._df._native_frame, list(self._keys))
 
@@ -74,11 +79,7 @@ def agg(
         )
 
     def __iter__(self) -> Iterator[tuple[Any, ArrowDataFrame]]:
-        key_values = (
-            self._df.select(*self._keys)
-            .unique(subset=self._keys, keep="first")
-            .iter_rows()
-        )
+        key_values = self._df.select(*self._keys).unique(subset=self._keys, keep="first")
         nw_namespace = self._df.__narwhals_namespace__()
         yield from (
             (
@@ -87,7 +88,7 @@ def __iter__(self) -> Iterator[tuple[Any, ArrowDataFrame]]:
                     *[nw_namespace.col(k) == v for k, v in zip(self._keys, key_value)]
                 ),
             )
-            for key_value in key_values
+            for key_value in key_values.iter_rows()
         )
 
 
diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py
index 65a393ca9..be1377b4d 100644
--- a/narwhals/_arrow/series.py
+++ b/narwhals/_arrow/series.py
@@ -14,7 +14,7 @@
 from narwhals._arrow.utils import native_to_narwhals_dtype
 from narwhals._arrow.utils import validate_column_comparand
 from narwhals.utils import Implementation
-from narwhals.utils import generate_unique_token
+from narwhals.utils import generate_temporary_column_name
 
 if TYPE_CHECKING:
     from types import ModuleType
@@ -604,7 +604,7 @@ def is_first_distinct(self: Self) -> Self:
         import pyarrow.compute as pc  # ignore-banned-import()
 
         row_number = pa.array(np.arange(len(self)))
-        col_token = generate_unique_token(n_bytes=8, columns=[self.name])
+        col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name])
         first_distinct_index = (
             pa.Table.from_arrays([self._native_series], names=[self.name])
             .append_column(col_token, row_number)
@@ -621,7 +621,7 @@ def is_last_distinct(self: Self) -> Self:
         import pyarrow.compute as pc  # ignore-banned-import()
 
         row_number = pa.array(np.arange(len(self)))
-        col_token = generate_unique_token(n_bytes=8, columns=[self.name])
+        col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name])
         last_distinct_index = (
             pa.Table.from_arrays([self._native_series], names=[self.name])
             .append_column(col_token, row_number)
@@ -715,7 +715,7 @@ def to_arrow(self: Self) -> pa.Array:
 
     def mode(self: Self) -> ArrowSeries:
         plx = self.__narwhals_namespace__()
-        col_token = generate_unique_token(n_bytes=8, columns=[self.name])
+        col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name])
         return self.value_counts(name=col_token, normalize=False).filter(
             plx.col(col_token) == plx.col(col_token).max()
         )[self.name]
@@ -780,11 +780,64 @@ def convert_time_zone(self: Self, time_zone: str) -> ArrowSeries:
 
         return self._arrow_series._from_native_series(result)
 
+    def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> ArrowSeries:
+        import pyarrow as pa  # ignore-banned-import
+        import pyarrow.compute as pc  # ignore-banned-import
+
+        s = self._arrow_series._native_series
+        dtype = self._arrow_series.dtype
+        if dtype == self._arrow_series._dtypes.Datetime:
+            unit = dtype.time_unit  # type: ignore[attr-defined]
+            s_cast = s.cast(pa.int64())
+            if unit == "ns":
+                if time_unit == "ns":
+                    result = s_cast
+                elif time_unit == "us":
+                    result = floordiv_compat(s_cast, 1_000)
+                else:
+                    result = floordiv_compat(s_cast, 1_000_000)
+            elif unit == "us":
+                if time_unit == "ns":
+                    result = pc.multiply(s_cast, 1_000)
+                elif time_unit == "us":
+                    result = s_cast
+                else:
+                    result = floordiv_compat(s_cast, 1_000)
+            elif unit == "ms":
+                if time_unit == "ns":
+                    result = pc.multiply(s_cast, 1_000_000)
+                elif time_unit == "us":
+                    result = pc.multiply(s_cast, 1_000)
+                else:
+                    result = s_cast
+            elif unit == "s":
+                if time_unit == "ns":
+                    result = pc.multiply(s_cast, 1_000_000_000)
+                elif time_unit == "us":
+                    result = pc.multiply(s_cast, 1_000_000)
+                else:
+                    result = pc.multiply(s_cast, 1_000)
+            else:  # pragma: no cover
+                msg = f"unexpected time unit {unit}, please report an issue at https://github.com/narwhals-dev/narwhals"
+                raise AssertionError(msg)
+        elif dtype == self._arrow_series._dtypes.Date:
+            time_s = pc.multiply(s.cast(pa.int32()), 86400)
+            if time_unit == "ns":
+                result = pc.multiply(time_s, 1_000_000_000)
+            elif time_unit == "us":
+                result = pc.multiply(time_s, 1_000_000)
+            else:
+                result = pc.multiply(time_s, 1_000)
+        else:
+            msg = "Input should be either of Date or Datetime type"
+            raise TypeError(msg)
+        return self._arrow_series._from_native_series(result)
+
     def date(self: Self) -> ArrowSeries:
         import pyarrow as pa  # ignore-banned-import()
 
         return self._arrow_series._from_native_series(
-            self._arrow_series._native_series.cast(pa.date64())
+            self._arrow_series._native_series.cast(pa.date32())
         )
 
     def year(self: Self) -> ArrowSeries:
diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py
index e37cb093f..7f6fa6558 100644
--- a/narwhals/_arrow/utils.py
+++ b/narwhals/_arrow/utils.py
@@ -56,7 +56,16 @@ def native_to_narwhals_dtype(dtype: Any, dtypes: DTypes) -> DType:
     if pa.types.is_dictionary(dtype):
         return dtypes.Categorical()
     if pa.types.is_struct(dtype):
-        return dtypes.Struct()
+        return dtypes.Struct(
+            [
+                dtypes.Field(
+                    dtype.field(i).name,
+                    native_to_narwhals_dtype(dtype.field(i).type, dtypes),
+                )
+                for i in range(dtype.num_fields)
+            ]
+        )
+
     if pa.types.is_list(dtype) or pa.types.is_large_list(dtype):
         return dtypes.List(native_to_narwhals_dtype(dtype.value_type, dtypes))
     if pa.types.is_fixed_size_list(dtype):
diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py
index 31949cf22..150b0177c 100644
--- a/narwhals/_dask/dataframe.py
+++ b/narwhals/_dask/dataframe.py
@@ -11,7 +11,7 @@
 from narwhals._pandas_like.utils import native_to_narwhals_dtype
 from narwhals.utils import Implementation
 from narwhals.utils import flatten
-from narwhals.utils import generate_unique_token
+from narwhals.utils import generate_temporary_column_name
 from narwhals.utils import parse_columns_to_drop
 from narwhals.utils import parse_version
 
@@ -149,7 +149,9 @@ def drop_nulls(self: Self, subset: str | list[str] | None) -> Self:
     @property
     def schema(self) -> dict[str, DType]:
         return {
-            col: native_to_narwhals_dtype(self._native_frame.loc[:, col], self._dtypes)
+            col: native_to_narwhals_dtype(
+                self._native_frame.loc[:, col], self._dtypes, self._implementation
+            )
             for col in self._native_frame.columns
         }
 
@@ -192,7 +194,7 @@ def unique(
         native_frame = self._native_frame
         if keep == "none":
             subset = subset or self.columns
-            token = generate_unique_token(n_bytes=8, columns=subset)
+            token = generate_temporary_column_name(n_bytes=8, columns=subset)
             ser = native_frame.groupby(subset).size().rename(token)
             ser = ser.loc[ser == 1]
             unique = ser.reset_index().drop(columns=token)
@@ -234,7 +236,7 @@ def join(
         if isinstance(right_on, str):
             right_on = [right_on]
         if how == "cross":
-            key_token = generate_unique_token(
+            key_token = generate_temporary_column_name(
                 n_bytes=8, columns=[*self.columns, *other.columns]
             )
 
@@ -251,7 +253,7 @@ def join(
             )
 
         if how == "anti":
-            indicator_token = generate_unique_token(
+            indicator_token = generate_temporary_column_name(
                 n_bytes=8, columns=[*self.columns, *other.columns]
             )
 
@@ -345,23 +347,23 @@ def join_asof(
             ),
         )
 
-    def group_by(self, *by: str) -> DaskLazyGroupBy:
+    def group_by(self, *by: str, drop_null_keys: bool) -> DaskLazyGroupBy:
         from narwhals._dask.group_by import DaskLazyGroupBy
 
-        return DaskLazyGroupBy(self, list(by))
+        return DaskLazyGroupBy(self, list(by), drop_null_keys=drop_null_keys)
 
     def tail(self: Self, n: int) -> Self:
         native_frame = self._native_frame
         n_partitions = native_frame.npartitions
 
-        if n_partitions == 1:
+        if n_partitions == 1:  # pragma: no cover
             return self._from_native_frame(self._native_frame.tail(n=n, compute=False))
         else:
             msg = "`LazyFrame.tail` is not supported for Dask backend with multiple partitions."
             raise NotImplementedError(msg)
 
     def gather_every(self: Self, n: int, offset: int) -> Self:
-        row_index_token = generate_unique_token(n_bytes=8, columns=self.columns)
+        row_index_token = generate_temporary_column_name(n_bytes=8, columns=self.columns)
         pln = self.__narwhals_namespace__()
         return (
             self.with_row_index(name=row_index_token)
diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py
index 693fcad5e..db29f6c4d 100644
--- a/narwhals/_dask/expr.py
+++ b/narwhals/_dask/expr.py
@@ -10,8 +10,11 @@
 from narwhals._dask.utils import add_row_index
 from narwhals._dask.utils import maybe_evaluate
 from narwhals._dask.utils import narwhals_to_native_dtype
+from narwhals._pandas_like.utils import calculate_timestamp_date
+from narwhals._pandas_like.utils import calculate_timestamp_datetime
 from narwhals._pandas_like.utils import native_to_narwhals_dtype
-from narwhals.utils import generate_unique_token
+from narwhals.utils import Implementation
+from narwhals.utils import generate_temporary_column_name
 
 if TYPE_CHECKING:
     import dask_expr
@@ -562,7 +565,7 @@ def func(_input: dask_expr.Series, _quantile: float) -> dask_expr.Series:
                 if _input.npartitions > 1:
                     msg = "`Expr.quantile` is not supported for Dask backend with multiple partitions."
                     raise NotImplementedError(msg)
-                return _input.quantile(q=_quantile, method="dask")
+                return _input.quantile(q=_quantile, method="dask")  # pragma: no cover
 
             return self._from_call(
                 func,
@@ -577,7 +580,7 @@ def func(_input: dask_expr.Series, _quantile: float) -> dask_expr.Series:
     def is_first_distinct(self: Self) -> Self:
         def func(_input: dask_expr.Series) -> dask_expr.Series:
             _name = _input.name
-            col_token = generate_unique_token(n_bytes=8, columns=[_name])
+            col_token = generate_temporary_column_name(n_bytes=8, columns=[_name])
             _input = add_row_index(_input.to_frame(), col_token)
             first_distinct_index = _input.groupby(_name).agg({col_token: "min"})[
                 col_token
@@ -594,7 +597,7 @@ def func(_input: dask_expr.Series) -> dask_expr.Series:
     def is_last_distinct(self: Self) -> Self:
         def func(_input: dask_expr.Series) -> dask_expr.Series:
             _name = _input.name
-            col_token = generate_unique_token(n_bytes=8, columns=[_name])
+            col_token = generate_temporary_column_name(n_bytes=8, columns=[_name])
             _input = add_row_index(_input.to_frame(), col_token)
             last_distinct_index = _input.groupby(_name).agg({col_token: "max"})[col_token]
 
@@ -667,17 +670,18 @@ def func(df: DaskLazyFrame) -> list[Any]:
                 )
                 raise ValueError(msg)
 
-            if df._native_frame.npartitions > 1:
-                msg = "`Expr.over` is not supported for Dask backend with multiple partitions."
-                raise NotImplementedError(msg)
-
-            tmp = df.group_by(*keys).agg(self)
-            tmp_native = (
-                df.select(*keys)
-                .join(tmp, how="left", left_on=keys, right_on=keys, suffix="_right")
-                ._native_frame
+            if df._native_frame.npartitions == 1:  # pragma: no cover
+                tmp = df.group_by(*keys, drop_null_keys=False).agg(self)
+                tmp_native = (
+                    df.select(*keys)
+                    .join(tmp, how="left", left_on=keys, right_on=keys, suffix="_right")
+                    ._native_frame
+                )
+                return [tmp_native[name] for name in self._output_names]
+            msg = (
+                "`Expr.over` is not supported for Dask backend with multiple partitions."
             )
-            return [tmp_native[name] for name in self._output_names]
+            raise NotImplementedError(msg)
 
         return self.__class__(
             func,
@@ -940,7 +944,7 @@ def replace_time_zone(self, time_zone: str | None) -> DaskExpr:
 
     def convert_time_zone(self, time_zone: str) -> DaskExpr:
         def func(s: dask_expr.Series, time_zone: str) -> dask_expr.Series:
-            dtype = native_to_narwhals_dtype(s, self._expr._dtypes)
+            dtype = native_to_narwhals_dtype(s, self._expr._dtypes, Implementation.DASK)
             if dtype.time_zone is None:  # type: ignore[attr-defined]
                 return s.dt.tz_localize("UTC").dt.tz_convert(time_zone)
             else:
@@ -953,6 +957,37 @@ def func(s: dask_expr.Series, time_zone: str) -> dask_expr.Series:
             returns_scalar=False,
         )
 
+    def timestamp(self, time_unit: Literal["ns", "us", "ms"] = "us") -> DaskExpr:
+        def func(
+            s: dask_expr.Series, time_unit: Literal["ns", "us", "ms"] = "us"
+        ) -> dask_expr.Series:
+            dtype = native_to_narwhals_dtype(s, self._expr._dtypes, Implementation.DASK)
+            is_pyarrow_dtype = "pyarrow" in str(dtype)
+            mask_na = s.isna()
+            if dtype == self._expr._dtypes.Date:
+                # Date is only supported in pandas dtypes if pyarrow-backed
+                s_cast = s.astype("Int32[pyarrow]")
+                result = calculate_timestamp_date(s_cast, time_unit)
+            elif dtype == self._expr._dtypes.Datetime:
+                original_time_unit = dtype.time_unit  # type: ignore[attr-defined]
+                s_cast = (
+                    s.astype("Int64[pyarrow]") if is_pyarrow_dtype else s.astype("int64")
+                )
+                result = calculate_timestamp_datetime(
+                    s_cast, original_time_unit, time_unit
+                )
+            else:
+                msg = "Input should be either of Date or Datetime type"
+                raise TypeError(msg)
+            return result.where(~mask_na)
+
+        return self._expr._from_call(
+            func,
+            "datetime",
+            time_unit,
+            returns_scalar=False,
+        )
+
     def total_minutes(self) -> DaskExpr:
         return self._expr._from_call(
             lambda _input: _input.dt.total_seconds() // 60,
diff --git a/narwhals/_dask/group_by.py b/narwhals/_dask/group_by.py
index 55ef69f46..e4c1e14c1 100644
--- a/narwhals/_dask/group_by.py
+++ b/narwhals/_dask/group_by.py
@@ -41,12 +41,14 @@ def agg(s0: pd.core.groupby.generic.SeriesGroupBy) -> int:
 
 
 class DaskLazyGroupBy:
-    def __init__(self, df: DaskLazyFrame, keys: list[str]) -> None:
+    def __init__(
+        self, df: DaskLazyFrame, keys: list[str], *, drop_null_keys: bool
+    ) -> None:
         self._df = df
         self._keys = keys
         self._grouped = self._df._native_frame.groupby(
             list(self._keys),
-            dropna=False,
+            dropna=drop_null_keys,
             observed=True,
         )
 
diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py
index 5877ed51e..5bd7af153 100644
--- a/narwhals/_duckdb/dataframe.py
+++ b/narwhals/_duckdb/dataframe.py
@@ -52,7 +52,16 @@ def map_duckdb_dtype_to_narwhals_dtype(duckdb_dtype: Any, dtypes: DTypes) -> DTy
     if duckdb_dtype == "INTERVAL":
         return dtypes.Duration()
     if duckdb_dtype.startswith("STRUCT"):
-        return dtypes.Struct()
+        matchstruc_ = re.findall(r"(\w+)\s+(\w+)", duckdb_dtype)
+        return dtypes.Struct(
+            [
+                dtypes.Field(
+                    matchstruc_[i][0],
+                    map_duckdb_dtype_to_narwhals_dtype(matchstruc_[i][1], dtypes),
+                )
+                for i in range(len(matchstruc_))
+            ]
+        )
     if match_ := re.match(r"(.*)\[\]$", duckdb_dtype):
         return dtypes.List(map_duckdb_dtype_to_narwhals_dtype(match_.group(1), dtypes))
     if match_ := re.match(r"(\w+)\[(\d+)\]", duckdb_dtype):
@@ -81,6 +90,22 @@ def __getitem__(self, item: str) -> DuckDBInterchangeSeries:
             self._native_frame.select(item), dtypes=self._dtypes
         )
 
+    def select(
+        self: Self,
+        *exprs: Any,
+        **named_exprs: Any,
+    ) -> Self:
+        if named_exprs or not all(isinstance(x, str) for x in exprs):  # pragma: no cover
+            msg = (
+                "`select`-ing not by name is not supported for DuckDB backend.\n\n"
+                "If you would like to see this kind of object better supported in "
+                "Narwhals, please open a feature request "
+                "at https://github.com/narwhals-dev/narwhals/issues."
+            )
+            raise NotImplementedError(msg)
+
+        return self._from_native_frame(self._native_frame.select(*exprs))
+
     def __getattr__(self, attr: str) -> Any:
         if attr == "schema":
             return {
@@ -111,3 +136,6 @@ def to_pandas(self: Self) -> pd.DataFrame:
 
     def to_arrow(self: Self) -> pa.Table:
         return self._native_frame.arrow()
+
+    def _from_native_frame(self: Self, df: Any) -> Self:
+        return self.__class__(df, dtypes=self._dtypes)
diff --git a/narwhals/_expression_parsing.py b/narwhals/_expression_parsing.py
index d281cc945..89d020b0b 100644
--- a/narwhals/_expression_parsing.py
+++ b/narwhals/_expression_parsing.py
@@ -219,19 +219,21 @@ def reuse_series_implementation(
     plx = expr.__narwhals_namespace__()
 
     def func(df: CompliantDataFrame) -> list[CompliantSeries]:
-        out: list[CompliantSeries] = []
-        for column in expr._call(df):  # type: ignore[arg-type]
-            _out = getattr(column, attr)(
-                *[maybe_evaluate_expr(df, arg) for arg in args],
-                **{
-                    arg_name: maybe_evaluate_expr(df, arg_value)
-                    for arg_name, arg_value in kwargs.items()
-                },
+        _args = [maybe_evaluate_expr(df, arg) for arg in args]
+        _kwargs = {
+            arg_name: maybe_evaluate_expr(df, arg_value)
+            for arg_name, arg_value in kwargs.items()
+        }
+
+        out: list[CompliantSeries] = [
+            plx._create_series_from_scalar(
+                getattr(column, attr)(*_args, **_kwargs),
+                column,  # type: ignore[arg-type]
             )
-            if returns_scalar:
-                out.append(plx._create_series_from_scalar(_out, column))  # type: ignore[arg-type]
-            else:
-                out.append(_out)
+            if returns_scalar
+            else getattr(column, attr)(*_args, **_kwargs)
+            for column in expr._call(df)  # type: ignore[arg-type]
+        ]
         if expr._output_names is not None and (
             [s.name for s in out] != expr._output_names
         ):  # pragma: no cover
diff --git a/narwhals/_ibis/dataframe.py b/narwhals/_ibis/dataframe.py
index 9d7ebefb0..c8a665db0 100644
--- a/narwhals/_ibis/dataframe.py
+++ b/narwhals/_ibis/dataframe.py
@@ -51,7 +51,15 @@ def map_ibis_dtype_to_narwhals_dtype(ibis_dtype: Any, dtypes: DTypes) -> DType:
             map_ibis_dtype_to_narwhals_dtype(ibis_dtype.value_type, dtypes)
         )
     if ibis_dtype.is_struct():
-        return dtypes.Struct()
+        return dtypes.Struct(
+            [
+                dtypes.Field(
+                    ibis_dtype_name,
+                    map_ibis_dtype_to_narwhals_dtype(ibis_dtype_field, dtypes),
+                )
+                for ibis_dtype_name, ibis_dtype_field in ibis_dtype.items()
+            ]
+        )
     return dtypes.Unknown()  # pragma: no cover
 
 
@@ -77,6 +85,24 @@ def to_pandas(self: Self) -> pd.DataFrame:
     def to_arrow(self: Self) -> pa.Table:
         return self._native_frame.to_pyarrow()
 
+    def select(
+        self: Self,
+        *exprs: Any,
+        **named_exprs: Any,
+    ) -> Self:
+        if named_exprs or not all(isinstance(x, str) for x in exprs):  # pragma: no cover
+            msg = (
+                "`select`-ing not by name is not supported for Ibis backend.\n\n"
+                "If you would like to see this kind of object better supported in "
+                "Narwhals, please open a feature request "
+                "at https://github.com/narwhals-dev/narwhals/issues."
+            )
+            raise NotImplementedError(msg)
+
+        import ibis.selectors as s
+
+        return self._from_native_frame(self._native_frame.select(s.cols(*exprs)))
+
     def __getattr__(self, attr: str) -> Any:
         if attr == "schema":
             return {
@@ -90,3 +116,6 @@ def __getattr__(self, attr: str) -> Any:
             "at https://github.com/narwhals-dev/narwhals/issues."
         )
         raise NotImplementedError(msg)
+
+    def _from_native_frame(self: Self, df: Any) -> Self:
+        return self.__class__(df, dtypes=self._dtypes)
diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py
index 6a6292988..7be808afd 100644
--- a/narwhals/_pandas_like/dataframe.py
+++ b/narwhals/_pandas_like/dataframe.py
@@ -19,7 +19,7 @@
 from narwhals.dependencies import is_numpy_array
 from narwhals.utils import Implementation
 from narwhals.utils import flatten
-from narwhals.utils import generate_unique_token
+from narwhals.utils import generate_temporary_column_name
 from narwhals.utils import is_sequence_but_not_str
 from narwhals.utils import parse_columns_to_drop
 
@@ -89,7 +89,14 @@ def _validate_columns(self, columns: pd.Index) -> None:
             raise ValueError(msg) from None
 
         if len(columns) != len_unique_columns:
-            msg = f"Expected unique column names, got: {columns}"
+            from collections import Counter
+
+            counter = Counter(columns)
+            msg = ""
+            for key, value in counter.items():
+                if value > 1:
+                    msg += f"\n- '{key}' {value} times"
+            msg = f"Expected unique column names, got:{msg}"
             raise ValueError(msg)
 
     def _from_native_frame(self, df: Any) -> Self:
@@ -141,17 +148,19 @@ def __getitem__(self, item: tuple[slice, Sequence[int]]) -> PandasLikeDataFrame:
 
     def __getitem__(
         self,
-        item: str
-        | int
-        | slice
-        | Sequence[int]
-        | Sequence[str]
-        | tuple[Sequence[int], str | int]
-        | tuple[slice | Sequence[int], Sequence[int] | slice]
-        | tuple[slice, slice],
+        item: (
+            str
+            | int
+            | slice
+            | Sequence[int]
+            | Sequence[str]
+            | tuple[Sequence[int], str | int]
+            | tuple[slice | Sequence[int], Sequence[int] | slice]
+            | tuple[slice, slice]
+        ),
     ) -> PandasLikeSeries | PandasLikeDataFrame:
         if isinstance(item, tuple):
-            item = tuple(list(i) if is_sequence_but_not_str(i) else i for i in item)
+            item = tuple(list(i) if is_sequence_but_not_str(i) else i for i in item)  # type: ignore[assignment]
 
         if isinstance(item, str):
             from narwhals._pandas_like.series import PandasLikeSeries
@@ -245,10 +254,36 @@ def __getitem__(
     def columns(self) -> list[str]:
         return self._native_frame.columns.tolist()  # type: ignore[no-any-return]
 
+    @overload
+    def rows(
+        self,
+        *,
+        named: Literal[True],
+    ) -> list[dict[str, Any]]: ...
+
+    @overload
+    def rows(
+        self,
+        *,
+        named: Literal[False] = False,
+    ) -> list[tuple[Any, ...]]: ...
+
+    @overload
+    def rows(
+        self,
+        *,
+        named: bool,
+    ) -> list[tuple[Any, ...]] | list[dict[str, Any]]: ...
+
     def rows(
         self, *, named: bool = False
     ) -> list[tuple[Any, ...]] | list[dict[str, Any]]:
         if not named:
+            # cuDF does not support itertuples. But it does support to_dict!
+            if self._implementation is Implementation.CUDF:  # pragma: no cover
+                # Extract the row values from the named rows
+                return [tuple(row.values()) for row in self.rows(named=True)]
+
             return list(self._native_frame.itertuples(index=False, name=None))
 
         return self._native_frame.to_dict(orient="records")  # type: ignore[no-any-return]
@@ -276,7 +311,9 @@ def iter_rows(
     @property
     def schema(self) -> dict[str, DType]:
         return {
-            col: native_to_narwhals_dtype(self._native_frame[col], self._dtypes)
+            col: native_to_narwhals_dtype(
+                self._native_frame[col], self._dtypes, self._implementation
+            )
             for col in self._native_frame.columns
         }
 
@@ -403,7 +440,9 @@ def with_columns(
         return self._from_native_frame(df)
 
     def rename(self, mapping: dict[str, str]) -> Self:
-        return self._from_native_frame(self._native_frame.rename(columns=mapping))
+        return self._from_native_frame(
+            self._native_frame.rename(columns=mapping, copy=False)
+        )
 
     def drop(self: Self, columns: list[str], strict: bool) -> Self:  # noqa: FBT001
         to_drop = parse_columns_to_drop(
@@ -440,12 +479,13 @@ def collect(self) -> PandasLikeDataFrame:
         )
 
     # --- actions ---
-    def group_by(self, *keys: str) -> PandasLikeGroupBy:
+    def group_by(self, *keys: str, drop_null_keys: bool) -> PandasLikeGroupBy:
         from narwhals._pandas_like.group_by import PandasLikeGroupBy
 
         return PandasLikeGroupBy(
             self,
             list(keys),
+            drop_null_keys=drop_null_keys,
         )
 
     def join(
@@ -469,7 +509,7 @@ def join(
                 self._implementation is Implementation.PANDAS
                 and self._backend_version < (1, 4)
             ):
-                key_token = generate_unique_token(
+                key_token = generate_temporary_column_name(
                     n_bytes=8, columns=[*self.columns, *other.columns]
                 )
 
@@ -504,14 +544,15 @@ def join(
                     )
                 )
             else:
-                indicator_token = generate_unique_token(
+                indicator_token = generate_temporary_column_name(
                     n_bytes=8, columns=[*self.columns, *other.columns]
                 )
 
                 other_native = (
                     other._native_frame.loc[:, right_on]
                     .rename(  # rename to avoid creating extra columns in join
-                        columns=dict(zip(right_on, left_on))  # type: ignore[arg-type]
+                        columns=dict(zip(right_on, left_on)),  # type: ignore[arg-type]
+                        copy=False,
                     )
                     .drop_duplicates()
                 )
@@ -531,7 +572,8 @@ def join(
             other_native = (
                 other._native_frame.loc[:, right_on]
                 .rename(  # rename to avoid creating extra columns in join
-                    columns=dict(zip(right_on, left_on))  # type: ignore[arg-type]
+                    columns=dict(zip(right_on, left_on)),  # type: ignore[arg-type]
+                    copy=False,
                 )
                 .drop_duplicates()  # avoids potential rows duplication from inner join
             )
diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py
index 07ba3e56d..a58597eea 100644
--- a/narwhals/_pandas_like/expr.py
+++ b/narwhals/_pandas_like/expr.py
@@ -331,7 +331,7 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
                     "`nw.col('a', 'b')`\n"
                 )
                 raise ValueError(msg)
-            tmp = df.group_by(*keys).agg(self)
+            tmp = df.group_by(*keys, drop_null_keys=False).agg(self)
             tmp = df.select(*keys).join(
                 tmp, how="left", left_on=keys, right_on=keys, suffix="_right"
             )
@@ -582,6 +582,11 @@ def convert_time_zone(self, time_zone: str) -> PandasLikeExpr:
             self._expr, "dt", "convert_time_zone", time_zone
         )
 
+    def timestamp(self, time_unit: Literal["ns", "us", "ms"] = "us") -> PandasLikeExpr:
+        return reuse_series_namespace_implementation(
+            self._expr, "dt", "timestamp", time_unit
+        )
+
 
 class PandasLikeExprNameNamespace:
     def __init__(self: Self, expr: PandasLikeExpr) -> None:
diff --git a/narwhals/_pandas_like/group_by.py b/narwhals/_pandas_like/group_by.py
index 366a52e1e..c628ecbdb 100644
--- a/narwhals/_pandas_like/group_by.py
+++ b/narwhals/_pandas_like/group_by.py
@@ -13,6 +13,7 @@
 from narwhals._pandas_like.utils import native_series_from_iterable
 from narwhals.utils import Implementation
 from narwhals.utils import remove_prefix
+from narwhals.utils import tupleify
 
 if TYPE_CHECKING:
     from narwhals._pandas_like.dataframe import PandasLikeDataFrame
@@ -26,14 +27,19 @@
 
 
 class PandasLikeGroupBy:
-    def __init__(self, df: PandasLikeDataFrame, keys: list[str]) -> None:
+    def __init__(
+        self, df: PandasLikeDataFrame, keys: list[str], *, drop_null_keys: bool
+    ) -> None:
         self._df = df
         self._keys = keys
         if (
             self._df._implementation is Implementation.PANDAS
-            and self._df._backend_version < (1, 0)
+            and self._df._backend_version < (1, 1)
         ):  # pragma: no cover
-            if self._df._native_frame.loc[:, self._keys].isna().any().any():
+            if (
+                not drop_null_keys
+                and self._df._native_frame.loc[:, self._keys].isna().any().any()
+            ):
                 msg = "Grouping by null values is not supported in pandas < 1.0.0"
                 raise NotImplementedError(msg)
             self._grouped = self._df._native_frame.groupby(
@@ -47,7 +53,7 @@ def __init__(self, df: PandasLikeDataFrame, keys: list[str]) -> None:
                 list(self._keys),
                 sort=False,
                 as_index=True,
-                dropna=False,
+                dropna=drop_null_keys,
                 observed=True,
             )
 
@@ -96,16 +102,17 @@ def _from_native_frame(self, df: PandasLikeDataFrame) -> PandasLikeDataFrame:
         )
 
     def __iter__(self) -> Iterator[tuple[Any, PandasLikeDataFrame]]:
-        with warnings.catch_warnings():
-            # we already use `tupleify` above, so we're already opting in to
-            # the new behaviour
-            warnings.filterwarnings(
-                "ignore",
-                message="In a future version of pandas, a length 1 tuple will be returned",
-                category=FutureWarning,
-            )
-            iterator = self._grouped.__iter__()
-        yield from ((key, self._from_native_frame(sub_df)) for (key, sub_df) in iterator)
+        indices = self._grouped.indices
+        if (
+            self._df._implementation is Implementation.PANDAS
+            and self._df._backend_version < (2, 2)
+        ) or (self._df._implementation is Implementation.CUDF):  # pragma: no cover
+            for key in indices:
+                yield (key, self._from_native_frame(self._grouped.get_group(key)))
+        else:
+            for key in indices:
+                key = tupleify(key)  # noqa: PLW2901
+                yield (key, self._from_native_frame(self._grouped.get_group(key)))
 
 
 def agg_pandas(  # noqa: PLR0915
@@ -186,14 +193,19 @@ def agg_pandas(  # noqa: PLR0915
                 f"{a}_{b}" for a, b in result_simple_aggs.columns
             ]
             result_simple_aggs = result_simple_aggs.rename(
-                columns=name_mapping
-            ).reset_index()
+                columns=name_mapping, copy=False
+            )
+            # Keep inplace=True to avoid making a redundant copy.
+            # This may need updating, depending on https://github.com/pandas-dev/pandas/pull/51466/files
+            result_simple_aggs.reset_index(inplace=True)  # noqa: PD002
         if nunique_aggs:
             result_nunique_aggs = grouped[list(nunique_aggs.values())].nunique(
                 dropna=False
             )
             result_nunique_aggs.columns = list(nunique_aggs.keys())
-            result_nunique_aggs = result_nunique_aggs.reset_index()
+            # Keep inplace=True to avoid making a redundant copy.
+            # This may need updating, depending on https://github.com/pandas-dev/pandas/pull/51466/files
+            result_nunique_aggs.reset_index(inplace=True)  # noqa: PD002
         if simple_aggs and nunique_aggs:
             if (
                 set(result_simple_aggs.columns)
@@ -259,6 +271,8 @@ def func(df: Any) -> Any:
     else:  # pragma: no cover
         result_complex = grouped.apply(func)
 
-    result = result_complex.reset_index()
+    # Keep inplace=True to avoid making a redundant copy.
+    # This may need updating, depending on https://github.com/pandas-dev/pandas/pull/51466/files
+    result_complex.reset_index(inplace=True)  # noqa: PD002
 
-    return from_dataframe(result.loc[:, output_names])
+    return from_dataframe(result_complex.loc[:, output_names])
diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py
index 63d3454a5..bb4368908 100644
--- a/narwhals/_pandas_like/namespace.py
+++ b/narwhals/_pandas_like/namespace.py
@@ -290,7 +290,7 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
                         (s.to_frame() for s in series), how="horizontal"
                     )
                     ._native_frame.min(axis=1)
-                    .rename(series[0].name),
+                    .rename(series[0].name, copy=False),
                     implementation=self._implementation,
                     backend_version=self._backend_version,
                     dtypes=self._dtypes,
@@ -317,7 +317,7 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
                         (s.to_frame() for s in series), how="horizontal"
                     )
                     ._native_frame.max(axis=1)
-                    .rename(series[0].name),
+                    .rename(series[0].name, copy=False),
                     implementation=self._implementation,
                     backend_version=self._backend_version,
                     dtypes=self._dtypes,
diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py
index 2fe53b22a..35df78e2f 100644
--- a/narwhals/_pandas_like/series.py
+++ b/narwhals/_pandas_like/series.py
@@ -8,6 +8,8 @@
 from typing import Sequence
 from typing import overload
 
+from narwhals._pandas_like.utils import calculate_timestamp_date
+from narwhals._pandas_like.utils import calculate_timestamp_datetime
 from narwhals._pandas_like.utils import int_dtype_mapper
 from narwhals._pandas_like.utils import narwhals_to_native_dtype
 from narwhals._pandas_like.utils import native_series_from_iterable
@@ -124,11 +126,6 @@ def __getitem__(self, idx: int | slice | Sequence[int]) -> Any | Self:
             return self._native_series.iloc[idx]
         return self._from_native_series(self._native_series.iloc[idx])
 
-    def _rename(self, series: Any, name: str) -> Any:
-        if self._use_copy_false:
-            return series.rename(name, copy=False)
-        return series.rename(name)  # pragma: no cover
-
     def _from_native_series(self, series: Any) -> Self:
         return self.__class__(
             series,
@@ -173,7 +170,9 @@ def shape(self) -> tuple[int]:
 
     @property
     def dtype(self: Self) -> DType:
-        return native_to_narwhals_dtype(self._native_series, self._dtypes)
+        return native_to_narwhals_dtype(
+            self._native_series, self._dtypes, self._implementation
+        )
 
     def scatter(self, indices: int | Sequence[int], values: Any) -> Self:
         if isinstance(values, self.__class__):
@@ -258,127 +257,135 @@ def filter(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         if not (isinstance(other, list) and all(isinstance(x, bool) for x in other)):
             other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.loc[other], ser.name))
+        return self._from_native_series(ser.loc[other].rename(ser.name, copy=False))
 
     def __eq__(self, other: object) -> PandasLikeSeries:  # type: ignore[override]
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__eq__(other), ser.name))
+        return self._from_native_series(ser.__eq__(other).rename(ser.name, copy=False))
 
     def __ne__(self, other: object) -> PandasLikeSeries:  # type: ignore[override]
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__ne__(other), ser.name))
+        return self._from_native_series(ser.__ne__(other).rename(ser.name, copy=False))
 
     def __ge__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__ge__(other), ser.name))
+        return self._from_native_series(ser.__ge__(other).rename(ser.name, copy=False))
 
     def __gt__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__gt__(other), ser.name))
+        return self._from_native_series(ser.__gt__(other).rename(ser.name, copy=False))
 
     def __le__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__le__(other), ser.name))
+        return self._from_native_series(ser.__le__(other).rename(ser.name, copy=False))
 
     def __lt__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__lt__(other), ser.name))
+        return self._from_native_series(ser.__lt__(other).rename(ser.name, copy=False))
 
     def __and__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__and__(other), ser.name))
+        return self._from_native_series(ser.__and__(other).rename(ser.name, copy=False))
 
     def __rand__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__rand__(other), ser.name))
+        return self._from_native_series(ser.__rand__(other).rename(ser.name, copy=False))
 
     def __or__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__or__(other), ser.name))
+        return self._from_native_series(ser.__or__(other).rename(ser.name, copy=False))
 
     def __ror__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__ror__(other), ser.name))
+        return self._from_native_series(ser.__ror__(other).rename(ser.name, copy=False))
 
     def __add__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__add__(other), ser.name))
+        return self._from_native_series(ser.__add__(other).rename(ser.name, copy=False))
 
     def __radd__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__radd__(other), ser.name))
+        return self._from_native_series(ser.__radd__(other).rename(ser.name, copy=False))
 
     def __sub__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__sub__(other), ser.name))
+        return self._from_native_series(ser.__sub__(other).rename(ser.name, copy=False))
 
     def __rsub__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__rsub__(other), ser.name))
+        return self._from_native_series(ser.__rsub__(other).rename(ser.name, copy=False))
 
     def __mul__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__mul__(other), ser.name))
+        return self._from_native_series(ser.__mul__(other).rename(ser.name, copy=False))
 
     def __rmul__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__rmul__(other), ser.name))
+        return self._from_native_series(ser.__rmul__(other).rename(ser.name, copy=False))
 
     def __truediv__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__truediv__(other), ser.name))
+        return self._from_native_series(
+            ser.__truediv__(other).rename(ser.name, copy=False)
+        )
 
     def __rtruediv__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__rtruediv__(other), ser.name))
+        return self._from_native_series(
+            ser.__rtruediv__(other).rename(ser.name, copy=False)
+        )
 
     def __floordiv__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__floordiv__(other), ser.name))
+        return self._from_native_series(
+            ser.__floordiv__(other).rename(ser.name, copy=False)
+        )
 
     def __rfloordiv__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__rfloordiv__(other), ser.name))
+        return self._from_native_series(
+            ser.__rfloordiv__(other).rename(ser.name, copy=False)
+        )
 
     def __pow__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__pow__(other), ser.name))
+        return self._from_native_series(ser.__pow__(other).rename(ser.name, copy=False))
 
     def __rpow__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__rpow__(other), ser.name))
+        return self._from_native_series(ser.__rpow__(other).rename(ser.name, copy=False))
 
     def __mod__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__mod__(other), ser.name))
+        return self._from_native_series(ser.__mod__(other).rename(ser.name, copy=False))
 
     def __rmod__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__rmod__(other), ser.name))
+        return self._from_native_series(ser.__rmod__(other).rename(ser.name, copy=False))
 
     # Unary
 
@@ -484,13 +491,15 @@ def sort(
         na_position = "last" if nulls_last else "first"
         return self._from_native_series(
             ser.sort_values(ascending=not descending, na_position=na_position).rename(
-                self.name
+                self.name, copy=False
             )
         )
 
     def alias(self, name: str) -> Self:
-        ser = self._native_series
-        return self._from_native_series(self._rename(ser, name))
+        if name != self.name:
+            ser = self._native_series
+            return self._from_native_series(ser.rename(name, copy=False))
+        return self
 
     def __array__(self, dtype: Any = None, copy: bool | None = None) -> Any:
         # pandas used to always return object dtype for nullable dtypes.
@@ -544,28 +553,25 @@ def to_pandas(self) -> Any:
     # --- descriptive ---
     def is_duplicated(self: Self) -> Self:
         res = self._native_series.duplicated(keep=False)
-        res = self._rename(res, self.name)
+        res = res.rename(self.name, copy=False)
         return self._from_native_series(res)
 
     def is_empty(self: Self) -> bool:
         return self._native_series.empty  # type: ignore[no-any-return]
 
     def is_unique(self: Self) -> Self:
-        res = ~self._native_series.duplicated(keep=False)
-        res = self._rename(res, self.name)
+        res = ~self._native_series.duplicated(keep=False).rename(self.name, copy=False)
         return self._from_native_series(res)
 
     def null_count(self: Self) -> int:
         return self._native_series.isna().sum()  # type: ignore[no-any-return]
 
     def is_first_distinct(self: Self) -> Self:
-        res = ~self._native_series.duplicated(keep="first")
-        res = self._rename(res, self.name)
+        res = ~self._native_series.duplicated(keep="first").rename(self.name, copy=False)
         return self._from_native_series(res)
 
     def is_last_distinct(self: Self) -> Self:
-        res = ~self._native_series.duplicated(keep="last")
-        res = self._rename(res, self.name)
+        res = ~self._native_series.duplicated(keep="last").rename(self.name, copy=False)
         return self._from_native_series(res)
 
     def is_sorted(self: Self, *, descending: bool = False) -> bool:
@@ -619,9 +625,7 @@ def quantile(
 
     def zip_with(self: Self, mask: Any, other: Any) -> PandasLikeSeries:
         ser = self._native_series
-        mask = validate_column_comparand(
-            ser.index, mask, treat_length_one_as_scalar=False
-        )
+        mask = validate_column_comparand(ser.index, mask)
         other = validate_column_comparand(ser.index, other)
         res = ser.where(mask, other)
         return self._from_native_series(res)
@@ -867,8 +871,19 @@ def ordinal_day(self) -> PandasLikeSeries:
             )
         )
 
+    def _get_total_seconds(self) -> Any:
+        if hasattr(self._pandas_series._native_series.dt, "total_seconds"):
+            return self._pandas_series._native_series.dt.total_seconds()
+        else:  # pragma: no cover
+            return (
+                self._pandas_series._native_series.dt.days * 86400
+                + self._pandas_series._native_series.dt.seconds
+                + (self._pandas_series._native_series.dt.microseconds / 1e6)
+                + (self._pandas_series._native_series.dt.nanoseconds / 1e9)
+            )
+
     def total_minutes(self) -> PandasLikeSeries:
-        s = self._pandas_series._native_series.dt.total_seconds()
+        s = self._get_total_seconds()
         s_sign = (
             2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
         )  # this calculates the sign of each series element
@@ -878,7 +893,7 @@ def total_minutes(self) -> PandasLikeSeries:
         return self._pandas_series._from_native_series(s_abs * s_sign)
 
     def total_seconds(self) -> PandasLikeSeries:
-        s = self._pandas_series._native_series.dt.total_seconds()
+        s = self._get_total_seconds()
         s_sign = (
             2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
         )  # this calculates the sign of each series element
@@ -888,7 +903,7 @@ def total_seconds(self) -> PandasLikeSeries:
         return self._pandas_series._from_native_series(s_abs * s_sign)
 
     def total_milliseconds(self) -> PandasLikeSeries:
-        s = self._pandas_series._native_series.dt.total_seconds() * 1e3
+        s = self._get_total_seconds() * 1e3
         s_sign = (
             2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
         )  # this calculates the sign of each series element
@@ -898,7 +913,7 @@ def total_milliseconds(self) -> PandasLikeSeries:
         return self._pandas_series._from_native_series(s_abs * s_sign)
 
     def total_microseconds(self) -> PandasLikeSeries:
-        s = self._pandas_series._native_series.dt.total_seconds() * 1e6
+        s = self._get_total_seconds() * 1e6
         s_sign = (
             2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
         )  # this calculates the sign of each series element
@@ -908,7 +923,7 @@ def total_microseconds(self) -> PandasLikeSeries:
         return self._pandas_series._from_native_series(s_abs * s_sign)
 
     def total_nanoseconds(self) -> PandasLikeSeries:
-        s = self._pandas_series._native_series.dt.total_seconds() * 1e9
+        s = self._get_total_seconds() * 1e9
         s_sign = (
             2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
         )  # this calculates the sign of each series element
@@ -946,3 +961,30 @@ def convert_time_zone(self, time_zone: str) -> PandasLikeSeries:
         else:
             result = self._pandas_series._native_series.dt.tz_convert(time_zone)
         return self._pandas_series._from_native_series(result)
+
+    def timestamp(self, time_unit: Literal["ns", "us", "ms"] = "us") -> PandasLikeSeries:
+        s = self._pandas_series._native_series
+        dtype = self._pandas_series.dtype
+        is_pyarrow_dtype = "pyarrow" in str(self._pandas_series._native_series.dtype)
+        mask_na = s.isna()
+        if dtype == self._pandas_series._dtypes.Date:
+            # Date is only supported in pandas dtypes if pyarrow-backed
+            s_cast = s.astype("Int32[pyarrow]")
+            result = calculate_timestamp_date(s_cast, time_unit)
+        elif dtype == self._pandas_series._dtypes.Datetime:
+            original_time_unit = dtype.time_unit  # type: ignore[attr-defined]
+            if (
+                self._pandas_series._implementation is Implementation.PANDAS
+                and self._pandas_series._backend_version < (2,)
+            ):  # pragma: no cover
+                s_cast = s.view("Int64[pyarrow]") if is_pyarrow_dtype else s.view("int64")
+            else:
+                s_cast = (
+                    s.astype("Int64[pyarrow]") if is_pyarrow_dtype else s.astype("int64")
+                )
+            result = calculate_timestamp_datetime(s_cast, original_time_unit, time_unit)
+        else:
+            msg = "Input should be either of Date or Datetime type"
+            raise TypeError(msg)
+        result[mask_na] = None
+        return self._pandas_series._from_native_series(result)
diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py
index 381a78c8d..99181bc1e 100644
--- a/narwhals/_pandas_like/utils.py
+++ b/narwhals/_pandas_like/utils.py
@@ -32,9 +32,7 @@
 }
 
 
-def validate_column_comparand(
-    index: Any, other: Any, *, treat_length_one_as_scalar: bool = True
-) -> Any:
+def validate_column_comparand(index: Any, other: Any) -> Any:
     """Validate RHS of binary operation.
 
     If the comparison isn't supported, return `NotImplemented` so that the
@@ -55,9 +53,10 @@ def validate_column_comparand(
     if isinstance(other, PandasLikeDataFrame):
         return NotImplemented
     if isinstance(other, PandasLikeSeries):
-        if other.len() == 1 and treat_length_one_as_scalar:
+        if other.len() == 1:
             # broadcast
-            return other.item()
+            s = other._native_series
+            return s.__class__(s.iloc[0], index=index, dtype=s.dtype)
         if other._native_series.index is not index:
             return set_axis(
                 other._native_series,
@@ -83,7 +82,8 @@ def validate_dataframe_comparand(index: Any, other: Any) -> Any:
     if isinstance(other, PandasLikeSeries):
         if other.len() == 1:
             # broadcast
-            return other._native_series.iloc[0]
+            s = other._native_series
+            return s.__class__(s.iloc[0], index=index, dtype=s.dtype)
         if other._native_series.index is not index:
             return set_axis(
                 other._native_series,
@@ -218,7 +218,9 @@ def set_axis(
     return obj.set_axis(index, axis=0, **kwargs)  # type: ignore[attr-defined, no-any-return]
 
 
-def native_to_narwhals_dtype(native_column: Any, dtypes: DTypes) -> DType:
+def native_to_narwhals_dtype(
+    native_column: Any, dtypes: DTypes, implementation: Implementation
+) -> DType:
     dtype = str(native_column.dtype)
 
     pd_datetime_rgx = (
@@ -280,31 +282,23 @@ def native_to_narwhals_dtype(native_column: Any, dtypes: DTypes) -> DType:
         return dtypes.Duration(du_time_unit)
     if dtype == "date32[day][pyarrow]":
         return dtypes.Date()
-    if dtype.startswith(("large_list", "list")):
-        return dtypes.List(
-            arrow_native_to_narwhals_dtype(
-                native_column.dtype.pyarrow_dtype.value_type, dtypes
-            )
-        )
-    if dtype.startswith("fixed_size_list"):
-        return dtypes.Array(
-            arrow_native_to_narwhals_dtype(
-                native_column.dtype.pyarrow_dtype.value_type, dtypes
-            ),
-            native_column.dtype.pyarrow_dtype.list_size,
-        )
-    if dtype.startswith("struct"):
-        return dtypes.Struct()
+    if dtype.startswith(("large_list", "list", "struct", "fixed_size_list")):
+        return arrow_native_to_narwhals_dtype(native_column.dtype.pyarrow_dtype, dtypes)
     if dtype == "object":
-        if (  # pragma: no cover  TODO(unassigned): why does this show as uncovered?
-            idx := getattr(native_column, "first_valid_index", lambda: None)()
-        ) is not None and isinstance(native_column.loc[idx], str):
-            # Infer based on first non-missing value.
-            # For pandas pre 3.0, this isn't perfect.
-            # After pandas 3.0, pandas has a dedicated string dtype
-            # which is inferred by default.
+        if implementation is Implementation.DASK:
+            # Dask columns are lazy, so we can't inspect values.
+            # The most useful assumption is probably String
             return dtypes.String()
-        else:
+        if implementation is Implementation.PANDAS:  # pragma: no cover
+            # This is the most efficient implementation for pandas,
+            # and doesn't require the interchange protocol
+            import pandas as pd  # ignore-banned-import
+
+            dtype = pd.api.types.infer_dtype(native_column, skipna=True)
+            if dtype == "string":
+                return dtypes.String()
+            return dtypes.Object()
+        else:  # pragma: no cover
             df = native_column.to_frame()
             if hasattr(df, "__dataframe__"):
                 from narwhals._interchange.dataframe import (
@@ -315,10 +309,8 @@ def native_to_narwhals_dtype(native_column: Any, dtypes: DTypes) -> DType:
                     return map_interchange_dtype_to_narwhals_dtype(
                         df.__dataframe__().get_column(0).dtype, dtypes
                     )
-                except Exception:  # noqa: BLE001
-                    return dtypes.Object()
-            else:  # pragma: no cover
-                return dtypes.Object()
+                except Exception:  # noqa: BLE001, S110
+                    pass
     return dtypes.Unknown()
 
 
@@ -555,3 +547,51 @@ def convert_str_slice_to_int_slice(
     stop = columns.get_loc(str_slice.stop) + 1 if str_slice.stop is not None else None
     step = str_slice.step
     return (start, stop, step)
+
+
+def calculate_timestamp_datetime(
+    s: pd.Series, original_time_unit: str, time_unit: str
+) -> pd.Series:
+    if original_time_unit == "ns":
+        if time_unit == "ns":
+            result = s
+        elif time_unit == "us":
+            result = s // 1_000
+        else:
+            result = s // 1_000_000
+    elif original_time_unit == "us":
+        if time_unit == "ns":
+            result = s * 1_000
+        elif time_unit == "us":
+            result = s
+        else:
+            result = s // 1_000
+    elif original_time_unit == "ms":
+        if time_unit == "ns":
+            result = s * 1_000_000
+        elif time_unit == "us":
+            result = s * 1_000
+        else:
+            result = s
+    elif original_time_unit == "s":
+        if time_unit == "ns":
+            result = s * 1_000_000_000
+        elif time_unit == "us":
+            result = s * 1_000_000
+        else:
+            result = s * 1_000
+    else:  # pragma: no cover
+        msg = f"unexpected time unit {original_time_unit}, please report a bug at https://github.com/narwhals-dev/narwhals"
+        raise AssertionError(msg)
+    return result
+
+
+def calculate_timestamp_date(s: pd.Series, time_unit: str) -> pd.Series:
+    s = s * 86_400  # number of seconds in a day
+    if time_unit == "ns":
+        result = s * 1_000_000_000
+    elif time_unit == "us":
+        result = s * 1_000_000
+    else:
+        result = s * 1_000
+    return result
diff --git a/narwhals/_polars/dataframe.py b/narwhals/_polars/dataframe.py
index 0ef2f879d..832331ebf 100644
--- a/narwhals/_polars/dataframe.py
+++ b/narwhals/_polars/dataframe.py
@@ -68,6 +68,12 @@ def _from_native_object(self, obj: Any) -> Any:
     def __getattr__(self, attr: str) -> Any:
         if attr == "collect":  # pragma: no cover
             raise AttributeError
+        if attr == "schema":
+            schema = self._native_frame.schema
+            return {
+                name: native_to_narwhals_dtype(dtype, self._dtypes)
+                for name, dtype in schema.items()
+            }
 
         def func(*args: Any, **kwargs: Any) -> Any:
             args, kwargs = extract_args_kwargs(args, kwargs)  # type: ignore[assignment]
@@ -85,14 +91,6 @@ def __array__(self, dtype: Any | None = None, copy: bool | None = None) -> np.nd
             return self._native_frame.__array__(dtype)
         return self._native_frame.__array__(dtype)
 
-    @property
-    def schema(self) -> dict[str, Any]:
-        schema = self._native_frame.schema
-        return {
-            name: native_to_narwhals_dtype(dtype, self._dtypes)
-            for name, dtype in schema.items()
-        }
-
     def collect_schema(self) -> dict[str, Any]:
         if self._backend_version < (1,):  # pragma: no cover
             schema = self._native_frame.schema
@@ -205,10 +203,10 @@ def to_dict(self, *, as_series: bool) -> Any:
         else:
             return df.to_dict(as_series=False)
 
-    def group_by(self, *by: str) -> Any:
+    def group_by(self, *by: str, drop_null_keys: bool) -> Any:
         from narwhals._polars.group_by import PolarsGroupBy
 
-        return PolarsGroupBy(self, list(by))
+        return PolarsGroupBy(self, list(by), drop_null_keys=drop_null_keys)
 
     def with_row_index(self, name: str) -> Any:
         if self._backend_version < (0, 20, 4):  # pragma: no cover
@@ -314,10 +312,10 @@ def collect(self) -> PolarsDataFrame:
             dtypes=self._dtypes,
         )
 
-    def group_by(self, *by: str) -> Any:
+    def group_by(self, *by: str, drop_null_keys: bool) -> Any:
         from narwhals._polars.group_by import PolarsLazyGroupBy
 
-        return PolarsLazyGroupBy(self, list(by))
+        return PolarsLazyGroupBy(self, list(by), drop_null_keys=drop_null_keys)
 
     def with_row_index(self, name: str) -> Any:
         if self._backend_version < (0, 20, 4):  # pragma: no cover
diff --git a/narwhals/_polars/group_by.py b/narwhals/_polars/group_by.py
index f03da610e..aa69db37f 100644
--- a/narwhals/_polars/group_by.py
+++ b/narwhals/_polars/group_by.py
@@ -11,10 +11,13 @@
 
 
 class PolarsGroupBy:
-    def __init__(self, df: Any, keys: list[str]) -> None:
+    def __init__(self, df: Any, keys: list[str], *, drop_null_keys: bool) -> None:
         self._compliant_frame = df
         self.keys = keys
-        self._grouped = df._native_frame.group_by(keys)
+        if drop_null_keys:
+            self._grouped = df.drop_nulls(keys)._native_frame.group_by(keys)
+        else:
+            self._grouped = df._native_frame.group_by(keys)
 
     def agg(self, *aggs: Any, **named_aggs: Any) -> PolarsDataFrame:
         aggs, named_aggs = extract_args_kwargs(aggs, named_aggs)  # type: ignore[assignment]
@@ -28,10 +31,13 @@ def __iter__(self) -> Any:
 
 
 class PolarsLazyGroupBy:
-    def __init__(self, df: Any, keys: list[str]) -> None:
+    def __init__(self, df: Any, keys: list[str], *, drop_null_keys: bool) -> None:
         self._compliant_frame = df
         self.keys = keys
-        self._grouped = df._native_frame.group_by(keys)
+        if drop_null_keys:
+            self._grouped = df.drop_nulls(keys)._native_frame.group_by(keys)
+        else:
+            self._grouped = df._native_frame.group_by(keys)
 
     def agg(self, *aggs: Any, **named_aggs: Any) -> PolarsLazyFrame:
         aggs, named_aggs = extract_args_kwargs(aggs, named_aggs)  # type: ignore[assignment]
diff --git a/narwhals/_polars/utils.py b/narwhals/_polars/utils.py
index d44535cc7..ac6ffb2bd 100644
--- a/narwhals/_polars/utils.py
+++ b/narwhals/_polars/utils.py
@@ -75,11 +75,16 @@ def native_to_narwhals_dtype(dtype: Any, dtypes: DTypes) -> DType:
         du_time_unit: Literal["us", "ns", "ms"] = getattr(dtype, "time_unit", "us")
         return dtypes.Duration(time_unit=du_time_unit)
     if dtype == pl.Struct:
-        return dtypes.Struct()
+        return dtypes.Struct(
+            [
+                dtypes.Field(field_name, native_to_narwhals_dtype(field_type, dtypes))
+                for field_name, field_type in dtype
+            ]
+        )
     if dtype == pl.List:
         return dtypes.List(native_to_narwhals_dtype(dtype.inner, dtypes))
     if dtype == pl.Array:
-        if parse_version(pl.__version__) < (1, 0):  # pragma: no cover
+        if parse_version(pl.__version__) < (0, 20, 30):  # pragma: no cover
             return dtypes.Array(
                 native_to_narwhals_dtype(dtype.inner, dtypes), dtype.width
             )
diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py
index 5c8c7e13e..3ddaa2814 100644
--- a/narwhals/dataframe.py
+++ b/narwhals/dataframe.py
@@ -582,9 +582,9 @@ def write_parquet(self, file: str | Path | BytesIO) -> Any:
 
             We can then pass either pandas, Polars or PyArrow to `func`:
 
-            >>> func(df_pd)
-            >>> func(df_pl)
-            >>> func(df_pa)
+            >>> func(df_pd)  # doctest:+SKIP
+            >>> func(df_pl)  # doctest:+SKIP
+            >>> func(df_pa)  # doctest:+SKIP
         """
         self._compliant_frame.write_parquet(file)
 
@@ -742,14 +742,16 @@ def __getitem__(self, item: tuple[slice, slice]) -> Self: ...
 
     def __getitem__(
         self,
-        item: str
-        | slice
-        | Sequence[int]
-        | Sequence[str]
-        | tuple[Sequence[int], str | int]
-        | tuple[slice, str | int]
-        | tuple[slice | Sequence[int], Sequence[int] | Sequence[str] | slice]
-        | tuple[slice, slice],
+        item: (
+            str
+            | slice
+            | Sequence[int]
+            | Sequence[str]
+            | tuple[Sequence[int], str | int]
+            | tuple[slice, str | int]
+            | tuple[slice | Sequence[int], Sequence[int] | Sequence[str] | slice]
+            | tuple[slice, slice]
+        ),
     ) -> Series | Self:
         """
         Extract column or slice of DataFrame.
@@ -1195,16 +1197,14 @@ def columns(self) -> list[str]:
     def rows(
         self,
         *,
-        named: Literal[False],
+        named: Literal[False] = False,
     ) -> list[tuple[Any, ...]]: ...
-
     @overload
     def rows(
         self,
         *,
         named: Literal[True],
     ) -> list[dict[str, Any]]: ...
-
     @overload
     def rows(
         self,
@@ -1867,12 +1867,16 @@ def filter(self, *predicates: IntoExpr | Iterable[IntoExpr] | list[bool]) -> Sel
         """
         return super().filter(*predicates)
 
-    def group_by(self, *keys: str | Iterable[str]) -> GroupBy[Self]:
+    def group_by(
+        self, *keys: str | Iterable[str], drop_null_keys: bool = False
+    ) -> GroupBy[Self]:
         r"""
         Start a group by operation.
 
         Arguments:
             *keys: Column(s) to group by. Accepts multiple columns names as a list.
+            drop_null_keys: if True, then groups where any key is null won't be included
+                in the result.
 
         Returns:
             GroupBy: Object which can be used to perform aggregations.
@@ -1941,7 +1945,7 @@ def group_by(self, *keys: str | Iterable[str]) -> GroupBy[Self]:
         """
         from narwhals.group_by import GroupBy
 
-        return GroupBy(self, *flatten(keys))
+        return GroupBy(self, *flatten(keys), drop_null_keys=drop_null_keys)
 
     def sort(
         self,
@@ -3322,11 +3326,6 @@ def rename(self, mapping: dict[str, str]) -> Self:
                       function that takes the old name as input and returns the
                       new name.
 
-        Notes:
-            If existing names are swapped (e.g. 'A' points to 'B' and 'B'
-             points to 'A'), polars will block projection and predicate
-             pushdowns at this node.
-
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
@@ -3758,7 +3757,9 @@ def filter(self, *predicates: IntoExpr | Iterable[IntoExpr] | list[bool]) -> Sel
         """
         return super().filter(*predicates)
 
-    def group_by(self, *keys: str | Iterable[str]) -> LazyGroupBy[Self]:
+    def group_by(
+        self, *keys: str | Iterable[str], drop_null_keys: bool = False
+    ) -> LazyGroupBy[Self]:
         r"""
         Start a group by operation.
 
@@ -3766,6 +3767,8 @@ def group_by(self, *keys: str | Iterable[str]) -> LazyGroupBy[Self]:
             *keys:
                 Column(s) to group by. Accepts expression input. Strings are
                 parsed as column names.
+            drop_null_keys: if True, then groups where any key is null won't be
+                included in the result.
 
         Examples:
             Group by one column and call `agg` to compute the grouped sum of
@@ -3858,7 +3861,7 @@ def group_by(self, *keys: str | Iterable[str]) -> LazyGroupBy[Self]:
         """
         from narwhals.group_by import LazyGroupBy
 
-        return LazyGroupBy(self, *flatten(keys))
+        return LazyGroupBy(self, *flatten(keys), drop_null_keys=drop_null_keys)
 
     def sort(
         self,
diff --git a/narwhals/dependencies.py b/narwhals/dependencies.py
index 144c57c8a..f1c057b3b 100644
--- a/narwhals/dependencies.py
+++ b/narwhals/dependencies.py
@@ -23,6 +23,8 @@
     import polars as pl
     import pyarrow as pa
 
+    from narwhals.typing import IntoSeries
+
 
 def get_polars() -> Any:
     """Get Polars module (if already imported - else return None)."""
@@ -46,6 +48,11 @@ def get_cudf() -> Any:
     return sys.modules.get("cudf", None)
 
 
+def get_cupy() -> Any:
+    """Get cupy module (if already imported - else return None)."""
+    return sys.modules.get("cupy", None)
+
+
 def get_pyarrow() -> Any:  # pragma: no cover
     """Get pyarrow module (if already imported - else return None)."""
     return sys.modules.get("pyarrow", None)
@@ -91,6 +98,11 @@ def is_pandas_series(ser: Any) -> TypeGuard[pd.Series[Any]]:
     return (pd := get_pandas()) is not None and isinstance(ser, pd.Series)
 
 
+def is_pandas_index(index: Any) -> TypeGuard[pd.Index]:
+    """Check whether `index` is a pandas Index without importing pandas."""
+    return (pd := get_pandas()) is not None and isinstance(index, pd.Index)
+
+
 def is_modin_dataframe(df: Any) -> TypeGuard[mpd.DataFrame]:
     """Check whether `df` is a modin DataFrame without importing modin."""
     return (mpd := get_modin()) is not None and isinstance(df, mpd.DataFrame)
@@ -101,6 +113,13 @@ def is_modin_series(ser: Any) -> TypeGuard[mpd.Series]:
     return (mpd := get_modin()) is not None and isinstance(ser, mpd.Series)
 
 
+def is_modin_index(index: Any) -> TypeGuard[mpd.Index]:
+    """Check whether `index` is a modin Index without importing modin."""
+    return (mpd := get_modin()) is not None and isinstance(
+        index, mpd.Index
+    )  # pragma: no cover
+
+
 def is_cudf_dataframe(df: Any) -> TypeGuard[cudf.DataFrame]:
     """Check whether `df` is a cudf DataFrame without importing cudf."""
     return (cudf := get_cudf()) is not None and isinstance(df, cudf.DataFrame)
@@ -111,6 +130,13 @@ def is_cudf_series(ser: Any) -> TypeGuard[cudf.Series[Any]]:
     return (cudf := get_cudf()) is not None and isinstance(ser, cudf.Series)
 
 
+def is_cudf_index(index: Any) -> TypeGuard[cudf.Index]:
+    """Check whether `index` is a cudf Index without importing cudf."""
+    return (cudf := get_cudf()) is not None and isinstance(
+        index, cudf.Index
+    )  # pragma: no cover
+
+
 def is_dask_dataframe(df: Any) -> TypeGuard[dd.DataFrame]:
     """Check whether `df` is a Dask DataFrame without importing Dask."""
     return (dd := get_dask_dataframe()) is not None and isinstance(df, dd.DataFrame)
@@ -167,13 +193,62 @@ def is_pandas_like_dataframe(df: Any) -> bool:
     return is_pandas_dataframe(df) or is_modin_dataframe(df) or is_cudf_dataframe(df)
 
 
-def is_pandas_like_series(arr: Any) -> bool:
+def is_pandas_like_series(ser: Any) -> bool:
+    """
+    Check whether `ser` is a pandas-like Series without doing any imports
+
+    By "pandas-like", we mean: pandas, Modin, cuDF.
+    """
+    return is_pandas_series(ser) or is_modin_series(ser) or is_cudf_series(ser)
+
+
+def is_pandas_like_index(index: Any) -> bool:
     """
-    Check whether `arr` is a pandas-like Series without doing any imports
+    Check whether `index` is a pandas-like Index without doing any imports
 
     By "pandas-like", we mean: pandas, Modin, cuDF.
     """
-    return is_pandas_series(arr) or is_modin_series(arr) or is_cudf_series(arr)
+    return (
+        is_pandas_index(index) or is_modin_index(index) or is_cudf_index(index)
+    )  # pragma: no cover
+
+
+def is_into_series(native_series: IntoSeries) -> bool:
+    """
+    Check whether `native_series` can be converted to a Narwhals Series.
+
+    Arguments:
+        native_series: The object to check.
+
+    Returns:
+        `True` if `native_series` can be converted to a Narwhals Series, `False` otherwise.
+
+    Examples:
+        >>> import pandas as pd
+        >>> import polars as pl
+        >>> import numpy as np
+        >>> import narwhals as nw
+
+        >>> s_pd = pd.Series([1, 2, 3])
+        >>> s_pl = pl.Series([1, 2, 3])
+        >>> np_arr = np.array([1, 2, 3])
+
+        >>> nw.dependencies.is_into_series(s_pd)
+        True
+        >>> nw.dependencies.is_into_series(s_pl)
+        True
+        >>> nw.dependencies.is_into_series(np_arr)
+        False
+    """
+    from narwhals.series import Series
+
+    return (
+        isinstance(native_series, Series)
+        or hasattr(native_series, "__narwhals_series__")
+        or is_polars_series(native_series)
+        or is_pyarrow_chunked_array(native_series)
+        or is_pandas_like_series(native_series)
+    )
 
 
 __all__ = [
@@ -200,4 +275,5 @@ def is_pandas_like_series(arr: Any) -> bool:
     "is_dask_dataframe",
     "is_pandas_like_dataframe",
     "is_pandas_like_series",
+    "is_into_series",
 ]
diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py
index 98d8c6914..73a77af1f 100644
--- a/narwhals/dtypes.py
+++ b/narwhals/dtypes.py
@@ -1,10 +1,15 @@
 from __future__ import annotations
 
+from collections import OrderedDict
 from datetime import timezone
 from typing import TYPE_CHECKING
-from typing import Literal
+from typing import Mapping
 
 if TYPE_CHECKING:
+    from typing import Iterator
+    from typing import Literal
+    from typing import Sequence
+
     from typing_extensions import Self
 
 
@@ -170,7 +175,82 @@ class Categorical(DType): ...
 class Enum(DType): ...
 
 
-class Struct(DType): ...
+class Field:
+    """
+    Definition of a single field within a `Struct` DataType.
+
+    Arguments:
+        name: The name of the field within its parent `Struct`.
+        dtype: The `DataType` of the field's values.
+
+    """
+
+    name: str
+    dtype: type[DType] | DType
+
+    def __init__(self, name: str, dtype: type[DType] | DType) -> None:
+        self.name = name
+        self.dtype = dtype
+
+    def __eq__(self, other: Field) -> bool:  # type: ignore[override]
+        return (self.name == other.name) & (self.dtype == other.dtype)
+
+    def __hash__(self) -> int:
+        return hash((self.name, self.dtype))
+
+    def __repr__(self) -> str:
+        class_name = self.__class__.__name__
+        return f"{class_name}({self.name!r}, {self.dtype})"
+
+
+class Struct(DType):
+    """
+    Struct composite type.
+
+    Arguments:
+        fields: The fields that make up the struct. Can be either a sequence of Field objects or a mapping of column names to data types.
+    """
+
+    fields: list[Field]
+
+    def __init__(
+        self, fields: Sequence[Field] | Mapping[str, DType | type[DType]]
+    ) -> None:
+        if isinstance(fields, Mapping):
+            self.fields = [Field(name, dtype) for name, dtype in fields.items()]
+        else:
+            self.fields = list(fields)
+
+    def __eq__(self, other: DType | type[DType]) -> bool:  # type: ignore[override]
+        # The comparison allows comparing objects to classes, and specific
+        # inner types to those without (eg: inner=None). if one of the
+        # arguments is not specific about its inner type we infer it
+        # as being equal. (See the List type for more info).
+        if type(other) is type and issubclass(other, self.__class__):
+            return True
+        elif isinstance(other, self.__class__):
+            return self.fields == other.fields
+        else:
+            return False
+
+    def __hash__(self) -> int:
+        return hash((self.__class__, tuple(self.fields)))
+
+    def __iter__(self) -> Iterator[tuple[str, DType | type[DType]]]:
+        for fld in self.fields:
+            yield fld.name, fld.dtype
+
+    def __reversed__(self) -> Iterator[tuple[str, DType | type[DType]]]:
+        for fld in reversed(self.fields):
+            yield fld.name, fld.dtype
+
+    def __repr__(self) -> str:
+        class_name = self.__class__.__name__
+        return f"{class_name}({dict(self)})"
+
+    def to_schema(self) -> OrderedDict[str, DType | type[DType]]:
+        """Return Struct dtype as a schema dict."""
+        return OrderedDict(self)
 
 
 class List(DType):
diff --git a/narwhals/expr.py b/narwhals/expr.py
index 6eedbafa4..6c2d28962 100644
--- a/narwhals/expr.py
+++ b/narwhals/expr.py
@@ -3,9 +3,11 @@
 from typing import TYPE_CHECKING
 from typing import Any
 from typing import Callable
+from typing import Generic
 from typing import Iterable
 from typing import Literal
 from typing import Sequence
+from typing import TypeVar
 
 from narwhals.dependencies import is_numpy_array
 from narwhals.utils import flatten
@@ -48,9 +50,11 @@ def alias(self, name: str) -> Self:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
             >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [4, 5]})
+            >>> df_pa = pa.table({"a": [1, 2], "b": [4, 5]})
 
             Let's define a dataframe-agnostic function:
 
@@ -58,7 +62,7 @@ def alias(self, name: str) -> Self:
             ... def func(df):
             ...     return df.select((nw.col("b") + 10).alias("c"))
 
-            We can then pass either pandas or Polars to `func`:
+            We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                 c
@@ -74,6 +78,12 @@ def alias(self, name: str) -> Self:
             │ 14  │
             │ 15  │
             └─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            c: int64
+            ----
+            c: [[14,15]]
+
         """
         return self.__class__(lambda plx: self._call(plx).alias(name))
 
@@ -84,10 +94,12 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> data = {"a": [1, 2, 3, 4]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Lets define a library-agnostic function:
 
@@ -95,7 +107,7 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se
             ... def func(df):
             ...     return df.select(nw.col("a").pipe(lambda x: x + 1))
 
-            We can then pass any supported library:
+            We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a
@@ -115,6 +127,11 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se
             │ 4   │
             │ 5   │
             └─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            ----
+            a: [[2,3,4,5]]
         """
         return function(self, *args, **kwargs)
 
@@ -131,10 +148,12 @@ def cast(
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> from datetime import date
             >>> df_pd = pd.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
             >>> df_pl = pl.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
+            >>> df_pa = pa.table({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
 
             Let's define a dataframe-agnostic function:
 
@@ -144,7 +163,7 @@ def cast(
             ...         nw.col("foo").cast(nw.Float32), nw.col("bar").cast(nw.UInt8)
             ...     )
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                foo  bar
@@ -162,6 +181,13 @@ def cast(
             │ 2.0 ┆ 7   │
             │ 3.0 ┆ 8   │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            foo: float
+            bar: uint8
+            ----
+            foo: [[1,2,3]]
+            bar: [[6,7,8]]
         """
         return self.__class__(
             lambda plx: self._call(plx).cast(dtype),
@@ -299,9 +325,11 @@ def any(self) -> Self:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [True, False], "b": [True, True]})
             >>> df_pl = pl.DataFrame({"a": [True, False], "b": [True, True]})
+            >>> df_pa = pa.table({"a": [True, False], "b": [True, True]})
 
             We define a dataframe-agnostic function:
 
@@ -309,7 +337,7 @@ def any(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a", "b").any())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                   a     b
@@ -323,6 +351,13 @@ def any(self) -> Self:
             ╞══════╪══════╡
             │ true ┆ true │
             └──────┴──────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: bool
+            b: bool
+            ----
+            a: [[true]]
+            b: [[true]]
         """
         return self.__class__(lambda plx: self._call(plx).any())
 
@@ -333,9 +368,11 @@ def all(self) -> Self:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [True, False], "b": [True, True]})
             >>> df_pl = pl.DataFrame({"a": [True, False], "b": [True, True]})
+            >>> df_pa = pa.table({"a": [True, False], "b": [True, True]})
 
             Let's define a dataframe-agnostic function:
 
@@ -343,7 +380,7 @@ def all(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a", "b").all())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                    a     b
@@ -357,6 +394,13 @@ def all(self) -> Self:
             ╞═══════╪══════╡
             │ false ┆ true │
             └───────┴──────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: bool
+            b: bool
+            ----
+            a: [[false]]
+            b: [[true]]
         """
         return self.__class__(lambda plx: self._call(plx).all())
 
@@ -367,9 +411,11 @@ def mean(self) -> Self:
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [-1, 0, 1], "b": [2, 4, 6]})
             >>> df_pl = pl.DataFrame({"a": [-1, 0, 1], "b": [2, 4, 6]})
+            >>> df_pa = pa.table({"a": [-1, 0, 1], "b": [2, 4, 6]})
 
             Let's define a dataframe-agnostic function:
 
@@ -377,7 +423,7 @@ def mean(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a", "b").mean())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                  a    b
@@ -391,6 +437,13 @@ def mean(self) -> Self:
             ╞═════╪═════╡
             │ 0.0 ┆ 4.0 │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: double
+            b: double
+            ----
+            a: [[0]]
+            b: [[4]]
         """
         return self.__class__(lambda plx: self._call(plx).mean())
 
@@ -405,9 +458,11 @@ def std(self, *, ddof: int = 1) -> Self:
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
             >>> df_pl = pl.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
+            >>> df_pa = pa.table({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
 
             Let's define a dataframe-agnostic function:
 
@@ -415,7 +470,7 @@ def std(self, *, ddof: int = 1) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a", "b").std(ddof=0))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                       a         b
@@ -429,6 +484,13 @@ def std(self, *, ddof: int = 1) -> Self:
             ╞══════════╪══════════╡
             │ 17.79513 ┆ 1.265789 │
             └──────────┴──────────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: double
+            b: double
+            ----
+            a: [[17.795130420052185]]
+            b: [[1.2657891697365016]]
 
         """
         return self.__class__(lambda plx: self._call(plx).std(ddof=ddof))
@@ -440,9 +502,11 @@ def sum(self) -> Expr:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [5, 10], "b": [50, 100]})
             >>> df_pl = pl.DataFrame({"a": [5, 10], "b": [50, 100]})
+            >>> df_pa = pa.table({"a": [5, 10], "b": [50, 100]})
 
             Let's define a dataframe-agnostic function:
 
@@ -450,7 +514,7 @@ def sum(self) -> Expr:
             ... def func(df):
             ...     return df.select(nw.col("a", "b").sum())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                 a    b
@@ -464,6 +528,13 @@ def sum(self) -> Expr:
             ╞═════╪═════╡
             │ 15  ┆ 150 │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: int64
+            ----
+            a: [[15]]
+            b: [[150]]
         """
         return self.__class__(lambda plx: self._call(plx).sum())
 
@@ -474,9 +545,11 @@ def min(self) -> Self:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 3]})
             >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [4, 3]})
+            >>> df_pa = pa.table({"a": [1, 2], "b": [4, 3]})
 
             Let's define a dataframe-agnostic function:
 
@@ -484,7 +557,7 @@ def min(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.min("a", "b"))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a  b
@@ -498,6 +571,13 @@ def min(self) -> Self:
             ╞═════╪═════╡
             │ 1   ┆ 3   │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: int64
+            ----
+            a: [[1]]
+            b: [[3]]
         """
         return self.__class__(lambda plx: self._call(plx).min())
 
@@ -508,9 +588,11 @@ def max(self) -> Self:
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [10, 20], "b": [50, 100]})
             >>> df_pl = pl.DataFrame({"a": [10, 20], "b": [50, 100]})
+            >>> df_pa = pa.table({"a": [10, 20], "b": [50, 100]})
 
             Let's define a dataframe-agnostic function:
 
@@ -518,7 +600,7 @@ def max(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.max("a", "b"))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                 a    b
@@ -532,6 +614,13 @@ def max(self) -> Self:
             ╞═════╪═════╡
             │ 20  ┆ 100 │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: int64
+            ----
+            a: [[20]]
+            b: [[100]]
         """
         return self.__class__(lambda plx: self._call(plx).max())
 
@@ -542,9 +631,11 @@ def count(self) -> Self:
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})
             >>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})
+            >>> df_pa = pa.table({"a": [1, 2, 3], "b": [None, 4, 4]})
 
             Let's define a dataframe-agnostic function:
 
@@ -552,7 +643,7 @@ def count(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.all().count())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a  b
@@ -566,6 +657,13 @@ def count(self) -> Self:
             ╞═════╪═════╡
             │ 3   ┆ 2   │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: int64
+            ----
+            a: [[3]]
+            b: [[2]]
         """
         return self.__class__(lambda plx: self._call(plx).count())
 
@@ -576,9 +674,11 @@ def n_unique(self) -> Self:
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
             >>> df_pl = pl.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
+            >>> df_pa = pa.table({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
 
             Let's define a dataframe-agnostic function:
 
@@ -586,7 +686,7 @@ def n_unique(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a", "b").n_unique())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a  b
@@ -600,6 +700,13 @@ def n_unique(self) -> Self:
             ╞═════╪═════╡
             │ 5   ┆ 3   │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: int64
+            ----
+            a: [[5]]
+            b: [[3]]
         """
         return self.__class__(lambda plx: self._call(plx).n_unique())
 
@@ -610,9 +717,11 @@ def unique(self) -> Self:
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
             >>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+            >>> df_pa = pa.table({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
 
             Let's define a dataframe-agnostic function:
 
@@ -620,7 +729,7 @@ def unique(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a", "b").unique())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a  b
@@ -638,6 +747,13 @@ def unique(self) -> Self:
             │ 3   ┆ 4   │
             │ 5   ┆ 6   │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: int64
+            ----
+            a: [[1,3,5]]
+            b: [[2,4,6]]
         """
         return self.__class__(lambda plx: self._call(plx).unique())
 
@@ -648,10 +764,12 @@ def abs(self) -> Self:
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> data = {"a": [1, -2], "b": [-3, 4]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function:
 
@@ -659,7 +777,7 @@ def abs(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a", "b").abs())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a  b
@@ -675,6 +793,13 @@ def abs(self) -> Self:
             │ 1   ┆ 3   │
             │ 2   ┆ 4   │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: int64
+            ----
+            a: [[1,2]]
+            b: [[3,4]]
         """
         return self.__class__(lambda plx: self._call(plx).abs())
 
@@ -685,9 +810,11 @@ def cum_sum(self) -> Self:
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
             >>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+            >>> df_pa = pa.table({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
 
             Let's define a dataframe-agnostic function:
 
@@ -695,7 +822,7 @@ def cum_sum(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a", "b").cum_sum())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                 a   b
@@ -717,6 +844,13 @@ def cum_sum(self) -> Self:
             │ 10  ┆ 16  │
             │ 15  ┆ 22  │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: int64
+            ----
+            a: [[1,2,5,10,15]]
+            b: [[2,6,10,16,22]]
         """
         return self.__class__(lambda plx: self._call(plx).cum_sum())
 
@@ -736,9 +870,11 @@ def diff(self) -> Self:
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5]})
             >>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
+            >>> df_pa = pa.table({"a": [1, 1, 3, 5, 5]})
 
             Let's define a dataframe-agnostic function:
 
@@ -746,7 +882,7 @@ def diff(self) -> Self:
             ... def func(df):
             ...     return df.select(a_diff=nw.col("a").diff())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a_diff
@@ -768,6 +904,11 @@ def diff(self) -> Self:
             │ 2      │
             │ 0      │
             └────────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a_diff: int64
+            ----
+            a_diff: [[null,0,2,2,0]]
         """
         return self.__class__(lambda plx: self._call(plx).diff())
 
@@ -787,9 +928,11 @@ def shift(self, n: int) -> Self:
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5]})
             >>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
+            >>> df_pa = pa.table({"a": [1, 1, 3, 5, 5]})
 
             Let's define a dataframe-agnostic function:
 
@@ -797,7 +940,7 @@ def shift(self, n: int) -> Self:
             ... def func(df):
             ...     return df.select(a_shift=nw.col("a").shift(n=1))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a_shift
@@ -819,6 +962,11 @@ def shift(self, n: int) -> Self:
             │ 3       │
             │ 5       │
             └─────────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a_shift: int64
+            ----
+            a_shift: [[null,1,1,3,5]]
         """
         return self.__class__(lambda plx: self._call(plx).shift(n))
 
@@ -834,9 +982,10 @@ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
-
+            >>> import pyarrow as pa
             >>> df_pd = pd.DataFrame({"a": [5, None, 1, 2]})
             >>> df_pl = pl.DataFrame({"a": [5, None, 1, 2]})
+            >>> df_pa = pa.table({"a": [5, None, 1, 2]})
 
             Let's define dataframe-agnostic functions:
 
@@ -849,7 +998,7 @@ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self:
             ...     df = df.select(nw.col("a").sort(descending=True))
             ...     return nw.to_native(df)
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                  a
@@ -869,6 +1018,11 @@ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self:
             │ 2    │
             │ 5    │
             └──────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            ----
+            a: [[null,1,2,5]]
 
             >>> func_descend(df_pd)
                  a
@@ -888,6 +1042,11 @@ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self:
             │ 2    │
             │ 1    │
             └──────┘
+            >>> func_descend(df_pa)
+            pyarrow.Table
+            a: int64
+            ----
+            a: [[null,5,2,1]]
         """
         return self.__class__(
             lambda plx: self._call(plx).sort(descending=descending, nulls_last=nulls_last)
@@ -910,9 +1069,11 @@ def is_between(
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [1, 2, 3, 4, 5]})
             >>> df_pl = pl.DataFrame({"a": [1, 2, 3, 4, 5]})
+            >>> df_pa = pa.table({"a": [1, 2, 3, 4, 5]})
 
             Let's define a dataframe-agnostic function:
 
@@ -920,7 +1081,7 @@ def is_between(
             ... def func(df):
             ...     return df.select(nw.col("a").is_between(2, 4, "right"))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                    a
@@ -942,6 +1103,11 @@ def is_between(
             │ true  │
             │ false │
             └───────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: bool
+            ----
+            a: [[false,false,true,true,false]]
         """
         return self.__class__(
             lambda plx: self._call(plx).is_between(lower_bound, upper_bound, closed)
@@ -957,9 +1123,11 @@ def is_in(self, other: Any) -> Self:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [1, 2, 9, 10]})
             >>> df_pl = pl.DataFrame({"a": [1, 2, 9, 10]})
+            >>> df_pa = pa.table({"a": [1, 2, 9, 10]})
 
             Let's define a dataframe-agnostic function:
 
@@ -967,7 +1135,7 @@ def is_in(self, other: Any) -> Self:
             ... def func(df):
             ...     return df.with_columns(b=nw.col("a").is_in([1, 2]))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                 a      b
@@ -988,6 +1156,13 @@ def is_in(self, other: Any) -> Self:
             │ 9   ┆ false │
             │ 10  ┆ false │
             └─────┴───────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: bool
+            ----
+            a: [[1,2,9,10]]
+            b: [[true,true,false,false]]
         """
         if isinstance(other, Iterable) and not isinstance(other, (str, bytes)):
             return self.__class__(lambda plx: self._call(plx).is_in(other))
@@ -1002,9 +1177,11 @@ def filter(self, *predicates: Any) -> Self:
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})
             >>> df_pl = pl.DataFrame({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})
+            >>> df_pa = pa.table({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})
 
             Let's define a dataframe-agnostic function:
 
@@ -1015,7 +1192,7 @@ def filter(self, *predicates: Any) -> Self:
             ...         nw.col("b").filter(nw.col("b") < 13),
             ...     )
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a   b
@@ -1033,6 +1210,13 @@ def filter(self, *predicates: Any) -> Self:
             │ 6   ┆ 11  │
             │ 7   ┆ 12  │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: int64
+            ----
+            a: [[5,6,7]]
+            b: [[10,11,12]]
         """
         return self.__class__(
             lambda plx: self._call(plx).filter(
@@ -1051,6 +1235,7 @@ def is_null(self) -> Self:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame(
             ...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
@@ -1058,6 +1243,9 @@ def is_null(self) -> Self:
             >>> df_pl = pl.DataFrame(
             ...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
             ... )
+            >>> df_pa = pa.table(
+            ...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
+            ... )
 
             Let's define a dataframe-agnostic function:
 
@@ -1067,7 +1255,7 @@ def is_null(self) -> Self:
             ...         a_is_null=nw.col("a").is_null(), b_is_null=nw.col("b").is_null()
             ...     )
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                  a    b  a_is_null  b_is_null
@@ -1090,6 +1278,18 @@ def is_null(self) -> Self:
             │ 3    ┆ 3.0 ┆ false     ┆ false     │
             │ 5    ┆ 5.0 ┆ false     ┆ false     │
             └──────┴─────┴───────────┴───────────┘
+
+            >>> func(df_pa)  # nan != null for pyarrow
+            pyarrow.Table
+            a: int64
+            b: double
+            a_is_null: bool
+            b_is_null: bool
+            ----
+            a: [[2,4,null,3,5]]
+            b: [[2,4,nan,3,5]]
+            a_is_null: [[false,false,true,false,false]]
+            b_is_null: [[false,false,false,false,false]]
         """
         return self.__class__(lambda plx: self._call(plx).is_null())
 
@@ -1100,10 +1300,12 @@ def arg_true(self) -> Self:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> data = {"a": [1, None, None, 2]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             We define a library agnostic function:
 
@@ -1111,7 +1313,7 @@ def arg_true(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a").is_null().arg_true())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a
@@ -1127,6 +1329,11 @@ def arg_true(self) -> Self:
             │ 1   │
             │ 2   │
             └─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            ----
+            a: [[1,2]]
         """
         return self.__class__(lambda plx: self._call(plx).arg_true())
 
@@ -1141,6 +1348,7 @@ def fill_null(self, value: Any) -> Self:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame(
             ...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
@@ -1148,6 +1356,9 @@ def fill_null(self, value: Any) -> Self:
             >>> df_pl = pl.DataFrame(
             ...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
             ... )
+            >>> df_pa = pa.table(
+            ...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
+            ... )
 
             Let's define a dataframe-agnostic function:
 
@@ -1155,7 +1366,7 @@ def fill_null(self, value: Any) -> Self:
             ... def func(df):
             ...     return df.with_columns(nw.col("a", "b").fill_null(0))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                  a    b
@@ -1178,6 +1389,14 @@ def fill_null(self, value: Any) -> Self:
             │ 3   ┆ 3.0 │
             │ 5   ┆ 5.0 │
             └─────┴─────┘
+
+            >>> func(df_pa)  # nan != null for pyarrow
+            pyarrow.Table
+            a: int64
+            b: double
+            ----
+            a: [[2,4,0,3,5]]
+            b: [[2,4,nan,3,5]]
         """
         return self.__class__(lambda plx: self._call(plx).fill_null(value))
 
@@ -1194,9 +1413,11 @@ def drop_nulls(self) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
 
             >>> df_pd = pd.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
             >>> df_pl = pl.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
+            >>> df_pa = pa.table({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
 
             Let's define a dataframe-agnostic function:
 
@@ -1204,7 +1425,7 @@ def drop_nulls(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a").drop_nulls())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                  a
@@ -1225,6 +1446,11 @@ def drop_nulls(self) -> Self:
             │ 3.0 │
             │ 5.0 │
             └─────┘
+            >>> func(df_pa)  # nan != null for pyarrow
+            pyarrow.Table
+            a: double
+            ----
+            a: [[2,4,nan,3,5]]
         """
         return self.__class__(lambda plx: self._call(plx).drop_nulls())
 
@@ -1250,9 +1476,10 @@ def sample(
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
-
+            >>> import pyarrow as pa
             >>> df_pd = pd.DataFrame({"a": [1, 2, 3]})
             >>> df_pl = pl.DataFrame({"a": [1, 2, 3]})
+            >>> df_pa = pa.table({"a": [1, 2, 3]})
 
             Let's define a dataframe-agnostic function:
 
@@ -1260,7 +1487,7 @@ def sample(
             ... def func(df):
             ...     return df.select(nw.col("a").sample(fraction=1.0, with_replacement=True))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)  # doctest: +SKIP
                a
@@ -1278,6 +1505,11 @@ def sample(
             │ 3   │
             │ 3   │
             └─────┘
+            >>> func(df_pa)  # doctest: +SKIP
+            pyarrow.Table
+            a: int64
+            ----
+            a: [[1,3,3]]
         """
         return self.__class__(
             lambda plx: self._call(plx).sample(
@@ -1298,9 +1530,11 @@ def over(self, *keys: str | Iterable[str]) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": [1, 2, 3], "b": [1, 1, 2]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function:
 
@@ -1308,7 +1542,7 @@ def over(self, *keys: str | Iterable[str]) -> Self:
             ... def func(df):
             ...     return df.with_columns(a_min_per_group=nw.col("a").min().over("b"))
 
-            We can then pass either pandas or Polars:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a  b  a_min_per_group
@@ -1326,6 +1560,15 @@ def over(self, *keys: str | Iterable[str]) -> Self:
             │ 2   ┆ 1   ┆ 1               │
             │ 3   ┆ 2   ┆ 3               │
             └─────┴─────┴─────────────────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: int64
+            a_min_per_group: int64
+            ----
+            a: [[1,2,3]]
+            b: [[1,1,2]]
+            a_min_per_group: [[1,1,3]]
         """
         return self.__class__(lambda plx: self._call(plx).over(flatten(keys)))
 
@@ -1337,9 +1580,11 @@ def is_duplicated(self) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function:
 
@@ -1347,7 +1592,7 @@ def is_duplicated(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.all().is_duplicated())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                    a      b
@@ -1367,6 +1612,13 @@ def is_duplicated(self) -> Self:
             │ false ┆ false │
             │ true  ┆ false │
             └───────┴───────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: bool
+            b: bool
+            ----
+            a: [[true,false,false,true]]
+            b: [[true,true,false,false]]
         """
         return self.__class__(lambda plx: self._call(plx).is_duplicated())
 
@@ -1378,9 +1630,11 @@ def is_unique(self) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function:
 
@@ -1388,7 +1642,7 @@ def is_unique(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.all().is_unique())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                    a      b
@@ -1408,6 +1662,13 @@ def is_unique(self) -> Self:
             │ true  ┆ true  │
             │ false ┆ true  │
             └───────┴───────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: bool
+            b: bool
+            ----
+            a: [[false,true,true,false]]
+            b: [[false,false,true,true]]
         """
         return self.__class__(lambda plx: self._call(plx).is_unique())
 
@@ -1423,9 +1684,11 @@ def null_count(self) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": [1, 2, None, 1], "b": ["a", None, "b", None]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function:
 
@@ -1433,7 +1696,7 @@ def null_count(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.all().null_count())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a  b
@@ -1447,6 +1710,13 @@ def null_count(self) -> Self:
             ╞═════╪═════╡
             │ 1   ┆ 2   │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: int64
+            ----
+            a: [[1]]
+            b: [[2]]
         """
         return self.__class__(lambda plx: self._call(plx).null_count())
 
@@ -1458,9 +1728,11 @@ def is_first_distinct(self) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function:
 
@@ -1468,7 +1740,7 @@ def is_first_distinct(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.all().is_first_distinct())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                    a      b
@@ -1488,6 +1760,13 @@ def is_first_distinct(self) -> Self:
             │ true  ┆ true  │
             │ false ┆ true  │
             └───────┴───────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: bool
+            b: bool
+            ----
+            a: [[true,true,true,false]]
+            b: [[true,false,true,true]]
         """
         return self.__class__(lambda plx: self._call(plx).is_first_distinct())
 
@@ -1498,9 +1777,11 @@ def is_last_distinct(self) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function:
 
@@ -1508,7 +1789,7 @@ def is_last_distinct(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.all().is_last_distinct())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                    a      b
@@ -1528,6 +1809,13 @@ def is_last_distinct(self) -> Self:
             │ true  ┆ true  │
             │ true  ┆ true  │
             └───────┴───────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: bool
+            b: bool
+            ----
+            a: [[false,true,true,true]]
+            b: [[false,true,true,true]]
         """
         return self.__class__(lambda plx: self._call(plx).is_last_distinct())
 
@@ -1539,23 +1827,23 @@ def quantile(
         r"""Get quantile value.
 
         Note:
-            * pandas and Polars may have implementation differences for a given interpolation method.
-            * [dask](https://docs.dask.org/en/stable/generated/dask.dataframe.Series.quantile.html) has its own method to approximate quantile and it doesn't implement 'nearest', 'higher', 'lower', 'midpoint'
+            - pandas and Polars may have implementation differences for a given interpolation method.
+            - [dask](https://docs.dask.org/en/stable/generated/dask.dataframe.Series.quantile.html) has its own method to approximate quantile and it doesn't implement 'nearest', 'higher', 'lower', 'midpoint'
             as interpolation method - use 'linear' which is closest to the native 'dask' - method.
 
         Arguments:
-            quantile : float
-                Quantile between 0.0 and 1.0.
-            interpolation : {'nearest', 'higher', 'lower', 'midpoint', 'linear'}
-                Interpolation method.
+            quantile: Quantile between 0.0 and 1.0.
+            interpolation: Interpolation method.
 
         Examples:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": list(range(50)), "b": list(range(50, 100))}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function:
 
@@ -1563,7 +1851,7 @@ def quantile(
             ... def func(df):
             ...     return df.select(nw.col("a", "b").quantile(0.5, interpolation="linear"))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                   a     b
@@ -1578,6 +1866,13 @@ def quantile(
             ╞══════╪══════╡
             │ 24.5 ┆ 74.5 │
             └──────┴──────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: double
+            b: double
+            ----
+            a: [[24.5]]
+            b: [[74.5]]
         """
         return self.__class__(
             lambda plx: self._call(plx).quantile(quantile, interpolation)
@@ -1594,9 +1889,11 @@ def head(self, n: int = 10) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": list(range(10))}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function that returns the first 3 rows:
 
@@ -1604,7 +1901,7 @@ def head(self, n: int = 10) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a").head(3))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a
@@ -1622,6 +1919,11 @@ def head(self, n: int = 10) -> Self:
             │ 1   │
             │ 2   │
             └─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            ----
+            a: [[0,1,2]]
         """
         return self.__class__(lambda plx: self._call(plx).head(n))
 
@@ -1636,9 +1938,11 @@ def tail(self, n: int = 10) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": list(range(10))}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function that returns the last 3 rows:
 
@@ -1646,7 +1950,7 @@ def tail(self, n: int = 10) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a").tail(3))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a
@@ -1664,6 +1968,11 @@ def tail(self, n: int = 10) -> Self:
             │ 8   │
             │ 9   │
             └─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            ----
+            a: [[7,8,9]]
         """
         return self.__class__(lambda plx: self._call(plx).tail(n))
 
@@ -1687,9 +1996,11 @@ def round(self, decimals: int = 0) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": [1.12345, 2.56789, 3.901234]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function that rounds to the first decimal:
 
@@ -1697,7 +2008,7 @@ def round(self, decimals: int = 0) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a").round(1))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                  a
@@ -1715,6 +2026,11 @@ def round(self, decimals: int = 0) -> Self:
             │ 2.6 │
             │ 3.9 │
             └─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: double
+            ----
+            a: [[1.1,2.6,3.9]]
         """
         return self.__class__(lambda plx: self._call(plx).round(decimals))
 
@@ -1728,9 +2044,11 @@ def len(self) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": ["x", "y", "z"], "b": [1, 2, 1]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function that computes the len over different values of "b" column:
 
@@ -1741,7 +2059,7 @@ def len(self) -> Self:
             ...         nw.col("a").filter(nw.col("b") == 2).len().alias("a2"),
             ...     )
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a1  a2
@@ -1755,6 +2073,13 @@ def len(self) -> Self:
             ╞═════╪═════╡
             │ 2   ┆ 1   │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a1: int64
+            a2: int64
+            ----
+            a1: [[2]]
+            a2: [[1]]
         """
         return self.__class__(lambda plx: self._call(plx).len())
 
@@ -1770,9 +2095,11 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function in which gather every 2 rows,
             starting from a offset of 1:
@@ -1781,11 +2108,12 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a").gather_every(n=2, offset=1))
 
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
+
             >>> func(df_pd)
                a
             1  2
             3  4
-
             >>> func(df_pl)
             shape: (2, 1)
             ┌─────┐
@@ -1796,6 +2124,11 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self:
             │ 2   │
             │ 4   │
             └─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            ----
+            a: [[2,4]]
         """
         return self.__class__(
             lambda plx: self._call(plx).gather_every(n=n, offset=offset)
@@ -1818,11 +2151,13 @@ def clip(
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
 
             >>> s = [1, 2, 3]
             >>> df_pd = pd.DataFrame({"s": s})
             >>> df_pl = pl.DataFrame({"s": s})
+            >>> df_pa = pa.table({"s": s})
 
             We define a library agnostic function:
 
@@ -1830,7 +2165,7 @@ def clip(
             ... def func_lower(df):
             ...     return df.select(nw.col("s").clip(2))
 
-            We can then pass either pandas or Polars to `func_lower`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func_lower`:
 
             >>> func_lower(df_pd)
                s
@@ -1848,6 +2183,11 @@ def clip(
             │ 2   │
             │ 3   │
             └─────┘
+            >>> func_lower(df_pa)
+            pyarrow.Table
+            s: int64
+            ----
+            s: [[2,2,3]]
 
             We define another library agnostic function:
 
@@ -1855,7 +2195,7 @@ def clip(
             ... def func_upper(df):
             ...     return df.select(nw.col("s").clip(upper_bound=2))
 
-            We can then pass either pandas or Polars to `func_upper`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func_upper`:
 
             >>> func_upper(df_pd)
                s
@@ -1873,12 +2213,18 @@ def clip(
             │ 2   │
             │ 2   │
             └─────┘
+            >>> func_upper(df_pa)
+            pyarrow.Table
+            s: int64
+            ----
+            s: [[1,2,2]]
 
             We can have both at the same time
 
             >>> s = [-1, 1, -3, 3, -5, 5]
             >>> df_pd = pd.DataFrame({"s": s})
             >>> df_pl = pl.DataFrame({"s": s})
+            >>> df_pa = pa.table({"s": s})
 
             We define a library agnostic function:
 
@@ -1886,7 +2232,7 @@ def clip(
             ... def func(df):
             ...     return df.select(nw.col("s").clip(-1, 3))
 
-            We can pass either pandas or Polars to `func`:
+            We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                s
@@ -1910,6 +2256,11 @@ def clip(
             │ -1  │
             │ 3   │
             └─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            s: int64
+            ----
+            s: [[-1,1,-1,3,-1,3]]
         """
         return self.__class__(lambda plx: self._call(plx).clip(lower_bound, upper_bound))
 
@@ -1921,6 +2272,7 @@ def mode(self: Self) -> Self:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
 
             >>> data = {
@@ -1929,6 +2281,7 @@ def mode(self: Self) -> Self:
             ... }
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             We define a library agnostic function:
 
@@ -1936,7 +2289,7 @@ def mode(self: Self) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a").mode()).sort("a")
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a
@@ -1951,31 +2304,40 @@ def mode(self: Self) -> Self:
             ╞═════╡
             │ 1   │
             └─────┘
+
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            ----
+            a: [[1]]
         """
         return self.__class__(lambda plx: self._call(plx).mode())
 
     @property
-    def str(self: Self) -> ExprStringNamespace:
+    def str(self: Self) -> ExprStringNamespace[Self]:
         return ExprStringNamespace(self)
 
     @property
-    def dt(self: Self) -> ExprDateTimeNamespace:
+    def dt(self: Self) -> ExprDateTimeNamespace[Self]:
         return ExprDateTimeNamespace(self)
 
     @property
-    def cat(self: Self) -> ExprCatNamespace:
+    def cat(self: Self) -> ExprCatNamespace[Self]:
         return ExprCatNamespace(self)
 
     @property
-    def name(self: Self) -> ExprNameNamespace:
+    def name(self: Self) -> ExprNameNamespace[Self]:
         return ExprNameNamespace(self)
 
 
-class ExprCatNamespace:
-    def __init__(self, expr: Expr) -> None:
+T = TypeVar("T", bound=Expr)
+
+
+class ExprCatNamespace(Generic[T]):
+    def __init__(self: Self, expr: T) -> None:
         self._expr = expr
 
-    def get_categories(self) -> Expr:
+    def get_categories(self: Self) -> T:
         """
         Get unique categories from column.
 
@@ -2018,11 +2380,11 @@ def get_categories(self) -> Expr:
         )
 
 
-class ExprStringNamespace:
-    def __init__(self, expr: Expr) -> None:
+class ExprStringNamespace(Generic[T]):
+    def __init__(self: Self, expr: T) -> None:
         self._expr = expr
 
-    def len_chars(self) -> Expr:
+    def len_chars(self: Self) -> T:
         r"""
         Return the length of each string as the number of characters.
 
@@ -2068,7 +2430,7 @@ def len_chars(self) -> Expr:
 
     def replace(
         self, pattern: str, value: str, *, literal: bool = False, n: int = 1
-    ) -> Expr:
+    ) -> T:
         r"""
         Replace first matching regex/literal substring with a new string value.
 
@@ -2108,7 +2470,7 @@ def replace(
             )
         )
 
-    def replace_all(self, pattern: str, value: str, *, literal: bool = False) -> Expr:
+    def replace_all(self: Self, pattern: str, value: str, *, literal: bool = False) -> T:
         r"""
         Replace all matching regex/literal substring with a new string value.
 
@@ -2147,7 +2509,7 @@ def replace_all(self, pattern: str, value: str, *, literal: bool = False) -> Exp
             )
         )
 
-    def strip_chars(self, characters: str | None = None) -> Expr:
+    def strip_chars(self: Self, characters: str | None = None) -> T:
         r"""
         Remove leading and trailing characters.
 
@@ -2181,7 +2543,7 @@ def strip_chars(self, characters: str | None = None) -> Expr:
             lambda plx: self._expr._call(plx).str.strip_chars(characters)
         )
 
-    def starts_with(self, prefix: str) -> Expr:
+    def starts_with(self: Self, prefix: str) -> T:
         r"""
         Check if string values start with a substring.
 
@@ -2226,7 +2588,7 @@ def starts_with(self, prefix: str) -> Expr:
             lambda plx: self._expr._call(plx).str.starts_with(prefix)
         )
 
-    def ends_with(self, suffix: str) -> Expr:
+    def ends_with(self: Self, suffix: str) -> T:
         r"""
         Check if string values end with a substring.
 
@@ -2271,7 +2633,7 @@ def ends_with(self, suffix: str) -> Expr:
             lambda plx: self._expr._call(plx).str.ends_with(suffix)
         )
 
-    def contains(self, pattern: str, *, literal: bool = False) -> Expr:
+    def contains(self: Self, pattern: str, *, literal: bool = False) -> T:
         r"""
         Check if string contains a substring that matches a pattern.
 
@@ -2327,7 +2689,7 @@ def contains(self, pattern: str, *, literal: bool = False) -> Expr:
             lambda plx: self._expr._call(plx).str.contains(pattern, literal=literal)
         )
 
-    def slice(self, offset: int, length: int | None = None) -> Expr:
+    def slice(self: Self, offset: int, length: int | None = None) -> T:
         r"""
         Create subslices of the string values of an expression.
 
@@ -2402,7 +2764,7 @@ def slice(self, offset: int, length: int | None = None) -> Expr:
             lambda plx: self._expr._call(plx).str.slice(offset=offset, length=length)
         )
 
-    def head(self, n: int = 5) -> Expr:
+    def head(self: Self, n: int = 5) -> T:
         r"""
         Take the first n elements of each string.
 
@@ -2450,7 +2812,7 @@ def head(self, n: int = 5) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).str.slice(0, n))
 
-    def tail(self, n: int = 5) -> Expr:
+    def tail(self: Self, n: int = 5) -> T:
         r"""
         Take the last n elements of each string.
 
@@ -2498,7 +2860,7 @@ def tail(self, n: int = 5) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).str.slice(-n))
 
-    def to_datetime(self: Self, format: str | None = None) -> Expr:  # noqa: A002
+    def to_datetime(self: Self, format: str | None = None) -> T:  # noqa: A002
         """
         Convert to Datetime dtype.
 
@@ -2558,7 +2920,7 @@ def to_datetime(self: Self, format: str | None = None) -> Expr:  # noqa: A002
             lambda plx: self._expr._call(plx).str.to_datetime(format=format)
         )
 
-    def to_uppercase(self) -> Expr:
+    def to_uppercase(self: Self) -> T:
         r"""
         Transform string to uppercase variant.
 
@@ -2604,7 +2966,7 @@ def to_uppercase(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).str.to_uppercase())
 
-    def to_lowercase(self) -> Expr:
+    def to_lowercase(self: Self) -> T:
         r"""
         Transform string to lowercase variant.
 
@@ -2645,11 +3007,11 @@ def to_lowercase(self) -> Expr:
         return self._expr.__class__(lambda plx: self._expr._call(plx).str.to_lowercase())
 
 
-class ExprDateTimeNamespace:
-    def __init__(self, expr: Expr) -> None:
+class ExprDateTimeNamespace(Generic[T]):
+    def __init__(self: Self, expr: T) -> None:
         self._expr = expr
 
-    def date(self) -> Expr:
+    def date(self: Self) -> T:
         """
         Extract the date from underlying DateTime representation.
 
@@ -2691,7 +3053,7 @@ def date(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.date())
 
-    def year(self) -> Expr:
+    def year(self: Self) -> T:
         """
         Extract year from underlying DateTime representation.
 
@@ -2739,7 +3101,7 @@ def year(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.year())
 
-    def month(self) -> Expr:
+    def month(self: Self) -> T:
         """
         Extract month from underlying DateTime representation.
 
@@ -2790,7 +3152,7 @@ def month(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.month())
 
-    def day(self) -> Expr:
+    def day(self: Self) -> T:
         """
         Extract day from underlying DateTime representation.
 
@@ -2842,7 +3204,7 @@ def day(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.day())
 
-    def hour(self) -> Expr:
+    def hour(self: Self) -> T:
         """
         Extract hour from underlying DateTime representation.
 
@@ -2890,7 +3252,7 @@ def hour(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.hour())
 
-    def minute(self) -> Expr:
+    def minute(self: Self) -> T:
         """
         Extract minutes from underlying DateTime representation.
 
@@ -2941,7 +3303,7 @@ def minute(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.minute())
 
-    def second(self) -> Expr:
+    def second(self: Self) -> T:
         """
         Extract seconds from underlying DateTime representation.
 
@@ -2991,7 +3353,7 @@ def second(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.second())
 
-    def millisecond(self) -> Expr:
+    def millisecond(self: Self) -> T:
         """
         Extract milliseconds from underlying DateTime representation.
 
@@ -3042,7 +3404,7 @@ def millisecond(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.millisecond())
 
-    def microsecond(self) -> Expr:
+    def microsecond(self: Self) -> T:
         """
         Extract microseconds from underlying DateTime representation.
 
@@ -3093,7 +3455,7 @@ def microsecond(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.microsecond())
 
-    def nanosecond(self) -> Expr:
+    def nanosecond(self: Self) -> T:
         """
         Extract Nanoseconds from underlying DateTime representation
 
@@ -3144,7 +3506,7 @@ def nanosecond(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.nanosecond())
 
-    def ordinal_day(self) -> Expr:
+    def ordinal_day(self: Self) -> T:
         """
         Get ordinal day.
 
@@ -3182,7 +3544,7 @@ def ordinal_day(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.ordinal_day())
 
-    def total_minutes(self) -> Expr:
+    def total_minutes(self: Self) -> T:
         """
         Get total minutes.
 
@@ -3225,7 +3587,7 @@ def total_minutes(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.total_minutes())
 
-    def total_seconds(self) -> Expr:
+    def total_seconds(self: Self) -> T:
         """
         Get total seconds.
 
@@ -3268,7 +3630,7 @@ def total_seconds(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.total_seconds())
 
-    def total_milliseconds(self) -> Expr:
+    def total_milliseconds(self: Self) -> T:
         """
         Get total milliseconds.
 
@@ -3320,7 +3682,7 @@ def total_milliseconds(self) -> Expr:
             lambda plx: self._expr._call(plx).dt.total_milliseconds()
         )
 
-    def total_microseconds(self) -> Expr:
+    def total_microseconds(self: Self) -> T:
         """
         Get total microseconds.
 
@@ -3372,7 +3734,7 @@ def total_microseconds(self) -> Expr:
             lambda plx: self._expr._call(plx).dt.total_microseconds()
         )
 
-    def total_nanoseconds(self) -> Expr:
+    def total_nanoseconds(self: Self) -> T:
         """
         Get total nanoseconds.
 
@@ -3421,7 +3783,7 @@ def total_nanoseconds(self) -> Expr:
             lambda plx: self._expr._call(plx).dt.total_nanoseconds()
         )
 
-    def to_string(self, format: str) -> Expr:  # noqa: A002
+    def to_string(self: Self, format: str) -> T:  # noqa: A002
         """
         Convert a Date/Time/Datetime column into a String column with the given format.
 
@@ -3499,7 +3861,7 @@ def to_string(self, format: str) -> Expr:  # noqa: A002
             lambda plx: self._expr._call(plx).dt.to_string(format)
         )
 
-    def replace_time_zone(self, time_zone: str | None) -> Expr:
+    def replace_time_zone(self: Self, time_zone: str | None) -> T:
         """
         Replace time zone.
 
@@ -3554,7 +3916,7 @@ def replace_time_zone(self, time_zone: str | None) -> Expr:
             lambda plx: self._expr._call(plx).dt.replace_time_zone(time_zone)
         )
 
-    def convert_time_zone(self, time_zone: str) -> Expr:
+    def convert_time_zone(self: Self, time_zone: str) -> T:
         """
         Convert to a new time zone.
 
@@ -3612,12 +3974,78 @@ def convert_time_zone(self, time_zone: str) -> Expr:
             lambda plx: self._expr._call(plx).dt.convert_time_zone(time_zone)
         )
 
+    def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> T:
+        """
+        Return a timestamp in the given time unit.
+
+        Arguments:
+            time_unit: {'ns', 'us', 'ms'}
+                Time unit.
+
+        Examples:
+            >>> from datetime import date
+            >>> import narwhals as nw
+            >>> import pandas as pd
+            >>> import polars as pl
+            >>> import pyarrow as pa
+            >>> data = {"date": [date(2001, 1, 1), None, date(2001, 1, 3)]}
+            >>> df_pd = pd.DataFrame(data, dtype="datetime64[ns]")
+            >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
+
+            Let's define a dataframe-agnostic function:
+
+            >>> @nw.narwhalify
+            ... def func(df):
+            ...     return df.with_columns(
+            ...         nw.col("date").dt.timestamp().alias("timestamp_us"),
+            ...         nw.col("date").dt.timestamp("ms").alias("timestamp_ms"),
+            ...     )
+
+            We can then pass pandas / PyArrow / Polars / any other supported library:
+
+            >>> func(df_pd)
+                    date  timestamp_us  timestamp_ms
+            0 2001-01-01  9.783072e+14  9.783072e+11
+            1        NaT           NaN           NaN
+            2 2001-01-03  9.784800e+14  9.784800e+11
+            >>> func(df_pl)
+            shape: (3, 3)
+            ┌────────────┬─────────────────┬──────────────┐
+            │ date       ┆ timestamp_us    ┆ timestamp_ms │
+            │ ---        ┆ ---             ┆ ---          │
+            │ date       ┆ i64             ┆ i64          │
+            ╞════════════╪═════════════════╪══════════════╡
+            │ 2001-01-01 ┆ 978307200000000 ┆ 978307200000 │
+            │ null       ┆ null            ┆ null         │
+            │ 2001-01-03 ┆ 978480000000000 ┆ 978480000000 │
+            └────────────┴─────────────────┴──────────────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            date: date32[day]
+            timestamp_us: int64
+            timestamp_ms: int64
+            ----
+            date: [[2001-01-01,null,2001-01-03]]
+            timestamp_us: [[978307200000000,null,978480000000000]]
+            timestamp_ms: [[978307200000,null,978480000000]]
+        """
+        if time_unit not in {"ns", "us", "ms"}:
+            msg = (
+                "invalid `time_unit`"
+                f"\n\nExpected one of {{'ns', 'us', 'ms'}}, got {time_unit!r}."
+            )
+            raise ValueError(msg)
+        return self._expr.__class__(
+            lambda plx: self._expr._call(plx).dt.timestamp(time_unit)
+        )
+
 
-class ExprNameNamespace:
-    def __init__(self: Self, expr: Expr) -> None:
+class ExprNameNamespace(Generic[T]):
+    def __init__(self: Self, expr: T) -> None:
         self._expr = expr
 
-    def keep(self: Self) -> Expr:
+    def keep(self: Self) -> T:
         r"""
         Keep the original root name of the expression.
 
@@ -3649,7 +4077,7 @@ def keep(self: Self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).name.keep())
 
-    def map(self: Self, function: Callable[[str], str]) -> Expr:
+    def map(self: Self, function: Callable[[str], str]) -> T:
         r"""
         Rename the output of an expression by mapping a function over the root name.
 
@@ -3685,7 +4113,7 @@ def map(self: Self, function: Callable[[str], str]) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).name.map(function))
 
-    def prefix(self: Self, prefix: str) -> Expr:
+    def prefix(self: Self, prefix: str) -> T:
         r"""
         Add a prefix to the root column name of the expression.
 
@@ -3721,7 +4149,7 @@ def prefix(self: Self, prefix: str) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).name.prefix(prefix))
 
-    def suffix(self: Self, suffix: str) -> Expr:
+    def suffix(self: Self, suffix: str) -> T:
         r"""
         Add a suffix to the root column name of the expression.
 
@@ -3756,7 +4184,7 @@ def suffix(self: Self, suffix: str) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).name.suffix(suffix))
 
-    def to_lowercase(self: Self) -> Expr:
+    def to_lowercase(self: Self) -> T:
         r"""
         Make the root column name lowercase.
 
@@ -3788,7 +4216,7 @@ def to_lowercase(self: Self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).name.to_lowercase())
 
-    def to_uppercase(self: Self) -> Expr:
+    def to_uppercase(self: Self) -> T:
         r"""
         Make the root column name uppercase.
 
@@ -3830,9 +4258,11 @@ def col(*names: str | Iterable[str]) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
         >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
+        >>> df_pa = pa.table({"a": [1, 2], "b": [3, 4]})
 
         We define a dataframe-agnostic function:
 
@@ -3840,7 +4270,7 @@ def col(*names: str | Iterable[str]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.col("a") * nw.col("b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a
@@ -3856,6 +4286,11 @@ def col(*names: str | Iterable[str]) -> Expr:
         │ 3   │
         │ 8   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        ----
+        a: [[3,8]]
     """
 
     def func(plx: Any) -> Any:
@@ -3890,7 +4325,7 @@ def nth(*indices: int | Sequence[int]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.nth(0) * 2)
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a
@@ -3927,9 +4362,11 @@ def all_() -> Expr:
     Examples:
         >>> import polars as pl
         >>> import pandas as pd
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
         >>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+        >>> df_pa = pa.table({"a": [1, 2, 3], "b": [4, 5, 6]})
 
         Let's define a dataframe-agnostic function:
 
@@ -3937,7 +4374,7 @@ def all_() -> Expr:
         ... def func(df):
         ...     return df.select(nw.all() * 2)
 
-        We can then pass either pandas or Polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a   b
@@ -3955,6 +4392,13 @@ def all_() -> Expr:
         │ 4   ┆ 10  │
         │ 6   ┆ 12  │
         └─────┴─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        b: int64
+        ----
+        a: [[2,4,6]]
+        b: [[8,10,12]]
     """
     return Expr(lambda plx: plx.all())
 
@@ -3967,9 +4411,11 @@ def len_() -> Expr:
     Examples:
         >>> import polars as pl
         >>> import pandas as pd
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
         >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]})
+        >>> df_pa = pa.table({"a": [1, 2], "b": [5, 10]})
 
         Let's define a dataframe-agnostic function:
 
@@ -3977,7 +4423,7 @@ def len_() -> Expr:
         ... def func(df):
         ...     return df.select(nw.len())
 
-        We can then pass either pandas or Polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            len
@@ -3991,6 +4437,11 @@ def len_() -> Expr:
         ╞═════╡
         │ 2   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        len: int64
+        ----
+        len: [[2]]
     """
 
     def func(plx: Any) -> Any:
@@ -4012,9 +4463,11 @@ def sum(*columns: str) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> df_pl = pl.DataFrame({"a": [1, 2]})
         >>> df_pd = pd.DataFrame({"a": [1, 2]})
+        >>> df_pa = pa.table({"a": [1, 2]})
 
         We define a dataframe-agnostic function:
 
@@ -4022,7 +4475,7 @@ def sum(*columns: str) -> Expr:
         ... def func(df):
         ...     return df.select(nw.sum("a"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a
@@ -4036,6 +4489,11 @@ def sum(*columns: str) -> Expr:
         ╞═════╡
         │ 3   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        ----
+        a: [[3]]
     """
 
     return Expr(lambda plx: plx.sum(*columns))
@@ -4054,9 +4512,11 @@ def mean(*columns: str) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> df_pl = pl.DataFrame({"a": [1, 8, 3]})
         >>> df_pd = pd.DataFrame({"a": [1, 8, 3]})
+        >>> df_pa = pa.table({"a": [1, 8, 3]})
 
         We define a dataframe agnostic function:
 
@@ -4064,7 +4524,7 @@ def mean(*columns: str) -> Expr:
         ... def func(df):
         ...     return df.select(nw.mean("a"))
 
-        We can then pass either pandas or Polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
              a
@@ -4078,6 +4538,11 @@ def mean(*columns: str) -> Expr:
         ╞═════╡
         │ 4.0 │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: double
+        ----
+        a: [[4]]
     """
 
     return Expr(lambda plx: plx.mean(*columns))
@@ -4096,9 +4561,11 @@ def min(*columns: str) -> Expr:
     Examples:
         >>> import polars as pl
         >>> import pandas as pd
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
         >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]})
+        >>> df_pa = pa.table({"a": [1, 2], "b": [5, 10]})
 
         Let's define a dataframe-agnostic function:
 
@@ -4106,7 +4573,7 @@ def min(*columns: str) -> Expr:
         ... def func(df):
         ...     return df.select(nw.min("b"))
 
-        We can then pass either pandas or Polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            b
@@ -4120,6 +4587,11 @@ def min(*columns: str) -> Expr:
         ╞═════╡
         │ 5   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        b: int64
+        ----
+        b: [[5]]
     """
     return Expr(lambda plx: plx.min(*columns))
 
@@ -4137,9 +4609,11 @@ def max(*columns: str) -> Expr:
     Examples:
         >>> import polars as pl
         >>> import pandas as pd
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
         >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]})
+        >>> df_pa = pa.table({"a": [1, 2], "b": [5, 10]})
 
         Let's define a dataframe-agnostic function:
 
@@ -4147,7 +4621,7 @@ def max(*columns: str) -> Expr:
         ... def func(df):
         ...     return df.select(nw.max("a"))
 
-        We can then pass either pandas or Polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a
@@ -4161,6 +4635,11 @@ def max(*columns: str) -> Expr:
         ╞═════╡
         │ 2   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        ----
+        a: [[2]]
     """
     return Expr(lambda plx: plx.max(*columns))
 
@@ -4179,10 +4658,12 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> data = {"a": [1, 2, 3], "b": [5, 10, None]}
         >>> df_pl = pl.DataFrame(data)
         >>> df_pd = pd.DataFrame(data)
+        >>> df_pa = pa.table(data)
 
         We define a dataframe-agnostic function:
 
@@ -4190,7 +4671,7 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.sum_horizontal("a", "b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
               a
@@ -4208,6 +4689,11 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         │ 12  │
         │ 3   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        ----
+        a: [[6,12,3]]
     """
     if not exprs:
         msg = "At least one expression must be passed to `sum_horizontal`"
@@ -4248,7 +4734,7 @@ def min_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.min_horizontal("a", "b"))
 
-        We can then pass either pandas, polars or pyarrow to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(pd.DataFrame(data))
              a
@@ -4311,7 +4797,7 @@ def max_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.max_horizontal("a", "b"))
 
-        We can then pass either pandas, polars or pyarrow to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(pd.DataFrame(data))
              a
@@ -4382,9 +4868,11 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [5, 10, 15]})
         >>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [5, 10, 15]})
+        >>> df_pa = pa.table({"a": [1, 2, 3], "b": [5, 10, 15]})
 
         We define a dataframe-agnostic function:
 
@@ -4394,7 +4882,7 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When:
         ...         nw.when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when")
         ...     )
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a   b  a_when
@@ -4412,6 +4900,15 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When:
         │ 2   ┆ 10  ┆ 5      │
         │ 3   ┆ 15  ┆ 6      │
         └─────┴─────┴────────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        b: int64
+        a_when: int64
+        ----
+        a: [[1,2,3]]
+        b: [[5,10,15]]
+        a_when: [[5,5,6]]
     """
     return When(*predicates)
 
@@ -4429,6 +4926,7 @@ def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> data = {
         ...     "a": [False, False, True, True, False, None],
@@ -4436,6 +4934,7 @@ def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... }
         >>> df_pl = pl.DataFrame(data)
         >>> df_pd = pd.DataFrame(data)
+        >>> df_pa = pa.table(data)
 
         We define a dataframe-agnostic function:
 
@@ -4443,7 +4942,7 @@ def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select("a", "b", all=nw.all_horizontal("a", "b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
                a      b    all
@@ -4468,6 +4967,16 @@ def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         │ false ┆ null  ┆ false │
         │ null  ┆ null  ┆ null  │
         └───────┴───────┴───────┘
+
+        >>> func(df_pa)
+        pyarrow.Table
+        a: bool
+        b: bool
+        all: bool
+        ----
+        a: [[false,false,true,true,false,null]]
+        b: [[false,true,true,null,null,null]]
+        all: [[false,false,true,null,false,null]]
     """
     if not exprs:
         msg = "At least one expression must be passed to `all_horizontal`"
@@ -4490,9 +4999,11 @@ def lit(value: Any, dtype: DType | None = None) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> df_pl = pl.DataFrame({"a": [1, 2]})
         >>> df_pd = pd.DataFrame({"a": [1, 2]})
+        >>> df_pa = pa.table({"a": [1, 2]})
 
         We define a dataframe-agnostic function:
 
@@ -4500,7 +5011,7 @@ def lit(value: Any, dtype: DType | None = None) -> Expr:
         ... def func(df):
         ...     return df.with_columns(nw.lit(3).alias("b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a  b
@@ -4516,7 +5027,13 @@ def lit(value: Any, dtype: DType | None = None) -> Expr:
         │ 1   ┆ 3   │
         │ 2   ┆ 3   │
         └─────┴─────┘
-
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        b: int64
+        ----
+        a: [[1,2]]
+        b: [[3,3]]
     """
     if is_numpy_array(value):
         msg = (
@@ -4545,6 +5062,7 @@ def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> data = {
         ...     "a": [False, False, True, True, False, None],
@@ -4552,6 +5070,7 @@ def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... }
         >>> df_pl = pl.DataFrame(data)
         >>> df_pd = pd.DataFrame(data)
+        >>> df_pa = pa.table(data)
 
         We define a dataframe-agnostic function:
 
@@ -4559,7 +5078,7 @@ def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select("a", "b", any=nw.any_horizontal("a", "b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
                a      b    any
@@ -4584,6 +5103,16 @@ def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         │ false ┆ null  ┆ null  │
         │ null  ┆ null  ┆ null  │
         └───────┴───────┴───────┘
+
+        >>> func(df_pa)
+        pyarrow.Table
+        a: bool
+        b: bool
+        any: bool
+        ----
+        a: [[false,false,true,true,false,null]]
+        b: [[false,true,true,null,null,null]]
+        any: [[false,true,true,true,null,null]]
     """
     if not exprs:
         msg = "At least one expression must be passed to `any_horizontal`"
@@ -4606,6 +5135,7 @@ def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> data = {
         ...     "a": [1, 8, 3],
@@ -4614,6 +5144,7 @@ def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... }
         >>> df_pl = pl.DataFrame(data)
         >>> df_pd = pd.DataFrame(data)
+        >>> df_pa = pa.table(data)
 
         We define a dataframe-agnostic function that computes the horizontal mean of "a"
         and "b" columns:
@@ -4622,13 +5153,14 @@ def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.mean_horizontal("a", "b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
              a
         0  2.5
         1  6.5
         2  3.0
+
         >>> func(df_pl)
         shape: (3, 1)
         ┌─────┐
@@ -4640,6 +5172,12 @@ def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         │ 6.5 │
         │ 3.0 │
         └─────┘
+
+        >>> func(df_pa)
+        pyarrow.Table
+        a: double
+        ----
+        a: [[2.5,6.5,3]]
     """
     if not exprs:
         msg = "At least one expression must be passed to `mean_horizontal`"
@@ -4698,7 +5236,7 @@ def concat_str(
         ...         ).alias("full_sentence")
         ...     )
 
-        We can then pass either pandas, Polars or PyArrow to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(pd.DataFrame(data))
           full_sentence
diff --git a/narwhals/functions.py b/narwhals/functions.py
index b84dcb174..b8dfffbeb 100644
--- a/narwhals/functions.py
+++ b/narwhals/functions.py
@@ -6,6 +6,7 @@
 from typing import Any
 from typing import Iterable
 from typing import Literal
+from typing import Protocol
 from typing import TypeVar
 from typing import Union
 
@@ -21,6 +22,7 @@
 # The rest of the annotations seem to work fine with this anyway
 FrameT = TypeVar("FrameT", bound=Union[DataFrame, LazyFrame])  # type: ignore[type-arg]
 
+
 if TYPE_CHECKING:
     from types import ModuleType
 
@@ -29,6 +31,11 @@
     from narwhals.series import Series
     from narwhals.typing import DTypes
 
+    class ArrowStreamExportable(Protocol):
+        def __arrow_c_stream__(
+            self, requested_schema: object | None = None
+        ) -> object: ...
+
 
 def concat(
     items: Iterable[FrameT],
@@ -287,6 +294,7 @@ def from_dict(
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
 
@@ -298,7 +306,7 @@ def from_dict(
         ...     native_namespace = nw.get_native_namespace(df)
         ...     return nw.from_dict(new_data, native_namespace=native_namespace)
 
-        Let's see what happens when passing pandas / Polars input:
+        Let's see what happens when passing Pandas, Polars or PyArrow input:
 
         >>> func(pd.DataFrame(data))
            c  d
@@ -314,6 +322,13 @@ def from_dict(
         │ 5   ┆ 1   │
         │ 2   ┆ 4   │
         └─────┴─────┘
+        >>> func(pa.table(data))
+        pyarrow.Table
+        c: int64
+        d: int64
+        ----
+        c: [[5,2]]
+        d: [[1,4]]
     """
     from narwhals import dtypes
 
@@ -406,6 +421,100 @@ def _from_dict_impl(
     return from_native(native_frame, eager_only=True)
 
 
+def from_arrow(
+    native_frame: ArrowStreamExportable, *, native_namespace: ModuleType
+) -> DataFrame[Any]:
+    """
+    Construct a DataFrame from an object which supports the PyCapsule Interface.
+
+    Arguments:
+        native_frame: Object which implements `__arrow_c_stream__`.
+        native_namespace: The native library to use for DataFrame creation.
+
+    Examples:
+        >>> import pandas as pd
+        >>> import polars as pl
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
+        >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+
+        Let's define a dataframe-agnostic function which creates a PyArrow
+        Table.
+
+        >>> @nw.narwhalify
+        ... def func(df):
+        ...     return nw.from_arrow(df, native_namespace=pa)
+
+        Let's see what happens when passing pandas / Polars input:
+
+        >>> func(pd.DataFrame(data))  # doctest: +SKIP
+        pyarrow.Table
+        a: int64
+        b: int64
+        ----
+        a: [[1,2,3]]
+        b: [[4,5,6]]
+        >>> func(pl.DataFrame(data))  # doctest: +SKIP
+        pyarrow.Table
+        a: int64
+        b: int64
+        ----
+        a: [[1,2,3]]
+        b: [[4,5,6]]
+    """
+    if not hasattr(native_frame, "__arrow_c_stream__"):
+        msg = f"Given object of type {type(native_frame)} does not support PyCapsule interface"
+        raise TypeError(msg)
+    implementation = Implementation.from_native_namespace(native_namespace)
+
+    if implementation is Implementation.POLARS and parse_version(
+        native_namespace.__version__
+    ) >= (1, 3):
+        native_frame = native_namespace.DataFrame(native_frame)
+    elif implementation in {
+        Implementation.PANDAS,
+        Implementation.MODIN,
+        Implementation.CUDF,
+        Implementation.POLARS,
+    }:
+        # These don't (yet?) support the PyCapsule Interface for import
+        # so we go via PyArrow
+        try:
+            import pyarrow as pa  # ignore-banned-import
+        except ModuleNotFoundError as exc:  # pragma: no cover
+            msg = f"PyArrow>=14.0.0 is required for `from_arrow` for object of type {native_namespace}"
+            raise ModuleNotFoundError(msg) from exc
+        if parse_version(pa.__version__) < (14, 0):  # pragma: no cover
+            msg = f"PyArrow>=14.0.0 is required for `from_arrow` for object of type {native_namespace}"
+            raise ModuleNotFoundError(msg) from None
+
+        tbl = pa.table(native_frame)
+        if implementation is Implementation.PANDAS:
+            native_frame = tbl.to_pandas()
+        elif implementation is Implementation.MODIN:  # pragma: no cover
+            from modin.pandas.utils import from_arrow
+
+            native_frame = from_arrow(tbl)
+        elif implementation is Implementation.CUDF:  # pragma: no cover
+            native_frame = native_namespace.DataFrame.from_arrow(tbl)
+        elif implementation is Implementation.POLARS:  # pragma: no cover
+            native_frame = native_namespace.from_arrow(tbl)
+        else:  # pragma: no cover
+            msg = "congratulations, you entered unrecheable code - please report a bug"
+            raise AssertionError(msg)
+    elif implementation is Implementation.PYARROW:
+        native_frame = native_namespace.table(native_frame)
+    else:  # pragma: no cover
+        try:
+            # implementation is UNKNOWN, Narwhals extension using this feature should
+            # implement PyCapsule support
+            native_frame = native_namespace.DataFrame(native_frame)
+        except AttributeError as e:
+            msg = "Unknown namespace is expected to implement `DataFrame` class which accepts object which supports PyCapsule Interface."
+            raise AttributeError(msg) from e
+    return from_native(native_frame, eager_only=True)
+
+
 def _get_sys_info() -> dict[str, str]:
     """System information
 
diff --git a/narwhals/group_by.py b/narwhals/group_by.py
index 797442e3c..9ec14c4d7 100644
--- a/narwhals/group_by.py
+++ b/narwhals/group_by.py
@@ -20,10 +20,12 @@
 
 
 class GroupBy(Generic[DataFrameT]):
-    def __init__(self, df: DataFrameT, *keys: str) -> None:
+    def __init__(self, df: DataFrameT, *keys: str, drop_null_keys: bool) -> None:
         self._df = cast(DataFrame[Any], df)
         self._keys = keys
-        self._grouped = self._df._compliant_frame.group_by(*self._keys)
+        self._grouped = self._df._compliant_frame.group_by(
+            *self._keys, drop_null_keys=drop_null_keys
+        )
 
     def agg(
         self, *aggs: IntoExpr | Iterable[IntoExpr], **named_aggs: IntoExpr
@@ -119,10 +121,12 @@ def __iter__(self) -> Iterator[tuple[Any, DataFrameT]]:
 
 
 class LazyGroupBy(Generic[LazyFrameT]):
-    def __init__(self, df: LazyFrameT, *keys: str) -> None:
+    def __init__(self, df: LazyFrameT, *keys: str, drop_null_keys: bool) -> None:
         self._df = cast(LazyFrame[Any], df)
         self._keys = keys
-        self._grouped = self._df._compliant_frame.group_by(*self._keys)
+        self._grouped = self._df._compliant_frame.group_by(
+            *self._keys, drop_null_keys=drop_null_keys
+        )
 
     def agg(
         self, *aggs: IntoExpr | Iterable[IntoExpr], **named_aggs: IntoExpr
diff --git a/narwhals/series.py b/narwhals/series.py
index 36ecf50ff..6f5223202 100644
--- a/narwhals/series.py
+++ b/narwhals/series.py
@@ -3,9 +3,11 @@
 from typing import TYPE_CHECKING
 from typing import Any
 from typing import Callable
+from typing import Generic
 from typing import Iterator
 from typing import Literal
 from typing import Sequence
+from typing import TypeVar
 from typing import overload
 
 from narwhals.utils import parse_version
@@ -40,7 +42,7 @@ def _dataframe(self) -> type[DataFrame[Any]]:
         return DataFrame
 
     def __init__(
-        self,
+        self: Self,
         series: Any,
         *,
         level: Literal["full", "interchange"],
@@ -52,16 +54,16 @@ def __init__(
             msg = f"Expected Polars Series or an object which implements `__narwhals_series__`, got: {type(series)}."
             raise AssertionError(msg)
 
-    def __array__(self, dtype: Any = None, copy: bool | None = None) -> np.ndarray:
+    def __array__(self: Self, dtype: Any = None, copy: bool | None = None) -> np.ndarray:
         return self._compliant_series.__array__(dtype=dtype, copy=copy)
 
     @overload
-    def __getitem__(self, idx: int) -> Any: ...
+    def __getitem__(self: Self, idx: int) -> Any: ...
 
     @overload
-    def __getitem__(self, idx: slice | Sequence[int]) -> Self: ...
+    def __getitem__(self: Self, idx: slice | Sequence[int]) -> Self: ...
 
-    def __getitem__(self, idx: int | slice | Sequence[int]) -> Any | Self:
+    def __getitem__(self: Self, idx: int | slice | Sequence[int]) -> Any | Self:
         if isinstance(idx, int):
             return self._compliant_series[idx]
         return self._from_compliant_series(self._compliant_series[idx])
@@ -1201,6 +1203,25 @@ def alias(self, name: str) -> Self:
         """
         Rename the Series.
 
+        Notes:
+            This method is very cheap, but does not guarantee that data
+            will be copied. For example:
+
+            ```python
+            s1: nw.Series
+            s2 = s1.alias("foo")
+            arr = s2.to_numpy()
+            arr[0] = 999
+            ```
+
+            may (depending on the backend, and on the version) result in
+            `s1`'s data being modified. We recommend:
+
+                - if you need to alias an object and don't need the original
+                  one around any more, just use `alias` without worrying about it.
+                - if you were expecting `alias` to copy data, then explicily call
+                  `.clone` before calling `alias`.
+
         Arguments:
             name: The new name.
 
@@ -1253,6 +1274,25 @@ def rename(self, name: str) -> Self:
 
         Alias for `Series.alias()`.
 
+        Notes:
+            This method is very cheap, but does not guarantee that data
+            will be copied. For example:
+
+            ```python
+            s1: nw.Series
+            s2 = s1.rename("foo")
+            arr = s2.to_numpy()
+            arr[0] = 999
+            ```
+
+            may (depending on the backend, and on the version) result in
+            `s1`'s data being modified. We recommend:
+
+                - if you need to rename an object and don't need the original
+                  one around any more, just use `rename` without worrying about it.
+                - if you were expecting `rename` to copy data, then explicily call
+                  `.clone` before calling `rename`.
+
         Arguments:
             name: The new name.
 
@@ -2058,10 +2098,8 @@ def quantile(
             pandas and Polars may have implementation differences for a given interpolation method.
 
         Arguments:
-            quantile : float
-                Quantile between 0.0 and 1.0.
-            interpolation : {'nearest', 'higher', 'lower', 'midpoint', 'linear'}
-                Interpolation method.
+            quantile: Quantile between 0.0 and 1.0.
+            interpolation: Interpolation method.
 
         Examples:
             >>> import narwhals as nw
@@ -2491,23 +2529,26 @@ def __iter__(self: Self) -> Iterator[Any]:
         yield from self._compliant_series.__iter__()
 
     @property
-    def str(self) -> SeriesStringNamespace:
+    def str(self: Self) -> SeriesStringNamespace[Self]:
         return SeriesStringNamespace(self)
 
     @property
-    def dt(self) -> SeriesDateTimeNamespace:
+    def dt(self: Self) -> SeriesDateTimeNamespace[Self]:
         return SeriesDateTimeNamespace(self)
 
     @property
-    def cat(self) -> SeriesCatNamespace:
+    def cat(self: Self) -> SeriesCatNamespace[Self]:
         return SeriesCatNamespace(self)
 
 
-class SeriesCatNamespace:
-    def __init__(self, series: Series) -> None:
+T = TypeVar("T", bound=Series)
+
+
+class SeriesCatNamespace(Generic[T]):
+    def __init__(self: Self, series: T) -> None:
         self._narwhals_series = series
 
-    def get_categories(self) -> Series:
+    def get_categories(self: Self) -> T:
         """
         Get unique categories from column.
 
@@ -2547,11 +2588,11 @@ def get_categories(self) -> Series:
         )
 
 
-class SeriesStringNamespace:
-    def __init__(self, series: Series) -> None:
+class SeriesStringNamespace(Generic[T]):
+    def __init__(self: Self, series: T) -> None:
         self._narwhals_series = series
 
-    def len_chars(self) -> Series:
+    def len_chars(self: Self) -> T:
         r"""
         Return the length of each string as the number of characters.
 
@@ -2595,8 +2636,8 @@ def len_chars(self) -> Series:
         )
 
     def replace(
-        self, pattern: str, value: str, *, literal: bool = False, n: int = 1
-    ) -> Series:
+        self: Self, pattern: str, value: str, *, literal: bool = False, n: int = 1
+    ) -> T:
         r"""
         Replace first matching regex/literal substring with a new string value.
 
@@ -2635,7 +2676,7 @@ def replace(
             )
         )
 
-    def replace_all(self, pattern: str, value: str, *, literal: bool = False) -> Series:
+    def replace_all(self: Self, pattern: str, value: str, *, literal: bool = False) -> T:
         r"""
         Replace all matching regex/literal substring with a new string value.
 
@@ -2673,7 +2714,7 @@ def replace_all(self, pattern: str, value: str, *, literal: bool = False) -> Ser
             )
         )
 
-    def strip_chars(self, characters: str | None = None) -> Series:
+    def strip_chars(self: Self, characters: str | None = None) -> T:
         r"""
         Remove leading and trailing characters.
 
@@ -2707,7 +2748,7 @@ def strip_chars(self, characters: str | None = None) -> Series:
             self._narwhals_series._compliant_series.str.strip_chars(characters)
         )
 
-    def starts_with(self, prefix: str) -> Series:
+    def starts_with(self: Self, prefix: str) -> T:
         r"""
         Check if string values start with a substring.
 
@@ -2749,7 +2790,7 @@ def starts_with(self, prefix: str) -> Series:
             self._narwhals_series._compliant_series.str.starts_with(prefix)
         )
 
-    def ends_with(self, suffix: str) -> Series:
+    def ends_with(self: Self, suffix: str) -> T:
         r"""
         Check if string values end with a substring.
 
@@ -2791,7 +2832,7 @@ def ends_with(self, suffix: str) -> Series:
             self._narwhals_series._compliant_series.str.ends_with(suffix)
         )
 
-    def contains(self, pattern: str, *, literal: bool = False) -> Series:
+    def contains(self: Self, pattern: str, *, literal: bool = False) -> T:
         r"""
         Check if string contains a substring that matches a pattern.
 
@@ -2839,7 +2880,7 @@ def contains(self, pattern: str, *, literal: bool = False) -> Series:
             self._narwhals_series._compliant_series.str.contains(pattern, literal=literal)
         )
 
-    def slice(self, offset: int, length: int | None = None) -> Series:
+    def slice(self: Self, offset: int, length: int | None = None) -> T:
         r"""
         Create subslices of the string values of a Series.
 
@@ -2910,7 +2951,7 @@ def slice(self, offset: int, length: int | None = None) -> Series:
             )
         )
 
-    def head(self, n: int = 5) -> Series:
+    def head(self: Self, n: int = 5) -> T:
         r"""
         Take the first n elements of each string.
 
@@ -2958,7 +2999,7 @@ def head(self, n: int = 5) -> Series:
             self._narwhals_series._compliant_series.str.slice(0, n)
         )
 
-    def tail(self, n: int = 5) -> Series:
+    def tail(self: Self, n: int = 5) -> T:
         r"""
         Take the last n elements of each string.
 
@@ -3006,7 +3047,7 @@ def tail(self, n: int = 5) -> Series:
             self._narwhals_series._compliant_series.str.slice(-n)
         )
 
-    def to_uppercase(self) -> Series:
+    def to_uppercase(self) -> T:
         r"""
         Transform string to uppercase variant.
 
@@ -3054,7 +3095,7 @@ def to_uppercase(self) -> Series:
             self._narwhals_series._compliant_series.str.to_uppercase()
         )
 
-    def to_lowercase(self) -> Series:
+    def to_lowercase(self) -> T:
         r"""
         Transform string to lowercase variant.
 
@@ -3097,7 +3138,7 @@ def to_lowercase(self) -> Series:
             self._narwhals_series._compliant_series.str.to_lowercase()
         )
 
-    def to_datetime(self: Self, format: str | None = None) -> Series:  # noqa: A002
+    def to_datetime(self: Self, format: str | None = None) -> T:  # noqa: A002
         """
         Parse Series with strings to a Series with Datetime dtype.
 
@@ -3158,11 +3199,11 @@ def to_datetime(self: Self, format: str | None = None) -> Series:  # noqa: A002
         )
 
 
-class SeriesDateTimeNamespace:
-    def __init__(self, series: Series) -> None:
+class SeriesDateTimeNamespace(Generic[T]):
+    def __init__(self: Self, series: T) -> None:
         self._narwhals_series = series
 
-    def date(self) -> Series:
+    def date(self: Self) -> T:
         """
         Get the date in a datetime series.
 
@@ -3203,7 +3244,7 @@ def date(self) -> Series:
             self._narwhals_series._compliant_series.dt.date()
         )
 
-    def year(self) -> Series:
+    def year(self: Self) -> T:
         """
         Get the year in a datetime series.
 
@@ -3240,7 +3281,7 @@ def year(self) -> Series:
             self._narwhals_series._compliant_series.dt.year()
         )
 
-    def month(self) -> Series:
+    def month(self: Self) -> T:
         """
         Gets the month in a datetime series.
 
@@ -3277,7 +3318,7 @@ def month(self) -> Series:
             self._narwhals_series._compliant_series.dt.month()
         )
 
-    def day(self) -> Series:
+    def day(self: Self) -> T:
         """
         Extracts the day in a datetime series.
 
@@ -3314,7 +3355,7 @@ def day(self) -> Series:
             self._narwhals_series._compliant_series.dt.day()
         )
 
-    def hour(self) -> Series:
+    def hour(self: Self) -> T:
         """
          Extracts the hour in a datetime series.
 
@@ -3351,7 +3392,7 @@ def hour(self) -> Series:
             self._narwhals_series._compliant_series.dt.hour()
         )
 
-    def minute(self) -> Series:
+    def minute(self: Self) -> T:
         """
         Extracts the minute in a datetime series.
 
@@ -3388,7 +3429,7 @@ def minute(self) -> Series:
             self._narwhals_series._compliant_series.dt.minute()
         )
 
-    def second(self) -> Series:
+    def second(self: Self) -> T:
         """
         Extracts the second(s) in a datetime series.
 
@@ -3425,7 +3466,7 @@ def second(self) -> Series:
             self._narwhals_series._compliant_series.dt.second()
         )
 
-    def millisecond(self) -> Series:
+    def millisecond(self: Self) -> T:
         """
         Extracts the milliseconds in a datetime series.
 
@@ -3475,7 +3516,7 @@ def millisecond(self) -> Series:
             self._narwhals_series._compliant_series.dt.millisecond()
         )
 
-    def microsecond(self) -> Series:
+    def microsecond(self: Self) -> T:
         """
         Extracts the microseconds in a datetime series.
 
@@ -3525,7 +3566,7 @@ def microsecond(self) -> Series:
             self._narwhals_series._compliant_series.dt.microsecond()
         )
 
-    def nanosecond(self) -> Series:
+    def nanosecond(self: Self) -> T:
         """
         Extracts the nanosecond(s) in a date series.
 
@@ -3565,7 +3606,7 @@ def nanosecond(self) -> Series:
             self._narwhals_series._compliant_series.dt.nanosecond()
         )
 
-    def ordinal_day(self) -> Series:
+    def ordinal_day(self: Self) -> T:
         """
         Get ordinal day.
 
@@ -3602,7 +3643,7 @@ def ordinal_day(self) -> Series:
             self._narwhals_series._compliant_series.dt.ordinal_day()
         )
 
-    def total_minutes(self) -> Series:
+    def total_minutes(self: Self) -> T:
         """
         Get total minutes.
 
@@ -3644,7 +3685,7 @@ def total_minutes(self) -> Series:
             self._narwhals_series._compliant_series.dt.total_minutes()
         )
 
-    def total_seconds(self) -> Series:
+    def total_seconds(self: Self) -> T:
         """
         Get total seconds.
 
@@ -3686,7 +3727,7 @@ def total_seconds(self) -> Series:
             self._narwhals_series._compliant_series.dt.total_seconds()
         )
 
-    def total_milliseconds(self) -> Series:
+    def total_milliseconds(self: Self) -> T:
         """
         Get total milliseconds.
 
@@ -3731,7 +3772,7 @@ def total_milliseconds(self) -> Series:
             self._narwhals_series._compliant_series.dt.total_milliseconds()
         )
 
-    def total_microseconds(self) -> Series:
+    def total_microseconds(self: Self) -> T:
         """
         Get total microseconds.
 
@@ -3776,7 +3817,7 @@ def total_microseconds(self) -> Series:
             self._narwhals_series._compliant_series.dt.total_microseconds()
         )
 
-    def total_nanoseconds(self) -> Series:
+    def total_nanoseconds(self: Self) -> T:
         """
         Get total nanoseconds.
 
@@ -3818,7 +3859,7 @@ def total_nanoseconds(self) -> Series:
             self._narwhals_series._compliant_series.dt.total_nanoseconds()
         )
 
-    def to_string(self, format: str) -> Series:  # noqa: A002
+    def to_string(self: Self, format: str) -> T:  # noqa: A002
         """
         Convert a Date/Time/Datetime series into a String series with the given format.
 
@@ -3893,7 +3934,7 @@ def to_string(self, format: str) -> Series:  # noqa: A002
             self._narwhals_series._compliant_series.dt.to_string(format)
         )
 
-    def replace_time_zone(self, time_zone: str | None) -> Series:
+    def replace_time_zone(self: Self, time_zone: str | None) -> T:
         """
         Replace time zone.
 
@@ -3946,7 +3987,7 @@ def replace_time_zone(self, time_zone: str | None) -> Series:
             self._narwhals_series._compliant_series.dt.replace_time_zone(time_zone)
         )
 
-    def convert_time_zone(self, time_zone: str) -> Series:
+    def convert_time_zone(self: Self, time_zone: str) -> T:
         """
         Convert time zone.
 
@@ -4001,3 +4042,63 @@ def convert_time_zone(self, time_zone: str) -> Series:
         return self._narwhals_series._from_compliant_series(
             self._narwhals_series._compliant_series.dt.convert_time_zone(time_zone)
         )
+
+    def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> T:
+        """
+        Return a timestamp in the given time unit.
+
+        Arguments:
+            time_unit: {'ns', 'us', 'ms'}
+                Time unit.
+
+        Examples:
+            >>> from datetime import date
+            >>> import narwhals as nw
+            >>> import pandas as pd
+            >>> import polars as pl
+            >>> import pyarrow as pa
+            >>> data = [date(2001, 1, 1), None, date(2001, 1, 3)]
+            >>> s_pd = pd.Series(data, dtype="datetime64[ns]")
+            >>> s_pl = pl.Series(data)
+            >>> s_pa = pa.chunked_array([data])
+
+            Let's define a dataframe-agnostic function:
+
+            >>> @nw.narwhalify
+            ... def func(s):
+            ...     return s.dt.timestamp("ms")
+
+            We can then pass pandas / PyArrow / Polars / any other supported library:
+
+            >>> func(s_pd)
+            0    9.783072e+11
+            1             NaN
+            2    9.784800e+11
+            dtype: float64
+            >>> func(s_pl)  # doctest: +NORMALIZE_WHITESPACE
+            shape: (3,)
+            Series: '' [i64]
+            [
+                    978307200000
+                    null
+                    978480000000
+            ]
+            >>> func(s_pa)
+            <pyarrow.lib.ChunkedArray object at ...>
+            [
+              [
+                978307200000,
+                null,
+                978480000000
+              ]
+            ]
+        """
+        if time_unit not in {"ns", "us", "ms"}:
+            msg = (
+                "invalid `time_unit`"
+                f"\n\nExpected one of {{'ns', 'us', 'ms'}}, got {time_unit!r}."
+            )
+            raise ValueError(msg)
+        return self._narwhals_series._from_compliant_series(
+            self._narwhals_series._compliant_series.dt.timestamp(time_unit)
+        )
diff --git a/narwhals/stable/__init__.py b/narwhals/stable/__init__.py
index 572034fe7..60bc872a5 100644
--- a/narwhals/stable/__init__.py
+++ b/narwhals/stable/__init__.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from narwhals.stable import v1
 
 __all__ = ["v1"]
diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py
index 86ddd1def..33531480c 100644
--- a/narwhals/stable/v1/__init__.py
+++ b/narwhals/stable/v1/__init__.py
@@ -21,6 +21,7 @@
 from narwhals.expr import when as nw_when
 from narwhals.functions import _from_dict_impl
 from narwhals.functions import _new_series_impl
+from narwhals.functions import from_arrow as nw_from_arrow
 from narwhals.functions import show_versions
 from narwhals.schema import Schema as NwSchema
 from narwhals.series import Series as NwSeries
@@ -31,6 +32,7 @@
 from narwhals.stable.v1.dtypes import Datetime
 from narwhals.stable.v1.dtypes import Duration
 from narwhals.stable.v1.dtypes import Enum
+from narwhals.stable.v1.dtypes import Field
 from narwhals.stable.v1.dtypes import Float32
 from narwhals.stable.v1.dtypes import Float64
 from narwhals.stable.v1.dtypes import Int8
@@ -49,8 +51,13 @@
 from narwhals.translate import _from_native_impl
 from narwhals.translate import get_native_namespace as nw_get_native_namespace
 from narwhals.translate import to_native
+from narwhals.translate import to_py_scalar as nw_to_py_scalar
 from narwhals.typing import IntoDataFrameT
 from narwhals.typing import IntoFrameT
+from narwhals.typing import IntoSeriesT
+from narwhals.utils import (
+    generate_temporary_column_name as nw_generate_temporary_column_name,
+)
 from narwhals.utils import is_ordered_categorical as nw_is_ordered_categorical
 from narwhals.utils import maybe_align_index as nw_maybe_align_index
 from narwhals.utils import maybe_convert_dtypes as nw_maybe_convert_dtypes
@@ -64,6 +71,7 @@
     from typing_extensions import Self
 
     from narwhals.dtypes import DType
+    from narwhals.functions import ArrowStreamExportable
     from narwhals.typing import IntoExpr
 
 T = TypeVar("T")
@@ -570,26 +578,26 @@ def _stableify(
 
 @overload
 def from_native(
-    native_dataframe: Any,
+    native_dataframe: IntoDataFrameT | IntoSeriesT,
     *,
     strict: Literal[False],
     eager_only: None = ...,
     eager_or_interchange_only: Literal[True],
     series_only: None = ...,
     allow_series: Literal[True],
-) -> Any: ...
+) -> DataFrame[IntoFrameT] | Series: ...
 
 
 @overload
 def from_native(
-    native_dataframe: Any,
+    native_dataframe: IntoDataFrameT | IntoSeriesT,
     *,
     strict: Literal[False],
     eager_only: Literal[True],
     eager_or_interchange_only: None = ...,
     series_only: None = ...,
     allow_series: Literal[True],
-) -> Any: ...
+) -> DataFrame[IntoDataFrameT] | Series: ...
 
 
 @overload
@@ -642,26 +650,26 @@ def from_native(
 
 @overload
 def from_native(
-    native_dataframe: Any,
+    native_dataframe: IntoFrameT | IntoSeriesT,
     *,
     strict: Literal[False],
     eager_only: None = ...,
     eager_or_interchange_only: None = ...,
     series_only: None = ...,
     allow_series: Literal[True],
-) -> Any: ...
+) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series: ...
 
 
 @overload
 def from_native(
-    native_dataframe: Any,
+    native_dataframe: IntoSeriesT,
     *,
     strict: Literal[False],
     eager_only: None = ...,
     eager_or_interchange_only: None = ...,
     series_only: Literal[True],
     allow_series: None = ...,
-) -> Any: ...
+) -> Series: ...
 
 
 @overload
@@ -722,7 +730,7 @@ def from_native(
 
 @overload
 def from_native(
-    native_dataframe: Any,
+    native_dataframe: IntoFrameT | IntoSeriesT,
     *,
     strict: Literal[True] = ...,
     eager_only: None = ...,
@@ -738,7 +746,7 @@ def from_native(
 
 @overload
 def from_native(
-    native_dataframe: Any,
+    native_dataframe: IntoSeriesT | Any,  # remain `Any` for downstream compatibility
     *,
     strict: Literal[True] = ...,
     eager_only: None = ...,
@@ -948,6 +956,28 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
         return decorator(func)
 
 
+def to_py_scalar(scalar: Any) -> Any:
+    """If a scalar is not Python native, converts it to Python native.
+
+    Raises:
+        ValueError: If the object is not convertible to a scalar.
+
+    Examples:
+        >>> import narwhals.stable.v1 as nw
+        >>> import pandas as pd
+        >>> df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]}))
+        >>> nw.to_py_scalar(df["a"].item(0))
+        1
+        >>> import pyarrow as pa
+        >>> df = nw.from_native(pa.table({"a": [1, 2, 3]}))
+        >>> nw.to_py_scalar(df["a"].item(0))
+        1
+        >>> nw.to_py_scalar(1)
+        1
+    """
+    return _stableify(nw_to_py_scalar(scalar))
+
+
 def all() -> Expr:
     """
     Instantiate an expression representing all columns.
@@ -955,9 +985,11 @@ def all() -> Expr:
     Examples:
         >>> import polars as pl
         >>> import pandas as pd
+        >>> import pyarrow as pa
         >>> import narwhals.stable.v1 as nw
         >>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
         >>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+        >>> df_pa = pa.table({"a": [1, 2, 3], "b": [4, 5, 6]})
 
         Let's define a dataframe-agnostic function:
 
@@ -965,7 +997,7 @@ def all() -> Expr:
         ... def func(df):
         ...     return df.select(nw.all() * 2)
 
-        We can then pass either pandas or Polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a   b
@@ -983,6 +1015,13 @@ def all() -> Expr:
         │ 4   ┆ 10  │
         │ 6   ┆ 12  │
         └─────┴─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        b: int64
+        ----
+        a: [[2,4,6]]
+        b: [[8,10,12]]
     """
     return _stableify(nw.all())
 
@@ -997,9 +1036,11 @@ def col(*names: str | Iterable[str]) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals.stable.v1 as nw
         >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
         >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
+        >>> df_pa = pa.table({"a": [1, 2], "b": [3, 4]})
 
         We define a dataframe-agnostic function:
 
@@ -1007,7 +1048,7 @@ def col(*names: str | Iterable[str]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.col("a") * nw.col("b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a
@@ -1023,6 +1064,11 @@ def col(*names: str | Iterable[str]) -> Expr:
         │ 3   │
         │ 8   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        ----
+        a: [[3,8]]
     """
     return _stableify(nw.col(*names))
 
@@ -1053,7 +1099,7 @@ def nth(*indices: int | Sequence[int]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.nth(0) * 2)
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a
@@ -1085,9 +1131,11 @@ def len() -> Expr:
     Examples:
         >>> import polars as pl
         >>> import pandas as pd
+        >>> import pyarrow as pa
         >>> import narwhals.stable.v1 as nw
         >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
         >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]})
+        >>> df_pa = pa.table({"a": [1, 2], "b": [5, 10]})
 
         Let's define a dataframe-agnostic function:
 
@@ -1095,7 +1143,7 @@ def len() -> Expr:
         ... def func(df):
         ...     return df.select(nw.len())
 
-        We can then pass either pandas or Polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            len
@@ -1109,6 +1157,11 @@ def len() -> Expr:
         ╞═════╡
         │ 2   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        len: int64
+        ----
+        len: [[2]]
     """
     return _stableify(nw.len())
 
@@ -1124,9 +1177,11 @@ def lit(value: Any, dtype: DType | None = None) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals.stable.v1 as nw
         >>> df_pl = pl.DataFrame({"a": [1, 2]})
         >>> df_pd = pd.DataFrame({"a": [1, 2]})
+        >>> df_pa = pa.table({"a": [1, 2]})
 
         We define a dataframe-agnostic function:
 
@@ -1134,7 +1189,7 @@ def lit(value: Any, dtype: DType | None = None) -> Expr:
         ... def func(df):
         ...     return df.with_columns(nw.lit(3).alias("b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a  b
@@ -1150,7 +1205,13 @@ def lit(value: Any, dtype: DType | None = None) -> Expr:
         │ 1   ┆ 3   │
         │ 2   ┆ 3   │
         └─────┴─────┘
-
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        b: int64
+        ----
+        a: [[1,2]]
+        b: [[3,3]]
     """
     return _stableify(nw.lit(value, dtype))
 
@@ -1168,9 +1229,11 @@ def min(*columns: str) -> Expr:
     Examples:
         >>> import polars as pl
         >>> import pandas as pd
+        >>> import pyarrow as pa
         >>> import narwhals.stable.v1 as nw
         >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
         >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]})
+        >>> df_pa = pa.table({"a": [1, 2], "b": [5, 10]})
 
         Let's define a dataframe-agnostic function:
 
@@ -1178,7 +1241,7 @@ def min(*columns: str) -> Expr:
         ... def func(df):
         ...     return df.select(nw.min("b"))
 
-        We can then pass either pandas or Polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            b
@@ -1192,6 +1255,11 @@ def min(*columns: str) -> Expr:
         ╞═════╡
         │ 5   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        b: int64
+        ----
+        b: [[5]]
     """
     return _stableify(nw.min(*columns))
 
@@ -1209,9 +1277,11 @@ def max(*columns: str) -> Expr:
     Examples:
         >>> import polars as pl
         >>> import pandas as pd
+        >>> import pyarrow as pa
         >>> import narwhals.stable.v1 as nw
         >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
         >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]})
+        >>> df_pa = pa.table({"a": [1, 2], "b": [5, 10]})
 
         Let's define a dataframe-agnostic function:
 
@@ -1219,7 +1289,7 @@ def max(*columns: str) -> Expr:
         ... def func(df):
         ...     return df.select(nw.max("a"))
 
-        We can then pass either pandas or Polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a
@@ -1233,6 +1303,11 @@ def max(*columns: str) -> Expr:
         ╞═════╡
         │ 2   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        ----
+        a: [[2]]
     """
     return _stableify(nw.max(*columns))
 
@@ -1250,9 +1325,11 @@ def mean(*columns: str) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals.stable.v1 as nw
         >>> df_pl = pl.DataFrame({"a": [1, 8, 3]})
         >>> df_pd = pd.DataFrame({"a": [1, 8, 3]})
+        >>> df_pa = pa.table({"a": [1, 8, 3]})
 
         We define a dataframe agnostic function:
 
@@ -1260,7 +1337,7 @@ def mean(*columns: str) -> Expr:
         ... def func(df):
         ...     return df.select(nw.mean("a"))
 
-        We can then pass either pandas or Polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
              a
@@ -1274,6 +1351,11 @@ def mean(*columns: str) -> Expr:
         ╞═════╡
         │ 4.0 │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: double
+        ----
+        a: [[4]]
     """
     return _stableify(nw.mean(*columns))
 
@@ -1291,9 +1373,11 @@ def sum(*columns: str) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals.stable.v1 as nw
         >>> df_pl = pl.DataFrame({"a": [1, 2]})
         >>> df_pd = pd.DataFrame({"a": [1, 2]})
+        >>> df_pa = pa.table({"a": [1, 2]})
 
         We define a dataframe-agnostic function:
 
@@ -1301,7 +1385,7 @@ def sum(*columns: str) -> Expr:
         ... def func(df):
         ...     return df.select(nw.sum("a"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a
@@ -1315,6 +1399,11 @@ def sum(*columns: str) -> Expr:
         ╞═════╡
         │ 3   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        ----
+        a: [[3]]
     """
     return _stableify(nw.sum(*columns))
 
@@ -1333,10 +1422,12 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals.stable.v1 as nw
         >>> data = {"a": [1, 2, 3], "b": [5, 10, None]}
         >>> df_pl = pl.DataFrame(data)
         >>> df_pd = pd.DataFrame(data)
+        >>> df_pa = pa.table(data)
 
         We define a dataframe-agnostic function:
 
@@ -1344,7 +1435,7 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.sum_horizontal("a", "b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
               a
@@ -1362,6 +1453,11 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         │ 12  │
         │ 3   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        ----
+        a: [[6,12,3]]
     """
     return _stableify(nw.sum_horizontal(*exprs))
 
@@ -1379,6 +1475,7 @@ def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals.stable.v1 as nw
         >>> data = {
         ...     "a": [False, False, True, True, False, None],
@@ -1386,6 +1483,7 @@ def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... }
         >>> df_pl = pl.DataFrame(data)
         >>> df_pd = pd.DataFrame(data)
+        >>> df_pa = pa.table(data)
 
         We define a dataframe-agnostic function:
 
@@ -1393,7 +1491,7 @@ def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select("a", "b", all=nw.all_horizontal("a", "b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
                a      b    all
@@ -1418,6 +1516,16 @@ def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         │ false ┆ null  ┆ false │
         │ null  ┆ null  ┆ null  │
         └───────┴───────┴───────┘
+
+        >>> func(df_pa)
+        pyarrow.Table
+        a: bool
+        b: bool
+        all: bool
+        ----
+        a: [[false,false,true,true,false,null]]
+        b: [[false,true,true,null,null,null]]
+        all: [[false,false,true,null,false,null]]
     """
     return _stableify(nw.all_horizontal(*exprs))
 
@@ -1435,6 +1543,7 @@ def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals.stable.v1 as nw
         >>> data = {
         ...     "a": [False, False, True, True, False, None],
@@ -1442,6 +1551,7 @@ def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... }
         >>> df_pl = pl.DataFrame(data)
         >>> df_pd = pd.DataFrame(data)
+        >>> df_pa = pa.table(data)
 
         We define a dataframe-agnostic function:
 
@@ -1449,7 +1559,7 @@ def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select("a", "b", any=nw.any_horizontal("a", "b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
                a      b    any
@@ -1474,6 +1584,16 @@ def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         │ false ┆ null  ┆ null  │
         │ null  ┆ null  ┆ null  │
         └───────┴───────┴───────┘
+
+        >>> func(df_pa)
+        pyarrow.Table
+        a: bool
+        b: bool
+        any: bool
+        ----
+        a: [[false,false,true,true,false,null]]
+        b: [[false,true,true,null,null,null]]
+        any: [[false,true,true,true,null,null]]
     """
     return _stableify(nw.any_horizontal(*exprs))
 
@@ -1489,6 +1609,7 @@ def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals.stable.v1 as nw
         >>> data = {
         ...     "a": [1, 8, 3],
@@ -1497,6 +1618,7 @@ def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... }
         >>> df_pl = pl.DataFrame(data)
         >>> df_pd = pd.DataFrame(data)
+        >>> df_pa = pa.table(data)
 
         We define a dataframe-agnostic function that computes the horizontal mean of "a"
         and "b" columns:
@@ -1505,13 +1627,14 @@ def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.mean_horizontal("a", "b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
              a
         0  2.5
         1  6.5
         2  3.0
+
         >>> func(df_pl)
         shape: (3, 1)
         ┌─────┐
@@ -1523,6 +1646,12 @@ def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         │ 6.5 │
         │ 3.0 │
         └─────┘
+
+        >>> func(df_pa)
+        pyarrow.Table
+        a: double
+        ----
+        a: [[2.5,6.5,3]]
     """
     return _stableify(nw.mean_horizontal(*exprs))
 
@@ -1556,7 +1685,7 @@ def min_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.min_horizontal("a", "b"))
 
-        We can then pass either pandas, polars or pyarrow to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(pd.DataFrame(data))
              a
@@ -1612,7 +1741,7 @@ def max_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.max_horizontal("a", "b"))
 
-        We can then pass either pandas, polars or pyarrow to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(pd.DataFrame(data))
              a
@@ -1808,7 +1937,7 @@ def concat_str(
         ...         ).alias("full_sentence")
         ...     )
 
-        We can then pass either pandas, Polars or PyArrow to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(pd.DataFrame(data))
           full_sentence
@@ -2023,6 +2152,32 @@ def maybe_reset_index(obj: T) -> T:
     return nw_maybe_reset_index(obj)
 
 
+def generate_temporary_column_name(n_bytes: int, columns: list[str]) -> str:
+    """Generates a unique token of specified `n_bytes` that is not present in the given
+    list of columns.
+
+    It relies on [python secrets token_hex](https://docs.python.org/3/library/secrets.html#secrets.token_hex)
+    function to return a string nbytes random bytes.
+
+    Arguments:
+        n_bytes: The number of bytes to generate for the token.
+        columns: The list of columns to check for uniqueness.
+
+    Returns:
+        A unique token that is not present in the given list of columns.
+
+    Raises:
+        AssertionError: If a unique token cannot be generated after 100 attempts.
+
+    Examples:
+        >>> import narwhals.stable.v1 as nw
+        >>> columns = ["abc", "xyz"]
+        >>> nw.generate_temporary_column_name(n_bytes=8, columns=columns) not in columns
+        True
+    """
+    return nw_generate_temporary_column_name(n_bytes=n_bytes, columns=columns)
+
+
 def get_native_namespace(obj: Any) -> Any:
     """
     Get native namespace from object.
@@ -2087,9 +2242,11 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals.stable.v1 as nw
         >>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [5, 10, 15]})
         >>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [5, 10, 15]})
+        >>> df_pa = pa.table({"a": [1, 2, 3], "b": [5, 10, 15]})
 
         We define a dataframe-agnostic function:
 
@@ -2099,7 +2256,7 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When:
         ...         nw.when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when")
         ...     )
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a   b  a_when
@@ -2117,6 +2274,15 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When:
         │ 2   ┆ 10  ┆ 5      │
         │ 3   ┆ 15  ┆ 6      │
         └─────┴─────┴────────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        b: int64
+        a_when: int64
+        ----
+        a: [[1,2,3]]
+        b: [[5,10,15]]
+        a_when: [[5,5,6]]
     """
     return When.from_when(nw_when(*predicates))
 
@@ -2181,6 +2347,52 @@ def new_series(
     )
 
 
+def from_arrow(
+    native_frame: ArrowStreamExportable, *, native_namespace: ModuleType
+) -> DataFrame[Any]:
+    """
+    Construct a DataFrame from an object which supports the PyCapsule Interface.
+
+    Arguments:
+        native_frame: Object which implements `__arrow_c_stream__`.
+        native_namespace: The native library to use for DataFrame creation.
+
+    Examples:
+        >>> import pandas as pd
+        >>> import polars as pl
+        >>> import pyarrow as pa
+        >>> import narwhals.stable.v1 as nw
+        >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+
+        Let's define a dataframe-agnostic function which creates a PyArrow
+        Table.
+
+        >>> @nw.narwhalify
+        ... def func(df):
+        ...     return nw.from_arrow(df, native_namespace=pa)
+
+        Let's see what happens when passing pandas / Polars input:
+
+        >>> func(pd.DataFrame(data))  # doctest: +SKIP
+        pyarrow.Table
+        a: int64
+        b: int64
+        ----
+        a: [[1,2,3]]
+        b: [[4,5,6]]
+        >>> func(pl.DataFrame(data))  # doctest: +SKIP
+        pyarrow.Table
+        a: int64
+        b: int64
+        ----
+        a: [[1,2,3]]
+        b: [[4,5,6]]
+    """
+    return _stableify(  # type: ignore[no-any-return]
+        nw_from_arrow(native_frame, native_namespace=native_namespace)
+    )
+
+
 def from_dict(
     data: dict[str, Any],
     schema: dict[str, DType] | Schema | None = None,
@@ -2203,6 +2415,7 @@ def from_dict(
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals.stable.v1 as nw
         >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
 
@@ -2214,7 +2427,7 @@ def from_dict(
         ...     native_namespace = nw.get_native_namespace(df)
         ...     return nw.from_dict(new_data, native_namespace=native_namespace)
 
-        Let's see what happens when passing pandas / Polars input:
+        Let's see what happens when passing Pandas, Polars or PyArrow input:
 
         >>> func(pd.DataFrame(data))
            c  d
@@ -2230,6 +2443,13 @@ def from_dict(
         │ 5   ┆ 1   │
         │ 2   ┆ 4   │
         └─────┴─────┘
+        >>> func(pa.table(data))
+        pyarrow.Table
+        c: int64
+        d: int64
+        ----
+        c: [[5,2]]
+        d: [[1,4]]
     """
     from narwhals.stable.v1 import dtypes
 
@@ -2249,12 +2469,14 @@ def from_dict(
     "dependencies",
     "to_native",
     "from_native",
+    "to_py_scalar",
     "is_ordered_categorical",
     "maybe_align_index",
     "maybe_convert_dtypes",
     "maybe_get_index",
     "maybe_reset_index",
     "maybe_set_index",
+    "generate_temporary_column_name",
     "get_native_namespace",
     "get_level",
     "all",
@@ -2296,6 +2518,7 @@ def from_dict(
     "String",
     "Datetime",
     "Duration",
+    "Field",
     "Struct",
     "Array",
     "List",
@@ -2304,5 +2527,6 @@ def from_dict(
     "show_versions",
     "Schema",
     "from_dict",
+    "from_arrow",
     "new_series",
 ]
diff --git a/narwhals/stable/v1/_dtypes.py b/narwhals/stable/v1/_dtypes.py
index 13dd3237d..459441d66 100644
--- a/narwhals/stable/v1/_dtypes.py
+++ b/narwhals/stable/v1/_dtypes.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from narwhals.dtypes import Array
 from narwhals.dtypes import Boolean
 from narwhals.dtypes import Categorical
@@ -6,6 +8,7 @@
 from narwhals.dtypes import DType
 from narwhals.dtypes import Duration as NwDuration
 from narwhals.dtypes import Enum
+from narwhals.dtypes import Field
 from narwhals.dtypes import Float32
 from narwhals.dtypes import Float64
 from narwhals.dtypes import Int8
@@ -77,6 +80,7 @@ def __hash__(self) -> int:
     "NumericType",
     "Object",
     "String",
+    "Field",
     "Struct",
     "UInt8",
     "UInt16",
diff --git a/narwhals/stable/v1/dtypes.py b/narwhals/stable/v1/dtypes.py
index f36da9725..37c3af0e8 100644
--- a/narwhals/stable/v1/dtypes.py
+++ b/narwhals/stable/v1/dtypes.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from narwhals.stable.v1._dtypes import Array
 from narwhals.stable.v1._dtypes import Boolean
 from narwhals.stable.v1._dtypes import Categorical
@@ -6,6 +8,7 @@
 from narwhals.stable.v1._dtypes import DType
 from narwhals.stable.v1._dtypes import Duration
 from narwhals.stable.v1._dtypes import Enum
+from narwhals.stable.v1._dtypes import Field
 from narwhals.stable.v1._dtypes import Float32
 from narwhals.stable.v1._dtypes import Float64
 from narwhals.stable.v1._dtypes import Int8
@@ -34,6 +37,7 @@
     "Enum",
     "Float32",
     "Float64",
+    "Field",
     "Int8",
     "Int16",
     "Int32",
diff --git a/narwhals/stable/v1/typing.py b/narwhals/stable/v1/typing.py
index e8ab9e1ae..79adf5063 100644
--- a/narwhals/stable/v1/typing.py
+++ b/narwhals/stable/v1/typing.py
@@ -29,6 +29,9 @@ def columns(self) -> Any: ...
 
         def join(self, *args: Any, **kwargs: Any) -> Any: ...
 
+    class NativeSeries(Protocol):
+        def __len__(self) -> int: ...
+
     class DataFrameLike(Protocol):
         def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ...
 
@@ -47,11 +50,15 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ...
 Frame: TypeAlias = Union["DataFrame[Any]", "LazyFrame[Any]"]
 """Narwhals DataFrame or Narwhals LazyFrame"""
 
+IntoSeries: TypeAlias = Union["Series", "NativeSeries"]
+"""Anything which can be converted to a Narwhals Series."""
+
 # TypeVars for some of the above
 IntoFrameT = TypeVar("IntoFrameT", bound="IntoFrame")
 IntoDataFrameT = TypeVar("IntoDataFrameT", bound="IntoDataFrame")
 FrameT = TypeVar("FrameT", bound="Frame")
 DataFrameT = TypeVar("DataFrameT", bound="DataFrame[Any]")
+IntoSeriesT = TypeVar("IntoSeriesT", bound="IntoSeries")
 
 
 class DTypes:
@@ -73,6 +80,7 @@ class DTypes:
     Datetime: type[dtypes.Datetime]
     Duration: type[dtypes.Duration]
     Date: type[dtypes.Date]
+    Field: type[dtypes.Field]
     Struct: type[dtypes.Struct]
     List: type[dtypes.List]
     Array: type[dtypes.Array]
@@ -88,4 +96,6 @@ class DTypes:
     "Frame",
     "FrameT",
     "DataFrameT",
+    "IntoSeries",
+    "IntoSeriesT",
 ]
diff --git a/narwhals/this.py b/narwhals/this.py
index 541ee7704..8ba7aa261 100644
--- a/narwhals/this.py
+++ b/narwhals/this.py
@@ -6,12 +6,12 @@
 ⣿⣿⣿⣿⣿⡇⡼⡘⠛⠿⠿⠿⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ A good API is an honest one
 ⣿⣿⣿⡿⣫⡄⠾⣣⠹⣿⣿⣿⣶⣮⣙⠻⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ Yes, that needs documenting
 ⣿⣿⢋⣴⣿⣷⣬⣭⣾⣿⣿⣿⣿⣿⣿⣿⣦⡙⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ People learn better from examples
-⣿⢃⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⡌⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ than from explanations⠀
-⡏⠀⢰⠄⢻⣿⣿⣿⣿⡿⠋⢉⠻⣿⣿⣿⣿⣿⣿⡜⣿⣿⡿⢁⢻⣿⣿⣿⣿⣿ If in doubt, say 'no'⠀
-⡇⣌⣀⣠⣾⣿⣿⣿⣿⣇⠶⠉⢁⣿⣿⣿⣿⣿⣿⣧⡹⣿⡇⣿⣧⠻⠿⠿⠿⠿ you can always reconsider⠀
+⣿⢃⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⡌⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿   than from explanations⠀
+⡏⠀⢰⠄⢻⣿⣿⣿⣿⡿⠋⢉⠻⣿⣿⣿⣿⣿⣿⡜⣿⣿⡿⢁⢻⣿⣿⣿⣿⣿ If in doubt, better to say 'no'
+⡇⣌⣀⣠⣾⣿⣿⣿⣿⣇⠶⠉⢁⣿⣿⣿⣿⣿⣿⣧⡹⣿⡇⣿⣧⠻⠿⠿⠿⠿   than to risk causing a commotion⠀
 ⡧⢹⣿⣿⣿⣜⣟⣸⣿⣿⣷⣶⣿⡿⣿⣿⣝⢿⣿⣿⣷⣬⣥⣿⣿⣿⣿⣿⡟⣰ Yes, we need a test for that
 ⢡⣆⢻⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣧⡙⣿⣿⡇⣿⣿⣿⣿⠟⣋⣭⣛⠻⣋⣴⣿ If you want users  
-⣶⣤⣤⣙⠻⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⣦⣍⣡⣿⡿⢋⣴⣿⣿⣿⣿⣿⣿⣿⣿ you need good docs⠀
+⣶⣤⣤⣙⠻⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⣦⣍⣡⣿⡿⢋⣴⣿⣿⣿⣿⣿⣿⣿⣿   you need good docs⠀
 ⣿⣿⣿⣿⣿⣶⣬⣙⣛⠻⠿⠿⠿⠿⠿⠟⣛⣩⣥⣶⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ Our code is not irreplaceable"""
 
 print(ZEN)
diff --git a/narwhals/translate.py b/narwhals/translate.py
index 4c23f6d91..a1b0e2323 100644
--- a/narwhals/translate.py
+++ b/narwhals/translate.py
@@ -1,5 +1,8 @@
 from __future__ import annotations
 
+import numbers
+from datetime import datetime
+from datetime import timedelta
 from functools import wraps
 from typing import TYPE_CHECKING
 from typing import Any
@@ -9,9 +12,11 @@
 from typing import overload
 
 from narwhals.dependencies import get_cudf
+from narwhals.dependencies import get_cupy
 from narwhals.dependencies import get_dask
 from narwhals.dependencies import get_dask_expr
 from narwhals.dependencies import get_modin
+from narwhals.dependencies import get_numpy
 from narwhals.dependencies import get_pandas
 from narwhals.dependencies import get_polars
 from narwhals.dependencies import get_pyarrow
@@ -37,6 +42,7 @@
     from narwhals.typing import DTypes
     from narwhals.typing import IntoDataFrameT
     from narwhals.typing import IntoFrameT
+    from narwhals.typing import IntoSeriesT
 
 T = TypeVar("T")
 
@@ -86,26 +92,26 @@ def to_native(
 
 @overload
 def from_native(
-    native_object: Any,
+    native_object: IntoDataFrameT | IntoSeriesT,
     *,
     strict: Literal[False],
     eager_only: None = ...,
     eager_or_interchange_only: Literal[True],
     series_only: None = ...,
     allow_series: Literal[True],
-) -> Any: ...
+) -> DataFrame[IntoDataFrameT]: ...
 
 
 @overload
 def from_native(
-    native_object: Any,
+    native_object: IntoDataFrameT | IntoSeriesT,
     *,
     strict: Literal[False],
     eager_only: Literal[True],
     eager_or_interchange_only: None = ...,
     series_only: None = ...,
     allow_series: Literal[True],
-) -> Any: ...
+) -> DataFrame[IntoDataFrameT] | Series: ...
 
 
 @overload
@@ -158,26 +164,26 @@ def from_native(
 
 @overload
 def from_native(
-    native_object: Any,
+    native_object: IntoFrameT | IntoSeriesT,
     *,
     strict: Literal[False],
     eager_only: None = ...,
     eager_or_interchange_only: None = ...,
     series_only: None = ...,
     allow_series: Literal[True],
-) -> Any: ...
+) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series: ...
 
 
 @overload
 def from_native(
-    native_object: Any,
+    native_object: IntoSeriesT,
     *,
     strict: Literal[False],
     eager_only: None = ...,
     eager_or_interchange_only: None = ...,
     series_only: Literal[True],
     allow_series: None = ...,
-) -> Any: ...
+) -> Series: ...
 
 
 @overload
@@ -238,7 +244,7 @@ def from_native(
 
 @overload
 def from_native(
-    native_object: Any,
+    native_object: IntoFrameT | IntoSeriesT,
     *,
     strict: Literal[True] = ...,
     eager_only: None = ...,
@@ -254,7 +260,7 @@ def from_native(
 
 @overload
 def from_native(
-    native_object: Any,
+    native_object: IntoSeriesT,
     *,
     strict: Literal[True] = ...,
     eager_only: None = ...,
@@ -389,27 +395,35 @@ def _from_native_impl(  # noqa: PLR0915
     # Extensions
     if hasattr(native_object, "__narwhals_dataframe__"):
         if series_only:
-            msg = "Cannot only use `series_only` with dataframe"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `series_only` with dataframe"
+                raise TypeError(msg)
+            return native_object
         return DataFrame(
             native_object.__narwhals_dataframe__(),
             level="full",
         )
     elif hasattr(native_object, "__narwhals_lazyframe__"):
         if series_only:
-            msg = "Cannot only use `series_only` with lazyframe"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `series_only` with lazyframe"
+                raise TypeError(msg)
+            return native_object
         if eager_only or eager_or_interchange_only:
-            msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with lazyframe"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with lazyframe"
+                raise TypeError(msg)
+            return native_object
         return LazyFrame(
             native_object.__narwhals_lazyframe__(),
             level="full",
         )
     elif hasattr(native_object, "__narwhals_series__"):
         if not allow_series:
-            msg = "Please set `allow_series=True`"
-            raise TypeError(msg)
+            if strict:
+                msg = "Please set `allow_series=True`"
+                raise TypeError(msg)
+            return native_object
         return Series(
             native_object.__narwhals_series__(),
             level="full",
@@ -418,8 +432,10 @@ def _from_native_impl(  # noqa: PLR0915
     # Polars
     elif is_polars_dataframe(native_object):
         if series_only:
-            msg = "Cannot only use `series_only` with polars.DataFrame"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `series_only` with polars.DataFrame"
+                raise TypeError(msg)
+            return native_object
         pl = get_polars()
         return DataFrame(
             PolarsDataFrame(
@@ -431,11 +447,15 @@ def _from_native_impl(  # noqa: PLR0915
         )
     elif is_polars_lazyframe(native_object):
         if series_only:
-            msg = "Cannot only use `series_only` with polars.LazyFrame"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `series_only` with polars.LazyFrame"
+                raise TypeError(msg)
+            return native_object
         if eager_only or eager_or_interchange_only:
-            msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with polars.LazyFrame"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with polars.LazyFrame"
+                raise TypeError(msg)
+            return native_object
         pl = get_polars()
         return LazyFrame(
             PolarsLazyFrame(
@@ -448,8 +468,10 @@ def _from_native_impl(  # noqa: PLR0915
     elif is_polars_series(native_object):
         pl = get_polars()
         if not allow_series:
-            msg = "Please set `allow_series=True`"
-            raise TypeError(msg)
+            if strict:
+                msg = "Please set `allow_series=True`"
+                raise TypeError(msg)
+            return native_object
         return Series(
             PolarsSeries(
                 native_object,
@@ -462,8 +484,10 @@ def _from_native_impl(  # noqa: PLR0915
     # pandas
     elif is_pandas_dataframe(native_object):
         if series_only:
-            msg = "Cannot only use `series_only` with dataframe"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `series_only` with dataframe"
+                raise TypeError(msg)
+            return native_object
         pd = get_pandas()
         return DataFrame(
             PandasLikeDataFrame(
@@ -476,8 +500,10 @@ def _from_native_impl(  # noqa: PLR0915
         )
     elif is_pandas_series(native_object):
         if not allow_series:
-            msg = "Please set `allow_series=True`"
-            raise TypeError(msg)
+            if strict:
+                msg = "Please set `allow_series=True`"
+                raise TypeError(msg)
+            return native_object
         pd = get_pandas()
         return Series(
             PandasLikeSeries(
@@ -493,8 +519,10 @@ def _from_native_impl(  # noqa: PLR0915
     elif is_modin_dataframe(native_object):  # pragma: no cover
         mpd = get_modin()
         if series_only:
-            msg = "Cannot only use `series_only` with modin.DataFrame"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `series_only` with modin.DataFrame"
+                raise TypeError(msg)
+            return native_object
         return DataFrame(
             PandasLikeDataFrame(
                 native_object,
@@ -507,8 +535,10 @@ def _from_native_impl(  # noqa: PLR0915
     elif is_modin_series(native_object):  # pragma: no cover
         mpd = get_modin()
         if not allow_series:
-            msg = "Please set `allow_series=True`"
-            raise TypeError(msg)
+            if strict:
+                msg = "Please set `allow_series=True`"
+                raise TypeError(msg)
+            return native_object
         return Series(
             PandasLikeSeries(
                 native_object,
@@ -523,8 +553,10 @@ def _from_native_impl(  # noqa: PLR0915
     elif is_cudf_dataframe(native_object):  # pragma: no cover
         cudf = get_cudf()
         if series_only:
-            msg = "Cannot only use `series_only` with cudf.DataFrame"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `series_only` with cudf.DataFrame"
+                raise TypeError(msg)
+            return native_object
         return DataFrame(
             PandasLikeDataFrame(
                 native_object,
@@ -537,8 +569,10 @@ def _from_native_impl(  # noqa: PLR0915
     elif is_cudf_series(native_object):  # pragma: no cover
         cudf = get_cudf()
         if not allow_series:
-            msg = "Please set `allow_series=True`"
-            raise TypeError(msg)
+            if strict:
+                msg = "Please set `allow_series=True`"
+                raise TypeError(msg)
+            return native_object
         return Series(
             PandasLikeSeries(
                 native_object,
@@ -553,8 +587,10 @@ def _from_native_impl(  # noqa: PLR0915
     elif is_pyarrow_table(native_object):
         pa = get_pyarrow()
         if series_only:
-            msg = "Cannot only use `series_only` with arrow table"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `series_only` with arrow table"
+                raise TypeError(msg)
+            return native_object
         return DataFrame(
             ArrowDataFrame(
                 native_object,
@@ -566,8 +602,10 @@ def _from_native_impl(  # noqa: PLR0915
     elif is_pyarrow_chunked_array(native_object):
         pa = get_pyarrow()
         if not allow_series:
-            msg = "Please set `allow_series=True`"
-            raise TypeError(msg)
+            if strict:
+                msg = "Please set `allow_series=True`"
+                raise TypeError(msg)
+            return native_object
         return Series(
             ArrowSeries(
                 native_object,
@@ -581,11 +619,15 @@ def _from_native_impl(  # noqa: PLR0915
     # Dask
     elif is_dask_dataframe(native_object):
         if series_only:
-            msg = "Cannot only use `series_only` with dask DataFrame"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `series_only` with dask DataFrame"
+                raise TypeError(msg)
+            return native_object
         if eager_only or eager_or_interchange_only:
-            msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with dask DataFrame"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with dask DataFrame"
+                raise TypeError(msg)
+            return native_object
         if get_dask_expr() is None:  # pragma: no cover
             msg = "Please install dask-expr"
             raise ImportError(msg)
@@ -601,10 +643,13 @@ def _from_native_impl(  # noqa: PLR0915
     # DuckDB
     elif is_duckdb_relation(native_object):
         if eager_only or series_only:  # pragma: no cover
-            msg = (
-                "Cannot only use `series_only=True` or `eager_only=False` "
-                "with DuckDB Relation"
-            )
+            if strict:
+                msg = (
+                    "Cannot only use `series_only=True` or `eager_only=False` "
+                    "with DuckDB Relation"
+                )
+            else:
+                return native_object
             raise TypeError(msg)
         return DataFrame(
             DuckDBInterchangeFrame(native_object, dtypes=dtypes),
@@ -614,11 +659,13 @@ def _from_native_impl(  # noqa: PLR0915
     # Ibis
     elif is_ibis_table(native_object):  # pragma: no cover
         if eager_only or series_only:
-            msg = (
-                "Cannot only use `series_only=True` or `eager_only=False` "
-                "with Ibis table"
-            )
-            raise TypeError(msg)
+            if strict:
+                msg = (
+                    "Cannot only use `series_only=True` or `eager_only=False` "
+                    "with Ibis table"
+                )
+                raise TypeError(msg)
+            return native_object
         return DataFrame(
             IbisInterchangeFrame(native_object, dtypes=dtypes),
             level="interchange",
@@ -627,11 +674,13 @@ def _from_native_impl(  # noqa: PLR0915
     # Interchange protocol
     elif hasattr(native_object, "__dataframe__"):
         if eager_only or series_only:
-            msg = (
-                "Cannot only use `series_only=True` or `eager_only=False` "
-                "with object which only implements __dataframe__"
-            )
-            raise TypeError(msg)
+            if strict:
+                msg = (
+                    "Cannot only use `series_only=True` or `eager_only=False` "
+                    "with object which only implements __dataframe__"
+                )
+                raise TypeError(msg)
+            return native_object
         return DataFrame(
             InterchangeFrame(native_object, dtypes=dtypes),
             level="interchange",
@@ -775,8 +824,70 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
         return decorator(func)
 
 
+def to_py_scalar(scalar_like: Any) -> Any:
+    """If a scalar is not Python native, converts it to Python native.
+
+    Raises:
+        ValueError: If the object is not convertible to a scalar.
+
+    Examples:
+        >>> import narwhals as nw
+        >>> import pandas as pd
+        >>> df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]}))
+        >>> nw.to_py_scalar(df["a"].item(0))
+        1
+        >>> import pyarrow as pa
+        >>> df = nw.from_native(pa.table({"a": [1, 2, 3]}))
+        >>> nw.to_py_scalar(df["a"].item(0))
+        1
+        >>> nw.to_py_scalar(1)
+        1
+    """
+
+    pa = get_pyarrow()
+    if pa and isinstance(scalar_like, pa.Scalar):
+        return scalar_like.as_py()
+
+    cupy = get_cupy()
+    if (  # pragma: no cover
+        cupy and isinstance(scalar_like, cupy.ndarray) and scalar_like.size == 1
+    ):
+        return scalar_like.item()
+
+    np = get_numpy()
+    if np and np.isscalar(scalar_like) and hasattr(scalar_like, "item"):
+        return scalar_like.item()
+
+    pd = get_pandas()
+    if pd and isinstance(scalar_like, pd.Timestamp):
+        return scalar_like.to_pydatetime()
+    if pd and isinstance(scalar_like, pd.Timedelta):
+        return scalar_like.to_pytimedelta()
+
+    all_scalar_types = (
+        int,
+        float,
+        complex,
+        bool,
+        bytes,
+        str,
+        datetime,
+        timedelta,
+        numbers.Number,
+    )
+    if isinstance(scalar_like, all_scalar_types):
+        return scalar_like
+
+    msg = (
+        f"Expected object convertible to a scalar, found {type(scalar_like)}. "
+        "Please report a bug to https://github.com/narwhals-dev/narwhals/issues"
+    )
+    raise ValueError(msg)
+
+
 __all__ = [
     "get_native_namespace",
     "to_native",
     "narwhalify",
+    "to_py_scalar",
 ]
diff --git a/narwhals/typing.py b/narwhals/typing.py
index 30de0a097..044962ac3 100644
--- a/narwhals/typing.py
+++ b/narwhals/typing.py
@@ -29,6 +29,9 @@ def columns(self) -> Any: ...
 
         def join(self, *args: Any, **kwargs: Any) -> Any: ...
 
+    class NativeSeries(Protocol):
+        def __len__(self) -> int: ...
+
     class DataFrameLike(Protocol):
         def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ...
 
@@ -47,11 +50,15 @@ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ...
 Frame: TypeAlias = Union["DataFrame[Any]", "LazyFrame[Any]"]
 """Narwhals DataFrame or Narwhals LazyFrame"""
 
+IntoSeries: TypeAlias = Union["Series", "NativeSeries"]
+"""Anything which can be converted to a Narwhals Series."""
+
 # TypeVars for some of the above
 IntoFrameT = TypeVar("IntoFrameT", bound="IntoFrame")
 IntoDataFrameT = TypeVar("IntoDataFrameT", bound="IntoDataFrame")
 FrameT = TypeVar("FrameT", bound="Frame")
 DataFrameT = TypeVar("DataFrameT", bound="DataFrame[Any]")
+IntoSeriesT = TypeVar("IntoSeriesT", bound="IntoSeries")
 
 
 class DTypes:
@@ -73,6 +80,7 @@ class DTypes:
     Datetime: type[dtypes.Datetime]
     Duration: type[dtypes.Duration]
     Date: type[dtypes.Date]
+    Field: type[dtypes.Field]
     Struct: type[dtypes.Struct]
     List: type[dtypes.List]
     Array: type[dtypes.Array]
@@ -88,4 +96,6 @@ class DTypes:
     "Frame",
     "FrameT",
     "DataFrameT",
+    "IntoSeries",
+    "IntoSeriesT",
 ]
diff --git a/narwhals/utils.py b/narwhals/utils.py
index 37cce17d3..66c2badee 100644
--- a/narwhals/utils.py
+++ b/narwhals/utils.py
@@ -10,6 +10,7 @@
 from typing import Sequence
 from typing import TypeVar
 from typing import cast
+from warnings import warn
 
 from narwhals._exceptions import ColumnNotFoundError
 from narwhals.dependencies import get_cudf
@@ -31,6 +32,7 @@
 if TYPE_CHECKING:
     from types import ModuleType
 
+    import pandas as pd
     from typing_extensions import Self
     from typing_extensions import TypeGuard
 
@@ -335,10 +337,16 @@ def maybe_reset_index(obj: T) -> T:
     obj_any = cast(Any, obj)
     native_obj = to_native(obj_any)
     if is_pandas_like_dataframe(native_obj):
+        native_namespace = obj_any.__native_namespace__()
+        if _has_default_index(native_obj, native_namespace):
+            return obj_any  # type: ignore[no-any-return]
         return obj_any._from_compliant_dataframe(  # type: ignore[no-any-return]
             obj_any._compliant_frame._from_native_frame(native_obj.reset_index(drop=True))
         )
     if is_pandas_like_series(native_obj):
+        native_namespace = obj_any.__native_namespace__()
+        if _has_default_index(native_obj, native_namespace):
+            return obj_any  # type: ignore[no-any-return]
         return obj_any._from_compliant_series(  # type: ignore[no-any-return]
             obj_any._compliant_series._from_native_series(
                 native_obj.reset_index(drop=True)
@@ -347,6 +355,18 @@ def maybe_reset_index(obj: T) -> T:
     return obj_any  # type: ignore[no-any-return]
 
 
+def _has_default_index(
+    native_frame_or_series: pd.Series | pd.DataFrame, native_namespace: Any
+) -> bool:
+    index = native_frame_or_series.index
+    return (
+        isinstance(index, native_namespace.RangeIndex)
+        and index.start == 0
+        and index.stop == len(index)
+        and index.step == 1
+    )
+
+
 def maybe_convert_dtypes(obj: T, *args: bool, **kwargs: bool | str) -> T:
     """
     Convert columns or series to the best possible dtypes using dtypes supporting ``pd.NA``, if df is pandas-like.
@@ -462,17 +482,37 @@ def is_ordered_categorical(series: Series) -> bool:
 
 
 def generate_unique_token(n_bytes: int, columns: list[str]) -> str:  # pragma: no cover
-    """Generates a unique token of specified n_bytes that is not present in the given list of columns.
+    warn(
+        "Use `generate_temporary_column_name` instead. `generate_unique_token` is "
+        "deprecated and it will be removed in future versions",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return generate_temporary_column_name(n_bytes=n_bytes, columns=columns)
+
+
+def generate_temporary_column_name(n_bytes: int, columns: list[str]) -> str:
+    """Generates a unique token of specified `n_bytes` that is not present in the given
+    list of columns.
+
+    It relies on [python secrets token_hex](https://docs.python.org/3/library/secrets.html#secrets.token_hex)
+    function to return a string nbytes random bytes.
 
     Arguments:
-        n_bytes : The number of bytes to generate for the token.
-        columns : The list of columns to check for uniqueness.
+        n_bytes: The number of bytes to generate for the token.
+        columns: The list of columns to check for uniqueness.
 
     Returns:
         A unique token that is not present in the given list of columns.
 
     Raises:
         AssertionError: If a unique token cannot be generated after 100 attempts.
+
+    Examples:
+        >>> import narwhals as nw
+        >>> columns = ["abc", "xyz"]
+        >>> nw.generate_temporary_column_name(n_bytes=8, columns=columns) not in columns
+        True
     """
     counter = 0
     while True:
@@ -483,8 +523,8 @@ def generate_unique_token(n_bytes: int, columns: list[str]) -> str:  # pragma: n
         counter += 1
         if counter > 100:
             msg = (
-                "Internal Error: Narwhals was not able to generate a column name to perform given "
-                "join operation"
+                "Internal Error: Narwhals was not able to generate a column name with "
+                f"{n_bytes=} and not in {columns}"
             )
             raise AssertionError(msg)
 
diff --git a/noxfile.py b/noxfile.py
index 1dc37b29d..aec70add4 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -1,5 +1,11 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import nox
-from nox.sessions import Session
+
+if TYPE_CHECKING:
+    from nox.sessions import Session
 
 nox.options.default_venv_backend = "uv"
 nox.options.reuse_venv = True
@@ -21,7 +27,9 @@ def run_common(session: Session, coverage_threshold: float) -> None:
         f"--cov-fail-under={coverage_threshold}",
         "--runslow",
     )
-    session.run("pytest", "narwhals", "--doctest-modules")
+
+    if session.python == "3.12":
+        session.run("pytest", "narwhals", "--doctest-modules")
 
 
 @nox.session(python=PYTHON_VERSIONS)  # type: ignore[misc]
diff --git a/pyproject.toml b/pyproject.toml
index c4974d8c0..6bc92b6dd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "narwhals"
-version = "1.9.3"
+version = "1.11.1"
 authors = [
   { name="Marco Gorelli", email="33491632+MarcoGorelli@users.noreply.github.com" },
 ]
@@ -94,6 +94,7 @@ convention = "google"
 
 [tool.ruff.lint.isort]
 force-single-line = true
+required-imports = ["from __future__ import annotations"]
 
 [tool.ruff.format]
 docstring-code-format = true
diff --git a/tests/conftest.py b/tests/conftest.py
index 85c296daf..d40d1027e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,4 +1,7 @@
+from __future__ import annotations
+
 import contextlib
+from typing import TYPE_CHECKING
 from typing import Any
 from typing import Callable
 
@@ -10,10 +13,12 @@
 from narwhals.dependencies import get_cudf
 from narwhals.dependencies import get_dask_dataframe
 from narwhals.dependencies import get_modin
-from narwhals.typing import IntoDataFrame
-from narwhals.typing import IntoFrame
-from narwhals.utils import parse_version
-from tests.utils import Constructor
+from tests.utils import PANDAS_VERSION
+
+if TYPE_CHECKING:
+    from narwhals.typing import IntoDataFrame
+    from narwhals.typing import IntoFrame
+    from tests.utils import Constructor
 
 with contextlib.suppress(ImportError):
     import modin.pandas  # noqa: F401
@@ -87,7 +92,7 @@ def pyarrow_table_constructor(obj: Any) -> IntoDataFrame:
     return pa.table(obj)  # type: ignore[no-any-return]
 
 
-if parse_version(pd.__version__) >= parse_version("2.0.0"):
+if PANDAS_VERSION >= (2, 0, 0):
     eager_constructors = [
         pandas_constructor,
         pandas_nullable_constructor,
@@ -104,11 +109,15 @@ def pyarrow_table_constructor(obj: Any) -> IntoDataFrame:
 if get_cudf() is not None:
     eager_constructors.append(cudf_constructor)  # pragma: no cover
 if get_dask_dataframe() is not None:  # pragma: no cover
-    lazy_constructors.extend([dask_lazy_p1_constructor, dask_lazy_p2_constructor])  # type: ignore  # noqa: PGH003
+    # TODO(unassigned): reinstate both dask constructors once if/when we have a dask use-case
+    # lazy_constructors.extend([dask_lazy_p1_constructor, dask_lazy_p2_constructor])  # noqa: ERA001
+    lazy_constructors.append(dask_lazy_p2_constructor)  # type: ignore  # noqa: PGH003
 
 
 @pytest.fixture(params=eager_constructors)
-def constructor_eager(request: pytest.FixtureRequest) -> Callable[[Any], IntoDataFrame]:
+def constructor_eager(
+    request: pytest.FixtureRequest,
+) -> Callable[[Any], IntoDataFrame]:
     return request.param  # type: ignore[no-any-return]
 
 
diff --git a/tests/dependencies/is_into_series_test.py b/tests/dependencies/is_into_series_test.py
new file mode 100644
index 000000000..a4d4a827f
--- /dev/null
+++ b/tests/dependencies/is_into_series_test.py
@@ -0,0 +1,37 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from typing import Any
+
+import numpy as np
+import pandas as pd
+import polars as pl
+import pyarrow as pa
+
+import narwhals as nw
+from narwhals.dependencies import is_into_series
+
+if TYPE_CHECKING:
+    from typing_extensions import Self
+
+
+class ListBackedSeries:
+    def __init__(self, name: str, data: list[Any]) -> None:
+        self._data = data
+        self._name = name
+
+    def __len__(self) -> int:  # pragma: no cover
+        return len(self._data)
+
+    def __narwhals_series__(self) -> Self:  # pragma: no cover
+        return self
+
+
+def test_is_into_series() -> None:
+    assert is_into_series(pa.chunked_array([["a", "b"]]))
+    assert is_into_series(pl.Series([1, 2, 3]))
+    assert is_into_series(pd.Series([1, 2, 3]))
+    assert is_into_series(nw.from_native(pd.Series([1, 2, 3]), series_only=True))
+    assert is_into_series(ListBackedSeries("a", [1, 4, 2]))
+    assert not is_into_series(np.array([1, 2, 3]))
+    assert not is_into_series([1, 2, 3])
diff --git a/tests/dependencies/is_pandas_dataframe_test.py b/tests/dependencies/is_pandas_dataframe_test.py
index a8ffaa739..96b874952 100644
--- a/tests/dependencies/is_pandas_dataframe_test.py
+++ b/tests/dependencies/is_pandas_dataframe_test.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import pandas as pd
 import polars as pl
 
diff --git a/tests/dependencies/is_pandas_index_test.py b/tests/dependencies/is_pandas_index_test.py
new file mode 100644
index 000000000..1d97dd824
--- /dev/null
+++ b/tests/dependencies/is_pandas_index_test.py
@@ -0,0 +1,12 @@
+from __future__ import annotations
+
+import pandas as pd
+
+from narwhals.dependencies import is_pandas_index
+
+
+def test_is_pandas_index() -> None:
+    data = [1, 2]
+    s_pd = pd.Series(data)
+    assert is_pandas_index(s_pd.index)
+    assert not is_pandas_index(data)
diff --git a/tests/dtypes_test.py b/tests/dtypes_test.py
index c35507873..0d6363aee 100644
--- a/tests/dtypes_test.py
+++ b/tests/dtypes_test.py
@@ -12,7 +12,8 @@
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
+from tests.utils import POLARS_VERSION
 
 
 @pytest.mark.parametrize("time_unit", ["us", "ns", "ms"])
@@ -87,8 +88,45 @@ def test_array_valid() -> None:
         dtype = nw.Array(nw.Int64)
 
 
+def test_struct_valid() -> None:
+    dtype = nw.Struct([nw.Field("a", nw.Int64)])
+    assert dtype == nw.Struct([nw.Field("a", nw.Int64)])
+    assert dtype == nw.Struct
+    assert dtype != nw.Struct([nw.Field("a", nw.Float32)])
+    assert dtype != nw.Duration
+    assert repr(dtype) == "Struct({'a': <class 'narwhals.dtypes.Int64'>})"
+
+    dtype = nw.Struct({"a": nw.Int64, "b": nw.String})
+    assert dtype == nw.Struct({"a": nw.Int64, "b": nw.String})
+    assert dtype.to_schema() == nw.Struct({"a": nw.Int64, "b": nw.String}).to_schema()
+    assert dtype == nw.Struct
+    assert dtype != nw.Struct({"a": nw.Int32, "b": nw.String})
+    assert dtype in {nw.Struct({"a": nw.Int64, "b": nw.String})}
+
+
+def test_struct_reverse() -> None:
+    dtype1 = nw.Struct({"a": nw.Int64, "b": nw.String})
+    dtype1_reversed = nw.Struct([nw.Field(*field) for field in reversed(dtype1)])
+    dtype2 = nw.Struct({"b": nw.String, "a": nw.Int64})
+    assert dtype1_reversed == dtype2
+
+
+def test_field_repr() -> None:
+    dtype = nw.Field("a", nw.Int32)
+    assert repr(dtype) == "Field('a', <class 'narwhals.dtypes.Int32'>)"
+
+
+def test_struct_hashes() -> None:
+    dtypes = (
+        nw.Struct,
+        nw.Struct([nw.Field("a", nw.Int64)]),
+        nw.Struct([nw.Field("a", nw.Int64), nw.Field("b", nw.List(nw.Int64))]),
+    )
+    assert len({hash(tp) for tp in (dtypes)}) == 3
+
+
 @pytest.mark.skipif(
-    parse_version(pl.__version__) < (1,) or parse_version(pd.__version__) < (2, 2),
+    POLARS_VERSION < (1,) or PANDAS_VERSION < (2, 2),
     reason="`shape` is only available after 1.0",
 )
 def test_polars_2d_array() -> None:
@@ -107,7 +145,7 @@ def test_polars_2d_array() -> None:
 def test_second_time_unit() -> None:
     s = pd.Series(np.array([np.datetime64("2020-01-01", "s")]))
     result = nw.from_native(s, series_only=True)
-    if parse_version(pd.__version__) < (2,):  # pragma: no cover
+    if PANDAS_VERSION < (2,):  # pragma: no cover
         assert result.dtype == nw.Datetime("ns")
     else:
         assert result.dtype == nw.Datetime("s")
@@ -116,10 +154,25 @@ def test_second_time_unit() -> None:
     assert result.dtype == nw.Datetime("s")
     s = pd.Series(np.array([np.timedelta64(1, "s")]))
     result = nw.from_native(s, series_only=True)
-    if parse_version(pd.__version__) < (2,):  # pragma: no cover
+    if PANDAS_VERSION < (2,):  # pragma: no cover
         assert result.dtype == nw.Duration("ns")
     else:
         assert result.dtype == nw.Duration("s")
     s = pa.chunked_array([pa.array([timedelta(1)], type=pa.duration("s"))])
     result = nw.from_native(s, series_only=True)
     assert result.dtype == nw.Duration("s")
+
+
+@pytest.mark.filterwarnings("ignore:Setting an item of incompatible")
+def test_pandas_inplace_modification_1267(request: pytest.FixtureRequest) -> None:
+    if PANDAS_VERSION >= (3,):
+        # pandas 3.0+ won't allow this kind of inplace modification
+        request.applymarker(pytest.mark.xfail)
+    if PANDAS_VERSION < (1, 4):
+        # pandas pre 1.4 wouldn't change the type?
+        request.applymarker(pytest.mark.xfail)
+    s = pd.Series([1, 2, 3])
+    snw = nw.from_native(s, series_only=True)
+    assert snw.dtype == nw.Int64
+    s[0] = 999.5
+    assert snw.dtype == nw.Float64
diff --git a/tests/expr_and_series/abs_test.py b/tests/expr_and_series/abs_test.py
index 286bcca19..098f0e894 100644
--- a/tests/expr_and_series/abs_test.py
+++ b/tests/expr_and_series/abs_test.py
@@ -1,19 +1,20 @@
-from typing import Any
+from __future__ import annotations
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
 def test_abs(constructor: Constructor) -> None:
     df = nw.from_native(constructor({"a": [1, 2, 3, -4, 5]}))
     result = df.select(b=nw.col("a").abs())
     expected = {"b": [1, 2, 3, 4, 5]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_abs_series(constructor_eager: Any) -> None:
+def test_abs_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager({"a": [1, 2, 3, -4, 5]}), eager_only=True)
     result = {"b": df["a"].abs()}
     expected = {"b": [1, 2, 3, 4, 5]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/all_horizontal_test.py b/tests/expr_and_series/all_horizontal_test.py
index 01d53fe63..706c42baf 100644
--- a/tests/expr_and_series/all_horizontal_test.py
+++ b/tests/expr_and_series/all_horizontal_test.py
@@ -1,12 +1,14 @@
+from __future__ import annotations
+
 from typing import Any
 
-import polars as pl
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import POLARS_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize("expr1", ["a", nw.col("a")])
@@ -20,10 +22,10 @@ def test_allh(constructor: Constructor, expr1: Any, expr2: Any) -> None:
     result = df.select(all=nw.all_horizontal(expr1, expr2))
 
     expected = {"all": [False, False, True]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_allh_series(constructor_eager: Any) -> None:
+def test_allh_series(constructor_eager: ConstructorEager) -> None:
     data = {
         "a": [False, False, True],
         "b": [False, True, True],
@@ -32,7 +34,7 @@ def test_allh_series(constructor_eager: Any) -> None:
     result = df.select(all=nw.all_horizontal(df["a"], df["b"]))
 
     expected = {"all": [False, False, True]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_allh_all(constructor: Constructor) -> None:
@@ -43,14 +45,17 @@ def test_allh_all(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(all=nw.all_horizontal(nw.all()))
     expected = {"all": [False, False, True]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df.select(nw.all_horizontal(nw.all()))
     expected = {"a": [False, False, True]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_allh_nth(constructor: Constructor, request: pytest.FixtureRequest) -> None:
-    if "polars" in str(constructor) and parse_version(pl.__version__) < (1, 0):
+def test_allh_nth(
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
+) -> None:
+    if "polars" in str(constructor) and POLARS_VERSION < (1, 0):
         request.applymarker(pytest.mark.xfail)
     data = {
         "a": [False, False, True],
@@ -59,13 +64,13 @@ def test_allh_nth(constructor: Constructor, request: pytest.FixtureRequest) -> N
     df = nw.from_native(constructor(data))
     result = df.select(nw.all_horizontal(nw.nth(0, 1)))
     expected = {"a": [False, False, True]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df.select(nw.all_horizontal(nw.col("a"), nw.nth(0)))
     expected = {"a": [False, False, True]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_horizontal_expressions_emtpy(constructor: Constructor) -> None:
+def test_horizontal_expressions_empty(constructor: Constructor) -> None:
     data = {
         "a": [False, False, True],
         "b": [False, True, True],
diff --git a/tests/expr_and_series/any_all_test.py b/tests/expr_and_series/any_all_test.py
index 834a91202..c5f22ad9a 100644
--- a/tests/expr_and_series/any_all_test.py
+++ b/tests/expr_and_series/any_all_test.py
@@ -1,8 +1,9 @@
-from typing import Any
+from __future__ import annotations
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
 def test_any_all(constructor: Constructor) -> None:
@@ -17,13 +18,13 @@ def test_any_all(constructor: Constructor) -> None:
     )
     result = df.select(nw.col("a", "b", "c").all())
     expected = {"a": [False], "b": [True], "c": [False]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df.select(nw.all().any())
     expected = {"a": [True], "b": [True], "c": [False]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_any_all_series(constructor_eager: Any) -> None:
+def test_any_all_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(
         constructor_eager(
             {
@@ -36,7 +37,7 @@ def test_any_all_series(constructor_eager: Any) -> None:
     )
     result = {"a": [df["a"].all()], "b": [df["b"].all()], "c": [df["c"].all()]}
     expected = {"a": [False], "b": [True], "c": [False]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = {"a": [df["a"].any()], "b": [df["b"].any()], "c": [df["c"].any()]}
     expected = {"a": [True], "b": [True], "c": [False]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/any_horizontal_test.py b/tests/expr_and_series/any_horizontal_test.py
index cd360bf66..4eb082b51 100644
--- a/tests/expr_and_series/any_horizontal_test.py
+++ b/tests/expr_and_series/any_horizontal_test.py
@@ -1,10 +1,12 @@
+from __future__ import annotations
+
 from typing import Any
 
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize("expr1", ["a", nw.col("a")])
@@ -18,7 +20,7 @@ def test_anyh(constructor: Constructor, expr1: Any, expr2: Any) -> None:
     result = df.select(any=nw.any_horizontal(expr1, expr2))
 
     expected = {"any": [False, True, True]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_anyh_all(constructor: Constructor) -> None:
@@ -29,7 +31,7 @@ def test_anyh_all(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(any=nw.any_horizontal(nw.all()))
     expected = {"any": [False, True, True]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df.select(nw.any_horizontal(nw.all()))
     expected = {"a": [False, True, True]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/arg_true_test.py b/tests/expr_and_series/arg_true_test.py
index 7e1262aa8..7dfeaa46a 100644
--- a/tests/expr_and_series/arg_true_test.py
+++ b/tests/expr_and_series/arg_true_test.py
@@ -1,10 +1,11 @@
-from typing import Any
+from __future__ import annotations
 
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
 def test_arg_true(constructor: Constructor, request: pytest.FixtureRequest) -> None:
@@ -13,12 +14,12 @@ def test_arg_true(constructor: Constructor, request: pytest.FixtureRequest) -> N
     df = nw.from_native(constructor({"a": [1, None, None, 3]}))
     result = df.select(nw.col("a").is_null().arg_true())
     expected = {"a": [1, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_arg_true_series(constructor_eager: Any) -> None:
+def test_arg_true_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager({"a": [1, None, None, 3]}), eager_only=True)
     result = df.select(df["a"].is_null().arg_true())
     expected = {"a": [1, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     assert "a" in df  # cheeky test to hit `__contains__` method
diff --git a/tests/expr_and_series/arithmetic_test.py b/tests/expr_and_series/arithmetic_test.py
index e431aebbe..95172bd2c 100644
--- a/tests/expr_and_series/arithmetic_test.py
+++ b/tests/expr_and_series/arithmetic_test.py
@@ -11,9 +11,10 @@
 from hypothesis import given
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize(
@@ -44,7 +45,7 @@ def test_arithmetic_expr(
     data = {"a": [1.0, 2, 3]}
     df = nw.from_native(constructor(data))
     result = df.select(getattr(nw.col("a"), attr)(rhs))
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
 
 
 @pytest.mark.parametrize(
@@ -74,7 +75,7 @@ def test_right_arithmetic_expr(
     data = {"a": [1, 2, 3]}
     df = nw.from_native(constructor(data))
     result = df.select(a=getattr(nw.col("a"), attr)(rhs))
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
 
 
 @pytest.mark.parametrize(
@@ -94,7 +95,7 @@ def test_arithmetic_series(
     attr: str,
     rhs: Any,
     expected: list[Any],
-    constructor_eager: Any,
+    constructor_eager: ConstructorEager,
     request: pytest.FixtureRequest,
 ) -> None:
     if attr == "__mod__" and any(
@@ -105,7 +106,7 @@ def test_arithmetic_series(
     data = {"a": [1, 2, 3]}
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df.select(getattr(df["a"], attr)(rhs))
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
 
 
 @pytest.mark.parametrize(
@@ -124,7 +125,7 @@ def test_right_arithmetic_series(
     attr: str,
     rhs: Any,
     expected: list[Any],
-    constructor_eager: Any,
+    constructor_eager: ConstructorEager,
     request: pytest.FixtureRequest,
 ) -> None:
     if attr == "__rmod__" and any(
@@ -135,11 +136,11 @@ def test_right_arithmetic_series(
     data = {"a": [1, 2, 3]}
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df.select(a=getattr(df["a"], attr)(rhs))
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
 
 
 def test_truediv_same_dims(
-    constructor_eager: Any, request: pytest.FixtureRequest
+    constructor_eager: ConstructorEager, request: pytest.FixtureRequest
 ) -> None:
     if "polars" in str(constructor_eager):
         # https://github.com/pola-rs/polars/issues/17760
@@ -147,9 +148,9 @@ def test_truediv_same_dims(
     s_left = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)["a"]
     s_right = nw.from_native(constructor_eager({"a": [2, 2, 1]}), eager_only=True)["a"]
     result = s_left / s_right
-    compare_dicts({"a": result}, {"a": [0.5, 1.0, 3.0]})
+    assert_equal_data({"a": result}, {"a": [0.5, 1.0, 3.0]})
     result = s_left.__rtruediv__(s_right)
-    compare_dicts({"a": result}, {"a": [2, 1, 1 / 3]})
+    assert_equal_data({"a": result}, {"a": [2, 1, 1 / 3]})
 
 
 @pytest.mark.slow
@@ -157,9 +158,7 @@ def test_truediv_same_dims(
     left=st.integers(-100, 100),
     right=st.integers(-100, 100),
 )
-@pytest.mark.skipif(
-    parse_version(pd.__version__) < (2, 0), reason="convert_dtypes not available"
-)
+@pytest.mark.skipif(PANDAS_VERSION < (2, 0), reason="convert_dtypes not available")
 def test_floordiv(left: int, right: int) -> None:
     # hypothesis complains if we add `constructor` as an argument, so this
     # test is a bit manual unfortunately
@@ -168,8 +167,8 @@ def test_floordiv(left: int, right: int) -> None:
     result = nw.from_native(pd.DataFrame({"a": [left]}), eager_only=True).select(
         nw.col("a") // right
     )
-    compare_dicts(result, expected)
-    if parse_version(pd.__version__) < (2, 2):  # pragma: no cover
+    assert_equal_data(result, expected)
+    if PANDAS_VERSION < (2, 2):  # pragma: no cover
         # Bug in old version of pandas
         pass
     else:
@@ -177,19 +176,19 @@ def test_floordiv(left: int, right: int) -> None:
             pd.DataFrame({"a": [left]}).convert_dtypes(dtype_backend="pyarrow"),
             eager_only=True,
         ).select(nw.col("a") // right)
-        compare_dicts(result, expected)
+        assert_equal_data(result, expected)
     result = nw.from_native(
         pd.DataFrame({"a": [left]}).convert_dtypes(), eager_only=True
     ).select(nw.col("a") // right)
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = nw.from_native(pl.DataFrame({"a": [left]}), eager_only=True).select(
         nw.col("a") // right
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = nw.from_native(pa.table({"a": [left]}), eager_only=True).select(
         nw.col("a") // right
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.slow
@@ -197,9 +196,7 @@ def test_floordiv(left: int, right: int) -> None:
     left=st.integers(-100, 100),
     right=st.integers(-100, 100),
 )
-@pytest.mark.skipif(
-    parse_version(pd.__version__) < (2, 0), reason="convert_dtypes not available"
-)
+@pytest.mark.skipif(PANDAS_VERSION < (2, 0), reason="convert_dtypes not available")
 def test_mod(left: int, right: int) -> None:
     # hypothesis complains if we add `constructor` as an argument, so this
     # test is a bit manual unfortunately
@@ -208,16 +205,16 @@ def test_mod(left: int, right: int) -> None:
     result = nw.from_native(pd.DataFrame({"a": [left]}), eager_only=True).select(
         nw.col("a") % right
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = nw.from_native(
         pd.DataFrame({"a": [left]}).convert_dtypes(), eager_only=True
     ).select(nw.col("a") % right)
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = nw.from_native(pl.DataFrame({"a": [left]}), eager_only=True).select(
         nw.col("a") % right
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = nw.from_native(pa.table({"a": [left]}), eager_only=True).select(
         nw.col("a") % right
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/binary_test.py b/tests/expr_and_series/binary_test.py
index 1ce76d9d2..3693ccebd 100644
--- a/tests/expr_and_series/binary_test.py
+++ b/tests/expr_and_series/binary_test.py
@@ -1,6 +1,8 @@
+from __future__ import annotations
+
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_expr_binary(constructor: Constructor) -> None:
@@ -41,4 +43,4 @@ def test_expr_binary(constructor: Constructor) -> None:
         "l": [0, 1, 1],
         "m": [1, 9, 4],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/cast_test.py b/tests/expr_and_series/cast_test.py
index 2229c8abb..11c20d0a7 100644
--- a/tests/expr_and_series/cast_test.py
+++ b/tests/expr_and_series/cast_test.py
@@ -5,13 +5,13 @@
 from datetime import timezone
 
 import pandas as pd
-import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
+from tests.utils import PYARROW_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 from tests.utils import is_windows
 
 data = {
@@ -53,10 +53,13 @@
 
 
 @pytest.mark.filterwarnings("ignore:casting period[M] values to int64:FutureWarning")
-def test_cast(constructor: Constructor, request: pytest.FixtureRequest) -> None:
-    if "pyarrow_table_constructor" in str(constructor) and parse_version(
-        pa.__version__
-    ) <= (15,):  # pragma: no cover
+def test_cast(
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
+) -> None:
+    if "pyarrow_table_constructor" in str(constructor) and PYARROW_VERSION <= (
+        15,
+    ):  # pragma: no cover
         request.applymarker(pytest.mark.xfail)
     if "modin" in str(constructor):
         # TODO(unassigned): in modin, we end up with `'<U0'` dtype
@@ -103,10 +106,13 @@ def test_cast(constructor: Constructor, request: pytest.FixtureRequest) -> None:
     assert dict(result.collect_schema()) == expected
 
 
-def test_cast_series(constructor: Constructor, request: pytest.FixtureRequest) -> None:
-    if "pyarrow_table_constructor" in str(constructor) and parse_version(
-        pa.__version__
-    ) <= (15,):  # pragma: no cover
+def test_cast_series(
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
+) -> None:
+    if "pyarrow_table_constructor" in str(constructor) and PYARROW_VERSION <= (
+        15,
+    ):  # pragma: no cover
         request.applymarker(pytest.mark.xfail)
     if "modin" in str(constructor):
         # TODO(unassigned): in modin, we end up with `'<U0'` dtype
@@ -157,10 +163,7 @@ def test_cast_series(constructor: Constructor, request: pytest.FixtureRequest) -
     assert result.schema == expected
 
 
-@pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("1.0.0"),
-    reason="too old for convert_dtypes",
-)
+@pytest.mark.skipif(PANDAS_VERSION < (1, 0, 0), reason="too old for convert_dtypes")
 def test_cast_string() -> None:
     s_pd = pd.Series([1, 2]).convert_dtypes()
     s = nw.from_native(s_pd, series_only=True)
@@ -170,11 +173,12 @@ def test_cast_string() -> None:
 
 
 def test_cast_raises_for_unknown_dtype(
-    constructor: Constructor, request: pytest.FixtureRequest
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
 ) -> None:
-    if "pyarrow_table_constructor" in str(constructor) and parse_version(
-        pa.__version__
-    ) <= (15,):  # pragma: no cover
+    if "pyarrow_table_constructor" in str(constructor) and PYARROW_VERSION <= (
+        15,
+    ):  # pragma: no cover
         request.applymarker(pytest.mark.xfail)
     if "polars" in str(constructor):
         request.applymarker(pytest.mark.xfail)
@@ -217,4 +221,4 @@ def test_cast_datetime_tz_aware(
         .cast(nw.String())
         .str.slice(offset=0, length=19)
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/cat/get_categories_test.py b/tests/expr_and_series/cat/get_categories_test.py
index 122f3c83e..9be209ab2 100644
--- a/tests/expr_and_series/cat/get_categories_test.py
+++ b/tests/expr_and_series/cat/get_categories_test.py
@@ -1,21 +1,21 @@
 from __future__ import annotations
 
-from typing import Any
-
 import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
-from tests.utils import compare_dicts
+from tests.utils import PYARROW_VERSION
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {"a": ["one", "two", "two"]}
 
 
-def test_get_categories(request: pytest.FixtureRequest, constructor_eager: Any) -> None:
-    if "pyarrow_table" in str(constructor_eager) and parse_version(
-        pa.__version__
-    ) < parse_version("15.0.0"):
+def test_get_categories(
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
+) -> None:
+    if "pyarrow_table" in str(constructor_eager) and PYARROW_VERSION < (15, 0, 0):
         request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(constructor_eager(data), eager_only=True)
@@ -23,10 +23,10 @@ def test_get_categories(request: pytest.FixtureRequest, constructor_eager: Any)
     expected = {"a": ["one", "two"]}
 
     result_expr = df.select(nw.col("a").cat.get_categories())
-    compare_dicts(result_expr, expected)
+    assert_equal_data(result_expr, expected)
 
     result_series = df["a"].cat.get_categories()
-    compare_dicts({"a": result_series}, expected)
+    assert_equal_data({"a": result_series}, expected)
 
 
 def test_get_categories_pyarrow() -> None:
@@ -39,7 +39,7 @@ def test_get_categories_pyarrow() -> None:
     expected = {"a": ["a", "b", "d"]}
 
     result_expr = df.select(nw.col("a").cat.get_categories())
-    compare_dicts(result_expr, expected)
+    assert_equal_data(result_expr, expected)
 
     result_series = df["a"].cat.get_categories()
-    compare_dicts({"a": result_series}, expected)
+    assert_equal_data({"a": result_series}, expected)
diff --git a/tests/expr_and_series/clip_test.py b/tests/expr_and_series/clip_test.py
index d3f90633c..86fe7dadb 100644
--- a/tests/expr_and_series/clip_test.py
+++ b/tests/expr_and_series/clip_test.py
@@ -1,8 +1,9 @@
-from typing import Any
+from __future__ import annotations
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
 def test_clip(constructor: Constructor) -> None:
@@ -17,10 +18,10 @@ def test_clip(constructor: Constructor) -> None:
         "upper_only": [1, 2, 3, -4, 4],
         "both": [3, 3, 3, 3, 4],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_clip_series(constructor_eager: Any) -> None:
+def test_clip_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager({"a": [1, 2, 3, -4, 5]}), eager_only=True)
     result = {
         "lower_only": df["a"].clip(lower_bound=3),
@@ -33,4 +34,4 @@ def test_clip_series(constructor_eager: Any) -> None:
         "upper_only": [1, 2, 3, -4, 4],
         "both": [3, 3, 3, 3, 4],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/concat_str_test.py b/tests/expr_and_series/concat_str_test.py
index 5a28085a8..26366d2f2 100644
--- a/tests/expr_and_series/concat_str_test.py
+++ b/tests/expr_and_series/concat_str_test.py
@@ -4,7 +4,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1, 2, 3],
@@ -40,7 +40,7 @@ def test_concat_str(
         .sort("a")
         .select("full_sentence")
     )
-    compare_dicts(result, {"full_sentence": expected})
+    assert_equal_data(result, {"full_sentence": expected})
     result = (
         df.select(
             "a",
@@ -55,4 +55,4 @@ def test_concat_str(
         .sort("a")
         .select("full_sentence")
     )
-    compare_dicts(result, {"full_sentence": expected})
+    assert_equal_data(result, {"full_sentence": expected})
diff --git a/tests/expr_and_series/convert_time_zone_test.py b/tests/expr_and_series/convert_time_zone_test.py
index ee4ccaec4..fbe33f9a2 100644
--- a/tests/expr_and_series/convert_time_zone_test.py
+++ b/tests/expr_and_series/convert_time_zone_test.py
@@ -1,24 +1,31 @@
+from __future__ import annotations
+
 from datetime import datetime
 from datetime import timezone
-from typing import Any
+from typing import TYPE_CHECKING
 
-import pandas as pd
-import polars as pl
-import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
+from tests.utils import POLARS_VERSION
+from tests.utils import PYARROW_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 from tests.utils import is_windows
 
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
+
 
 def test_convert_time_zone(
-    constructor: Constructor, request: pytest.FixtureRequest
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
 ) -> None:
-    if (any(x in str(constructor) for x in ("pyarrow", "modin")) and is_windows()) or (
-        "pandas_pyarrow" in str(constructor) and parse_version(pd.__version__) < (2, 1)
+    if (
+        (any(x in str(constructor) for x in ("pyarrow", "modin")) and is_windows())
+        or ("pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (2, 1))
+        or ("cudf" in str(constructor))
     ):
         request.applymarker(pytest.mark.xfail)
     data = {
@@ -34,17 +41,17 @@ def test_convert_time_zone(
     assert result_dtype.time_zone == "Asia/Kathmandu"  # type: ignore[attr-defined]
     result_str = result.select(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M%z"))
     expected = {"a": ["2020-01-01T05:45+0545", "2020-01-02T05:45+0545"]}
-    compare_dicts(result_str, expected)
+    assert_equal_data(result_str, expected)
 
 
 def test_convert_time_zone_series(
-    constructor_eager: Any, request: pytest.FixtureRequest
+    constructor_eager: ConstructorEager,
+    request: pytest.FixtureRequest,
 ) -> None:
     if (
-        any(x in str(constructor_eager) for x in ("pyarrow", "modin")) and is_windows()
-    ) or (
-        "pandas_pyarrow" in str(constructor_eager)
-        and parse_version(pd.__version__) < (2, 1)
+        (any(x in str(constructor_eager) for x in ("pyarrow", "modin")) and is_windows())
+        or ("pandas_pyarrow" in str(constructor_eager) and PANDAS_VERSION < (2, 1))
+        or ("cudf" in str(constructor_eager))
     ):
         request.applymarker(pytest.mark.xfail)
     data = {
@@ -60,7 +67,7 @@ def test_convert_time_zone_series(
     assert result_dtype.time_zone == "Asia/Kathmandu"  # type: ignore[attr-defined]
     result_str = result.select(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M%z"))
     expected = {"a": ["2020-01-01T05:45+0545", "2020-01-02T05:45+0545"]}
-    compare_dicts(result_str, expected)
+    assert_equal_data(result_str, expected)
 
 
 def test_convert_time_zone_from_none(
@@ -68,14 +75,12 @@ def test_convert_time_zone_from_none(
 ) -> None:
     if (
         (any(x in str(constructor) for x in ("pyarrow", "modin")) and is_windows())
-        or (
-            "pandas_pyarrow" in str(constructor)
-            and parse_version(pd.__version__) < (2, 1)
-        )
-        or ("pyarrow_table" in str(constructor) and parse_version(pa.__version__) < (12,))
+        or ("pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (2, 1))
+        or ("pyarrow_table" in str(constructor) and PYARROW_VERSION < (12,))
+        or ("cudf" in str(constructor))
     ):
         request.applymarker(pytest.mark.xfail)
-    if "polars" in str(constructor) and parse_version(pl.__version__) < (0, 20, 7):
+    if "polars" in str(constructor) and POLARS_VERSION < (0, 20, 7):
         # polars used to disallow this
         request.applymarker(pytest.mark.xfail)
     data = {
@@ -93,7 +98,7 @@ def test_convert_time_zone_from_none(
     assert result_dtype.time_zone == "Asia/Kathmandu"  # type: ignore[attr-defined]
     result_str = result.select(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M%z"))
     expected = {"a": ["2020-01-01T05:45+0545", "2020-01-02T05:45+0545"]}
-    compare_dicts(result_str, expected)
+    assert_equal_data(result_str, expected)
 
 
 def test_convert_time_zone_to_none(constructor: Constructor) -> None:
@@ -108,7 +113,7 @@ def test_convert_time_zone_to_none(constructor: Constructor) -> None:
         df.select(nw.col("a").dt.convert_time_zone(None))  # type: ignore[arg-type]
 
 
-def test_convert_time_zone_to_none_series(constructor_eager: Any) -> None:
+def test_convert_time_zone_to_none_series(constructor_eager: ConstructorEager) -> None:
     data = {
         "a": [
             datetime(2020, 1, 1, tzinfo=timezone.utc),
diff --git a/tests/expr_and_series/count_test.py b/tests/expr_and_series/count_test.py
index 580bd202b..d2048db33 100644
--- a/tests/expr_and_series/count_test.py
+++ b/tests/expr_and_series/count_test.py
@@ -1,8 +1,9 @@
-from typing import Any
+from __future__ import annotations
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
 def test_count(constructor: Constructor) -> None:
@@ -10,12 +11,12 @@ def test_count(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(nw.col("a", "b", "z").count())
     expected = {"a": [3], "b": [2], "z": [1]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_count_series(constructor_eager: Any) -> None:
+def test_count_series(constructor_eager: ConstructorEager) -> None:
     data = {"a": [1, 3, 2], "b": [4, None, 6], "z": [7.0, None, None]}
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = {"a": [df["a"].count()], "b": [df["b"].count()], "z": [df["z"].count()]}
     expected = {"a": [3], "b": [2], "z": [1]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/cum_sum_test.py b/tests/expr_and_series/cum_sum_test.py
index 94897a850..b60e36065 100644
--- a/tests/expr_and_series/cum_sum_test.py
+++ b/tests/expr_and_series/cum_sum_test.py
@@ -1,8 +1,9 @@
-from typing import Any
+from __future__ import annotations
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {
     "a": [0, 1, 2, 3, 4],
@@ -19,10 +20,10 @@ def test_cum_sum_simple(constructor: Constructor) -> None:
         "b": [1, 3, 6, 11, 14],
         "c": [5, 9, 12, 14, 15],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_cum_sum_simple_series(constructor_eager: Any) -> None:
+def test_cum_sum_simple_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
     expected = {
         "a": [0, 1, 3, 6, 10],
@@ -34,4 +35,4 @@ def test_cum_sum_simple_series(constructor_eager: Any) -> None:
         df["b"].cum_sum(),
         df["c"].cum_sum(),
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/diff_test.py b/tests/expr_and_series/diff_test.py
index 33445f763..da433f7ad 100644
--- a/tests/expr_and_series/diff_test.py
+++ b/tests/expr_and_series/diff_test.py
@@ -1,12 +1,12 @@
-from typing import Any
+from __future__ import annotations
 
-import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PYARROW_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {
     "i": [0, 1, 2, 3, 4],
@@ -15,10 +15,11 @@
 }
 
 
-def test_diff(constructor: Constructor, request: pytest.FixtureRequest) -> None:
-    if "pyarrow_table_constructor" in str(constructor) and parse_version(
-        pa.__version__
-    ) < (13,):
+def test_diff(
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
+) -> None:
+    if "pyarrow_table_constructor" in str(constructor) and PYARROW_VERSION < (13,):
         # pc.pairwisediff is available since pyarrow 13.0.0
         request.applymarker(pytest.mark.xfail)
     df = nw.from_native(constructor(data))
@@ -29,13 +30,14 @@ def test_diff(constructor: Constructor, request: pytest.FixtureRequest) -> None:
         "c": [4, 3, 2, 1],
         "c_diff": [-1, -1, -1, -1],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_diff_series(constructor_eager: Any, request: pytest.FixtureRequest) -> None:
-    if "pyarrow_table_constructor" in str(constructor_eager) and parse_version(
-        pa.__version__
-    ) < (13,):
+def test_diff_series(
+    constructor_eager: ConstructorEager,
+    request: pytest.FixtureRequest,
+) -> None:
+    if "pyarrow_table_constructor" in str(constructor_eager) and PYARROW_VERSION < (13,):
         # pc.pairwisediff is available since pyarrow 13.0.0
         request.applymarker(pytest.mark.xfail)
     df = nw.from_native(constructor_eager(data), eager_only=True)
@@ -46,4 +48,4 @@ def test_diff_series(constructor_eager: Any, request: pytest.FixtureRequest) ->
         "c_diff": [-1, -1, -1, -1],
     }
     result = df.with_columns(c_diff=df["c"].diff())[1:]
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/double_selected_test.py b/tests/expr_and_series/double_selected_test.py
index 88826fb40..9eb918924 100644
--- a/tests/expr_and_series/double_selected_test.py
+++ b/tests/expr_and_series/double_selected_test.py
@@ -1,6 +1,8 @@
+from __future__ import annotations
+
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_double_selected(constructor: Constructor) -> None:
@@ -9,12 +11,12 @@ def test_double_selected(constructor: Constructor) -> None:
 
     result = df.select(nw.col("a", "b") * 2)
     expected = {"a": [2, 6, 4], "b": [8, 8, 12]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     result = df.select("z", nw.col("a", "b") * 2)
     expected = {"z": [7, 8, 9], "a": [2, 6, 4], "b": [8, 8, 12]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     result = df.select("a").select(nw.col("a") + nw.all())
     expected = {"a": [2, 6, 4]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/double_test.py b/tests/expr_and_series/double_test.py
index 8f19e0202..321defad2 100644
--- a/tests/expr_and_series/double_test.py
+++ b/tests/expr_and_series/double_test.py
@@ -1,6 +1,8 @@
+from __future__ import annotations
+
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_double(constructor: Constructor) -> None:
@@ -8,7 +10,7 @@ def test_double(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.with_columns(nw.all() * 2)
     expected = {"a": [2, 6, 4], "b": [8, 8, 12], "z": [14.0, 16.0, 18.0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_double_alias(constructor: Constructor) -> None:
@@ -21,4 +23,4 @@ def test_double_alias(constructor: Constructor) -> None:
         "b": [8, 8, 12],
         "z": [14.0, 16.0, 18.0],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/drop_nulls_test.py b/tests/expr_and_series/drop_nulls_test.py
index bc06eec3a..70baf1f86 100644
--- a/tests/expr_and_series/drop_nulls_test.py
+++ b/tests/expr_and_series/drop_nulls_test.py
@@ -1,12 +1,11 @@
 from __future__ import annotations
 
-from typing import Any
-
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
 def test_drop_nulls(constructor: Constructor, request: pytest.FixtureRequest) -> None:
@@ -30,13 +29,13 @@ def test_drop_nulls(constructor: Constructor, request: pytest.FixtureRequest) ->
     expected_c = {"C": []}  # type: ignore[var-annotated]
     expected_d = {"D": [9, 10, 11, 12]}
 
-    compare_dicts(result_a, expected_a)
-    compare_dicts(result_b, expected_b)
-    compare_dicts(result_c, expected_c)
-    compare_dicts(result_d, expected_d)
+    assert_equal_data(result_a, expected_a)
+    assert_equal_data(result_b, expected_b)
+    assert_equal_data(result_c, expected_c)
+    assert_equal_data(result_d, expected_d)
 
 
-def test_drop_nulls_series(constructor_eager: Any) -> None:
+def test_drop_nulls_series(constructor_eager: ConstructorEager) -> None:
     data = {
         "A": [1, 2, None, 4],
         "B": [5, 6, 7, 8],
@@ -55,7 +54,7 @@ def test_drop_nulls_series(constructor_eager: Any) -> None:
     expected_c = {"C": []}  # type: ignore[var-annotated]
     expected_d = {"D": [9, 10, 11, 12]}
 
-    compare_dicts(result_a, expected_a)
-    compare_dicts(result_b, expected_b)
-    compare_dicts(result_c, expected_c)
-    compare_dicts(result_d, expected_d)
+    assert_equal_data(result_a, expected_a)
+    assert_equal_data(result_b, expected_b)
+    assert_equal_data(result_c, expected_c)
+    assert_equal_data(result_d, expected_d)
diff --git a/tests/expr_and_series/dt/datetime_attributes_test.py b/tests/expr_and_series/dt/datetime_attributes_test.py
index 5b9519f57..0e4c7c992 100644
--- a/tests/expr_and_series/dt/datetime_attributes_test.py
+++ b/tests/expr_and_series/dt/datetime_attributes_test.py
@@ -2,13 +2,13 @@
 
 from datetime import date
 from datetime import datetime
-from typing import Any
 
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {
     "a": [
@@ -51,7 +51,7 @@ def test_datetime_attributes(
 
     df = nw.from_native(constructor(data))
     result = df.select(getattr(nw.col("a").dt, attribute)())
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
 
 
 @pytest.mark.parametrize(
@@ -72,7 +72,7 @@ def test_datetime_attributes(
 )
 def test_datetime_attributes_series(
     request: pytest.FixtureRequest,
-    constructor_eager: Any,
+    constructor_eager: ConstructorEager,
     attribute: str,
     expected: list[int],
 ) -> None:
@@ -87,11 +87,11 @@ def test_datetime_attributes_series(
 
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df.select(getattr(df["a"].dt, attribute)())
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
 
 
 def test_datetime_chained_attributes(
-    request: pytest.FixtureRequest, constructor_eager: Any
+    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
 ) -> None:
     if "pandas" in str(constructor_eager) and "pyarrow" not in str(constructor_eager):
         request.applymarker(pytest.mark.xfail)
@@ -100,7 +100,22 @@ def test_datetime_chained_attributes(
 
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df.select(df["a"].dt.date().dt.year())
-    compare_dicts(result, {"a": [2021, 2020]})
+    assert_equal_data(result, {"a": [2021, 2020]})
 
     result = df.select(nw.col("a").dt.date().dt.year())
-    compare_dicts(result, {"a": [2021, 2020]})
+    assert_equal_data(result, {"a": [2021, 2020]})
+
+
+def test_to_date(request: pytest.FixtureRequest, constructor: Constructor) -> None:
+    if any(
+        x in str(constructor)
+        for x in ("pandas_constructor", "pandas_nullable_constructor", "cudf")
+    ):
+        request.applymarker(pytest.mark.xfail)
+    dates = {"a": [datetime(2001, 1, 1), None, datetime(2001, 1, 3)]}
+    if "dask" in str(constructor):
+        df = nw.from_native(constructor(dates).astype({"a": "timestamp[ns][pyarrow]"}))  # type: ignore[union-attr]
+    else:
+        df = nw.from_native(constructor(dates))
+    result = df.select(nw.col("a").dt.date())
+    assert result.collect_schema() == {"a": nw.Date}
diff --git a/tests/expr_and_series/dt/datetime_duration_test.py b/tests/expr_and_series/dt/datetime_duration_test.py
index da5ff325b..09f227c79 100644
--- a/tests/expr_and_series/dt/datetime_duration_test.py
+++ b/tests/expr_and_series/dt/datetime_duration_test.py
@@ -1,18 +1,17 @@
 from __future__ import annotations
 
 from datetime import timedelta
-from typing import Any
 
 import numpy as np
-import pandas as pd
 import pyarrow as pa
 import pyarrow.compute as pc
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {
     "a": [
@@ -45,21 +44,19 @@ def test_duration_attributes(
     expected_b: list[int],
     expected_c: list[int],
 ) -> None:
-    if parse_version(pd.__version__) < (2, 2) and "pandas_pyarrow" in str(constructor):
-        request.applymarker(pytest.mark.xfail)
-    if "cudf" in str(constructor):
+    if PANDAS_VERSION < (2, 2) and "pandas_pyarrow" in str(constructor):
         request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(constructor(data))
 
     result_a = df.select(getattr(nw.col("a").dt, attribute)().fill_null(0))
-    compare_dicts(result_a, {"a": expected_a})
+    assert_equal_data(result_a, {"a": expected_a})
 
     result_b = df.select(getattr(nw.col("b").dt, attribute)().fill_null(0))
-    compare_dicts(result_b, {"b": expected_b})
+    assert_equal_data(result_b, {"b": expected_b})
 
     result_c = df.select(getattr(nw.col("c").dt, attribute)().fill_null(0))
-    compare_dicts(result_c, {"c": expected_c})
+    assert_equal_data(result_c, {"c": expected_c})
 
 
 @pytest.mark.parametrize(
@@ -74,29 +71,25 @@ def test_duration_attributes(
 )
 def test_duration_attributes_series(
     request: pytest.FixtureRequest,
-    constructor_eager: Any,
+    constructor_eager: ConstructorEager,
     attribute: str,
     expected_a: list[int],
     expected_b: list[int],
     expected_c: list[int],
 ) -> None:
-    if parse_version(pd.__version__) < (2, 2) and "pandas_pyarrow" in str(
-        constructor_eager
-    ):
-        request.applymarker(pytest.mark.xfail)
-    if "cudf" in str(constructor_eager):
+    if PANDAS_VERSION < (2, 2) and "pandas_pyarrow" in str(constructor_eager):
         request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(constructor_eager(data), eager_only=True)
 
     result_a = df.select(getattr(df["a"].dt, attribute)().fill_null(0))
-    compare_dicts(result_a, {"a": expected_a})
+    assert_equal_data(result_a, {"a": expected_a})
 
     result_b = df.select(getattr(df["b"].dt, attribute)().fill_null(0))
-    compare_dicts(result_b, {"b": expected_b})
+    assert_equal_data(result_b, {"b": expected_b})
 
     result_c = df.select(getattr(df["c"].dt, attribute)().fill_null(0))
-    compare_dicts(result_c, {"c": expected_c})
+    assert_equal_data(result_c, {"c": expected_c})
 
 
 @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
@@ -116,7 +109,7 @@ def test_pyarrow_units(unit: str, attribute: str, expected: int) -> None:
     df = nw.from_native(pa.table({"a": arr}), eager_only=True)
 
     result_expr = df.select(getattr(nw.col("a").dt, attribute)().fill_null(0))
-    compare_dicts(result_expr, {"a": [0, expected]})
+    assert_equal_data(result_expr, {"a": [0, expected]})
 
     result_series = df.select(getattr(df["a"].dt, attribute)().fill_null(0))
-    compare_dicts(result_series, {"a": [0, expected]})
+    assert_equal_data(result_series, {"a": [0, expected]})
diff --git a/tests/expr_and_series/dt/ordinal_day_test.py b/tests/expr_and_series/dt/ordinal_day_test.py
index 2681188df..82e30d8a1 100644
--- a/tests/expr_and_series/dt/ordinal_day_test.py
+++ b/tests/expr_and_series/dt/ordinal_day_test.py
@@ -9,12 +9,12 @@
 from hypothesis import given
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 
 
 @given(dates=st.datetimes(min_value=datetime(1960, 1, 1), max_value=datetime(1980, 1, 1)))  # type: ignore[misc]
 @pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("2.0.0"),
+    PANDAS_VERSION < (2, 0, 0),
     reason="pyarrow dtype not available",
 )
 @pytest.mark.slow
diff --git a/tests/expr_and_series/dt/timestamp_test.py b/tests/expr_and_series/dt/timestamp_test.py
new file mode 100644
index 000000000..212926628
--- /dev/null
+++ b/tests/expr_and_series/dt/timestamp_test.py
@@ -0,0 +1,221 @@
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Literal
+
+import hypothesis.strategies as st
+import pandas as pd
+import pyarrow as pa
+import pytest
+from hypothesis import given
+
+import narwhals.stable.v1 as nw
+from tests.utils import PANDAS_VERSION
+from tests.utils import POLARS_VERSION
+from tests.utils import PYARROW_VERSION
+from tests.utils import Constructor
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
+from tests.utils import is_windows
+
+data = {
+    "a": [
+        datetime(2021, 3, 1, 12, 34, 56, 49000),
+        datetime(2020, 1, 2, 2, 4, 14, 715000),
+    ],
+}
+
+
+@pytest.mark.parametrize(
+    ("original_time_unit", "time_unit", "expected"),
+    [
+        ("ns", "ns", [978307200000000000, None, 978480000000000000]),
+        ("ns", "us", [978307200000000, None, 978480000000000]),
+        ("ns", "ms", [978307200000, None, 978480000000]),
+        ("us", "ns", [978307200000000000, None, 978480000000000000]),
+        ("us", "us", [978307200000000, None, 978480000000000]),
+        ("us", "ms", [978307200000, None, 978480000000]),
+        ("ms", "ns", [978307200000000000, None, 978480000000000000]),
+        ("ms", "us", [978307200000000, None, 978480000000000]),
+        ("ms", "ms", [978307200000, None, 978480000000]),
+        ("s", "ns", [978307200000000000, None, 978480000000000000]),
+        ("s", "us", [978307200000000, None, 978480000000000]),
+        ("s", "ms", [978307200000, None, 978480000000]),
+    ],
+)
+def test_timestamp_datetimes(
+    request: pytest.FixtureRequest,
+    constructor: Constructor,
+    original_time_unit: Literal["us", "ns", "ms", "s"],
+    time_unit: Literal["ns", "us", "ms"],
+    expected: list[int | None],
+) -> None:
+    if original_time_unit == "s" and "polars" in str(constructor):
+        request.applymarker(pytest.mark.xfail)
+    if "pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (
+        2,
+        2,
+    ):  # pragma: no cover
+        # pyarrow-backed timestamps were too inconsistent and unreliable before 2.2
+        request.applymarker(pytest.mark.xfail(strict=False))
+    datetimes = {"a": [datetime(2001, 1, 1), None, datetime(2001, 1, 3)]}
+    df = nw.from_native(constructor(datetimes))
+    result = df.select(
+        nw.col("a").cast(nw.Datetime(original_time_unit)).dt.timestamp(time_unit)
+    )
+    assert_equal_data(result, {"a": expected})
+
+
+@pytest.mark.parametrize(
+    ("original_time_unit", "time_unit", "expected"),
+    [
+        ("ns", "ns", [978307200000000000, None, 978480000000000000]),
+        ("ns", "us", [978307200000000, None, 978480000000000]),
+        ("ns", "ms", [978307200000, None, 978480000000]),
+        ("us", "ns", [978307200000000000, None, 978480000000000000]),
+        ("us", "us", [978307200000000, None, 978480000000000]),
+        ("us", "ms", [978307200000, None, 978480000000]),
+        ("ms", "ns", [978307200000000000, None, 978480000000000000]),
+        ("ms", "us", [978307200000000, None, 978480000000000]),
+        ("ms", "ms", [978307200000, None, 978480000000]),
+        ("s", "ns", [978307200000000000, None, 978480000000000000]),
+        ("s", "us", [978307200000000, None, 978480000000000]),
+        ("s", "ms", [978307200000, None, 978480000000]),
+    ],
+)
+def test_timestamp_datetimes_tz_aware(
+    request: pytest.FixtureRequest,
+    constructor: Constructor,
+    original_time_unit: Literal["us", "ns", "ms", "s"],
+    time_unit: Literal["ns", "us", "ms"],
+    expected: list[int | None],
+) -> None:
+    if (
+        (any(x in str(constructor) for x in ("pyarrow",)) and is_windows())
+        or ("pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (2,))
+        or ("pyarrow_table" in str(constructor) and PYARROW_VERSION < (12,))
+    ):
+        request.applymarker(pytest.mark.xfail)
+    if "pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (
+        2,
+        2,
+    ):  # pragma: no cover
+        # pyarrow-backed timestamps were too inconsistent and unreliable before 2.2
+        request.applymarker(pytest.mark.xfail(strict=False))
+    if "dask" in str(constructor) and PANDAS_VERSION < (
+        2,
+        1,
+    ):  # pragma: no cover
+        request.applymarker(pytest.mark.xfail)
+
+    if original_time_unit == "s" and "polars" in str(constructor):
+        request.applymarker(pytest.mark.xfail)
+    datetimes = {"a": [datetime(2001, 1, 1), None, datetime(2001, 1, 3)]}
+    df = nw.from_native(constructor(datetimes))
+    result = df.select(
+        nw.col("a")
+        .cast(nw.Datetime(original_time_unit))
+        .dt.replace_time_zone("UTC")
+        .dt.convert_time_zone("Asia/Kathmandu")
+        .dt.timestamp(time_unit)
+    )
+    assert_equal_data(result, {"a": expected})
+
+
+@pytest.mark.parametrize(
+    ("time_unit", "expected"),
+    [
+        ("ns", [978307200000000000, None, 978480000000000000]),
+        ("us", [978307200000000, None, 978480000000000]),
+        ("ms", [978307200000, None, 978480000000]),
+    ],
+)
+def test_timestamp_dates(
+    request: pytest.FixtureRequest,
+    constructor: Constructor,
+    time_unit: Literal["ns", "us", "ms"],
+    expected: list[int | None],
+) -> None:
+    if any(
+        x in str(constructor)
+        for x in ("pandas_constructor", "pandas_nullable_constructor", "cudf")
+    ):
+        request.applymarker(pytest.mark.xfail)
+
+    dates = {"a": [datetime(2001, 1, 1), None, datetime(2001, 1, 3)]}
+    if "dask" in str(constructor):
+        df = nw.from_native(
+            constructor(dates).astype({"a": "timestamp[ns][pyarrow]"})  # type: ignore[union-attr]
+        )
+    else:
+        df = nw.from_native(constructor(dates))
+    result = df.select(nw.col("a").dt.date().dt.timestamp(time_unit))
+    assert_equal_data(result, {"a": expected})
+
+
+def test_timestamp_invalid_date(
+    request: pytest.FixtureRequest, constructor: Constructor
+) -> None:
+    if "polars" in str(constructor):
+        request.applymarker(pytest.mark.xfail)
+    data_str = {"a": ["x", "y", None]}
+    data_num = {"a": [1, 2, None]}
+    df_str = nw.from_native(constructor(data_str))
+    df_num = nw.from_native(constructor(data_num))
+    msg = "Input should be either of Date or Datetime type"
+    with pytest.raises(TypeError, match=msg):
+        df_str.select(nw.col("a").dt.timestamp())
+    with pytest.raises(TypeError, match=msg):
+        df_num.select(nw.col("a").dt.timestamp())
+
+
+def test_timestamp_invalid_unit_expr(constructor: Constructor) -> None:
+    time_unit_invalid = "i"
+    msg = (
+        "invalid `time_unit`"
+        f"\n\nExpected one of {{'ns', 'us', 'ms'}}, got {time_unit_invalid!r}."
+    )
+    with pytest.raises(ValueError, match=msg):
+        nw.from_native(constructor(data)).select(
+            nw.col("a").dt.timestamp(time_unit_invalid)  # type: ignore[arg-type]
+        )
+
+
+def test_timestamp_invalid_unit_series(constructor_eager: ConstructorEager) -> None:
+    time_unit_invalid = "i"
+    msg = (
+        "invalid `time_unit`"
+        f"\n\nExpected one of {{'ns', 'us', 'ms'}}, got {time_unit_invalid!r}."
+    )
+    with pytest.raises(ValueError, match=msg):
+        nw.from_native(constructor_eager(data))["a"].dt.timestamp(time_unit_invalid)  # type: ignore[arg-type]
+
+
+@given(  # type: ignore[misc]
+    inputs=st.datetimes(min_value=datetime(1960, 1, 1), max_value=datetime(1980, 1, 1)),
+    time_unit=st.sampled_from(["ms", "us", "ns"]),
+    # We keep 'ms' out for now due to an upstream bug: https://github.com/pola-rs/polars/issues/19309
+    starting_time_unit=st.sampled_from(["us", "ns"]),
+)
+@pytest.mark.skipif(PANDAS_VERSION < (2, 2), reason="bug in old pandas")
+@pytest.mark.skipif(POLARS_VERSION < (0, 20, 7), reason="bug in old Polars")
+def test_timestamp_hypothesis(
+    inputs: datetime,
+    time_unit: Literal["ms", "us", "ns"],
+    starting_time_unit: Literal["ms", "us", "ns"],
+) -> None:
+    import polars as pl
+
+    @nw.narwhalify
+    def func(s: nw.Series) -> nw.Series:
+        return s.dt.timestamp(time_unit)
+
+    result_pl = func(pl.Series([inputs], dtype=pl.Datetime(starting_time_unit)))
+    result_pd = func(pd.Series([inputs], dtype=f"datetime64[{starting_time_unit}]"))
+    result_pdpa = func(
+        pd.Series([inputs], dtype=f"timestamp[{starting_time_unit}][pyarrow]")
+    )
+    result_pa = func(pa.chunked_array([[inputs]], type=pa.timestamp(starting_time_unit)))
+    assert result_pl[0] == result_pd[0]
+    assert result_pl[0] == result_pdpa[0]
+    assert result_pl[0] == result_pa[0].as_py()
diff --git a/tests/expr_and_series/dt/to_string_test.py b/tests/expr_and_series/dt/to_string_test.py
index 6017c33d2..629b39806 100644
--- a/tests/expr_and_series/dt/to_string_test.py
+++ b/tests/expr_and_series/dt/to_string_test.py
@@ -7,7 +7,8 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 from tests.utils import is_windows
 
 data = {
@@ -29,7 +30,7 @@
     ],
 )
 @pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows")
-def test_dt_to_string_series(constructor_eager: Any, fmt: str) -> None:
+def test_dt_to_string_series(constructor_eager: ConstructorEager, fmt: str) -> None:
     input_frame = nw.from_native(constructor_eager(data), eager_only=True)
     input_series = input_frame["a"]
 
@@ -44,7 +45,7 @@ def test_dt_to_string_series(constructor_eager: Any, fmt: str) -> None:
         # the fraction of a second.
         result = {"a": input_series.dt.to_string(fmt).str.replace(r"\.\d+$", "")}
 
-    compare_dicts(result, {"a": expected_col})
+    assert_equal_data(result, {"a": expected_col})
 
 
 @pytest.mark.parametrize(
@@ -70,7 +71,7 @@ def test_dt_to_string_expr(constructor: Constructor, fmt: str) -> None:
         result = input_frame.select(
             nw.col("a").dt.to_string(fmt).str.replace(r"\.\d+$", "").alias("b")
         )
-    compare_dicts(result, {"b": expected_col})
+    assert_equal_data(result, {"b": expected_col})
 
 
 def _clean_string(result: str) -> str:
@@ -100,7 +101,7 @@ def _clean_string_expr(e: Any) -> Any:
 )
 @pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows")
 def test_dt_to_string_iso_local_datetime_series(
-    constructor_eager: Any, data: datetime, expected: str
+    constructor_eager: ConstructorEager, data: datetime, expected: str
 ) -> None:
     df = constructor_eager({"a": [data]})
     result = (
@@ -138,12 +139,12 @@ def test_dt_to_string_iso_local_datetime_expr(
     result = nw.from_native(df).with_columns(
         _clean_string_expr(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M:%S.%f")).alias("b")
     )
-    compare_dicts(result, {"a": [data], "b": [_clean_string(expected)]})
+    assert_equal_data(result, {"a": [data], "b": [_clean_string(expected)]})
 
     result = nw.from_native(df).with_columns(
         _clean_string_expr(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M:%S%.f")).alias("b")
     )
-    compare_dicts(result, {"a": [data], "b": [_clean_string(expected)]})
+    assert_equal_data(result, {"a": [data], "b": [_clean_string(expected)]})
 
 
 @pytest.mark.parametrize(
@@ -152,7 +153,7 @@ def test_dt_to_string_iso_local_datetime_expr(
 )
 @pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows")
 def test_dt_to_string_iso_local_date_series(
-    constructor_eager: Any, data: datetime, expected: str
+    constructor_eager: ConstructorEager, data: datetime, expected: str
 ) -> None:
     df = constructor_eager({"a": [data]})
     result = nw.from_native(df, eager_only=True)["a"].dt.to_string("%Y-%m-%d").item(0)
@@ -171,4 +172,4 @@ def test_dt_to_string_iso_local_date_expr(
     result = nw.from_native(df).with_columns(
         nw.col("a").dt.to_string("%Y-%m-%d").alias("b")
     )
-    compare_dicts(result, {"a": [data], "b": [expected]})
+    assert_equal_data(result, {"a": [data], "b": [expected]})
diff --git a/tests/expr_and_series/dt/total_minutes_test.py b/tests/expr_and_series/dt/total_minutes_test.py
index bcd664442..094c51cbf 100644
--- a/tests/expr_and_series/dt/total_minutes_test.py
+++ b/tests/expr_and_series/dt/total_minutes_test.py
@@ -9,7 +9,7 @@
 from hypothesis import given
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 
 
 @given(
@@ -19,7 +19,7 @@
     )
 )  # type: ignore[misc]
 @pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("2.2.0"),
+    PANDAS_VERSION < (2, 2, 0),
     reason="pyarrow dtype not available",
 )
 @pytest.mark.slow
diff --git a/tests/expr_and_series/fill_null_test.py b/tests/expr_and_series/fill_null_test.py
index 6efde5ac0..471172698 100644
--- a/tests/expr_and_series/fill_null_test.py
+++ b/tests/expr_and_series/fill_null_test.py
@@ -1,8 +1,9 @@
-from typing import Any
+from __future__ import annotations
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {
     "a": [0.0, None, 2, 3, 4],
@@ -20,10 +21,10 @@ def test_fill_null(constructor: Constructor) -> None:
         "b": [1.0, 99, 99, 5, 3],
         "c": [5.0, 99, 3, 2, 1],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_fill_null_series(constructor_eager: Any) -> None:
+def test_fill_null_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
 
     expected = {
@@ -36,4 +37,4 @@ def test_fill_null_series(constructor_eager: Any) -> None:
         b=df["b"].fill_null(99),
         c=df["c"].fill_null(99),
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/filter_test.py b/tests/expr_and_series/filter_test.py
index 80267d1d0..b13370c85 100644
--- a/tests/expr_and_series/filter_test.py
+++ b/tests/expr_and_series/filter_test.py
@@ -1,10 +1,11 @@
-from typing import Any
+from __future__ import annotations
 
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {
     "i": [0, 1, 2, 3, 4],
@@ -20,14 +21,14 @@ def test_filter(constructor: Constructor, request: pytest.FixtureRequest) -> Non
     df = nw.from_native(constructor(data))
     result = df.select(nw.col("a").filter(nw.col("i") < 2, nw.col("c") == 5))
     expected = {"a": [0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_filter_series(constructor_eager: Any) -> None:
+def test_filter_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df.select(df["a"].filter((df["i"] < 2) & (df["c"] == 5)))
     expected = {"a": [0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result_s = df["a"].filter([True, False, False, False, False])
     expected = {"a": [0]}
-    compare_dicts({"a": result_s}, expected)
+    assert_equal_data({"a": result_s}, expected)
diff --git a/tests/expr_and_series/gather_every_test.py b/tests/expr_and_series/gather_every_test.py
index e01294ef9..7ec7a62cf 100644
--- a/tests/expr_and_series/gather_every_test.py
+++ b/tests/expr_and_series/gather_every_test.py
@@ -1,10 +1,11 @@
-from typing import Any
+from __future__ import annotations
 
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {"a": list(range(10))}
 
@@ -21,15 +22,17 @@ def test_gather_every_expr(
     result = df.select(nw.col("a").gather_every(n=n, offset=offset))
     expected = {"a": data["a"][offset::n]}
 
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize("n", [1, 2, 3])
 @pytest.mark.parametrize("offset", [1, 2, 3])
-def test_gather_every_series(constructor_eager: Any, n: int, offset: int) -> None:
+def test_gather_every_series(
+    constructor_eager: ConstructorEager, n: int, offset: int
+) -> None:
     series = nw.from_native(constructor_eager(data), eager_only=True)["a"]
 
     result = series.gather_every(n=n, offset=offset)
     expected = data["a"][offset::n]
 
-    compare_dicts({"a": result}, {"a": expected})
+    assert_equal_data({"a": result}, {"a": expected})
diff --git a/tests/expr_and_series/head_test.py b/tests/expr_and_series/head_test.py
index 2a6326921..499114f0e 100644
--- a/tests/expr_and_series/head_test.py
+++ b/tests/expr_and_series/head_test.py
@@ -1,12 +1,11 @@
 from __future__ import annotations
 
-from typing import Any
-
 import pytest
 
 import narwhals as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize("n", [2, -1])
@@ -18,12 +17,12 @@ def test_head(constructor: Constructor, n: int, request: pytest.FixtureRequest)
     df = nw.from_native(constructor({"a": [1, 2, 3]}))
     result = df.select(nw.col("a").head(n))
     expected = {"a": [1, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize("n", [2, -1])
-def test_head_series(constructor_eager: Any, n: int) -> None:
+def test_head_series(constructor_eager: ConstructorEager, n: int) -> None:
     df = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)
     result = df.select(df["a"].head(n))
     expected = {"a": [1, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/is_between_test.py b/tests/expr_and_series/is_between_test.py
index 0a9e578ea..8d08c6fac 100644
--- a/tests/expr_and_series/is_between_test.py
+++ b/tests/expr_and_series/is_between_test.py
@@ -1,12 +1,11 @@
 from __future__ import annotations
 
-from typing import Any
-
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1, 4, 2, 5],
@@ -26,7 +25,7 @@ def test_is_between(constructor: Constructor, closed: str, expected: list[bool])
     df = nw.from_native(constructor(data))
     result = df.select(nw.col("a").is_between(1, 5, closed=closed))
     expected_dict = {"a": expected}
-    compare_dicts(result, expected_dict)
+    assert_equal_data(result, expected_dict)
 
 
 @pytest.mark.parametrize(
@@ -39,9 +38,9 @@ def test_is_between(constructor: Constructor, closed: str, expected: list[bool])
     ],
 )
 def test_is_between_series(
-    constructor_eager: Any, closed: str, expected: list[bool]
+    constructor_eager: ConstructorEager, closed: str, expected: list[bool]
 ) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df.with_columns(a=df["a"].is_between(1, 5, closed=closed))
     expected_dict = {"a": expected}
-    compare_dicts(result, expected_dict)
+    assert_equal_data(result, expected_dict)
diff --git a/tests/expr_and_series/is_duplicated_test.py b/tests/expr_and_series/is_duplicated_test.py
index 7859aed02..2f5a8e32e 100644
--- a/tests/expr_and_series/is_duplicated_test.py
+++ b/tests/expr_and_series/is_duplicated_test.py
@@ -1,8 +1,9 @@
-from typing import Any
+from __future__ import annotations
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 1, 2], "b": [1, 2, 3], "index": [0, 1, 2]}
 
@@ -11,11 +12,11 @@ def test_is_duplicated_expr(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(nw.col("a", "b").is_duplicated(), "index").sort("index")
     expected = {"a": [True, True, False], "b": [False, False, False], "index": [0, 1, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_is_duplicated_series(constructor_eager: Any) -> None:
+def test_is_duplicated_series(constructor_eager: ConstructorEager) -> None:
     series = nw.from_native(constructor_eager(data), eager_only=True)["a"]
     result = series.is_duplicated()
     expected = {"a": [True, True, False]}
-    compare_dicts({"a": result}, expected)
+    assert_equal_data({"a": result}, expected)
diff --git a/tests/expr_and_series/is_first_distinct_test.py b/tests/expr_and_series/is_first_distinct_test.py
index 93ffc5d37..7084fb3fb 100644
--- a/tests/expr_and_series/is_first_distinct_test.py
+++ b/tests/expr_and_series/is_first_distinct_test.py
@@ -1,8 +1,9 @@
-from typing import Any
+from __future__ import annotations
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1, 1, 2, 3, 2],
@@ -17,13 +18,13 @@ def test_is_first_distinct_expr(constructor: Constructor) -> None:
         "a": [True, False, True, True, False],
         "b": [True, True, True, False, False],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_is_first_distinct_series(constructor_eager: Any) -> None:
+def test_is_first_distinct_series(constructor_eager: ConstructorEager) -> None:
     series = nw.from_native(constructor_eager(data), eager_only=True)["a"]
     result = series.is_first_distinct()
     expected = {
         "a": [True, False, True, True, False],
     }
-    compare_dicts({"a": result}, expected)
+    assert_equal_data({"a": result}, expected)
diff --git a/tests/expr_and_series/is_in_test.py b/tests/expr_and_series/is_in_test.py
index 085b1efbe..ee0080af9 100644
--- a/tests/expr_and_series/is_in_test.py
+++ b/tests/expr_and_series/is_in_test.py
@@ -1,10 +1,11 @@
-from typing import Any
+from __future__ import annotations
 
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 4, 2, 5]}
 
@@ -14,15 +15,15 @@ def test_expr_is_in(constructor: Constructor) -> None:
     result = df.select(nw.col("a").is_in([4, 5]))
     expected = {"a": [False, True, False, True]}
 
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_ser_is_in(constructor_eager: Any) -> None:
+def test_ser_is_in(constructor_eager: ConstructorEager) -> None:
     ser = nw.from_native(constructor_eager(data), eager_only=True)["a"]
     result = {"a": ser.is_in([4, 5])}
     expected = {"a": [False, True, False, True]}
 
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_is_in_other(constructor: Constructor) -> None:
diff --git a/tests/expr_and_series/is_last_distinct_test.py b/tests/expr_and_series/is_last_distinct_test.py
index 00db7f735..b91c171d3 100644
--- a/tests/expr_and_series/is_last_distinct_test.py
+++ b/tests/expr_and_series/is_last_distinct_test.py
@@ -1,8 +1,9 @@
-from typing import Any
+from __future__ import annotations
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1, 1, 2, 3, 2],
@@ -17,13 +18,13 @@ def test_is_last_distinct_expr(constructor: Constructor) -> None:
         "a": [False, True, False, True, True],
         "b": [False, False, True, True, True],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_is_last_distinct_series(constructor_eager: Any) -> None:
+def test_is_last_distinct_series(constructor_eager: ConstructorEager) -> None:
     series = nw.from_native(constructor_eager(data), eager_only=True)["a"]
     result = series.is_last_distinct()
     expected = {
         "a": [False, True, False, True, True],
     }
-    compare_dicts({"a": result}, expected)
+    assert_equal_data({"a": result}, expected)
diff --git a/tests/expr_and_series/is_null_test.py b/tests/expr_and_series/is_null_test.py
index 85ba55dc4..5d5250da9 100644
--- a/tests/expr_and_series/is_null_test.py
+++ b/tests/expr_and_series/is_null_test.py
@@ -1,8 +1,9 @@
-from typing import Any
+from __future__ import annotations
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
 def test_null(constructor: Constructor) -> None:
@@ -11,13 +12,13 @@ def test_null(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data_na))
     result = df.select(nw.col("a").is_null(), ~nw.col("z").is_null())
 
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_null_series(constructor_eager: Any) -> None:
+def test_null_series(constructor_eager: ConstructorEager) -> None:
     data_na = {"a": [None, 3, 2], "z": [7.0, None, None]}
     expected = {"a": [True, False, False], "z": [True, False, False]}
     df = nw.from_native(constructor_eager(data_na), eager_only=True)
     result = {"a": df["a"].is_null(), "z": ~df["z"].is_null()}
 
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/is_unique_test.py b/tests/expr_and_series/is_unique_test.py
index b10f7a68f..f5716c3fd 100644
--- a/tests/expr_and_series/is_unique_test.py
+++ b/tests/expr_and_series/is_unique_test.py
@@ -1,8 +1,9 @@
-from typing import Any
+from __future__ import annotations
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1, 1, 2],
@@ -19,13 +20,13 @@ def test_is_unique_expr(constructor: Constructor) -> None:
         "b": [True, True, True],
         "index": [0, 1, 2],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_is_unique_series(constructor_eager: Any) -> None:
+def test_is_unique_series(constructor_eager: ConstructorEager) -> None:
     series = nw.from_native(constructor_eager(data), eager_only=True)["a"]
     result = series.is_unique()
     expected = {
         "a": [False, False, True],
     }
-    compare_dicts({"a": result}, expected)
+    assert_equal_data({"a": result}, expected)
diff --git a/tests/expr_and_series/len_test.py b/tests/expr_and_series/len_test.py
index b1e1674bf..55a46f641 100644
--- a/tests/expr_and_series/len_test.py
+++ b/tests/expr_and_series/len_test.py
@@ -1,10 +1,11 @@
-from typing import Any
+from __future__ import annotations
 
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
 def test_len_no_filter(constructor: Constructor) -> None:
@@ -15,7 +16,7 @@ def test_len_no_filter(constructor: Constructor) -> None:
         (nw.col("a").len() * 2).alias("l2"),
     )
 
-    compare_dicts(df, expected)
+    assert_equal_data(df, expected)
 
 
 def test_len_chaining(constructor: Constructor, request: pytest.FixtureRequest) -> None:
@@ -28,7 +29,7 @@ def test_len_chaining(constructor: Constructor, request: pytest.FixtureRequest)
         nw.col("a").filter(nw.col("b") == 2).len().alias("a2"),
     )
 
-    compare_dicts(df, expected)
+    assert_equal_data(df, expected)
 
 
 def test_namespace_len(constructor: Constructor) -> None:
@@ -36,17 +37,17 @@ def test_namespace_len(constructor: Constructor) -> None:
         nw.len(), a=nw.len()
     )
     expected = {"len": [3], "a": [3]}
-    compare_dicts(df, expected)
+    assert_equal_data(df, expected)
     df = (
         nw.from_native(constructor({"a": [1, 2, 3], "b": [4, 5, 6]}))
         .select()
         .select(nw.len(), a=nw.len())
     )
     expected = {"len": [0], "a": [0]}
-    compare_dicts(df, expected)
+    assert_equal_data(df, expected)
 
 
-def test_len_series(constructor_eager: Any) -> None:
+def test_len_series(constructor_eager: ConstructorEager) -> None:
     data = {"a": [1, 2, 1]}
     s = nw.from_native(constructor_eager(data), eager_only=True)["a"]
 
diff --git a/tests/expr_and_series/max_horizontal_test.py b/tests/expr_and_series/max_horizontal_test.py
index 711ce4e0d..a489f9cb3 100644
--- a/tests/expr_and_series/max_horizontal_test.py
+++ b/tests/expr_and_series/max_horizontal_test.py
@@ -1,10 +1,12 @@
+from __future__ import annotations
+
 from typing import Any
 
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 3, None, None], "b": [4, None, 6, None], "z": [3, 1, None, None]}
 expected_values = [4, 3, 6, float("nan")]
@@ -15,11 +17,11 @@ def test_maxh(constructor: Constructor, col_expr: Any) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(horizontal_max=nw.max_horizontal(col_expr, nw.col("b"), "z"))
     expected = {"horizontal_max": expected_values}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_maxh_all(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(nw.max_horizontal(nw.all()), c=nw.max_horizontal(nw.all()))
     expected = {"a": expected_values, "c": expected_values}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/max_test.py b/tests/expr_and_series/max_test.py
index 1ea32531e..09483cb7d 100644
--- a/tests/expr_and_series/max_test.py
+++ b/tests/expr_and_series/max_test.py
@@ -1,12 +1,11 @@
 from __future__ import annotations
 
-from typing import Any
-
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
 
@@ -16,11 +15,13 @@ def test_expr_max_expr(constructor: Constructor, expr: nw.Expr) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(expr)
     expected = {"a": [3], "b": [6], "z": [9.0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(("col", "expected"), [("a", 3), ("b", 6), ("z", 9.0)])
-def test_expr_max_series(constructor_eager: Any, col: str, expected: float) -> None:
+def test_expr_max_series(
+    constructor_eager: ConstructorEager, col: str, expected: float
+) -> None:
     series = nw.from_native(constructor_eager(data), eager_only=True)[col]
     result = series.max()
-    compare_dicts({col: [result]}, {col: [expected]})
+    assert_equal_data({col: [result]}, {col: [expected]})
diff --git a/tests/expr_and_series/mean_horizontal_test.py b/tests/expr_and_series/mean_horizontal_test.py
index ce9ac8fe0..31b4b2109 100644
--- a/tests/expr_and_series/mean_horizontal_test.py
+++ b/tests/expr_and_series/mean_horizontal_test.py
@@ -1,10 +1,12 @@
+from __future__ import annotations
+
 from typing import Any
 
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize("col_expr", [nw.col("a"), "a"])
@@ -13,7 +15,7 @@ def test_meanh(constructor: Constructor, col_expr: Any) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(horizontal_mean=nw.mean_horizontal(col_expr, nw.col("b")))
     expected = {"horizontal_mean": [2.5, 3.0, 6.0, float("nan")]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_meanh_all(constructor: Constructor) -> None:
@@ -23,9 +25,9 @@ def test_meanh_all(constructor: Constructor) -> None:
     expected = {
         "a": [6, 12, 18],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df.select(c=nw.mean_horizontal(nw.all()))
     expected = {
         "c": [6, 12, 18],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/mean_test.py b/tests/expr_and_series/mean_test.py
index 50e6fd862..bab1fe821 100644
--- a/tests/expr_and_series/mean_test.py
+++ b/tests/expr_and_series/mean_test.py
@@ -1,12 +1,11 @@
 from __future__ import annotations
 
-from typing import Any
-
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 3, 2], "b": [4, 4, 7], "z": [7.0, 8, 9]}
 
@@ -16,11 +15,13 @@ def test_expr_mean_expr(constructor: Constructor, expr: nw.Expr) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(expr)
     expected = {"a": [2.0], "b": [5.0], "z": [8.0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(("col", "expected"), [("a", 2.0), ("b", 5.0), ("z", 8.0)])
-def test_expr_mean_series(constructor_eager: Any, col: str, expected: float) -> None:
+def test_expr_mean_series(
+    constructor_eager: ConstructorEager, col: str, expected: float
+) -> None:
     series = nw.from_native(constructor_eager(data), eager_only=True)[col]
     result = series.mean()
-    compare_dicts({col: [result]}, {col: [expected]})
+    assert_equal_data({col: [result]}, {col: [expected]})
diff --git a/tests/expr_and_series/min_horizontal_test.py b/tests/expr_and_series/min_horizontal_test.py
index ca34d440d..263b76e45 100644
--- a/tests/expr_and_series/min_horizontal_test.py
+++ b/tests/expr_and_series/min_horizontal_test.py
@@ -1,25 +1,27 @@
+from __future__ import annotations
+
 from typing import Any
 
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 3, None, None], "b": [4, None, 6, None], "z": [3, 1, None, None]}
-expcted_values = [1, 1, 6, float("nan")]
+expected_values = [1, 1, 6, float("nan")]
 
 
 @pytest.mark.parametrize("col_expr", [nw.col("a"), "a"])
 def test_minh(constructor: Constructor, col_expr: Any) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(horizontal_min=nw.min_horizontal(col_expr, nw.col("b"), "z"))
-    expected = {"horizontal_min": expcted_values}
-    compare_dicts(result, expected)
+    expected = {"horizontal_min": expected_values}
+    assert_equal_data(result, expected)
 
 
 def test_minh_all(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(nw.min_horizontal(nw.all()), c=nw.min_horizontal(nw.all()))
-    expected = {"a": expcted_values, "c": expcted_values}
-    compare_dicts(result, expected)
+    expected = {"a": expected_values, "c": expected_values}
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/min_test.py b/tests/expr_and_series/min_test.py
index f6e98e416..f50facb3e 100644
--- a/tests/expr_and_series/min_test.py
+++ b/tests/expr_and_series/min_test.py
@@ -1,12 +1,11 @@
 from __future__ import annotations
 
-from typing import Any
-
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
 
@@ -16,11 +15,13 @@ def test_expr_min_expr(constructor: Constructor, expr: nw.Expr) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(expr)
     expected = {"a": [1], "b": [4], "z": [7.0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(("col", "expected"), [("a", 1), ("b", 4), ("z", 7.0)])
-def test_expr_min_series(constructor_eager: Any, col: str, expected: float) -> None:
+def test_expr_min_series(
+    constructor_eager: ConstructorEager, col: str, expected: float
+) -> None:
     series = nw.from_native(constructor_eager(data), eager_only=True)[col]
     result = series.min()
-    compare_dicts({col: [result]}, {col: [expected]})
+    assert_equal_data({col: [result]}, {col: [expected]})
diff --git a/tests/expr_and_series/mode_test.py b/tests/expr_and_series/mode_test.py
index 8e39405af..9b94ecafa 100644
--- a/tests/expr_and_series/mode_test.py
+++ b/tests/expr_and_series/mode_test.py
@@ -1,12 +1,12 @@
-from typing import Any
+from __future__ import annotations
 
-import polars as pl
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import POLARS_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1, 1, 2, 2, 3],
@@ -23,24 +23,25 @@ def test_mode_single_expr(
     df = nw.from_native(constructor(data))
     result = df.select(nw.col("a").mode()).sort("a")
     expected = {"a": [1, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_mode_multi_expr(
-    constructor: Constructor, request: pytest.FixtureRequest
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
 ) -> None:
     if "dask" in str(constructor) or (
-        "polars" in str(constructor) and parse_version(pl.__version__) >= (1, 7, 0)
+        "polars" in str(constructor) and POLARS_VERSION >= (1, 7, 0)
     ):
         request.applymarker(pytest.mark.xfail)
     df = nw.from_native(constructor(data))
     result = df.select(nw.col("a", "b").mode()).sort("a", "b")
     expected = {"a": [1, 2], "b": [3, 3]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_mode_series(constructor_eager: Any) -> None:
+def test_mode_series(constructor_eager: ConstructorEager) -> None:
     series = nw.from_native(constructor_eager(data), eager_only=True)["a"]
     result = series.mode().sort()
     expected = {"a": [1, 2]}
-    compare_dicts({"a": result}, expected)
+    assert_equal_data({"a": result}, expected)
diff --git a/tests/expr_and_series/n_unique_test.py b/tests/expr_and_series/n_unique_test.py
index 3790bb1f3..90bffb04b 100644
--- a/tests/expr_and_series/n_unique_test.py
+++ b/tests/expr_and_series/n_unique_test.py
@@ -1,8 +1,9 @@
-from typing import Any
+from __future__ import annotations
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1.0, None, None, 3.0],
@@ -14,11 +15,11 @@ def test_n_unique(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(nw.all().n_unique())
     expected = {"a": [3], "b": [4]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_n_unique_series(constructor_eager: Any) -> None:
+def test_n_unique_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
     expected = {"a": [3], "b": [4]}
     result_series = {"a": [df["a"].n_unique()], "b": [df["b"].n_unique()]}
-    compare_dicts(result_series, expected)
+    assert_equal_data(result_series, expected)
diff --git a/tests/expr_and_series/name/keep_test.py b/tests/expr_and_series/name/keep_test.py
index be112d716..6c89d09fc 100644
--- a/tests/expr_and_series/name/keep_test.py
+++ b/tests/expr_and_series/name/keep_test.py
@@ -7,7 +7,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"foo": [1, 2, 3], "BAR": [4, 5, 6]}
 
@@ -16,14 +16,14 @@ def test_keep(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("foo", "BAR") * 2).name.keep())
     expected = {k: [e * 2 for e in v] for k, v in data.items()}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_keep_after_alias(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("foo")).alias("alias_for_foo").name.keep())
     expected = {"foo": data["foo"]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_keep_raise_anonymous(constructor: Constructor) -> None:
diff --git a/tests/expr_and_series/name/map_test.py b/tests/expr_and_series/name/map_test.py
index 5fad9f930..5afda2ee8 100644
--- a/tests/expr_and_series/name/map_test.py
+++ b/tests/expr_and_series/name/map_test.py
@@ -7,7 +7,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"foo": [1, 2, 3], "BAR": [4, 5, 6]}
 
@@ -20,14 +20,14 @@ def test_map(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("foo", "BAR") * 2).name.map(function=map_func))
     expected = {map_func(k): [e * 2 for e in v] for k, v in data.items()}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_map_after_alias(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("foo")).alias("alias_for_foo").name.map(function=map_func))
     expected = {map_func("foo"): data["foo"]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_map_raise_anonymous(constructor: Constructor) -> None:
diff --git a/tests/expr_and_series/name/prefix_test.py b/tests/expr_and_series/name/prefix_test.py
index 95d72914f..6f3fb3c9b 100644
--- a/tests/expr_and_series/name/prefix_test.py
+++ b/tests/expr_and_series/name/prefix_test.py
@@ -7,7 +7,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"foo": [1, 2, 3], "BAR": [4, 5, 6]}
 prefix = "with_prefix_"
@@ -17,14 +17,14 @@ def test_prefix(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("foo", "BAR") * 2).name.prefix(prefix))
     expected = {prefix + str(k): [e * 2 for e in v] for k, v in data.items()}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_suffix_after_alias(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("foo")).alias("alias_for_foo").name.prefix(prefix))
     expected = {prefix + "foo": data["foo"]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_prefix_raise_anonymous(constructor: Constructor) -> None:
diff --git a/tests/expr_and_series/name/suffix_test.py b/tests/expr_and_series/name/suffix_test.py
index 1802f26f6..1c5816154 100644
--- a/tests/expr_and_series/name/suffix_test.py
+++ b/tests/expr_and_series/name/suffix_test.py
@@ -7,7 +7,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"foo": [1, 2, 3], "BAR": [4, 5, 6]}
 suffix = "_with_suffix"
@@ -17,14 +17,14 @@ def test_suffix(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("foo", "BAR") * 2).name.suffix(suffix))
     expected = {str(k) + suffix: [e * 2 for e in v] for k, v in data.items()}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_suffix_after_alias(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("foo")).alias("alias_for_foo").name.suffix(suffix))
     expected = {"foo" + suffix: data["foo"]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_suffix_raise_anonymous(constructor: Constructor) -> None:
diff --git a/tests/expr_and_series/name/to_lowercase_test.py b/tests/expr_and_series/name/to_lowercase_test.py
index fedac9cd3..882663f60 100644
--- a/tests/expr_and_series/name/to_lowercase_test.py
+++ b/tests/expr_and_series/name/to_lowercase_test.py
@@ -7,7 +7,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"foo": [1, 2, 3], "BAR": [4, 5, 6]}
 
@@ -16,14 +16,14 @@ def test_to_lowercase(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("foo", "BAR") * 2).name.to_lowercase())
     expected = {k.lower(): [e * 2 for e in v] for k, v in data.items()}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_to_lowercase_after_alias(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("BAR")).alias("ALIAS_FOR_BAR").name.to_lowercase())
     expected = {"bar": data["BAR"]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_to_lowercase_raise_anonymous(constructor: Constructor) -> None:
diff --git a/tests/expr_and_series/name/to_uppercase_test.py b/tests/expr_and_series/name/to_uppercase_test.py
index 29b70bd99..785da4957 100644
--- a/tests/expr_and_series/name/to_uppercase_test.py
+++ b/tests/expr_and_series/name/to_uppercase_test.py
@@ -7,7 +7,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"foo": [1, 2, 3], "BAR": [4, 5, 6]}
 
@@ -16,14 +16,14 @@ def test_to_uppercase(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("foo", "BAR") * 2).name.to_uppercase())
     expected = {k.upper(): [e * 2 for e in v] for k, v in data.items()}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_to_uppercase_after_alias(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("foo")).alias("alias_for_foo").name.to_uppercase())
     expected = {"FOO": data["foo"]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_to_uppercase_raise_anonymous(constructor: Constructor) -> None:
diff --git a/tests/expr_and_series/nth_test.py b/tests/expr_and_series/nth_test.py
index 00a8b5c9d..8179fb261 100644
--- a/tests/expr_and_series/nth_test.py
+++ b/tests/expr_and_series/nth_test.py
@@ -1,14 +1,12 @@
 from __future__ import annotations
 
-from typing import Any
-
 import polars as pl
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import POLARS_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8, 9]}
 
@@ -25,19 +23,17 @@ def test_nth(
     constructor: Constructor,
     idx: int | list[int],
     expected: dict[str, list[int]],
-    request: Any,
+    request: pytest.FixtureRequest,
 ) -> None:
-    if "polars" in str(constructor) and parse_version(pl.__version__) < parse_version(
-        "1.0.0"
-    ):
+    if "polars" in str(constructor) and POLARS_VERSION < (1, 0, 0):
         request.applymarker(pytest.mark.xfail)
     df = nw.from_native(constructor(data))
     result = df.select(nw.nth(idx))
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.skipif(
-    parse_version(pl.__version__) >= parse_version("1.0.0"),
+    POLARS_VERSION >= (1, 0, 0),
     reason="1.0.0",
 )
 def test_nth_not_supported() -> None:  # pragma: no cover
diff --git a/tests/expr_and_series/null_count_test.py b/tests/expr_and_series/null_count_test.py
index 6be15ab32..0f2250713 100644
--- a/tests/expr_and_series/null_count_test.py
+++ b/tests/expr_and_series/null_count_test.py
@@ -1,8 +1,9 @@
-from typing import Any
+from __future__ import annotations
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1.0, None, None, 3.0],
@@ -17,10 +18,10 @@ def test_null_count_expr(constructor: Constructor) -> None:
         "a": [2],
         "b": [1],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_null_count_series(constructor_eager: Any) -> None:
+def test_null_count_series(constructor_eager: ConstructorEager) -> None:
     data = [1, 2, None]
     series = nw.from_native(constructor_eager({"a": data}), eager_only=True)["a"]
     result = series.null_count()
diff --git a/tests/expr_and_series/operators_test.py b/tests/expr_and_series/operators_test.py
index e3f39465c..5506e6a8d 100644
--- a/tests/expr_and_series/operators_test.py
+++ b/tests/expr_and_series/operators_test.py
@@ -1,12 +1,11 @@
 from __future__ import annotations
 
-from typing import Any
-
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize(
@@ -26,7 +25,7 @@ def test_comparand_operators_scalar_expr(
     data = {"a": [0, 1, 2]}
     df = nw.from_native(constructor(data))
     result = df.select(getattr(nw.col("a"), operator)(1))
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
 
 
 @pytest.mark.parametrize(
@@ -46,7 +45,7 @@ def test_comparand_operators_expr(
     data = {"a": [0, 1, 1], "b": [0, 0, 2]}
     df = nw.from_native(constructor(data))
     result = df.select(getattr(nw.col("a"), operator)(nw.col("b")))
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
 
 
 @pytest.mark.parametrize(
@@ -63,7 +62,7 @@ def test_logic_operators_expr(
     df = nw.from_native(constructor(data))
 
     result = df.select(getattr(nw.col("a"), operator)(nw.col("b")))
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
 
 
 @pytest.mark.parametrize(
@@ -78,12 +77,12 @@ def test_logic_operators_expr(
     ],
 )
 def test_comparand_operators_scalar_series(
-    constructor_eager: Any, operator: str, expected: list[bool]
+    constructor_eager: ConstructorEager, operator: str, expected: list[bool]
 ) -> None:
     data = {"a": [0, 1, 2]}
     s = nw.from_native(constructor_eager(data), eager_only=True)["a"]
     result = {"a": (getattr(s, operator)(1))}
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
 
 
 @pytest.mark.parametrize(
@@ -98,13 +97,13 @@ def test_comparand_operators_scalar_series(
     ],
 )
 def test_comparand_operators_series(
-    constructor_eager: Any, operator: str, expected: list[bool]
+    constructor_eager: ConstructorEager, operator: str, expected: list[bool]
 ) -> None:
     data = {"a": [0, 1, 1], "b": [0, 0, 2]}
     df = nw.from_native(constructor_eager(data), eager_only=True)
     series, other = df["a"], df["b"]
     result = {"a": getattr(series, operator)(other)}
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
 
 
 @pytest.mark.parametrize(
@@ -115,10 +114,10 @@ def test_comparand_operators_series(
     ],
 )
 def test_logic_operators_series(
-    constructor_eager: Any, operator: str, expected: list[bool]
+    constructor_eager: ConstructorEager, operator: str, expected: list[bool]
 ) -> None:
     data = {"a": [True, True, False, False], "b": [True, False, True, False]}
     df = nw.from_native(constructor_eager(data), eager_only=True)
     series, other = df["a"], df["b"]
     result = {"a": getattr(series, operator)(other)}
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
diff --git a/tests/expr_and_series/over_test.py b/tests/expr_and_series/over_test.py
index 2abc9a699..e4ab273c7 100644
--- a/tests/expr_and_series/over_test.py
+++ b/tests/expr_and_series/over_test.py
@@ -1,10 +1,12 @@
+from __future__ import annotations
+
 from contextlib import nullcontext as does_not_raise
 
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": ["a", "a", "b", "b", "b"],
@@ -33,7 +35,7 @@ def test_over_single(constructor: Constructor) -> None:
 
     with context:
         result = df.with_columns(c_max=nw.col("c").max().over("a"))
-        compare_dicts(result, expected)
+        assert_equal_data(result, expected)
 
 
 def test_over_multiple(constructor: Constructor) -> None:
@@ -56,7 +58,7 @@ def test_over_multiple(constructor: Constructor) -> None:
 
     with context:
         result = df.with_columns(c_min=nw.col("c").min().over("a", "b"))
-        compare_dicts(result, expected)
+        assert_equal_data(result, expected)
 
 
 def test_over_invalid(request: pytest.FixtureRequest, constructor: Constructor) -> None:
diff --git a/tests/expr_and_series/pipe_test.py b/tests/expr_and_series/pipe_test.py
index 2134a931b..0eef1cd6c 100644
--- a/tests/expr_and_series/pipe_test.py
+++ b/tests/expr_and_series/pipe_test.py
@@ -1,8 +1,9 @@
-from typing import Any
+from __future__ import annotations
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 input_list = {"a": [2, 4, 6, 8]}
 expected = [4, 16, 36, 64]
@@ -11,12 +12,12 @@
 def test_pipe_expr(constructor: Constructor) -> None:
     df = nw.from_native(constructor(input_list))
     e = df.select(nw.col("a").pipe(lambda x: x**2))
-    compare_dicts(e, {"a": expected})
+    assert_equal_data(e, {"a": expected})
 
 
 def test_pipe_series(
-    constructor_eager: Any,
+    constructor_eager: ConstructorEager,
 ) -> None:
     s = nw.from_native(constructor_eager(input_list), eager_only=True)["a"]
     result = s.pipe(lambda x: x**2)
-    compare_dicts({"a": result}, {"a": expected})
+    assert_equal_data({"a": result}, {"a": expected})
diff --git a/tests/expr_and_series/quantile_test.py b/tests/expr_and_series/quantile_test.py
index aae2b3647..ae707e739 100644
--- a/tests/expr_and_series/quantile_test.py
+++ b/tests/expr_and_series/quantile_test.py
@@ -1,14 +1,14 @@
 from __future__ import annotations
 
 from contextlib import nullcontext as does_not_raise
-from typing import Any
 from typing import Literal
 
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize(
@@ -47,7 +47,7 @@ def test_quantile_expr(
 
     with context:
         result = df.select(nw.all().quantile(quantile=q, interpolation=interpolation))
-        compare_dicts(result, expected)
+        assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -62,7 +62,7 @@ def test_quantile_expr(
 )
 @pytest.mark.filterwarnings("ignore:the `interpolation=` argument to percentile")
 def test_quantile_series(
-    constructor_eager: Any,
+    constructor_eager: ConstructorEager,
     interpolation: Literal["nearest", "higher", "lower", "midpoint", "linear"],
     expected: float,
 ) -> None:
@@ -72,4 +72,4 @@ def test_quantile_series(
         "a"
     ].alias("a")
     result = series.quantile(quantile=q, interpolation=interpolation)
-    compare_dicts({"a": [result]}, {"a": [expected]})
+    assert_equal_data({"a": [result]}, {"a": [expected]})
diff --git a/tests/expr_and_series/reduction_test.py b/tests/expr_and_series/reduction_test.py
index e22080e62..b1dcad232 100644
--- a/tests/expr_and_series/reduction_test.py
+++ b/tests/expr_and_series/reduction_test.py
@@ -6,7 +6,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize(
@@ -32,7 +32,7 @@ def test_scalar_reduction_select(
     data = {"a": [1, 2, 3], "b": [4, 5, 6]}
     df = nw.from_native(constructor(data))
     result = df.select(*expr)
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -58,4 +58,4 @@ def test_scalar_reduction_with_columns(
     data = {"a": [1, 2, 3], "b": [4, 5, 6]}
     df = nw.from_native(constructor(data))
     result = df.with_columns(*expr).select(*expected.keys())
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/replace_time_zone_test.py b/tests/expr_and_series/replace_time_zone_test.py
index 560fcfe84..76e786042 100644
--- a/tests/expr_and_series/replace_time_zone_test.py
+++ b/tests/expr_and_series/replace_time_zone_test.py
@@ -1,25 +1,30 @@
+from __future__ import annotations
+
 from datetime import datetime
 from datetime import timezone
-from typing import Any
+from typing import TYPE_CHECKING
 
-import pandas as pd
-import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
+from tests.utils import PYARROW_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 from tests.utils import is_windows
 
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
+
 
 def test_replace_time_zone(
     constructor: Constructor, request: pytest.FixtureRequest
 ) -> None:
     if (
         (any(x in str(constructor) for x in ("pyarrow", "modin")) and is_windows())
-        or ("pandas_pyarrow" in str(constructor) and parse_version(pd.__version__) < (2,))
-        or ("pyarrow_table" in str(constructor) and parse_version(pa.__version__) < (12,))
+        or ("pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (2,))
+        or ("pyarrow_table" in str(constructor) and PYARROW_VERSION < (12,))
+        or ("cudf" in str(constructor))
     ):
         request.applymarker(pytest.mark.xfail)
     data = {
@@ -35,7 +40,7 @@ def test_replace_time_zone(
     assert result_dtype.time_zone == "Asia/Kathmandu"  # type: ignore[attr-defined]
     result_str = result.select(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M%z"))
     expected = {"a": ["2020-01-01T00:00+0545", "2020-01-02T00:00+0545"]}
-    compare_dicts(result_str, expected)
+    assert_equal_data(result_str, expected)
 
 
 def test_replace_time_zone_none(
@@ -43,8 +48,8 @@ def test_replace_time_zone_none(
 ) -> None:
     if (
         (any(x in str(constructor) for x in ("pyarrow", "modin")) and is_windows())
-        or ("pandas_pyarrow" in str(constructor) and parse_version(pd.__version__) < (2,))
-        or ("pyarrow_table" in str(constructor) and parse_version(pa.__version__) < (12,))
+        or ("pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (2,))
+        or ("pyarrow_table" in str(constructor) and PYARROW_VERSION < (12,))
     ):
         request.applymarker(pytest.mark.xfail)
     data = {
@@ -60,22 +65,17 @@ def test_replace_time_zone_none(
     assert result_dtype.time_zone is None  # type: ignore[attr-defined]
     result_str = result.select(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M"))
     expected = {"a": ["2020-01-01T00:00", "2020-01-02T00:00"]}
-    compare_dicts(result_str, expected)
+    assert_equal_data(result_str, expected)
 
 
 def test_replace_time_zone_series(
-    constructor_eager: Any, request: pytest.FixtureRequest
+    constructor_eager: ConstructorEager, request: pytest.FixtureRequest
 ) -> None:
     if (
         (any(x in str(constructor_eager) for x in ("pyarrow", "modin")) and is_windows())
-        or (
-            "pandas_pyarrow" in str(constructor_eager)
-            and parse_version(pd.__version__) < (2,)
-        )
-        or (
-            "pyarrow_table" in str(constructor_eager)
-            and parse_version(pa.__version__) < (12,)
-        )
+        or ("pandas_pyarrow" in str(constructor_eager) and PANDAS_VERSION < (2,))
+        or ("pyarrow_table" in str(constructor_eager) and PYARROW_VERSION < (12,))
+        or ("cudf" in str(constructor_eager))
     ):
         request.applymarker(pytest.mark.xfail)
     data = {
@@ -91,22 +91,16 @@ def test_replace_time_zone_series(
     assert result_dtype.time_zone == "Asia/Kathmandu"  # type: ignore[attr-defined]
     result_str = result.select(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M%z"))
     expected = {"a": ["2020-01-01T00:00+0545", "2020-01-02T00:00+0545"]}
-    compare_dicts(result_str, expected)
+    assert_equal_data(result_str, expected)
 
 
 def test_replace_time_zone_none_series(
-    constructor_eager: Any, request: pytest.FixtureRequest
+    constructor_eager: ConstructorEager, request: pytest.FixtureRequest
 ) -> None:
     if (
         (any(x in str(constructor_eager) for x in ("pyarrow", "modin")) and is_windows())
-        or (
-            "pandas_pyarrow" in str(constructor_eager)
-            and parse_version(pd.__version__) < (2,)
-        )
-        or (
-            "pyarrow_table" in str(constructor_eager)
-            and parse_version(pa.__version__) < (12,)
-        )
+        or ("pandas_pyarrow" in str(constructor_eager) and PANDAS_VERSION < (2,))
+        or ("pyarrow_table" in str(constructor_eager) and PYARROW_VERSION < (12,))
     ):
         request.applymarker(pytest.mark.xfail)
     data = {
@@ -122,4 +116,4 @@ def test_replace_time_zone_none_series(
     assert result_dtype.time_zone is None  # type: ignore[attr-defined]
     result_str = result.select(df["a"].dt.to_string("%Y-%m-%dT%H:%M"))
     expected = {"a": ["2020-01-01T00:00", "2020-01-02T00:00"]}
-    compare_dicts(result_str, expected)
+    assert_equal_data(result_str, expected)
diff --git a/tests/expr_and_series/round_test.py b/tests/expr_and_series/round_test.py
index 37d6ce131..abae1d0e9 100644
--- a/tests/expr_and_series/round_test.py
+++ b/tests/expr_and_series/round_test.py
@@ -1,12 +1,11 @@
 from __future__ import annotations
 
-from typing import Any
-
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize("decimals", [0, 1, 2])
@@ -17,11 +16,11 @@ def test_round(constructor: Constructor, decimals: int) -> None:
 
     expected_data = {k: [round(e, decimals) for e in v] for k, v in data.items()}
     result_frame = df.select(nw.col("a").round(decimals))
-    compare_dicts(result_frame, expected_data)
+    assert_equal_data(result_frame, expected_data)
 
 
 @pytest.mark.parametrize("decimals", [0, 1, 2])
-def test_round_series(constructor_eager: Any, decimals: int) -> None:
+def test_round_series(constructor_eager: ConstructorEager, decimals: int) -> None:
     data = {"a": [1.12345, 2.56789, 3.901234]}
     df_raw = constructor_eager(data)
     df = nw.from_native(df_raw, eager_only=True)
@@ -29,4 +28,4 @@ def test_round_series(constructor_eager: Any, decimals: int) -> None:
     expected_data = {k: [round(e, decimals) for e in v] for k, v in data.items()}
     result_series = df["a"].round(decimals)
 
-    compare_dicts({"a": result_series}, expected_data)
+    assert_equal_data({"a": result_series}, expected_data)
diff --git a/tests/expr_and_series/sample_test.py b/tests/expr_and_series/sample_test.py
index eb6d853ec..e8985e561 100644
--- a/tests/expr_and_series/sample_test.py
+++ b/tests/expr_and_series/sample_test.py
@@ -1,8 +1,10 @@
+from __future__ import annotations
+
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_expr_sample(constructor: Constructor, request: pytest.FixtureRequest) -> None:
@@ -57,13 +59,13 @@ def test_sample_with_seed(
         .collect()
     )
 
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     series = df.collect()["a"]
     seed1 = series.sample(n=n, seed=123)
     seed2 = series.sample(n=n, seed=123)
     seed3 = series.sample(n=n, seed=42)
 
-    compare_dicts(
+    assert_equal_data(
         {"res1": [(seed1 == seed2).all()], "res2": [(seed1 == seed3).all()]}, expected
     )
diff --git a/tests/expr_and_series/shift_test.py b/tests/expr_and_series/shift_test.py
index b165adf12..379f40986 100644
--- a/tests/expr_and_series/shift_test.py
+++ b/tests/expr_and_series/shift_test.py
@@ -1,10 +1,11 @@
-from typing import Any
+from __future__ import annotations
 
 import pyarrow as pa
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {
     "i": [0, 1, 2, 3, 4],
@@ -23,10 +24,10 @@ def test_shift(constructor: Constructor) -> None:
         "b": [1, 2, 3],
         "c": [5, 4, 3],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_shift_series(constructor_eager: Any) -> None:
+def test_shift_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df.with_columns(
         df["a"].shift(2),
@@ -39,7 +40,7 @@ def test_shift_series(constructor_eager: Any) -> None:
         "b": [1, 2, 3],
         "c": [5, 4, 3],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_shift_multi_chunk_pyarrow() -> None:
@@ -49,12 +50,12 @@ def test_shift_multi_chunk_pyarrow() -> None:
 
     result = df.select(nw.col("a").shift(1))
     expected = {"a": [None, 1, 2, 3, 1, 2, 3, 1, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     result = df.select(nw.col("a").shift(-1))
     expected = {"a": [2, 3, 1, 2, 3, 1, 2, 3, None]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     result = df.select(nw.col("a").shift(0))
     expected = {"a": [1, 2, 3, 1, 2, 3, 1, 2, 3]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/sort_test.py b/tests/expr_and_series/sort_test.py
index f06e21f74..0d95722d8 100644
--- a/tests/expr_and_series/sort_test.py
+++ b/tests/expr_and_series/sort_test.py
@@ -1,8 +1,12 @@
+from __future__ import annotations
+
 from typing import Any
 
 import pytest
 
 import narwhals.stable.v1 as nw
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {"a": [0, 0, 2, -1], "b": [1, 3, 2, None]}
 
@@ -17,16 +21,14 @@
     ],
 )
 def test_sort_expr(
-    constructor_eager: Any, descending: Any, nulls_last: Any, expected: Any
+    constructor_eager: ConstructorEager, descending: Any, nulls_last: Any, expected: Any
 ) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
-    result = nw.to_native(
-        df.select(
-            "a",
-            nw.col("b").sort(descending=descending, nulls_last=nulls_last),
-        )
+    result = df.select(
+        "a",
+        nw.col("b").sort(descending=descending, nulls_last=nulls_last),
     )
-    assert result.equals(constructor_eager(expected))
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -39,7 +41,7 @@ def test_sort_expr(
     ],
 )
 def test_sort_series(
-    constructor_eager: Any, descending: Any, nulls_last: Any, expected: Any
+    constructor_eager: ConstructorEager, descending: Any, nulls_last: Any, expected: Any
 ) -> None:
     series = nw.from_native(constructor_eager(data), eager_only=True)["b"]
     result = series.sort(descending=descending, nulls_last=nulls_last)
diff --git a/tests/expr_and_series/std_test.py b/tests/expr_and_series/std_test.py
index 400a6e0af..db51c6572 100644
--- a/tests/expr_and_series/std_test.py
+++ b/tests/expr_and_series/std_test.py
@@ -1,8 +1,9 @@
-from typing import Any
+from __future__ import annotations
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
 
@@ -23,10 +24,10 @@ def test_std(constructor: Constructor) -> None:
         "b_ddof_2": [1.632993],
         "z_ddof_0": [0.816497],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_std_series(constructor_eager: Any) -> None:
+def test_std_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = {
         "a_ddof_default": [df["a"].std()],
@@ -42,4 +43,4 @@ def test_std_series(constructor_eager: Any) -> None:
         "b_ddof_2": [1.632993],
         "z_ddof_0": [0.816497],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/str/contains_test.py b/tests/expr_and_series/str/contains_test.py
index 6b9e74b69..866f50ce1 100644
--- a/tests/expr_and_series/str/contains_test.py
+++ b/tests/expr_and_series/str/contains_test.py
@@ -1,18 +1,14 @@
-from typing import Any
+from __future__ import annotations
 
-import pandas as pd
-import polars as pl
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {"pets": ["cat", "dog", "rabbit and parrot", "dove"]}
 
-df_pandas = pd.DataFrame(data)
-df_polars = pl.DataFrame(data)
-
 
 def test_contains_case_insensitive(
     constructor: Constructor, request: pytest.FixtureRequest
@@ -28,11 +24,11 @@ def test_contains_case_insensitive(
         "pets": ["cat", "dog", "rabbit and parrot", "dove"],
         "result": [False, False, True, True],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_contains_series_case_insensitive(
-    constructor_eager: Any, request: pytest.FixtureRequest
+    constructor_eager: ConstructorEager, request: pytest.FixtureRequest
 ) -> None:
     if "cudf" in str(constructor_eager):
         request.applymarker(pytest.mark.xfail)
@@ -45,7 +41,7 @@ def test_contains_series_case_insensitive(
         "pets": ["cat", "dog", "rabbit and parrot", "dove"],
         "case_insensitive_match": [False, False, True, True],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_contains_case_sensitive(constructor: Constructor) -> None:
@@ -55,14 +51,14 @@ def test_contains_case_sensitive(constructor: Constructor) -> None:
         "pets": ["cat", "dog", "rabbit and parrot", "dove"],
         "result": [False, False, True, False],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_contains_series_case_sensitive(constructor_eager: Any) -> None:
+def test_contains_series_case_sensitive(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df.with_columns(case_sensitive_match=df["pets"].str.contains("parrot|Dove"))
     expected = {
         "pets": ["cat", "dog", "rabbit and parrot", "dove"],
         "case_sensitive_match": [False, False, True, False],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/str/head_test.py b/tests/expr_and_series/str/head_test.py
index a4b3e7296..cf6cbd758 100644
--- a/tests/expr_and_series/str/head_test.py
+++ b/tests/expr_and_series/str/head_test.py
@@ -1,8 +1,9 @@
-from typing import Any
+from __future__ import annotations
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {"a": ["foo", "bars"]}
 
@@ -13,13 +14,13 @@ def test_str_head(constructor: Constructor) -> None:
     expected = {
         "a": ["foo", "bar"],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_str_head_series(constructor_eager: Any) -> None:
+def test_str_head_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
     expected = {
         "a": ["foo", "bar"],
     }
     result = df.select(df["a"].str.head(3))
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/str/len_chars_test.py b/tests/expr_and_series/str/len_chars_test.py
index ace145552..f9c63e01c 100644
--- a/tests/expr_and_series/str/len_chars_test.py
+++ b/tests/expr_and_series/str/len_chars_test.py
@@ -1,8 +1,9 @@
-from typing import Any
+from __future__ import annotations
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {"a": ["foo", "foobar", "Café", "345", "東京"]}
 
@@ -13,13 +14,13 @@ def test_str_len_chars(constructor: Constructor) -> None:
     expected = {
         "a": [3, 6, 4, 3, 2],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_str_len_chars_series(constructor_eager: Any) -> None:
+def test_str_len_chars_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
     expected = {
         "a": [3, 6, 4, 3, 2],
     }
     result = df.select(df["a"].str.len_chars())
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/str/replace_test.py b/tests/expr_and_series/str/replace_test.py
index b0cffb1b4..ffd8fce2e 100644
--- a/tests/expr_and_series/str/replace_test.py
+++ b/tests/expr_and_series/str/replace_test.py
@@ -1,12 +1,11 @@
 from __future__ import annotations
 
-from typing import Any
-
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 replace_data = [
     (
@@ -54,7 +53,7 @@
     replace_data,
 )
 def test_str_replace_series(
-    constructor_eager: Any,
+    constructor_eager: ConstructorEager,
     data: dict[str, list[str]],
     pattern: str,
     value: str,
@@ -67,7 +66,7 @@ def test_str_replace_series(
     result_series = df["a"].str.replace(
         pattern=pattern, value=value, n=n, literal=literal
     )
-    compare_dicts({"a": result_series}, expected)
+    assert_equal_data({"a": result_series}, expected)
 
 
 @pytest.mark.parametrize(
@@ -75,7 +74,7 @@ def test_str_replace_series(
     replace_all_data,
 )
 def test_str_replace_all_series(
-    constructor_eager: Any,
+    constructor_eager: ConstructorEager,
     data: dict[str, list[str]],
     pattern: str,
     value: str,
@@ -85,7 +84,7 @@ def test_str_replace_all_series(
     df = nw.from_native(constructor_eager(data), eager_only=True)
 
     result_series = df["a"].str.replace_all(pattern=pattern, value=value, literal=literal)
-    compare_dicts({"a": result_series}, expected)
+    assert_equal_data({"a": result_series}, expected)
 
 
 @pytest.mark.parametrize(
@@ -106,7 +105,7 @@ def test_str_replace_expr(
     result_df = df.select(
         nw.col("a").str.replace(pattern=pattern, value=value, n=n, literal=literal)
     )
-    compare_dicts(result_df, expected)
+    assert_equal_data(result_df, expected)
 
 
 @pytest.mark.parametrize(
@@ -126,4 +125,4 @@ def test_str_replace_all_expr(
     result = df.select(
         nw.col("a").str.replace_all(pattern=pattern, value=value, literal=literal)
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/str/slice_test.py b/tests/expr_and_series/str/slice_test.py
index e7fe0efa1..1e7115a8a 100644
--- a/tests/expr_and_series/str/slice_test.py
+++ b/tests/expr_and_series/str/slice_test.py
@@ -6,7 +6,8 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {"a": ["fdas", "edfas"]}
 
@@ -20,7 +21,7 @@ def test_str_slice(
 ) -> None:
     df = nw.from_native(constructor(data))
     result_frame = df.select(nw.col("a").str.slice(offset, length))
-    compare_dicts(result_frame, expected)
+    assert_equal_data(result_frame, expected)
 
 
 @pytest.mark.parametrize(
@@ -28,9 +29,9 @@ def test_str_slice(
     [(1, 2, {"a": ["da", "df"]}), (-2, None, {"a": ["as", "as"]})],
 )
 def test_str_slice_series(
-    constructor_eager: Any, offset: int, length: int | None, expected: Any
+    constructor_eager: ConstructorEager, offset: int, length: int | None, expected: Any
 ) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
 
     result_series = df["a"].str.slice(offset, length)
-    compare_dicts({"a": result_series}, expected)
+    assert_equal_data({"a": result_series}, expected)
diff --git a/tests/expr_and_series/str/starts_with_ends_with_test.py b/tests/expr_and_series/str/starts_with_ends_with_test.py
index e8b0afaa9..0b11a7537 100644
--- a/tests/expr_and_series/str/starts_with_ends_with_test.py
+++ b/tests/expr_and_series/str/starts_with_ends_with_test.py
@@ -1,13 +1,12 @@
 from __future__ import annotations
 
-from typing import Any
-
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
+from tests.utils import ConstructorEager
 
 # Don't move this into typechecking block, for coverage
 # purposes
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": ["fdas", "edfas"]}
 
@@ -18,16 +17,16 @@ def test_ends_with(constructor: Constructor) -> None:
     expected = {
         "a": [True, False],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_ends_with_series(constructor_eager: Any) -> None:
+def test_ends_with_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df.select(df["a"].str.ends_with("das"))
     expected = {
         "a": [True, False],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_starts_with(constructor: Constructor) -> None:
@@ -36,13 +35,13 @@ def test_starts_with(constructor: Constructor) -> None:
     expected = {
         "a": [True, False],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_starts_with_series(constructor_eager: Any) -> None:
+def test_starts_with_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df.select(df["a"].str.starts_with("fda"))
     expected = {
         "a": [True, False],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/str/strip_chars_test.py b/tests/expr_and_series/str/strip_chars_test.py
index 3d5b74456..d765e99e3 100644
--- a/tests/expr_and_series/str/strip_chars_test.py
+++ b/tests/expr_and_series/str/strip_chars_test.py
@@ -6,7 +6,8 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {"a": ["foobar", "bar\n", " baz"]}
 
@@ -23,7 +24,7 @@ def test_str_strip_chars(
 ) -> None:
     df = nw.from_native(constructor(data))
     result_frame = df.select(nw.col("a").str.strip_chars(characters))
-    compare_dicts(result_frame, expected)
+    assert_equal_data(result_frame, expected)
 
 
 @pytest.mark.parametrize(
@@ -34,9 +35,9 @@ def test_str_strip_chars(
     ],
 )
 def test_str_strip_chars_series(
-    constructor_eager: Any, characters: str | None, expected: Any
+    constructor_eager: ConstructorEager, characters: str | None, expected: Any
 ) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
 
     result_series = df["a"].str.strip_chars(characters)
-    compare_dicts({"a": result_series}, expected)
+    assert_equal_data({"a": result_series}, expected)
diff --git a/tests/expr_and_series/str/tail_test.py b/tests/expr_and_series/str/tail_test.py
index 92d474262..e2543de0a 100644
--- a/tests/expr_and_series/str/tail_test.py
+++ b/tests/expr_and_series/str/tail_test.py
@@ -1,8 +1,9 @@
-from typing import Any
+from __future__ import annotations
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {"a": ["foo", "bars"]}
 
@@ -12,12 +13,12 @@ def test_str_tail(constructor: Constructor) -> None:
     expected = {"a": ["foo", "ars"]}
 
     result_frame = df.select(nw.col("a").str.tail(3))
-    compare_dicts(result_frame, expected)
+    assert_equal_data(result_frame, expected)
 
 
-def test_str_tail_series(constructor_eager: Any) -> None:
+def test_str_tail_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
     expected = {"a": ["foo", "ars"]}
 
     result_series = df["a"].str.tail(3)
-    compare_dicts({"a": result_series}, expected)
+    assert_equal_data({"a": result_series}, expected)
diff --git a/tests/expr_and_series/str/to_datetime_test.py b/tests/expr_and_series/str/to_datetime_test.py
index 8474357e0..62afda474 100644
--- a/tests/expr_and_series/str/to_datetime_test.py
+++ b/tests/expr_and_series/str/to_datetime_test.py
@@ -1,10 +1,14 @@
-from typing import Any
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
 
 import pytest
 
 import narwhals.stable.v1 as nw
-from tests.utils import Constructor
 
+if TYPE_CHECKING:
+    from tests.utils import Constructor
+    from tests.utils import ConstructorEager
 data = {"a": ["2020-01-01T12:34:56"]}
 
 
@@ -24,7 +28,7 @@ def test_to_datetime(constructor: Constructor) -> None:
     assert str(result) == expected
 
 
-def test_to_datetime_series(constructor_eager: Any) -> None:
+def test_to_datetime_series(constructor_eager: ConstructorEager) -> None:
     if "cudf" in str(constructor_eager):  # pragma: no cover
         expected = "2020-01-01T12:34:56.000000000"
     else:
@@ -60,7 +64,7 @@ def test_to_datetime_infer_fmt(
 
 
 def test_to_datetime_series_infer_fmt(
-    request: pytest.FixtureRequest, constructor_eager: Any
+    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
 ) -> None:
     if "pyarrow_table" in str(constructor_eager):
         request.applymarker(pytest.mark.xfail)
diff --git a/tests/expr_and_series/str/to_uppercase_to_lowercase_test.py b/tests/expr_and_series/str/to_uppercase_to_lowercase_test.py
index 877409138..6ab26ac41 100644
--- a/tests/expr_and_series/str/to_uppercase_to_lowercase_test.py
+++ b/tests/expr_and_series/str/to_uppercase_to_lowercase_test.py
@@ -1,14 +1,12 @@
 from __future__ import annotations
 
-from typing import Any
-
-import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PYARROW_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize(
@@ -42,14 +40,14 @@ def test_str_to_uppercase(
             "pyarrow_table_constructor",
             "modin_constructor",
         )
-        or ("dask" in str(constructor) and parse_version(pa.__version__) >= (12,))
+        or ("dask" in str(constructor) and PYARROW_VERSION >= (12,))
     ):
         # We are marking it xfail for these conditions above
         # since the pyarrow backend will convert
         # smaller cap 'ß' to upper cap 'ẞ' instead of 'SS'
         request.applymarker(pytest.mark.xfail)
 
-    compare_dicts(result_frame, expected)
+    assert_equal_data(result_frame, expected)
 
 
 @pytest.mark.parametrize(
@@ -68,7 +66,7 @@ def test_str_to_uppercase(
     ],
 )
 def test_str_to_uppercase_series(
-    constructor_eager: Any,
+    constructor_eager: ConstructorEager,
     data: dict[str, list[str]],
     expected: dict[str, list[str]],
     request: pytest.FixtureRequest,
@@ -90,7 +88,7 @@ def test_str_to_uppercase_series(
         request.applymarker(pytest.mark.xfail)
 
     result_series = df["a"].str.to_uppercase()
-    compare_dicts({"a": result_series}, expected)
+    assert_equal_data({"a": result_series}, expected)
 
 
 @pytest.mark.parametrize(
@@ -115,7 +113,7 @@ def test_str_to_lowercase(
 ) -> None:
     df = nw.from_native(constructor(data))
     result_frame = df.select(nw.col("a").str.to_lowercase())
-    compare_dicts(result_frame, expected)
+    assert_equal_data(result_frame, expected)
 
 
 @pytest.mark.parametrize(
@@ -134,11 +132,11 @@ def test_str_to_lowercase(
     ],
 )
 def test_str_to_lowercase_series(
-    constructor_eager: Any,
+    constructor_eager: ConstructorEager,
     data: dict[str, list[str]],
     expected: dict[str, list[str]],
 ) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
 
     result_series = df["a"].str.to_lowercase()
-    compare_dicts({"a": result_series}, expected)
+    assert_equal_data({"a": result_series}, expected)
diff --git a/tests/expr_and_series/sum_horizontal_test.py b/tests/expr_and_series/sum_horizontal_test.py
index e9e1e4a3c..21bd138c2 100644
--- a/tests/expr_and_series/sum_horizontal_test.py
+++ b/tests/expr_and_series/sum_horizontal_test.py
@@ -1,10 +1,12 @@
+from __future__ import annotations
+
 from typing import Any
 
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize("col_expr", [nw.col("a"), "a"])
@@ -18,7 +20,7 @@ def test_sumh(constructor: Constructor, col_expr: Any) -> None:
         "z": [7.0, 8.0, 9.0],
         "horizontal_sum": [5, 7, 8],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_sumh_nullable(constructor: Constructor) -> None:
@@ -27,7 +29,7 @@ def test_sumh_nullable(constructor: Constructor) -> None:
 
     df = nw.from_native(constructor(data))
     result = df.select(hsum=nw.sum_horizontal("a", "b"))
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_sumh_all(constructor: Constructor) -> None:
@@ -37,9 +39,9 @@ def test_sumh_all(constructor: Constructor) -> None:
     expected = {
         "a": [11, 22, 33],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df.select(c=nw.sum_horizontal(nw.all()))
     expected = {
         "c": [11, 22, 33],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/sum_test.py b/tests/expr_and_series/sum_test.py
index 8059a097d..f988e8991 100644
--- a/tests/expr_and_series/sum_test.py
+++ b/tests/expr_and_series/sum_test.py
@@ -1,12 +1,11 @@
 from __future__ import annotations
 
-from typing import Any
-
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
 
@@ -16,11 +15,13 @@ def test_expr_sum_expr(constructor: Constructor, expr: nw.Expr) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(expr)
     expected = {"a": [6], "b": [14], "z": [24.0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(("col", "expected"), [("a", 6), ("b", 14), ("z", 24.0)])
-def test_expr_sum_series(constructor_eager: Any, col: str, expected: float) -> None:
+def test_expr_sum_series(
+    constructor_eager: ConstructorEager, col: str, expected: float
+) -> None:
     series = nw.from_native(constructor_eager(data), eager_only=True)[col]
     result = series.sum()
-    compare_dicts({col: [result]}, {col: [expected]})
+    assert_equal_data({col: [result]}, {col: [expected]})
diff --git a/tests/expr_and_series/tail_test.py b/tests/expr_and_series/tail_test.py
index fc3e6159a..a9ba5b20b 100644
--- a/tests/expr_and_series/tail_test.py
+++ b/tests/expr_and_series/tail_test.py
@@ -1,10 +1,11 @@
-from typing import Any
+from __future__ import annotations
 
 import pytest
 
 import narwhals as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize("n", [2, -1])
@@ -16,12 +17,12 @@ def test_head(constructor: Constructor, n: int, request: pytest.FixtureRequest)
     df = nw.from_native(constructor({"a": [1, 2, 3]}))
     result = df.select(nw.col("a").tail(n))
     expected = {"a": [2, 3]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize("n", [2, -1])
-def test_head_series(constructor_eager: Any, n: int) -> None:
+def test_head_series(constructor_eager: ConstructorEager, n: int) -> None:
     df = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)
     result = df.select(df["a"].tail(n))
     expected = {"a": [2, 3]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/unary_test.py b/tests/expr_and_series/unary_test.py
index 66afd22af..c165be8bd 100644
--- a/tests/expr_and_series/unary_test.py
+++ b/tests/expr_and_series/unary_test.py
@@ -1,23 +1,19 @@
-from typing import Any
+from __future__ import annotations
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
 def test_unary(constructor: Constructor) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
-    result = (
-        nw.from_native(constructor(data))
-        .with_columns(
-            a_mean=nw.col("a").mean(),
-            a_sum=nw.col("a").sum(),
-            b_nunique=nw.col("b").n_unique(),
-            z_min=nw.col("z").min(),
-            z_max=nw.col("z").max(),
-        )
-        .unique(["a_mean", "a_sum", "b_nunique", "z_min", "z_max"])
-        .select(["a_mean", "a_sum", "b_nunique", "z_min", "z_max"])
+    result = nw.from_native(constructor(data)).select(
+        a_mean=nw.col("a").mean(),
+        a_sum=nw.col("a").sum(),
+        b_nunique=nw.col("b").n_unique(),
+        z_min=nw.col("z").min(),
+        z_max=nw.col("z").max(),
     )
     expected = {
         "a_mean": [2],
@@ -26,10 +22,10 @@ def test_unary(constructor: Constructor) -> None:
         "z_min": [7],
         "z_max": [9],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_unary_series(constructor_eager: Any) -> None:
+def test_unary_series(constructor_eager: ConstructorEager) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = {
@@ -46,4 +42,4 @@ def test_unary_series(constructor_eager: Any) -> None:
         "z_min": [7],
         "z_max": [9],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/unique_test.py b/tests/expr_and_series/unique_test.py
index 5639179ba..acef3f60a 100644
--- a/tests/expr_and_series/unique_test.py
+++ b/tests/expr_and_series/unique_test.py
@@ -1,10 +1,11 @@
-from typing import Any
+from __future__ import annotations
 
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 1, 2]}
 
@@ -15,11 +16,11 @@ def test_unique_expr(constructor: Constructor, request: pytest.FixtureRequest) -
     df = nw.from_native(constructor(data))
     result = df.select(nw.col("a").unique())
     expected = {"a": [1, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_unique_series(constructor_eager: Any) -> None:
+def test_unique_series(constructor_eager: ConstructorEager) -> None:
     series = nw.from_native(constructor_eager(data), eager_only=True)["a"]
     result = series.unique()
     expected = {"a": [1, 2]}
-    compare_dicts({"a": result}, expected)
+    assert_equal_data({"a": result}, expected)
diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py
index 6fabaa68b..3cef177fa 100644
--- a/tests/expr_and_series/when_test.py
+++ b/tests/expr_and_series/when_test.py
@@ -1,13 +1,12 @@
 from __future__ import annotations
 
-from typing import Any
-
 import numpy as np
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1, 2, 3],
@@ -24,7 +23,7 @@ def test_when(constructor: Constructor) -> None:
     expected = {
         "a_when": [3, np.nan, np.nan],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_when_otherwise(constructor: Constructor) -> None:
@@ -33,7 +32,7 @@ def test_when_otherwise(constructor: Constructor) -> None:
     expected = {
         "a_when": [3, 6, 6],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_multiple_conditions(constructor: Constructor) -> None:
@@ -44,7 +43,7 @@ def test_multiple_conditions(constructor: Constructor) -> None:
     expected = {
         "a_when": [3, np.nan, np.nan],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_no_arg_when_fail(constructor: Constructor) -> None:
@@ -68,10 +67,10 @@ def test_value_numpy_array(
     expected = {
         "a_when": [3, np.nan, np.nan],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_value_series(constructor_eager: Any) -> None:
+def test_value_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data))
     s_data = {"s": [3, 4, 5]}
     s = nw.from_native(constructor_eager(s_data))["s"]
@@ -80,7 +79,7 @@ def test_value_series(constructor_eager: Any) -> None:
     expected = {
         "a_when": [3, np.nan, np.nan],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_value_expression(constructor: Constructor) -> None:
@@ -89,7 +88,7 @@ def test_value_expression(constructor: Constructor) -> None:
     expected = {
         "a_when": [10, np.nan, np.nan],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_otherwise_numpy_array(
@@ -107,10 +106,10 @@ def test_otherwise_numpy_array(
     expected = {
         "a_when": [-1, 9, 10],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_otherwise_series(constructor_eager: Any) -> None:
+def test_otherwise_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data))
     s_data = {"s": [0, 9, 10]}
     s = nw.from_native(constructor_eager(s_data))["s"]
@@ -119,7 +118,7 @@ def test_otherwise_series(constructor_eager: Any) -> None:
     expected = {
         "a_when": [-1, 9, 10],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_otherwise_expression(constructor: Constructor) -> None:
@@ -130,18 +129,18 @@ def test_otherwise_expression(constructor: Constructor) -> None:
     expected = {
         "a_when": [-1, 9, 10],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_when_then_otherwise_into_expr(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(nw.when(nw.col("a") > 1).then("c").otherwise("e"))
     expected = {"c": [7, 5, 6]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_when_then_otherwise_lit_str(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(nw.when(nw.col("a") > 1).then(nw.col("b")).otherwise(nw.lit("z")))
     expected = {"b": ["z", "b", "c"]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/add_test.py b/tests/frame/add_test.py
index c95fbae97..27a332ed0 100644
--- a/tests/frame/add_test.py
+++ b/tests/frame/add_test.py
@@ -1,6 +1,8 @@
+from __future__ import annotations
+
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_add(constructor: Constructor) -> None:
@@ -19,4 +21,4 @@ def test_add(constructor: Constructor) -> None:
         "d": [-1.0, 1.0, 0.0],
         "e": [0.0, 2.0, 1.0],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/array_dunder_test.py b/tests/frame/array_dunder_test.py
index 8a082bb1f..71446de9c 100644
--- a/tests/frame/array_dunder_test.py
+++ b/tests/frame/array_dunder_test.py
@@ -1,20 +1,25 @@
-from typing import Any
+from __future__ import annotations
 
 import numpy as np
-import pandas as pd
-import polars as pl
-import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
-from tests.utils import compare_dicts
+from tests.utils import PANDAS_VERSION
+from tests.utils import POLARS_VERSION
+from tests.utils import PYARROW_VERSION
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
-def test_array_dunder(request: pytest.FixtureRequest, constructor_eager: Any) -> None:
-    if "pyarrow_table" in str(constructor_eager) and parse_version(
-        pa.__version__
-    ) < parse_version("16.0.0"):  # pragma: no cover
+def test_array_dunder(
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
+) -> None:
+    if "pyarrow_table" in str(constructor_eager) and PYARROW_VERSION < (
+        16,
+        0,
+        0,
+    ):  # pragma: no cover
         request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)
@@ -23,11 +28,14 @@ def test_array_dunder(request: pytest.FixtureRequest, constructor_eager: Any) ->
 
 
 def test_array_dunder_with_dtype(
-    request: pytest.FixtureRequest, constructor_eager: Any
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
 ) -> None:
-    if "pyarrow_table" in str(constructor_eager) and parse_version(
-        pa.__version__
-    ) < parse_version("16.0.0"):  # pragma: no cover
+    if "pyarrow_table" in str(constructor_eager) and PYARROW_VERSION < (
+        16,
+        0,
+        0,
+    ):  # pragma: no cover
         request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)
@@ -36,15 +44,16 @@ def test_array_dunder_with_dtype(
 
 
 def test_array_dunder_with_copy(
-    request: pytest.FixtureRequest, constructor_eager: Any
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
 ) -> None:
-    if "pyarrow_table" in str(constructor_eager) and parse_version(pa.__version__) < (
+    if "pyarrow_table" in str(constructor_eager) and PYARROW_VERSION < (
         16,
         0,
         0,
     ):  # pragma: no cover
         request.applymarker(pytest.mark.xfail)
-    if "polars" in str(constructor_eager) and parse_version(pl.__version__) < (
+    if "polars" in str(constructor_eager) and POLARS_VERSION < (
         0,
         20,
         28,
@@ -54,12 +63,10 @@ def test_array_dunder_with_copy(
     df = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)
     result = df.__array__(copy=True)
     np.testing.assert_array_equal(result, np.array([[1], [2], [3]], dtype="int64"))
-    if "pandas_constructor" in str(constructor_eager) and parse_version(
-        pd.__version__
-    ) < (3,):
+    if "pandas_constructor" in str(constructor_eager) and PANDAS_VERSION < (3,):
         # If it's pandas, we know that `copy=False` definitely took effect.
         # So, let's check it!
         result = df.__array__(copy=False)
         np.testing.assert_array_equal(result, np.array([[1], [2], [3]], dtype="int64"))
         result[0, 0] = 999
-        compare_dicts(df, {"a": [999, 2, 3]})
+        assert_equal_data(df, {"a": [999, 2, 3]})
diff --git a/tests/frame/arrow_c_stream_test.py b/tests/frame/arrow_c_stream_test.py
index cb856adf9..def950d22 100644
--- a/tests/frame/arrow_c_stream_test.py
+++ b/tests/frame/arrow_c_stream_test.py
@@ -1,17 +1,18 @@
+from __future__ import annotations
+
 import polars as pl
 import pyarrow as pa
 import pyarrow.compute as pc
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import POLARS_VERSION
+from tests.utils import PYARROW_VERSION
 
 
+@pytest.mark.skipif(POLARS_VERSION < (1, 3), reason="too old for pycapsule in Polars")
 @pytest.mark.skipif(
-    parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
-)
-@pytest.mark.skipif(
-    parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
+    PYARROW_VERSION < (16, 0, 0), reason="too old for pycapsule in PyArrow"
 )
 def test_arrow_c_stream_test() -> None:
     df = nw.from_native(pl.Series([1, 2, 3]).to_frame("a"), eager_only=True)
@@ -20,11 +21,9 @@ def test_arrow_c_stream_test() -> None:
     assert pc.all(pc.equal(result["a"], expected["a"])).as_py()
 
 
+@pytest.mark.skipif(POLARS_VERSION < (1, 3), reason="too old for pycapsule in Polars")
 @pytest.mark.skipif(
-    parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
-)
-@pytest.mark.skipif(
-    parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
+    PYARROW_VERSION < (16, 0, 0), reason="too old for pycapsule in PyArrow"
 )
 def test_arrow_c_stream_test_invalid(monkeypatch: pytest.MonkeyPatch) -> None:
     # "poison" the dunder method to make sure it actually got called above
@@ -36,11 +35,9 @@ def test_arrow_c_stream_test_invalid(monkeypatch: pytest.MonkeyPatch) -> None:
         pa.table(df)
 
 
+@pytest.mark.skipif(POLARS_VERSION < (1, 3), reason="too old for pycapsule in Polars")
 @pytest.mark.skipif(
-    parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
-)
-@pytest.mark.skipif(
-    parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
+    PYARROW_VERSION < (16, 0, 0), reason="too old for pycapsule in PyArrow"
 )
 def test_arrow_c_stream_test_fallback(monkeypatch: pytest.MonkeyPatch) -> None:
     # Check that fallback to PyArrow works
diff --git a/tests/frame/clone_test.py b/tests/frame/clone_test.py
index e94183e2e..1a02910c8 100644
--- a/tests/frame/clone_test.py
+++ b/tests/frame/clone_test.py
@@ -1,8 +1,10 @@
+from __future__ import annotations
+
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_clone(request: pytest.FixtureRequest, constructor: Constructor) -> None:
@@ -16,4 +18,4 @@ def test_clone(request: pytest.FixtureRequest, constructor: Constructor) -> None
     df_clone = df.clone()
     assert df is not df_clone
     assert df._compliant_frame is not df_clone._compliant_frame
-    compare_dicts(df_clone, expected)
+    assert_equal_data(df_clone, expected)
diff --git a/tests/frame/columns_test.py b/tests/frame/columns_test.py
index 90a9c922d..3a18fb591 100644
--- a/tests/frame/columns_test.py
+++ b/tests/frame/columns_test.py
@@ -1,7 +1,13 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import pytest
 
 import narwhals.stable.v1 as nw
-from tests.utils import Constructor
+
+if TYPE_CHECKING:
+    from tests.utils import Constructor
 
 
 @pytest.mark.filterwarnings("ignore:Determining|Resolving.*")
diff --git a/tests/frame/concat_test.py b/tests/frame/concat_test.py
index 926f3f988..6a18d872b 100644
--- a/tests/frame/concat_test.py
+++ b/tests/frame/concat_test.py
@@ -1,8 +1,10 @@
+from __future__ import annotations
+
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_concat_horizontal(constructor: Constructor) -> None:
@@ -20,7 +22,7 @@ def test_concat_horizontal(constructor: Constructor) -> None:
         "c": [6, 12, -1],
         "d": [0, -4, 2],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     with pytest.raises(ValueError, match="No items"):
         nw.concat([])
@@ -37,7 +39,7 @@ def test_concat_vertical(constructor: Constructor) -> None:
 
     result = nw.concat([df_left, df_right], how="vertical")
     expected = {"c": [1, 3, 2, 6, 12, -1], "d": [4, 4, 6, 0, -4, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     with pytest.raises(ValueError, match="No items"):
         nw.concat([], how="vertical")
diff --git a/tests/frame/double_test.py b/tests/frame/double_test.py
index 6840145ec..87ff66af9 100644
--- a/tests/frame/double_test.py
+++ b/tests/frame/double_test.py
@@ -1,6 +1,8 @@
+from __future__ import annotations
+
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_double(constructor: Constructor) -> None:
@@ -9,7 +11,7 @@ def test_double(constructor: Constructor) -> None:
 
     result = df.with_columns(nw.all() * 2)
     expected = {"a": [2, 6, 4], "b": [8, 8, 12], "z": [14.0, 16.0, 18.0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     result = df.with_columns(nw.col("a").alias("o"), nw.all() * 2)
     expected = {
@@ -18,4 +20,4 @@ def test_double(constructor: Constructor) -> None:
         "b": [8, 8, 12],
         "z": [14.0, 16.0, 18.0],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/drop_nulls_test.py b/tests/frame/drop_nulls_test.py
index 9988aa6b2..680cbd4c4 100644
--- a/tests/frame/drop_nulls_test.py
+++ b/tests/frame/drop_nulls_test.py
@@ -4,7 +4,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1.0, 2.0, None, 4.0],
@@ -18,7 +18,7 @@ def test_drop_nulls(constructor: Constructor) -> None:
         "a": [2.0, 4.0],
         "b": [3.0, 5.0],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -33,4 +33,4 @@ def test_drop_nulls_subset(
     constructor: Constructor, subset: str | list[str], expected: dict[str, float]
 ) -> None:
     result = nw.from_native(constructor(data)).drop_nulls(subset=subset)
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/drop_test.py b/tests/frame/drop_test.py
index f8fc33254..f9da91cbb 100644
--- a/tests/frame/drop_test.py
+++ b/tests/frame/drop_test.py
@@ -4,13 +4,12 @@
 from typing import TYPE_CHECKING
 from typing import Any
 
-import polars as pl
 import pytest
 from polars.exceptions import ColumnNotFoundError as PlColumnNotFoundError
 
 import narwhals.stable.v1 as nw
 from narwhals._exceptions import ColumnNotFoundError
-from narwhals.utils import parse_version
+from tests.utils import POLARS_VERSION
 
 if TYPE_CHECKING:
     from tests.utils import Constructor
@@ -49,11 +48,7 @@ def test_drop_strict(
     *,
     strict: bool,
 ) -> None:
-    if (
-        "polars_lazy" in str(request)
-        and parse_version(pl.__version__) < (1, 0, 0)
-        and strict
-    ):
+    if "polars_lazy" in str(request) and POLARS_VERSION < (1, 0, 0) and strict:
         request.applymarker(pytest.mark.xfail)
 
     data = {"a": [1, 3, 2], "b": [4, 4, 6]}
diff --git a/tests/frame/filter_test.py b/tests/frame/filter_test.py
index 9c9b1b6fd..8721f3bde 100644
--- a/tests/frame/filter_test.py
+++ b/tests/frame/filter_test.py
@@ -1,10 +1,12 @@
+from __future__ import annotations
+
 from contextlib import nullcontext as does_not_raise
 
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_filter(constructor: Constructor) -> None:
@@ -12,7 +14,7 @@ def test_filter(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.filter(nw.col("a") > 1)
     expected = {"a": [3, 2], "b": [4, 6], "z": [8.0, 9.0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_filter_with_boolean_list(constructor: Constructor) -> None:
@@ -31,4 +33,4 @@ def test_filter_with_boolean_list(constructor: Constructor) -> None:
     with context:
         result = df.filter([False, True, True])
         expected = {"a": [3, 2], "b": [4, 6], "z": [8.0, 9.0]}
-        compare_dicts(result, expected)
+        assert_equal_data(result, expected)
diff --git a/tests/frame/gather_every_test.py b/tests/frame/gather_every_test.py
index 40e18a30b..671737ad1 100644
--- a/tests/frame/gather_every_test.py
+++ b/tests/frame/gather_every_test.py
@@ -1,8 +1,10 @@
+from __future__ import annotations
+
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": list(range(10))}
 
@@ -13,4 +15,4 @@ def test_gather_every(constructor: Constructor, n: int, offset: int) -> None:
     df = nw.from_native(constructor(data))
     result = df.gather_every(n=n, offset=offset)
     expected = {"a": data["a"][offset::n]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/get_column_test.py b/tests/frame/get_column_test.py
index 58766ac31..ec5ab24aa 100644
--- a/tests/frame/get_column_test.py
+++ b/tests/frame/get_column_test.py
@@ -1,16 +1,17 @@
-from typing import Any
+from __future__ import annotations
 
 import pandas as pd
 import pytest
 
 import narwhals.stable.v1 as nw
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
-def test_get_column(constructor_eager: Any) -> None:
+def test_get_column(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager({"a": [1, 2], "b": [3, 4]}), eager_only=True)
     result = df.get_column("a")
-    compare_dicts({"a": result}, {"a": [1, 2]})
+    assert_equal_data({"a": result}, {"a": [1, 2]})
     assert result.name == "a"
     with pytest.raises(
         (KeyError, TypeError), match="Expected str|'int' object cannot be converted|0"
@@ -22,11 +23,11 @@ def test_get_column(constructor_eager: Any) -> None:
 def test_non_string_name() -> None:
     df = pd.DataFrame({0: [1, 2]})
     result = nw.from_native(df, eager_only=True).get_column(0)  # type: ignore[arg-type]
-    compare_dicts({"a": result}, {"a": [1, 2]})
+    assert_equal_data({"a": result}, {"a": [1, 2]})
     assert result.name == 0  # type: ignore[comparison-overlap]
 
 
 def test_get_single_row() -> None:
     df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
     result = nw.from_native(df, eager_only=True)[0]  # type: ignore[call-overload]
-    compare_dicts(result, {"a": [1], "b": [3]})
+    assert_equal_data(result, {"a": [1], "b": [3]})
diff --git a/tests/frame/getitem_test.py b/tests/frame/getitem_test.py
index ce96c1b24..9f5a9b52d 100644
--- a/tests/frame/getitem_test.py
+++ b/tests/frame/getitem_test.py
@@ -1,7 +1,5 @@
 from __future__ import annotations
 
-from typing import Any
-
 import numpy as np
 import pandas as pd
 import polars as pl
@@ -9,7 +7,8 @@
 import pytest
 
 import narwhals.stable.v1 as nw
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
@@ -17,27 +16,27 @@
 }
 
 
-def test_slice_column(constructor_eager: Any) -> None:
+def test_slice_column(constructor_eager: ConstructorEager) -> None:
     result = nw.from_native(constructor_eager(data))["a"]
     assert isinstance(result, nw.Series)
-    compare_dicts({"a": result}, {"a": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
+    assert_equal_data({"a": result}, {"a": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
 
 
-def test_slice_rows(constructor_eager: Any) -> None:
+def test_slice_rows(constructor_eager: ConstructorEager) -> None:
     result = nw.from_native(constructor_eager(data))[1:]
-    compare_dicts(result, {"a": [2.0, 3.0, 4.0, 5.0, 6.0], "b": [12, 13, 14, 15, 16]})
+    assert_equal_data(result, {"a": [2.0, 3.0, 4.0, 5.0, 6.0], "b": [12, 13, 14, 15, 16]})
 
     result = nw.from_native(constructor_eager(data))[2:4]
-    compare_dicts(result, {"a": [3.0, 4.0], "b": [13, 14]})
+    assert_equal_data(result, {"a": [3.0, 4.0], "b": [13, 14]})
 
 
 def test_slice_rows_with_step(
-    request: pytest.FixtureRequest, constructor_eager: Any
+    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
 ) -> None:
     if "pyarrow_table" in str(constructor_eager):
         request.applymarker(pytest.mark.xfail)
     result = nw.from_native(constructor_eager(data))[1::2]
-    compare_dicts(result, {"a": [2.0, 4.0, 6.0], "b": [12, 14, 16]})
+    assert_equal_data(result, {"a": [2.0, 4.0, 6.0], "b": [12, 14, 16]})
 
 
 def test_slice_rows_with_step_pyarrow() -> None:
@@ -53,28 +52,28 @@ def test_slice_lazy_fails() -> None:
         _ = nw.from_native(pl.LazyFrame(data))[1:]
 
 
-def test_slice_int(constructor_eager: Any) -> None:
+def test_slice_int(constructor_eager: ConstructorEager) -> None:
     result = nw.from_native(constructor_eager(data), eager_only=True)[1]  # type: ignore[call-overload]
-    compare_dicts(result, {"a": [2], "b": [12]})
+    assert_equal_data(result, {"a": [2], "b": [12]})
 
 
-def test_slice_fails(constructor_eager: Any) -> None:
+def test_slice_fails(constructor_eager: ConstructorEager) -> None:
     class Foo: ...
 
     with pytest.raises(TypeError, match="Expected str or slice, got:"):
         nw.from_native(constructor_eager(data), eager_only=True)[Foo()]  # type: ignore[call-overload]
 
 
-def test_gather(constructor_eager: Any) -> None:
+def test_gather(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df[[0, 3, 1]]
     expected = {
         "a": [1.0, 4.0, 2.0],
         "b": [11, 14, 12],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[np.array([0, 3, 1])]
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_gather_pandas_index() -> None:
@@ -82,120 +81,120 @@ def test_gather_pandas_index() -> None:
     df = pd.DataFrame({"a": [4, 1, 2], "b": [1, 4, 2]}, index=[2, 1, 3])
     result = nw.from_native(df, eager_only=True)[[1, 2]]
     expected = {"a": [1, 2], "b": [4, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     result = nw.from_native(df, eager_only=True)[[1, 2], "a"].to_frame()
     expected = {"a": [1, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_gather_rows_cols(constructor_eager: Any) -> None:
+def test_gather_rows_cols(constructor_eager: ConstructorEager) -> None:
     native_df = constructor_eager(data)
     df = nw.from_native(native_df, eager_only=True)
 
     expected = {"b": [11, 14, 12]}
 
     result = {"b": df[[0, 3, 1], 1]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     result = {"b": df[np.array([0, 3, 1]), "b"]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_slice_both_tuples_of_ints(constructor_eager: Any) -> None:
+def test_slice_both_tuples_of_ints(constructor_eager: ConstructorEager) -> None:
     data = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df[[0, 1], [0, 2]]
     expected = {"a": [1, 2], "c": [7, 8]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_slice_int_rows_str_columns(constructor_eager: Any) -> None:
+def test_slice_int_rows_str_columns(constructor_eager: ConstructorEager) -> None:
     data = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df[[0, 1], ["a", "c"]]
     expected = {"a": [1, 2], "c": [7, 8]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_slice_slice_columns(constructor_eager: Any) -> None:  # noqa: PLR0915
+def test_slice_slice_columns(constructor_eager: ConstructorEager) -> None:  # noqa: PLR0915
     data = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [1, 4, 2]}
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df[[0, 1], "b":"c"]  # type: ignore[misc]
     expected = {"b": [4, 5], "c": [7, 8]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[[0, 1], :"c"]  # type: ignore[misc]
     expected = {"a": [1, 2], "b": [4, 5], "c": [7, 8]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[[0, 1], "a":"d":2]  # type: ignore[misc]
     expected = {"a": [1, 2], "c": [7, 8]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[[0, 1], "b":]  # type: ignore[misc]
     expected = {"b": [4, 5], "c": [7, 8], "d": [1, 4]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[[0, 1], 1:3]
     expected = {"b": [4, 5], "c": [7, 8]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[[0, 1], :3]
     expected = {"a": [1, 2], "b": [4, 5], "c": [7, 8]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[[0, 1], 0:4:2]
     expected = {"a": [1, 2], "c": [7, 8]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[[0, 1], 1:]
     expected = {"b": [4, 5], "c": [7, 8], "d": [1, 4]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[:, ["b", "d"]]
     expected = {"b": [4, 5, 6], "d": [1, 4, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[:, [0, 2]]
     expected = {"a": [1, 2, 3], "c": [7, 8, 9]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[:2, [0, 2]]
     expected = {"a": [1, 2], "c": [7, 8]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[:2, ["a", "c"]]
     expected = {"a": [1, 2], "c": [7, 8]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[1:, [0, 2]]
     expected = {"a": [2, 3], "c": [8, 9]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[1:, ["a", "c"]]
     expected = {"a": [2, 3], "c": [8, 9]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[["b", "c"]]
     expected = {"b": [4, 5, 6], "c": [7, 8, 9]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[:2]
     expected = {"a": [1, 2], "b": [4, 5], "c": [7, 8], "d": [1, 4]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[2:]
     expected = {"a": [3], "b": [6], "c": [9], "d": [2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     # mypy says "Slice index must be an integer", but we do in fact support
     # using string slices
     result = df["a":"b"]  # type: ignore[misc]
     expected = {"a": [1, 2, 3], "b": [4, 5, 6]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[(0, 1), :]
     expected = {"a": [1, 2], "b": [4, 5], "c": [7, 8], "d": [1, 4]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[[0, 1], :]
     expected = {"a": [1, 2], "b": [4, 5], "c": [7, 8], "d": [1, 4]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[[0, 1], df.columns]
     expected = {"a": [1, 2], "b": [4, 5], "c": [7, 8], "d": [1, 4]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_slice_invalid(constructor_eager: Any) -> None:
+def test_slice_invalid(constructor_eager: ConstructorEager) -> None:
     data = {"a": [1, 2], "b": [4, 5]}
     df = nw.from_native(constructor_eager(data), eager_only=True)
     with pytest.raises(TypeError, match="Hint:"):
         df[0, 0]
 
 
-def test_slice_edge_cases(constructor_eager: Any) -> None:
+def test_slice_edge_cases(constructor_eager: ConstructorEager) -> None:
     data = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [1, 4, 2]}
     df = nw.from_native(constructor_eager(data), eager_only=True)
     assert df[[], :].shape == (0, 4)
@@ -219,7 +218,7 @@ def test_slice_edge_cases(constructor_eager: Any) -> None:
     ],
 )
 def test_get_item_works_with_tuple_and_list_and_range_row_and_col_indexing(
-    constructor_eager: Any,
+    constructor_eager: ConstructorEager,
     row_idx: list[int] | tuple[int] | range,
     col_idx: list[int] | tuple[int] | range,
 ) -> None:
@@ -236,7 +235,7 @@ def test_get_item_works_with_tuple_and_list_and_range_row_and_col_indexing(
     ],
 )
 def test_get_item_works_with_tuple_and_list_and_range_row_indexing_and_slice_col_indexing(
-    constructor_eager: Any,
+    constructor_eager: ConstructorEager,
     row_idx: list[int] | tuple[int] | range,
     col: slice,
 ) -> None:
@@ -253,7 +252,7 @@ def test_get_item_works_with_tuple_and_list_and_range_row_indexing_and_slice_col
     ],
 )
 def test_get_item_works_with_tuple_and_list_indexing_and_str(
-    constructor_eager: Any,
+    constructor_eager: ConstructorEager,
     row_idx: list[int] | tuple[int] | range,
     col: str,
 ) -> None:
diff --git a/tests/frame/head_test.py b/tests/frame/head_test.py
index 7234828b0..e817aa416 100644
--- a/tests/frame/head_test.py
+++ b/tests/frame/head_test.py
@@ -2,7 +2,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_head(constructor: Constructor) -> None:
@@ -13,11 +13,11 @@ def test_head(constructor: Constructor) -> None:
     df = nw.from_native(df_raw)
 
     result = df.head(2)
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     result = df.head(2)
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     # negative indices not allowed for lazyframes
     result = df.lazy().collect().head(-1)
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/interchange_native_namespace_test.py b/tests/frame/interchange_native_namespace_test.py
index 8a67d07b8..22d036460 100644
--- a/tests/frame/interchange_native_namespace_test.py
+++ b/tests/frame/interchange_native_namespace_test.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import duckdb
 import polars as pl
 import pytest
@@ -25,6 +27,7 @@ def test_interchange() -> None:
         series.__native_namespace__()
 
 
+@pytest.mark.filterwarnings("ignore:.*The `ArrowDtype` class is not available in pandas")
 def test_ibis(
     tmpdir: pytest.TempdirFactory,
 ) -> None:  # pragma: no cover
diff --git a/tests/frame/interchange_schema_test.py b/tests/frame/interchange_schema_test.py
index afec06831..35de7d74a 100644
--- a/tests/frame/interchange_schema_test.py
+++ b/tests/frame/interchange_schema_test.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from datetime import date
 from datetime import datetime
 from datetime import timedelta
@@ -8,7 +10,7 @@
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import IBIS_VERSION
 
 
 def test_interchange_schema() -> None:
@@ -114,7 +116,7 @@ def test_interchange_schema_ibis(
     tbl = ibis.read_parquet(filepath)
     df = nw.from_native(tbl, eager_or_interchange_only=True)
     result = df.schema
-    if parse_version(ibis.__version__) > (6, 0, 0):
+    if IBIS_VERSION > (6, 0, 0):
         expected = {
             "a": nw.Int64,
             "b": nw.Int32,
diff --git a/tests/frame/interchange_select_test.py b/tests/frame/interchange_select_test.py
new file mode 100644
index 000000000..e124735f7
--- /dev/null
+++ b/tests/frame/interchange_select_test.py
@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+import duckdb
+import polars as pl
+import pytest
+
+import narwhals.stable.v1 as nw
+
+data = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]}
+
+
+def test_interchange() -> None:
+    df_pl = pl.DataFrame(data)
+    df = nw.from_native(df_pl.__dataframe__(), eager_or_interchange_only=True)
+    with pytest.raises(
+        NotImplementedError,
+        match="Attribute select is not supported for metadata-only dataframes",
+    ):
+        df.select("a", "z")
+
+
+def test_interchange_ibis(
+    tmpdir: pytest.TempdirFactory,
+) -> None:  # pragma: no cover
+    ibis = pytest.importorskip("ibis")
+    df_pl = pl.DataFrame(data)
+
+    filepath = str(tmpdir / "file.parquet")  # type: ignore[operator]
+    df_pl.write_parquet(filepath)
+
+    tbl = ibis.read_parquet(filepath)
+    df = nw.from_native(tbl, eager_or_interchange_only=True)
+
+    out_cols = df.select("a", "z").schema.names()
+
+    assert out_cols == ["a", "z"]
+
+
+def test_interchange_duckdb() -> None:
+    df_pl = pl.DataFrame(data)  # noqa: F841
+    rel = duckdb.sql("select * from df_pl")
+    df = nw.from_native(rel, eager_or_interchange_only=True)
+
+    out_cols = df.select("a", "z").schema.names()
+
+    assert out_cols == ["a", "z"]
diff --git a/tests/frame/interchange_to_arrow_test.py b/tests/frame/interchange_to_arrow_test.py
index 7308607ea..d1ddd2a53 100644
--- a/tests/frame/interchange_to_arrow_test.py
+++ b/tests/frame/interchange_to_arrow_test.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import duckdb
 import polars as pl
 import pyarrow as pa
diff --git a/tests/frame/interchange_to_pandas_test.py b/tests/frame/interchange_to_pandas_test.py
index f56575fa3..938c23eaf 100644
--- a/tests/frame/interchange_to_pandas_test.py
+++ b/tests/frame/interchange_to_pandas_test.py
@@ -1,15 +1,17 @@
+from __future__ import annotations
+
 import duckdb
 import pandas as pd
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 
 data = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.0], "z": ["x", "y", "z"]}
 
 
 def test_interchange_to_pandas(request: pytest.FixtureRequest) -> None:
-    if parse_version(pd.__version__) < parse_version("1.5.0"):
+    if PANDAS_VERSION < (1, 5, 0):
         request.applymarker(pytest.mark.xfail)
     df_raw = pd.DataFrame(data)
     df = nw.from_native(df_raw.__dataframe__(), eager_or_interchange_only=True)
@@ -18,9 +20,10 @@ def test_interchange_to_pandas(request: pytest.FixtureRequest) -> None:
 
 
 def test_interchange_ibis_to_pandas(
-    tmpdir: pytest.TempdirFactory, request: pytest.FixtureRequest
+    tmpdir: pytest.TempdirFactory,
+    request: pytest.FixtureRequest,
 ) -> None:  # pragma: no cover
-    if parse_version(pd.__version__) < parse_version("1.5.0"):
+    if PANDAS_VERSION < (1, 5, 0):
         request.applymarker(pytest.mark.xfail)
 
     ibis = pytest.importorskip("ibis")
@@ -36,7 +39,7 @@ def test_interchange_ibis_to_pandas(
 
 
 def test_interchange_duckdb_to_pandas(request: pytest.FixtureRequest) -> None:
-    if parse_version(pd.__version__) < parse_version("1.0.0"):
+    if PANDAS_VERSION < (1, 0, 0):
         request.applymarker(pytest.mark.xfail)
     df_raw = pd.DataFrame(data)
     rel = duckdb.sql("select * from df_raw")
diff --git a/tests/frame/invalid_test.py b/tests/frame/invalid_test.py
index 2fdf53949..7fdf3e5fe 100644
--- a/tests/frame/invalid_test.py
+++ b/tests/frame/invalid_test.py
@@ -1,11 +1,12 @@
-import numpy as np
+from __future__ import annotations
+
 import pandas as pd
 import polars as pl
 import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import NUMPY_VERSION
 
 
 def test_invalid() -> None:
@@ -42,9 +43,7 @@ def test_validate_laziness() -> None:
         nw.concat([nw.from_native(df, eager_only=True), nw.from_native(df).lazy()])  # type: ignore[list-item]
 
 
-@pytest.mark.skipif(
-    parse_version(np.__version__) < parse_version("1.26.4"), reason="too old"
-)
+@pytest.mark.skipif(NUMPY_VERSION < (1, 26, 4), reason="too old")
 def test_memmap() -> None:
     pytest.importorskip("sklearn")
     # the headache this caused me...
diff --git a/tests/frame/is_duplicated_test.py b/tests/frame/is_duplicated_test.py
index e1eb3f298..bcc803712 100644
--- a/tests/frame/is_duplicated_test.py
+++ b/tests/frame/is_duplicated_test.py
@@ -1,15 +1,14 @@
 from __future__ import annotations
 
-from typing import Any
-
 import narwhals.stable.v1 as nw
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
-def test_is_duplicated(constructor_eager: Any) -> None:
+def test_is_duplicated(constructor_eager: ConstructorEager) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
     df_raw = constructor_eager(data)
     df = nw.from_native(df_raw, eager_only=True)
     result = nw.concat([df, df.head(1)]).is_duplicated()
     expected = {"is_duplicated": [True, False, False, True]}
-    compare_dicts({"is_duplicated": result}, expected)
+    assert_equal_data({"is_duplicated": result}, expected)
diff --git a/tests/frame/is_empty_test.py b/tests/frame/is_empty_test.py
index a772abc8b..7ea6b22ad 100644
--- a/tests/frame/is_empty_test.py
+++ b/tests/frame/is_empty_test.py
@@ -1,14 +1,20 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
 from typing import Any
 
 import pytest
 
 import narwhals.stable.v1 as nw
 
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
+
 
 @pytest.mark.parametrize(("threshold", "expected"), [(0, False), (10, True)])
-def test_is_empty(constructor_eager: Any, threshold: Any, expected: Any) -> None:
+def test_is_empty(
+    constructor_eager: ConstructorEager, threshold: Any, expected: Any
+) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
     df_raw = constructor_eager(data)
     df = nw.from_native(df_raw, eager_only=True)
diff --git a/tests/frame/is_unique_test.py b/tests/frame/is_unique_test.py
index 4259c8773..81718f36c 100644
--- a/tests/frame/is_unique_test.py
+++ b/tests/frame/is_unique_test.py
@@ -1,15 +1,14 @@
 from __future__ import annotations
 
-from typing import Any
-
 import narwhals.stable.v1 as nw
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
-def test_is_unique(constructor_eager: Any) -> None:
+def test_is_unique(constructor_eager: ConstructorEager) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
     df_raw = constructor_eager(data)
     df = nw.from_native(df_raw, eager_only=True)
     result = nw.concat([df, df.head(1)]).is_unique()
     expected = {"is_unique": [False, True, True, False]}
-    compare_dicts({"is_unique": result}, expected)
+    assert_equal_data({"is_unique": result}, expected)
diff --git a/tests/frame/item_test.py b/tests/frame/item_test.py
index 7afbee12d..5a5f037f1 100644
--- a/tests/frame/item_test.py
+++ b/tests/frame/item_test.py
@@ -6,7 +6,8 @@
 import pytest
 
 import narwhals.stable.v1 as nw
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize(
@@ -14,12 +15,15 @@
     [(0, 2, 7), (1, "z", 8)],
 )
 def test_item(
-    constructor_eager: Any, row: int | None, column: int | str | None, expected: Any
+    constructor_eager: ConstructorEager,
+    row: int | None,
+    column: int | str | None,
+    expected: Any,
 ) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
     df = nw.from_native(constructor_eager(data), eager_only=True)
-    compare_dicts({"a": [df.item(row, column)]}, {"a": [expected]})
-    compare_dicts({"a": [df.select("a").head(1).item()]}, {"a": [1]})
+    assert_equal_data({"a": [df.item(row, column)]}, {"a": [expected]})
+    assert_equal_data({"a": [df.select("a").head(1).item()]}, {"a": [1]})
 
 
 @pytest.mark.parametrize(
@@ -43,7 +47,10 @@ def test_item(
     ],
 )
 def test_item_value_error(
-    constructor_eager: Any, row: int | None, column: int | str | None, err_msg: str
+    constructor_eager: ConstructorEager,
+    row: int | None,
+    column: int | str | None,
+    err_msg: str,
 ) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
     with pytest.raises(ValueError, match=err_msg):
diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py
index 85c76eba7..c743893d0 100644
--- a/tests/frame/join_test.py
+++ b/tests/frame/join_test.py
@@ -10,9 +10,9 @@
 
 import narwhals.stable.v1 as nw
 from narwhals.utils import Implementation
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_inner_join_two_keys(constructor: Constructor) -> None:
@@ -40,8 +40,8 @@ def test_inner_join_two_keys(constructor: Constructor) -> None:
         "zorro_right": [7.0, 8, 9],
         "index": [0, 1, 2],
     }
-    compare_dicts(result, expected)
-    compare_dicts(result_on, expected)
+    assert_equal_data(result, expected)
+    assert_equal_data(result_on, expected)
 
 
 def test_inner_join_single_key(constructor: Constructor) -> None:
@@ -70,8 +70,8 @@ def test_inner_join_single_key(constructor: Constructor) -> None:
         "zorro_right": [7.0, 8, 9],
         "index": [0, 1, 2],
     }
-    compare_dicts(result, expected)
-    compare_dicts(result_on, expected)
+    assert_equal_data(result, expected)
+    assert_equal_data(result_on, expected)
 
 
 def test_cross_join(constructor: Constructor) -> None:
@@ -82,7 +82,7 @@ def test_cross_join(constructor: Constructor) -> None:
         "antananarivo": [1, 1, 1, 2, 2, 2, 3, 3, 3],
         "antananarivo_right": [1, 2, 3, 1, 2, 3, 1, 2, 3],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     with pytest.raises(
         ValueError, match="Can not pass `left_on`, `right_on` or `on` keys for cross join"
@@ -122,7 +122,7 @@ def test_cross_join_suffix(constructor: Constructor, suffix: str) -> None:
         "antananarivo": [1, 1, 1, 2, 2, 2, 3, 3, 3],
         f"antananarivo{suffix}": [1, 2, 3, 1, 2, 3, 1, 2, 3],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_cross_join_non_pandas() -> None:
@@ -135,7 +135,7 @@ def test_cross_join_non_pandas() -> None:
         "antananarivo": [1, 1, 1, 3, 3, 3, 2, 2, 2],
         "antananarivo_right": [1, 3, 2, 1, 3, 2, 1, 3, 2],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -164,7 +164,7 @@ def test_anti_join(
     df = nw.from_native(constructor(data))
     other = df.filter(filter_expr)
     result = df.join(other, how="anti", left_on=join_key, right_on=join_key)  # type: ignore[arg-type]
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -204,7 +204,7 @@ def test_semi_join(
     result = df.join(other, how="semi", left_on=join_key, right_on=join_key).sort(  # type: ignore[arg-type]
         "antananarivo"
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize("how", ["right", "full"])
@@ -258,8 +258,8 @@ def test_left_join(constructor: Constructor) -> None:
         "index": [0, 1, 2],
         "co": [4, 5, 7],
     }
-    compare_dicts(result, expected)
-    compare_dicts(result_on_list, expected_on_list)
+    assert_equal_data(result, expected)
+    assert_equal_data(result_on_list, expected_on_list)
 
 
 @pytest.mark.filterwarnings("ignore: the default coalesce behavior")
@@ -277,7 +277,7 @@ def test_left_join_multiple_column(constructor: Constructor) -> None:
     result = result.sort("index")
     result = result.drop("index_right")
     expected = {"antananarivo": [1, 2, 3], "bob": [4, 5, 6], "index": [0, 1, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.filterwarnings("ignore: the default coalesce behavior")
@@ -306,7 +306,7 @@ def test_left_join_overlapping_column(constructor: Constructor) -> None:
         "d_right": [1, 4, 2],
         "index": [0, 1, 2],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df_left.join(
         df_right,  # type: ignore[arg-type]
         left_on="antananarivo",
@@ -323,7 +323,7 @@ def test_left_join_overlapping_column(constructor: Constructor) -> None:
         "c": [4.0, 6.0, float("nan")],
         "index": [0, 1, 2],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize("how", ["inner", "left", "semi", "anti"])
@@ -354,11 +354,12 @@ def test_join_keys_exceptions(constructor: Constructor, how: str) -> None:
 
 
 def test_joinasof_numeric(
-    constructor: Constructor, request: pytest.FixtureRequest
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
 ) -> None:
     if "pyarrow_table" in str(constructor) or "cudf" in str(constructor):
         request.applymarker(pytest.mark.xfail)
-    if parse_version(pd.__version__) < (2, 1) and (
+    if PANDAS_VERSION < (2, 1) and (
         ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor))
     ):
         request.applymarker(pytest.mark.xfail)
@@ -403,18 +404,21 @@ def test_joinasof_numeric(
         "val": ["a", "b", "c"],
         "val_right": [1, 6, 7],
     }
-    compare_dicts(result_backward, expected_backward)
-    compare_dicts(result_forward, expected_forward)
-    compare_dicts(result_nearest, expected_nearest)
-    compare_dicts(result_backward_on, expected_backward)
-    compare_dicts(result_forward_on, expected_forward)
-    compare_dicts(result_nearest_on, expected_nearest)
+    assert_equal_data(result_backward, expected_backward)
+    assert_equal_data(result_forward, expected_forward)
+    assert_equal_data(result_nearest, expected_nearest)
+    assert_equal_data(result_backward_on, expected_backward)
+    assert_equal_data(result_forward_on, expected_forward)
+    assert_equal_data(result_nearest_on, expected_nearest)
 
 
-def test_joinasof_time(constructor: Constructor, request: pytest.FixtureRequest) -> None:
+def test_joinasof_time(
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
+) -> None:
     if "pyarrow_table" in str(constructor) or "cudf" in str(constructor):
         request.applymarker(pytest.mark.xfail)
-    if parse_version(pd.__version__) < (2, 1) and ("pandas_pyarrow" in str(constructor)):
+    if PANDAS_VERSION < (2, 1) and ("pandas_pyarrow" in str(constructor)):
         request.applymarker(pytest.mark.xfail)
     df = nw.from_native(
         constructor(
@@ -481,18 +485,21 @@ def test_joinasof_time(constructor: Constructor, request: pytest.FixtureRequest)
         "population": [82.19, 82.66, 83.12],
         "gdp": [4164, 4696, 4696],
     }
-    compare_dicts(result_backward, expected_backward)
-    compare_dicts(result_forward, expected_forward)
-    compare_dicts(result_nearest, expected_nearest)
-    compare_dicts(result_backward_on, expected_backward)
-    compare_dicts(result_forward_on, expected_forward)
-    compare_dicts(result_nearest_on, expected_nearest)
+    assert_equal_data(result_backward, expected_backward)
+    assert_equal_data(result_forward, expected_forward)
+    assert_equal_data(result_nearest, expected_nearest)
+    assert_equal_data(result_backward_on, expected_backward)
+    assert_equal_data(result_forward_on, expected_forward)
+    assert_equal_data(result_nearest_on, expected_nearest)
 
 
-def test_joinasof_by(constructor: Constructor, request: pytest.FixtureRequest) -> None:
+def test_joinasof_by(
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
+) -> None:
     if "pyarrow_table" in str(constructor) or "cudf" in str(constructor):
         request.applymarker(pytest.mark.xfail)
-    if parse_version(pd.__version__) < (2, 1) and (
+    if PANDAS_VERSION < (2, 1) and (
         ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor))
     ):
         request.applymarker(pytest.mark.xfail)
@@ -518,8 +525,8 @@ def test_joinasof_by(constructor: Constructor, request: pytest.FixtureRequest) -
         "c": [9, 2, 1, 1],
         "d": [1, 3, float("nan"), 4],
     }
-    compare_dicts(result, expected)
-    compare_dicts(result_by, expected)
+    assert_equal_data(result, expected)
+    assert_equal_data(result_by, expected)
 
 
 @pytest.mark.parametrize("strategy", ["back", "furthest"])
diff --git a/tests/frame/lazy_test.py b/tests/frame/lazy_test.py
index 09ca734c2..df27a4cc9 100644
--- a/tests/frame/lazy_test.py
+++ b/tests/frame/lazy_test.py
@@ -1,10 +1,15 @@
-from typing import Any
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
 
 import narwhals as nw
 import narwhals.stable.v1 as nw_v1
 
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
+
 
-def test_lazy(constructor_eager: Any) -> None:
+def test_lazy(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)
     result = df.lazy()
     assert isinstance(result, nw.LazyFrame)
diff --git a/tests/frame/len_test.py b/tests/frame/len_test.py
index c06884e03..b22f0c67d 100644
--- a/tests/frame/len_test.py
+++ b/tests/frame/len_test.py
@@ -1,13 +1,18 @@
-from typing import Any
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
 
 import narwhals.stable.v1 as nw
 
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
 data = {
     "a": [1.0, 2.0, None, 4.0],
     "b": [None, 3.0, None, 5.0],
 }
 
 
-def test_len(constructor_eager: Any) -> None:
+def test_len(constructor_eager: ConstructorEager) -> None:
     result = len(nw.from_native(constructor_eager(data), eager_only=True))
+
     assert result == 4
diff --git a/tests/frame/lit_test.py b/tests/frame/lit_test.py
index aa18edb40..b30233fbd 100644
--- a/tests/frame/lit_test.py
+++ b/tests/frame/lit_test.py
@@ -8,7 +8,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 if TYPE_CHECKING:
     from narwhals.dtypes import DType
@@ -31,7 +31,7 @@ def test_lit(
         "z": [7.0, 8.0, 9.0],
         "lit": expected_lit,
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_lit_error(constructor: Constructor) -> None:
diff --git a/tests/frame/null_count_test.py b/tests/frame/null_count_test.py
index d3bf7f25c..f89c24e52 100644
--- a/tests/frame/null_count_test.py
+++ b/tests/frame/null_count_test.py
@@ -1,15 +1,14 @@
 from __future__ import annotations
 
-from typing import Any
-
 import narwhals.stable.v1 as nw
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
-def test_null_count(constructor_eager: Any) -> None:
+def test_null_count(constructor_eager: ConstructorEager) -> None:
     data = {"a": [None, 3, 2], "b": [4, 4, 6], "z": [7.0, None, 9]}
     df_raw = constructor_eager(data)
     df = nw.from_native(df_raw, eager_only=True)
     result = df.null_count()
     expected = {"a": [1], "b": [0], "z": [1]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/pipe_test.py b/tests/frame/pipe_test.py
index b7b57e0a1..6a3b30fc7 100644
--- a/tests/frame/pipe_test.py
+++ b/tests/frame/pipe_test.py
@@ -1,6 +1,8 @@
+from __future__ import annotations
+
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": ["foo", "bars"],
@@ -13,4 +15,4 @@ def test_pipe(constructor: Constructor) -> None:
     columns = df.collect_schema().names()
     result = df.pipe(lambda _df: _df.select([x for x in columns if len(x) == 2]))
     expected = {"ab": ["foo", "bars"]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/reindex_test.py b/tests/frame/reindex_test.py
index e21b31a8e..5696f5674 100644
--- a/tests/frame/reindex_test.py
+++ b/tests/frame/reindex_test.py
@@ -1,10 +1,12 @@
+from __future__ import annotations
+
 from typing import Any
 
 import pandas as pd
 import pytest
 
 import narwhals.stable.v1 as nw
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
 
@@ -14,9 +16,9 @@ def test_reindex(df_raw: Any) -> None:
     df = nw.from_native(df_raw, eager_only=True)
     result = df.select("b", df["a"].sort(descending=True))
     expected = {"b": [4, 4, 6], "a": [3, 2, 1]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df.select("b", nw.col("a").sort(descending=True))
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     s = df["a"]
     result_s = s > s.sort()
@@ -25,6 +27,6 @@ def test_reindex(df_raw: Any) -> None:
     assert not result_s[2]
     result = df.with_columns(s.sort())
     expected = {"a": [1, 2, 3], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]}  # type: ignore[list-item]
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     with pytest.raises(ValueError, match="Multi-output expressions are not supported"):
         nw.to_native(df.with_columns(nw.all() + nw.all()))
diff --git a/tests/frame/rename_test.py b/tests/frame/rename_test.py
index 79cf3f243..24c046200 100644
--- a/tests/frame/rename_test.py
+++ b/tests/frame/rename_test.py
@@ -1,6 +1,8 @@
+from __future__ import annotations
+
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_rename(constructor: Constructor) -> None:
@@ -8,4 +10,4 @@ def test_rename(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.rename({"a": "x", "b": "y"})
     expected = {"x": [1, 3, 2], "y": [4, 4, 6], "z": [7.0, 8, 9]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/row_test.py b/tests/frame/row_test.py
index 599dcaeaf..82af94146 100644
--- a/tests/frame/row_test.py
+++ b/tests/frame/row_test.py
@@ -1,11 +1,17 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
 from typing import Any
 
 import pytest
 
 import narwhals.stable.v1 as nw
 
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
+
 
-def test_row_column(request: Any, constructor_eager: Any) -> None:
+def test_row_column(request: Any, constructor_eager: ConstructorEager) -> None:
     if "cudf" in str(constructor_eager):
         request.applymarker(pytest.mark.xfail)
 
diff --git a/tests/frame/rows_test.py b/tests/frame/rows_test.py
index 744f66065..cdf426483 100644
--- a/tests/frame/rows_test.py
+++ b/tests/frame/rows_test.py
@@ -1,43 +1,18 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
 from typing import Any
 
 import pandas as pd
-import polars as pl
-import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
-
-df_pandas = pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]})
-df_pa = pa.table({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]})
-if parse_version(pd.__version__) >= parse_version("1.5.0"):
-    df_pandas_pyarrow = pd.DataFrame(
-        {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
-    ).astype(
-        {
-            "a": "Int64[pyarrow]",
-            "b": "Int64[pyarrow]",
-            "z": "Float64[pyarrow]",
-        }
-    )
-    df_pandas_nullable = pd.DataFrame(
-        {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
-    ).astype(
-        {
-            "a": "Int64",
-            "b": "Int64",
-            "z": "Float64",
-        }
-    )
-else:  # pragma: no cover
-    df_pandas_pyarrow = df_pandas
-    df_pandas_nullable = df_pandas
-df_polars = pl.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]})
-
-df_pandas_na = pd.DataFrame({"a": [None, 3, 2], "b": [4, 4, 6], "z": [7.0, None, 9]})
-df_polars_na = pl.DataFrame({"a": [None, 3, 2], "b": [4, 4, 6], "z": [7.0, None, 9]})
+
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
+
+data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
+data_na = {"a": [None, 3, 2], "b": [4, 4, 6], "z": [7.0, None, 9]}
 
 
 @pytest.mark.parametrize(
@@ -56,7 +31,7 @@
 )
 def test_iter_rows(
     request: Any,
-    constructor_eager: Any,
+    constructor_eager: ConstructorEager,
     named: bool,  # noqa: FBT001
     expected: list[tuple[Any, ...]] | list[dict[str, Any]],
 ) -> None:
@@ -69,8 +44,8 @@ def test_iter_rows(
     assert result == expected
 
 
-@pytest.mark.parametrize(
-    "df_raw", [df_pandas, df_pandas_nullable, df_pandas_pyarrow, df_polars, df_pa]
+@pytest.mark.filterwarnings(
+    "ignore:.*all arguments of to_dict except for the argument:FutureWarning"
 )
 @pytest.mark.parametrize(
     ("named", "expected"),
@@ -87,24 +62,52 @@ def test_iter_rows(
     ],
 )
 def test_rows(
-    df_raw: Any,
+    constructor_eager: ConstructorEager,
     named: bool,  # noqa: FBT001
     expected: list[tuple[Any, ...]] | list[dict[str, Any]],
 ) -> None:
-    df = nw.from_native(df_raw, eager_only=True)
+    df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df.rows(named=named)
     assert result == expected
 
 
-@pytest.mark.parametrize("df_raw", [df_pandas_na, df_polars_na])
-def test_rows_with_nulls_unnamed(df_raw: Any) -> None:
-    # GIVEN
-    df = nw.from_native(df_raw, eager_only=True)
+@pytest.mark.filterwarnings(
+    r"ignore:.*Starting with pandas version 3\.0 all arguments of to_dict"
+)
+@pytest.mark.parametrize(
+    ("named", "expected"),
+    [
+        (False, [(1, 4, 7.0, 5), (3, 4, 8.0, 6), (2, 6, 9.0, 7)]),
+        (
+            True,
+            [
+                {"a": 1, "_b": 4, "z": 7.0, "1": 5},
+                {"a": 3, "_b": 4, "z": 8.0, "1": 6},
+                {"a": 2, "_b": 6, "z": 9.0, "1": 7},
+            ],
+        ),
+    ],
+)
+def test_rows_eager(
+    constructor_eager: Any,
+    named: bool,  # noqa: FBT001
+    expected: list[tuple[Any, ...]] | list[dict[str, Any]],
+) -> None:
+    # posit-dev/py-shiny relies on `.rows(named=False)` to return unnamed rows
+    data = {"a": [1, 3, 2], "_b": [4, 4, 6], "z": [7.0, 8, 9], "1": [5, 6, 7]}
+    df = nw.from_native(constructor_eager(data), eager_only=True)
+    result = df.rows(named=named)
+    assert result == expected
+
 
-    # WHEN
+def test_rows_with_nulls_unnamed(
+    constructor_eager: ConstructorEager, request: pytest.FixtureRequest
+) -> None:
+    if "cudf" in str(constructor_eager):
+        # cudf intentionally doesn't support itertuples / iter_rows
+        request.applymarker(pytest.mark.xfail)
+    df = nw.from_native(constructor_eager(data_na), eager_only=True)
     result = list(df.iter_rows(named=False))
-
-    # THEN
     expected = [(None, 4, 7.0), (3, 4, None), (2, 6, 9.0)]
     for i, row in enumerate(expected):
         for j, value in enumerate(row):
@@ -115,15 +118,14 @@ def test_rows_with_nulls_unnamed(df_raw: Any) -> None:
                 assert value_in_result == value
 
 
-@pytest.mark.parametrize("df_raw", [df_pandas_na, df_polars_na])
-def test_rows_with_nulls_named(df_raw: Any) -> None:
-    # GIVEN
-    df = nw.from_native(df_raw, eager_only=True)
-
-    # WHEN
+def test_rows_with_nulls_named(
+    constructor_eager: ConstructorEager, request: pytest.FixtureRequest
+) -> None:
+    if "cudf" in str(constructor_eager):
+        # cudf intentionally doesn't support itertuples / iter_rows
+        request.applymarker(pytest.mark.xfail)
+    df = nw.from_native(constructor_eager(data_na), eager_only=True)
     result = list(df.iter_rows(named=True))
-
-    # THEN
     expected: list[dict[str, Any]] = [
         {"a": None, "b": 4, "z": 7.0},
         {"a": 3, "b": 4, "z": None},
diff --git a/tests/frame/sample_test.py b/tests/frame/sample_test.py
index 88d5969c3..ff3591fdd 100644
--- a/tests/frame/sample_test.py
+++ b/tests/frame/sample_test.py
@@ -1,5 +1,11 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import narwhals.stable.v1 as nw
-from tests.utils import Constructor
+
+if TYPE_CHECKING:
+    from tests.utils import Constructor
 
 
 def test_sample_n(constructor_eager: Constructor) -> None:
diff --git a/tests/frame/schema_test.py b/tests/frame/schema_test.py
index cb5ddff19..c2fdec31a 100644
--- a/tests/frame/schema_test.py
+++ b/tests/frame/schema_test.py
@@ -1,7 +1,10 @@
+from __future__ import annotations
+
 from datetime import date
 from datetime import datetime
 from datetime import timedelta
 from datetime import timezone
+from typing import TYPE_CHECKING
 from typing import Any
 
 import duckdb
@@ -10,8 +13,12 @@
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
-from tests.utils import Constructor
+from tests.utils import PANDAS_VERSION
+
+if TYPE_CHECKING:
+    from tests.utils import Constructor
+    from tests.utils import ConstructorEager
+
 
 data = {
     "a": [datetime(2020, 1, 1)],
@@ -60,7 +67,9 @@ def test_string_disguised_as_object() -> None:
     assert result["a"] == nw.String
 
 
-def test_actual_object(request: pytest.FixtureRequest, constructor_eager: Any) -> None:
+def test_actual_object(
+    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
+) -> None:
     if any(x in str(constructor_eager) for x in ("modin", "pyarrow_table", "cudf")):
         request.applymarker(pytest.mark.xfail)
 
@@ -72,9 +81,7 @@ class Foo: ...
     assert result == {"a": nw.Object}
 
 
-@pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("2.0.0"), reason="too old"
-)
+@pytest.mark.skipif(PANDAS_VERSION < (2, 0, 0), reason="too old")
 def test_dtypes() -> None:
     df_pl = pl.DataFrame(
         {
@@ -190,7 +197,7 @@ def test_schema_object(method: str, expected: Any) -> None:
 
 
 @pytest.mark.skipif(
-    parse_version(pd.__version__) < (2,),
+    PANDAS_VERSION < (2,),
     reason="Before 2.0, pandas would raise on `drop_duplicates`",
 )
 def test_from_non_hashable_column_name() -> None:
@@ -204,7 +211,7 @@ def test_from_non_hashable_column_name() -> None:
 
 
 @pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("2.2.0"),
+    PANDAS_VERSION < (2, 2, 0),
     reason="too old for pyarrow types",
 )
 def test_nested_dtypes() -> None:
@@ -213,38 +220,56 @@ def test_nested_dtypes() -> None:
         schema_overrides={"b": pl.Array(pl.Int64, 2)},
     ).to_pandas(use_pyarrow_extension_array=True)
     nwdf = nw.from_native(df)
-
-    assert nwdf.schema == {"a": nw.List, "b": nw.Array, "c": nw.Struct}
+    assert nwdf.schema == {
+        "a": nw.List(nw.Int64),
+        "b": nw.Array(nw.Int64, 2),
+        "c": nw.Struct({"a": nw.Int64}),
+    }
     df = pl.DataFrame(
         {"a": [[1, 2]], "b": [[1, 2]], "c": [{"a": 1}]},
         schema_overrides={"b": pl.Array(pl.Int64, 2)},
     )
     nwdf = nw.from_native(df)
-    assert nwdf.schema == {"a": nw.List, "b": nw.Array(nw.Int64, 2), "c": nw.Struct}
+    assert nwdf.schema == {
+        "a": nw.List(nw.Int64),
+        "b": nw.Array(nw.Int64, 2),
+        "c": nw.Struct({"a": nw.Int64}),
+    }
+
     df = pl.DataFrame(
-        {"a": [[1, 2]], "b": [[1, 2]], "c": [{"a": 1}]},
+        {"a": [[1, 2]], "b": [[1, 2]], "c": [{"a": 1, "b": "x", "c": 1.1}]},
         schema_overrides={"b": pl.Array(pl.Int64, 2)},
     ).to_arrow()
     nwdf = nw.from_native(df)
-    assert nwdf.schema == {"a": nw.List, "b": nw.Array(nw.Int64, 2), "c": nw.Struct}
+    assert nwdf.schema == {
+        "a": nw.List(nw.Int64),
+        "b": nw.Array(nw.Int64, 2),
+        "c": nw.Struct({"a": nw.Int64, "b": nw.String, "c": nw.Float64}),
+    }
     df = duckdb.sql("select * from df")
     nwdf = nw.from_native(df)
-    assert nwdf.schema == {"a": nw.List, "b": nw.Array(nw.Int64, 2), "c": nw.Struct}
+    assert nwdf.schema == {
+        "a": nw.List(nw.Int64),
+        "b": nw.Array(nw.Int64, 2),
+        "c": nw.Struct({"a": nw.Int64, "b": nw.String, "c": nw.Float64}),
+    }
 
 
-def test_nested_dtypes_ibis() -> None:  # pragma: no cover
+def test_nested_dtypes_ibis(request: pytest.FixtureRequest) -> None:  # pragma: no cover
     ibis = pytest.importorskip("ibis")
+    if PANDAS_VERSION < (1, 1):
+        request.applymarker(pytest.mark.xfail)
     df = pl.DataFrame(
         {"a": [[1, 2]], "b": [[1, 2]], "c": [{"a": 1}]},
         schema_overrides={"b": pl.Array(pl.Int64, 2)},
     )
     tbl = ibis.memtable(df[["a", "c"]])
     nwdf = nw.from_native(tbl)
-    assert nwdf.schema == {"a": nw.List, "c": nw.Struct}
+    assert nwdf.schema == {"a": nw.List(nw.Int64), "c": nw.Struct({"a": nw.Int64})}
 
 
 @pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("2.2.0"),
+    PANDAS_VERSION < (2, 2, 0),
     reason="too old for pyarrow types",
 )
 def test_nested_dtypes_dask() -> None:
@@ -259,4 +284,8 @@ def test_nested_dtypes_dask() -> None:
         ).to_pandas(use_pyarrow_extension_array=True)
     )
     nwdf = nw.from_native(df)
-    assert nwdf.schema == {"a": nw.List, "b": nw.Array, "c": nw.Struct}
+    assert nwdf.schema == {
+        "a": nw.List(nw.Int64),
+        "b": nw.Array(nw.Int64, 2),
+        "c": nw.Struct({"a": nw.Int64}),
+    }
diff --git a/tests/frame/select_test.py b/tests/frame/select_test.py
index 8c01be407..83b8e1f5e 100644
--- a/tests/frame/select_test.py
+++ b/tests/frame/select_test.py
@@ -1,9 +1,11 @@
+from __future__ import annotations
+
 import pandas as pd
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_select(constructor: Constructor) -> None:
@@ -11,7 +13,7 @@ def test_select(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select("a")
     expected = {"a": [1, 3, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_empty_select(constructor: Constructor) -> None:
diff --git a/tests/frame/shape_test.py b/tests/frame/shape_test.py
index 2ab3a23bc..6cbee058d 100644
--- a/tests/frame/shape_test.py
+++ b/tests/frame/shape_test.py
@@ -1,9 +1,14 @@
-from typing import Any
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
 
 import narwhals.stable.v1 as nw
 
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
+
 
-def test_shape(constructor_eager: Any) -> None:
+def test_shape(constructor_eager: ConstructorEager) -> None:
     result = nw.from_native(
         constructor_eager({"a": [1, 2], "b": [4, 5], "c": [7, 8]}), eager_only=True
     ).shape
diff --git a/tests/frame/sort_test.py b/tests/frame/sort_test.py
index bea9177df..4e12cc95a 100644
--- a/tests/frame/sort_test.py
+++ b/tests/frame/sort_test.py
@@ -4,7 +4,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_sort(constructor: Constructor) -> None:
@@ -16,14 +16,14 @@ def test_sort(constructor: Constructor) -> None:
         "b": [4, 6, 4],
         "z": [7.0, 9.0, 8.0],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df.sort("a", "b", descending=[True, False])
     expected = {
         "a": [3, 2, 1],
         "b": [4, 6, 4],
         "z": [8.0, 9.0, 7.0],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -39,4 +39,4 @@ def test_sort_nulls(
     data = {"a": [0, 0, 2, -1], "b": [1, 3, 2, None]}
     df = nw.from_native(constructor(data))
     result = df.sort("b", descending=True, nulls_last=nulls_last)
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/tail_test.py b/tests/frame/tail_test.py
index f7e06475c..a4d265797 100644
--- a/tests/frame/tail_test.py
+++ b/tests/frame/tail_test.py
@@ -6,7 +6,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_tail(constructor: Constructor) -> None:
@@ -27,13 +27,13 @@ def test_tail(constructor: Constructor) -> None:
 
     with context:
         result = df.tail(2)
-        compare_dicts(result, expected)
+        assert_equal_data(result, expected)
 
         result = df.collect().tail(2)  # type: ignore[assignment]
-        compare_dicts(result, expected)
+        assert_equal_data(result, expected)
 
         result = df.collect().tail(-1)  # type: ignore[assignment]
-        compare_dicts(result, expected)
+        assert_equal_data(result, expected)
 
         result = df.collect().select(nw.col("a").tail(2))  # type: ignore[assignment]
-        compare_dicts(result, {"a": expected["a"]})
+        assert_equal_data(result, {"a": expected["a"]})
diff --git a/tests/frame/to_arrow_test.py b/tests/frame/to_arrow_test.py
index f20bdf28c..3e8c704ea 100644
--- a/tests/frame/to_arrow_test.py
+++ b/tests/frame/to_arrow_test.py
@@ -1,17 +1,22 @@
 from __future__ import annotations
 
-from typing import Any
+from typing import TYPE_CHECKING
 
-import pandas as pd
 import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
 
-def test_to_arrow(request: pytest.FixtureRequest, constructor_eager: Any) -> None:
-    if "pandas" in str(constructor_eager) and parse_version(pd.__version__) < (1, 0, 0):
+
+def test_to_arrow(
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
+) -> None:
+    if "pandas" in str(constructor_eager) and PANDAS_VERSION < (1, 0, 0):
         # pyarrow requires pandas>=1.0.0
         request.applymarker(pytest.mark.xfail)
 
diff --git a/tests/frame/to_dict_test.py b/tests/frame/to_dict_test.py
index 29c3d2270..e6a434b7f 100644
--- a/tests/frame/to_dict_test.py
+++ b/tests/frame/to_dict_test.py
@@ -1,26 +1,27 @@
-from typing import Any
+from __future__ import annotations
 
 import pytest
 
 import narwhals.stable.v1 as nw
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.filterwarnings(
     "ignore:.*all arguments of to_dict except for the argument:FutureWarning"
 )
-def test_to_dict(constructor_eager: Any) -> None:
+def test_to_dict(constructor_eager: ConstructorEager) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "c": [7.0, 8, 9]}
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df.to_dict(as_series=False)
     assert result == data
 
 
-def test_to_dict_as_series(constructor_eager: Any) -> None:
+def test_to_dict_as_series(constructor_eager: ConstructorEager) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "c": [7.0, 8, 9]}
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df.to_dict(as_series=True)
     assert isinstance(result["a"], nw.Series)
     assert isinstance(result["b"], nw.Series)
     assert isinstance(result["c"], nw.Series)
-    compare_dicts(result, data)
+    assert_equal_data(result, data)
diff --git a/tests/frame/to_native_test.py b/tests/frame/to_native_test.py
index d8f4132bf..fb90caf10 100644
--- a/tests/frame/to_native_test.py
+++ b/tests/frame/to_native_test.py
@@ -1,9 +1,14 @@
-from typing import Any
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
 
 import narwhals.stable.v1 as nw
 
+if TYPE_CHECKING:
+    from tests.utils import Constructor
+
 
-def test_to_native(constructor: Any) -> None:
+def test_to_native(constructor: Constructor) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8, 9]}
     df_raw = constructor(data)
     df = nw.from_native(df_raw)
diff --git a/tests/frame/to_numpy_test.py b/tests/frame/to_numpy_test.py
index d573f4322..aa3dfc2e4 100644
--- a/tests/frame/to_numpy_test.py
+++ b/tests/frame/to_numpy_test.py
@@ -1,13 +1,16 @@
 from __future__ import annotations
 
-from typing import Any
+from typing import TYPE_CHECKING
 
 import numpy as np
 
 import narwhals.stable.v1 as nw
 
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
 
-def test_to_numpy(constructor_eager: Any) -> None:
+
+def test_to_numpy(constructor_eager: ConstructorEager) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8, 9]}
     df_raw = constructor_eager(data)
     result = nw.from_native(df_raw, eager_only=True).to_numpy()
diff --git a/tests/frame/to_pandas_test.py b/tests/frame/to_pandas_test.py
index 671a5d857..d9bce7a69 100644
--- a/tests/frame/to_pandas_test.py
+++ b/tests/frame/to_pandas_test.py
@@ -1,20 +1,26 @@
 from __future__ import annotations
 
-from typing import Any
+from typing import TYPE_CHECKING
 
 import pandas as pd
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
+
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
 
 
 @pytest.mark.filterwarnings("ignore:.*Passing a BlockManager.*:DeprecationWarning")
 @pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("2.0.0"),
+    PANDAS_VERSION < (2, 0, 0),
     reason="too old for pandas-pyarrow",
 )
-def test_convert_pandas(constructor_eager: Any, request: pytest.FixtureRequest) -> None:
+def test_convert_pandas(
+    constructor_eager: ConstructorEager,
+    request: pytest.FixtureRequest,
+) -> None:
     if "modin" in str(constructor_eager):
         request.applymarker(pytest.mark.xfail)
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
diff --git a/tests/frame/unique_test.py b/tests/frame/unique_test.py
index 40589c545..c8079f593 100644
--- a/tests/frame/unique_test.py
+++ b/tests/frame/unique_test.py
@@ -4,7 +4,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
 
@@ -29,7 +29,7 @@ def test_unique(
     df = nw.from_native(df_raw)
 
     result = df.unique(subset, keep=keep, maintain_order=True)  # type: ignore[arg-type]
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_unique_none(constructor: Constructor) -> None:
@@ -37,4 +37,4 @@ def test_unique_none(constructor: Constructor) -> None:
     df = nw.from_native(df_raw)
 
     result = df.unique(maintain_order=True)
-    compare_dicts(result, data)
+    assert_equal_data(result, data)
diff --git a/tests/frame/unpivot_test.py b/tests/frame/unpivot_test.py
index 33f7eaca0..ed8d98c96 100644
--- a/tests/frame/unpivot_test.py
+++ b/tests/frame/unpivot_test.py
@@ -3,13 +3,12 @@
 from typing import TYPE_CHECKING
 from typing import Any
 
-import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PYARROW_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 if TYPE_CHECKING:
     from narwhals.stable.v1.dtypes import DType
@@ -44,7 +43,7 @@ def test_unpivot_on(
 ) -> None:
     df = nw.from_native(constructor(data))
     result = df.unpivot(on=on, index=["a"]).sort("variable", "a")
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -95,13 +94,8 @@ def test_unpivot_mixed_types(
     data: dict[str, Any],
     expected_dtypes: list[DType],
 ) -> None:
-    if (
-        "dask" in str(constructor)
-        or "cudf" in str(constructor)
-        or (
-            "pyarrow_table" in str(constructor)
-            and parse_version(pa.__version__) < parse_version("14.0.0")
-        )
+    if "cudf" in str(constructor) or (
+        "pyarrow_table" in str(constructor) and PYARROW_VERSION < (14, 0, 0)
     ):
         request.applymarker(pytest.mark.xfail)
     df = nw.from_native(constructor(data))
diff --git a/tests/frame/with_columns_sequence_test.py b/tests/frame/with_columns_sequence_test.py
index 49db7820b..b88036a4d 100644
--- a/tests/frame/with_columns_sequence_test.py
+++ b/tests/frame/with_columns_sequence_test.py
@@ -1,9 +1,11 @@
+from __future__ import annotations
+
 import numpy as np
 import pytest
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": ["foo", "bars"],
@@ -21,4 +23,4 @@ def test_with_columns(constructor: Constructor, request: pytest.FixtureRequest)
         .select("d", "e")
     )
     expected = {"d": [4, 5], "e": [5, 6]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/with_columns_test.py b/tests/frame/with_columns_test.py
index 44bcd39a5..c05a41646 100644
--- a/tests/frame/with_columns_test.py
+++ b/tests/frame/with_columns_test.py
@@ -1,9 +1,13 @@
+from __future__ import annotations
+
 import numpy as np
 import pandas as pd
+import pytest
 
 import narwhals.stable.v1 as nw
+from tests.utils import PYARROW_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_with_columns_int_col_name_pandas() -> None:
@@ -23,14 +27,14 @@ def test_with_columns_order(constructor: Constructor) -> None:
     result = df.with_columns(nw.col("a") + 1, d=nw.col("a") - 1)
     assert result.collect_schema().names() == ["a", "b", "z", "d"]
     expected = {"a": [2, 4, 3], "b": [4, 4, 6], "z": [7.0, 8, 9], "d": [0, 2, 1]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_with_columns_empty(constructor: Constructor) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
     df = nw.from_native(constructor(data))
     result = df.select().with_columns()
-    compare_dicts(result, {})
+    assert_equal_data(result, {})
 
 
 def test_with_columns_order_single_row(constructor: Constructor) -> None:
@@ -39,4 +43,16 @@ def test_with_columns_order_single_row(constructor: Constructor) -> None:
     result = df.with_columns(nw.col("a") + 1, d=nw.col("a") - 1)
     assert result.collect_schema().names() == ["a", "b", "z", "d"]
     expected = {"a": [2], "b": [4], "z": [7.0], "d": [0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
+
+
+def test_with_columns_dtypes_single_row(
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
+) -> None:
+    if "pyarrow_table" in str(constructor) and PYARROW_VERSION < (15,):
+        request.applymarker(pytest.mark.xfail)
+    data = {"a": ["foo"]}
+    df = nw.from_native(constructor(data)).with_columns(nw.col("a").cast(nw.Categorical))
+    result = df.with_columns(nw.col("a"))
+    assert result.collect_schema() == {"a": nw.Categorical}
diff --git a/tests/frame/with_row_index_test.py b/tests/frame/with_row_index_test.py
index 8f802de0a..b6ad9c82d 100644
--- a/tests/frame/with_row_index_test.py
+++ b/tests/frame/with_row_index_test.py
@@ -1,6 +1,8 @@
+from __future__ import annotations
+
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": ["foo", "bars"],
@@ -11,4 +13,4 @@
 def test_with_row_index(constructor: Constructor) -> None:
     result = nw.from_native(constructor(data)).with_row_index()
     expected = {"a": ["foo", "bars"], "ab": ["foo", "bars"], "index": [0, 1]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/write_csv_test.py b/tests/frame/write_csv_test.py
index ed9303604..84ce84f0d 100644
--- a/tests/frame/write_csv_test.py
+++ b/tests/frame/write_csv_test.py
@@ -1,16 +1,18 @@
 from __future__ import annotations
 
 from typing import TYPE_CHECKING
-from typing import Any
 
 import narwhals.stable.v1 as nw
+from tests.utils import ConstructorEager
 from tests.utils import is_windows
 
 if TYPE_CHECKING:
     import pytest
 
 
-def test_write_csv(constructor_eager: Any, tmpdir: pytest.TempdirFactory) -> None:
+def test_write_csv(
+    constructor_eager: ConstructorEager, tmpdir: pytest.TempdirFactory
+) -> None:
     data = {"a": [1, 2, 3]}
     path = tmpdir / "foo.csv"  # type: ignore[operator]
     result = nw.from_native(constructor_eager(data), eager_only=True).write_csv(str(path))
diff --git a/tests/frame/write_parquet_test.py b/tests/frame/write_parquet_test.py
index 8efaefb55..e4b826cfb 100644
--- a/tests/frame/write_parquet_test.py
+++ b/tests/frame/write_parquet_test.py
@@ -1,20 +1,23 @@
 from __future__ import annotations
 
-from typing import Any
+from typing import TYPE_CHECKING
 
-import pandas as pd
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
+
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
 
 data = {"a": [1, 2, 3]}
 
 
-@pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("2.0.0"), reason="too old for pyarrow"
-)
-def test_write_parquet(constructor_eager: Any, tmpdir: pytest.TempdirFactory) -> None:
+@pytest.mark.skipif(PANDAS_VERSION < (2, 0, 0), reason="too old for pyarrow")
+def test_write_parquet(
+    constructor_eager: ConstructorEager,
+    tmpdir: pytest.TempdirFactory,
+) -> None:
     path = tmpdir / "foo.parquet"  # type: ignore[operator]
     nw.from_native(constructor_eager(data), eager_only=True).write_parquet(str(path))
     assert path.exists()
diff --git a/tests/from_dict_test.py b/tests/from_dict_test.py
index 4583b03e5..833cd3acc 100644
--- a/tests/from_dict_test.py
+++ b/tests/from_dict_test.py
@@ -1,9 +1,11 @@
+from __future__ import annotations
+
 import pytest
 
 import narwhals as nw
 import narwhals.stable.v1 as nw_v1
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_from_dict(constructor: Constructor, request: pytest.FixtureRequest) -> None:
@@ -13,7 +15,7 @@ def test_from_dict(constructor: Constructor, request: pytest.FixtureRequest) ->
     native_namespace = nw.get_native_namespace(df)
     result = nw.from_dict({"c": [1, 2], "d": [5, 6]}, native_namespace=native_namespace)
     expected = {"c": [1, 2], "d": [5, 6]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     assert isinstance(result, nw.DataFrame)
 
 
@@ -36,7 +38,7 @@ def test_from_dict_schema(
 def test_from_dict_without_namespace(constructor: Constructor) -> None:
     df = nw.from_native(constructor({"a": [1, 2, 3], "b": [4, 5, 6]})).lazy().collect()
     result = nw.from_dict({"c": df["a"], "d": df["b"]})
-    compare_dicts(result, {"c": [1, 2, 3], "d": [4, 5, 6]})
+    assert_equal_data(result, {"c": [1, 2, 3], "d": [4, 5, 6]})
 
 
 def test_from_dict_without_namespace_invalid(
@@ -53,7 +55,7 @@ def test_from_dict_one_native_one_narwhals(
     df = nw.from_native(constructor({"a": [1, 2, 3], "b": [4, 5, 6]})).lazy().collect()
     result = nw.from_dict({"c": nw.to_native(df["a"]), "d": df["b"]})
     expected = {"c": [1, 2, 3], "d": [4, 5, 6]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_from_dict_v1(constructor: Constructor, request: pytest.FixtureRequest) -> None:
@@ -63,7 +65,7 @@ def test_from_dict_v1(constructor: Constructor, request: pytest.FixtureRequest)
     native_namespace = nw.get_native_namespace(df)
     result = nw.from_dict({"c": [1, 2], "d": [5, 6]}, native_namespace=native_namespace)
     expected = {"c": [1, 2], "d": [5, 6]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     assert isinstance(result, nw.DataFrame)
 
 
diff --git a/tests/from_pycapsule_test.py b/tests/from_pycapsule_test.py
new file mode 100644
index 000000000..7d91a44f3
--- /dev/null
+++ b/tests/from_pycapsule_test.py
@@ -0,0 +1,47 @@
+from __future__ import annotations
+
+import sys
+
+import pandas as pd
+import polars as pl
+import pyarrow as pa
+import pytest
+
+import narwhals.stable.v1 as nw
+from tests.utils import PYARROW_VERSION
+from tests.utils import assert_equal_data
+
+
+@pytest.mark.xfail(PYARROW_VERSION < (14,), reason="too old")
+def test_from_arrow_to_arrow() -> None:
+    df = nw.from_native(pl.DataFrame({"ab": [1, 2, 3], "ba": [4, 5, 6]}), eager_only=True)
+    result = nw.from_arrow(df, native_namespace=pa)
+    assert isinstance(result.to_native(), pa.Table)
+    expected = {"ab": [1, 2, 3], "ba": [4, 5, 6]}
+    assert_equal_data(result, expected)
+
+
+@pytest.mark.xfail(PYARROW_VERSION < (14,), reason="too old")
+def test_from_arrow_to_polars(monkeypatch: pytest.MonkeyPatch) -> None:
+    tbl = pa.table({"ab": [1, 2, 3], "ba": [4, 5, 6]})
+    monkeypatch.delitem(sys.modules, "pandas")
+    df = nw.from_native(tbl, eager_only=True)
+    result = nw.from_arrow(df, native_namespace=pl)
+    assert isinstance(result.to_native(), pl.DataFrame)
+    expected = {"ab": [1, 2, 3], "ba": [4, 5, 6]}
+    assert_equal_data(result, expected)
+    assert "pandas" not in sys.modules
+
+
+@pytest.mark.xfail(PYARROW_VERSION < (14,), reason="too old")
+def test_from_arrow_to_pandas() -> None:
+    df = nw.from_native(pa.table({"ab": [1, 2, 3], "ba": [4, 5, 6]}), eager_only=True)
+    result = nw.from_arrow(df, native_namespace=pd)
+    assert isinstance(result.to_native(), pd.DataFrame)
+    expected = {"ab": [1, 2, 3], "ba": [4, 5, 6]}
+    assert_equal_data(result, expected)
+
+
+def test_from_arrow_invalid() -> None:
+    with pytest.raises(TypeError, match="PyCapsule"):
+        nw.from_arrow({"a": [1]}, native_namespace=pa)  # type: ignore[arg-type]
diff --git a/tests/group_by_test.py b/tests/group_by_test.py
index 90cb48c26..09ee213e8 100644
--- a/tests/group_by_test.py
+++ b/tests/group_by_test.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 from contextlib import nullcontext
-from typing import Any
 
 import pandas as pd
 import polars as pl
@@ -9,9 +8,11 @@
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
+from tests.utils import PYARROW_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 1, 3], "b": [4, 4, 6], "c": [7.0, 8, 9]}
 
@@ -27,13 +28,13 @@ def test_group_by_complex() -> None:
         result = nw.to_native(
             df.group_by("a").agg((nw.col("b") - nw.col("c").mean()).mean()).sort("a")
         )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     lf = nw.from_native(df_lazy).lazy()
     result = nw.to_native(
         lf.group_by("a").agg((nw.col("b") - nw.col("c").mean()).mean()).sort("a")
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_invalid_group_by_dask() -> None:
@@ -73,14 +74,14 @@ def test_invalid_group_by() -> None:
         )
 
 
-def test_group_by_iter(constructor_eager: Any) -> None:
+def test_group_by_iter(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
     expected_keys = [(1,), (3,)]
     keys = []
     for key, sub_df in df.group_by("a"):
         if key == (1,):
             expected = {"a": [1, 1], "b": [4, 4], "c": [7.0, 8.0]}
-            compare_dicts(sub_df, expected)
+            assert_equal_data(sub_df, expected)
             assert isinstance(sub_df, nw.DataFrame)
         keys.append(key)
     assert sorted(keys) == sorted(expected_keys)
@@ -100,7 +101,7 @@ def test_group_by_len(constructor: Constructor) -> None:
         nw.from_native(constructor(data)).group_by("a").agg(nw.col("b").len()).sort("a")
     )
     expected = {"a": [1, 3], "b": [2, 1]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_group_by_n_unique(constructor: Constructor) -> None:
@@ -111,7 +112,7 @@ def test_group_by_n_unique(constructor: Constructor) -> None:
         .sort("a")
     )
     expected = {"a": [1, 3], "b": [1, 1]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_group_by_std(constructor: Constructor) -> None:
@@ -120,7 +121,7 @@ def test_group_by_std(constructor: Constructor) -> None:
         nw.from_native(constructor(data)).group_by("a").agg(nw.col("b").std()).sort("a")
     )
     expected = {"a": [1, 2], "b": [0.707107] * 2}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_group_by_n_unique_w_missing(
@@ -149,7 +150,7 @@ def test_group_by_n_unique_w_missing(
         "c_n_min": [4, 5],
         "d_n_unique": [1, 1],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_group_by_same_name_twice() -> None:
@@ -186,7 +187,7 @@ def test_group_by_simple_named(constructor: Constructor) -> None:
         "b_min": [4, 6],
         "b_max": [5, 6],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_group_by_simple_unnamed(constructor: Constructor) -> None:
@@ -206,7 +207,7 @@ def test_group_by_simple_unnamed(constructor: Constructor) -> None:
         "b": [4, 6],
         "c": [7, 1],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_group_by_multiple_keys(constructor: Constructor) -> None:
@@ -227,19 +228,19 @@ def test_group_by_multiple_keys(constructor: Constructor) -> None:
         "c_min": [2, 1],
         "c_max": [7, 1],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_key_with_nulls(constructor: Constructor, request: pytest.FixtureRequest) -> None:
+def test_key_with_nulls(
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
+) -> None:
     if "modin" in str(constructor):
         # TODO(unassigned): Modin flaky here?
         request.applymarker(pytest.mark.skip)
     context = (
         pytest.raises(NotImplementedError, match="null values")
-        if (
-            "pandas_constructor" in str(constructor)
-            and parse_version(pd.__version__) < parse_version("1.0.0")
-        )
+        if ("pandas_constructor" in str(constructor) and PANDAS_VERSION < (1, 1, 0))
         else nullcontext()
     )
     data = {"b": [4, 5, None], "a": [1, 2, 3]}
@@ -252,20 +253,63 @@ def test_key_with_nulls(constructor: Constructor, request: pytest.FixtureRequest
             .with_columns(nw.col("b").cast(nw.Float64))
         )
         expected = {"b": [4.0, 5, float("nan")], "len": [1, 1, 1], "a": [1, 2, 3]}
-        compare_dicts(result, expected)
+        assert_equal_data(result, expected)
+
+
+def test_key_with_nulls_ignored(
+    constructor: Constructor,
+) -> None:
+    data = {"b": [4, 5, None], "a": [1, 2, 3]}
+    result = (
+        nw.from_native(constructor(data))
+        .group_by("b", drop_null_keys=True)
+        .agg(nw.len(), nw.col("a").min())
+        .sort("a")
+        .with_columns(nw.col("b").cast(nw.Float64))
+    )
+    expected = {"b": [4.0, 5], "len": [1, 1], "a": [1, 2]}
+    assert_equal_data(result, expected)
+
+
+def test_key_with_nulls_iter(
+    constructor_eager: ConstructorEager,
+    request: pytest.FixtureRequest,
+) -> None:
+    if PANDAS_VERSION < (1, 3) and "pandas_constructor" in str(constructor_eager):
+        # bug in old pandas
+        request.applymarker(pytest.mark.xfail)
+    data = {"b": ["4", "5", None, "7"], "a": [1, 2, 3, 4], "c": ["4", "3", None, None]}
+    result = dict(
+        nw.from_native(constructor_eager(data), eager_only=True)
+        .group_by("b", "c", drop_null_keys=True)
+        .__iter__()
+    )
+    assert len(result) == 2
+    assert_equal_data(result[("4", "4")], {"b": ["4"], "a": [1], "c": ["4"]})
+    assert_equal_data(result[("5", "3")], {"b": ["5"], "a": [2], "c": ["3"]})
+
+    result = dict(
+        nw.from_native(constructor_eager(data), eager_only=True)
+        .group_by("b", "c", drop_null_keys=False)
+        .__iter__()
+    )
+    assert_equal_data(result[("4", "4")], {"b": ["4"], "a": [1], "c": ["4"]})
+    assert_equal_data(result[("5", "3")], {"b": ["5"], "a": [2], "c": ["3"]})
+    assert len(result) == 4
 
 
 def test_no_agg(constructor: Constructor) -> None:
     result = nw.from_native(constructor(data)).group_by(["a", "b"]).agg().sort("a", "b")
 
     expected = {"a": [1, 3], "b": [4, 6]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_group_by_categorical(
-    constructor: Constructor, request: pytest.FixtureRequest
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
 ) -> None:
-    if "pyarrow_table" in str(constructor) and parse_version(pa.__version__) < (
+    if "pyarrow_table" in str(constructor) and PYARROW_VERSION < (
         15,
         0,
         0,
@@ -283,4 +327,4 @@ def test_group_by_categorical(
         .agg(nw.col("x").sum())
         .sort("x")
     )
-    compare_dicts(result, data)
+    assert_equal_data(result, data)
diff --git a/tests/hypothesis/concat_test.py b/tests/hypothesis/concat_test.py
index 9ae54dbc4..e0ec45369 100644
--- a/tests/hypothesis/concat_test.py
+++ b/tests/hypothesis/concat_test.py
@@ -9,7 +9,7 @@
 from hypothesis import strategies as st
 
 import narwhals.stable.v1 as nw
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 from tests.utils import is_windows
 
 
@@ -62,4 +62,4 @@ def test_concat(  # pragma: no cover
     dframe_pd1 = nw.to_native(dframe_pl)
     dframe_pd2 = nw.to_native(dframe_pd)
 
-    compare_dicts(dframe_pd1, dframe_pd2)
+    assert_equal_data(dframe_pd1, dframe_pd2)
diff --git a/tests/hypothesis/join_test.py b/tests/hypothesis/join_test.py
index bc1cd735c..5b498db65 100644
--- a/tests/hypothesis/join_test.py
+++ b/tests/hypothesis/join_test.py
@@ -10,11 +10,9 @@
 from pandas.testing import assert_frame_equal
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
-from tests.utils import compare_dicts
-
-pl_version = parse_version(pl.__version__)
-pd_version = parse_version(pd.__version__)
+from tests.utils import PANDAS_VERSION
+from tests.utils import POLARS_VERSION
+from tests.utils import assert_equal_data
 
 
 @given(
@@ -40,8 +38,8 @@
         unique=True,
     ),
 )  # type: ignore[misc]
-@pytest.mark.skipif(pl_version < parse_version("0.20.13"), reason="0.0 == -0.0")
-@pytest.mark.skipif(pd_version < parse_version("2.0.0"), reason="requires pyarrow")
+@pytest.mark.skipif(POLARS_VERSION < (0, 20, 13), reason="0.0 == -0.0")
+@pytest.mark.skipif(PANDAS_VERSION < (2, 0, 0), reason="requires pyarrow")
 @pytest.mark.slow
 def test_join(  # pragma: no cover
     integers: st.SearchStrategy[list[int]],
@@ -88,8 +86,8 @@ def test_join(  # pragma: no cover
         max_size=3,
     ),
 )  # type: ignore[misc]
+@pytest.mark.skipif(PANDAS_VERSION < (2, 0, 0), reason="requires pyarrow")
 @pytest.mark.slow
-@pytest.mark.skipif(pd_version < parse_version("2.0.0"), reason="requires pyarrow")
 def test_cross_join(  # pragma: no cover
     integers: st.SearchStrategy[list[int]],
     other_integers: st.SearchStrategy[list[int]],
@@ -164,7 +162,9 @@ def test_left_join(  # pragma: no cover
             right_on=right_key,
         )
     ).select(pl.all().fill_null(float("nan")))
-    compare_dicts(result_pd.to_dict(as_series=False), result_pl.to_dict(as_series=False))
+    assert_equal_data(
+        result_pd.to_dict(as_series=False), result_pl.to_dict(as_series=False)
+    )
     # For PyArrow, insert an extra sort, as the order of rows isn't guaranteed
     result_pa = (
         nw.from_native(pa.table(data_left), eager_only=True)
@@ -177,7 +177,7 @@ def test_left_join(  # pragma: no cover
         .select(nw.all().cast(nw.Float64).fill_null(float("nan")))
         .pipe(lambda df: df.sort(df.columns))
     )
-    compare_dicts(
+    assert_equal_data(
         result_pa,
         result_pd.pipe(lambda df: df.sort(df.columns)).to_dict(as_series=False),
     )
diff --git a/tests/new_series_test.py b/tests/new_series_test.py
index fad4a7536..0d635c853 100644
--- a/tests/new_series_test.py
+++ b/tests/new_series_test.py
@@ -1,31 +1,32 @@
-from typing import Any
+from __future__ import annotations
 
 import pandas as pd
 import pytest
 
 import narwhals as nw
 import narwhals.stable.v1 as nw_v1
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
-def test_new_series(constructor_eager: Any) -> None:
+def test_new_series(constructor_eager: ConstructorEager) -> None:
     s = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)["a"]
     result = nw.new_series("b", [4, 1, 2], native_namespace=nw.get_native_namespace(s))
     expected = {"b": [4, 1, 2]}
     # all supported libraries auto-infer this to be int64, we can always special-case
     # something different if necessary
     assert result.dtype == nw.Int64
-    compare_dicts(result.to_frame(), expected)
+    assert_equal_data(result.to_frame(), expected)
 
     result = nw.new_series(
         "b", [4, 1, 2], nw.Int32, native_namespace=nw.get_native_namespace(s)
     )
     expected = {"b": [4, 1, 2]}
     assert result.dtype == nw.Int32
-    compare_dicts(result.to_frame(), expected)
+    assert_equal_data(result.to_frame(), expected)
 
 
-def test_new_series_v1(constructor_eager: Any) -> None:
+def test_new_series_v1(constructor_eager: ConstructorEager) -> None:
     s = nw_v1.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)["a"]
     result = nw_v1.new_series(
         "b", [4, 1, 2], native_namespace=nw_v1.get_native_namespace(s)
@@ -34,14 +35,14 @@ def test_new_series_v1(constructor_eager: Any) -> None:
     # all supported libraries auto-infer this to be int64, we can always special-case
     # something different if necessary
     assert result.dtype == nw_v1.Int64
-    compare_dicts(result.to_frame(), expected)
+    assert_equal_data(result.to_frame(), expected)
 
     result = nw_v1.new_series(
         "b", [4, 1, 2], nw_v1.Int32, native_namespace=nw_v1.get_native_namespace(s)
     )
     expected = {"b": [4, 1, 2]}
     assert result.dtype == nw_v1.Int32
-    compare_dicts(result.to_frame(), expected)
+    assert_equal_data(result.to_frame(), expected)
 
 
 def test_new_series_dask() -> None:
diff --git a/tests/no_imports_test.py b/tests/no_imports_test.py
index b30545380..a6fe26e31 100644
--- a/tests/no_imports_test.py
+++ b/tests/no_imports_test.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import sys
 
 import pandas as pd
diff --git a/tests/selectors_test.py b/tests/selectors_test.py
index c78a9eac4..93f5cbd77 100644
--- a/tests/selectors_test.py
+++ b/tests/selectors_test.py
@@ -11,9 +11,9 @@
 from narwhals.selectors import categorical
 from narwhals.selectors import numeric
 from narwhals.selectors import string
-from narwhals.utils import parse_version
+from tests.utils import PYARROW_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1, 1, 2],
@@ -27,43 +27,49 @@ def test_selectors(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(by_dtype([nw.Int64, nw.Float64]) + 1)
     expected = {"a": [2, 2, 3], "c": [5.1, 6.0, 7.0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_numeric(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(numeric() + 1)
     expected = {"a": [2, 2, 3], "c": [5.1, 6.0, 7.0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_boolean(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(boolean())
     expected = {"d": [True, False, True]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_string(constructor: Constructor, request: pytest.FixtureRequest) -> None:
-    if "dask" in str(constructor) and parse_version(pa.__version__) < (12,):
+def test_string(
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
+) -> None:
+    if "dask" in str(constructor) and PYARROW_VERSION < (12,):
         # Dask doesn't infer `'b'` as String for old PyArrow versions
         request.applymarker(pytest.mark.xfail)
     df = nw.from_native(constructor(data))
     result = df.select(string())
     expected = {"b": ["a", "b", "c"]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_categorical(request: pytest.FixtureRequest, constructor: Constructor) -> None:
-    if "pyarrow_table_constructor" in str(constructor) and parse_version(
-        pa.__version__
-    ) <= (15,):  # pragma: no cover
+def test_categorical(
+    request: pytest.FixtureRequest,
+    constructor: Constructor,
+) -> None:
+    if "pyarrow_table_constructor" in str(constructor) and PYARROW_VERSION <= (
+        15,
+    ):  # pragma: no cover
         request.applymarker(pytest.mark.xfail)
     expected = {"b": ["a", "b", "c"]}
 
     df = nw.from_native(constructor(data)).with_columns(nw.col("b").cast(nw.Categorical))
     result = df.select(categorical())
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(
diff --git a/tests/series_only/__iter___test.py b/tests/series_only/__iter___test.py
index a0a5c1189..2a88ae1d3 100644
--- a/tests/series_only/__iter___test.py
+++ b/tests/series_only/__iter___test.py
@@ -1,20 +1,25 @@
 from __future__ import annotations
 
 from collections.abc import Iterable
-from typing import Any
+from typing import TYPE_CHECKING
 
 import pytest
 
 import narwhals.stable.v1 as nw
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
+
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
 
 data = [1, 2, 3]
 
 
-def test_iter(constructor_eager: Any, request: pytest.FixtureRequest) -> None:
+def test_iter(
+    constructor_eager: ConstructorEager, request: pytest.FixtureRequest
+) -> None:
     if "cudf" in str(constructor_eager):
         request.applymarker(pytest.mark.xfail)
     s = nw.from_native(constructor_eager({"a": data}), eager_only=True)["a"]
 
     assert isinstance(s, Iterable)
-    compare_dicts({"a": [x for x in s]}, {"a": [1, 2, 3]})  # noqa: C416
+    assert_equal_data({"a": [x for x in s]}, {"a": [1, 2, 3]})  # noqa: C416
diff --git a/tests/series_only/alias_rename_test.py b/tests/series_only/alias_rename_test.py
index 4fa8a9993..87143a574 100644
--- a/tests/series_only/alias_rename_test.py
+++ b/tests/series_only/alias_rename_test.py
@@ -1,6 +1,8 @@
+from __future__ import annotations
+
 import narwhals as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_alias_rename(constructor_eager: Constructor) -> None:
@@ -8,6 +10,6 @@ def test_alias_rename(constructor_eager: Constructor) -> None:
     expected = {"bar": data}
     series = nw.from_native(constructor_eager({"foo": data}), eager_only=True)["foo"]
     result = series.alias("bar").to_frame()
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = series.rename("bar").to_frame()
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/series_only/array_dunder_test.py b/tests/series_only/array_dunder_test.py
index c09bea9ec..cdb837c16 100644
--- a/tests/series_only/array_dunder_test.py
+++ b/tests/series_only/array_dunder_test.py
@@ -1,19 +1,24 @@
-from typing import Any
+from __future__ import annotations
 
 import numpy as np
-import pandas as pd
-import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
-from tests.utils import compare_dicts
+from tests.utils import PANDAS_VERSION
+from tests.utils import PYARROW_VERSION
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
-def test_array_dunder(request: pytest.FixtureRequest, constructor_eager: Any) -> None:
-    if "pyarrow_table" in str(constructor_eager) and parse_version(
-        pa.__version__
-    ) < parse_version("16.0.0"):  # pragma: no cover
+def test_array_dunder(
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
+) -> None:
+    if "pyarrow_table" in str(constructor_eager) and PYARROW_VERSION < (
+        16,
+        0,
+        0,
+    ):  # pragma: no cover
         request.applymarker(pytest.mark.xfail)
 
     s = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)["a"]
@@ -22,11 +27,14 @@ def test_array_dunder(request: pytest.FixtureRequest, constructor_eager: Any) ->
 
 
 def test_array_dunder_with_dtype(
-    request: pytest.FixtureRequest, constructor_eager: Any
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
 ) -> None:
-    if "pyarrow_table" in str(constructor_eager) and parse_version(
-        pa.__version__
-    ) < parse_version("16.0.0"):  # pragma: no cover
+    if "pyarrow_table" in str(constructor_eager) and PYARROW_VERSION < (
+        16,
+        0,
+        0,
+    ):  # pragma: no cover
         request.applymarker(pytest.mark.xfail)
 
     s = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)["a"]
@@ -35,22 +43,23 @@ def test_array_dunder_with_dtype(
 
 
 def test_array_dunder_with_copy(
-    request: pytest.FixtureRequest, constructor_eager: Any
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
 ) -> None:
-    if "pyarrow_table" in str(constructor_eager) and parse_version(
-        pa.__version__
-    ) < parse_version("16.0.0"):  # pragma: no cover
+    if "pyarrow_table" in str(constructor_eager) and PYARROW_VERSION < (
+        16,
+        0,
+        0,
+    ):  # pragma: no cover
         request.applymarker(pytest.mark.xfail)
 
     s = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)["a"]
     result = s.__array__(copy=True)
     np.testing.assert_array_equal(result, np.array([1, 2, 3], dtype="int64"))
-    if "pandas_constructor" in str(constructor_eager) and parse_version(
-        pd.__version__
-    ) < (3,):
+    if "pandas_constructor" in str(constructor_eager) and PANDAS_VERSION < (3,):
         # If it's pandas, we know that `copy=False` definitely took effect.
         # So, let's check it!
         result = s.__array__(copy=False)
         np.testing.assert_array_equal(result, np.array([1, 2, 3], dtype="int64"))
         result[0] = 999
-        compare_dicts({"a": s}, {"a": [999, 2, 3]})
+        assert_equal_data({"a": s}, {"a": [999, 2, 3]})
diff --git a/tests/series_only/arrow_c_stream_test.py b/tests/series_only/arrow_c_stream_test.py
index 9d2ebc8d0..3118c6f8c 100644
--- a/tests/series_only/arrow_c_stream_test.py
+++ b/tests/series_only/arrow_c_stream_test.py
@@ -1,17 +1,18 @@
+from __future__ import annotations
+
 import polars as pl
 import pyarrow as pa
 import pyarrow.compute as pc
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import POLARS_VERSION
+from tests.utils import PYARROW_VERSION
 
 
+@pytest.mark.skipif(POLARS_VERSION < (1, 3), reason="too old for pycapsule in Polars")
 @pytest.mark.skipif(
-    parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
-)
-@pytest.mark.skipif(
-    parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
+    PYARROW_VERSION < (16, 0, 0), reason="too old for pycapsule in PyArrow"
 )
 def test_arrow_c_stream_test() -> None:
     s = nw.from_native(pl.Series([1, 2, 3]), series_only=True)
@@ -20,11 +21,9 @@ def test_arrow_c_stream_test() -> None:
     assert pc.all(pc.equal(result, expected)).as_py()
 
 
+@pytest.mark.skipif(POLARS_VERSION < (1, 3), reason="too old for pycapsule in Polars")
 @pytest.mark.skipif(
-    parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
-)
-@pytest.mark.skipif(
-    parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
+    PYARROW_VERSION < (16, 0, 0), reason="too old for pycapsule in PyArrow"
 )
 def test_arrow_c_stream_test_invalid(monkeypatch: pytest.MonkeyPatch) -> None:
     # "poison" the dunder method to make sure it actually got called above
@@ -34,11 +33,9 @@ def test_arrow_c_stream_test_invalid(monkeypatch: pytest.MonkeyPatch) -> None:
         pa.chunked_array(s)
 
 
+@pytest.mark.skipif(POLARS_VERSION < (1, 3), reason="too old for pycapsule in Polars")
 @pytest.mark.skipif(
-    parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
-)
-@pytest.mark.skipif(
-    parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
+    PYARROW_VERSION < (16, 0, 0), reason="too old for pycapsule in PyArrow"
 )
 def test_arrow_c_stream_test_fallback(monkeypatch: pytest.MonkeyPatch) -> None:
     # Check that fallback to PyArrow works
diff --git a/tests/series_only/cast_test.py b/tests/series_only/cast_test.py
index 672cbebc2..10587a084 100644
--- a/tests/series_only/cast_test.py
+++ b/tests/series_only/cast_test.py
@@ -1,5 +1,8 @@
+from __future__ import annotations
+
 from datetime import date
 from datetime import datetime
+from typing import TYPE_CHECKING
 
 import pandas as pd
 import polars as pl
@@ -8,18 +11,20 @@
 from polars.testing import assert_frame_equal
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
 
-def test_cast_253() -> None:
-    df_polars = pl.DataFrame({"a": [1]})
-    result = nw.from_native(df_polars, eager_only=True).select(
-        nw.col("a").cast(nw.String) + "hi"
-    )["a"][0]
-    assert result == "1hi"
 
-    df_pandas = pd.DataFrame({"a": [1]})
-    result = nw.from_native(df_pandas, eager_only=True).select(
+def test_cast_253(
+    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
+) -> None:
+    if "pyarrow_table" in str(constructor_eager):
+        request.applymarker(pytest.mark.xfail)
+
+    df_raw = constructor_eager({"a": [1]})
+    result = nw.from_native(df_raw, eager_only=True).select(
         nw.col("a").cast(nw.String) + "hi"
     )["a"][0]
     assert result == "1hi"
@@ -63,7 +68,7 @@ def test_cast_date_datetime_pyarrow() -> None:
 
 
 @pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("2.0.0"),
+    PANDAS_VERSION < (2, 0, 0),
     reason="pyarrow dtype not available",
 )
 def test_cast_date_datetime_pandas() -> None:
@@ -94,7 +99,7 @@ def test_cast_date_datetime_pandas() -> None:
 
 
 @pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("2.0.0"),
+    PANDAS_VERSION < (2, 0, 0),
     reason="pyarrow dtype not available",
 )
 def test_cast_date_datetime_invalid() -> None:
diff --git a/tests/series_only/dtype_test.py b/tests/series_only/dtype_test.py
index 68d10fbca..8200150f0 100644
--- a/tests/series_only/dtype_test.py
+++ b/tests/series_only/dtype_test.py
@@ -1,13 +1,16 @@
 from __future__ import annotations
 
-from typing import Any
+from typing import TYPE_CHECKING
 
 import narwhals.stable.v1 as nw
 
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
+
 data = {"a": [1, 3, 2]}
 
 
-def test_dtype(constructor_eager: Any) -> None:
+def test_dtype(constructor_eager: ConstructorEager) -> None:
     series = nw.from_native(constructor_eager(data), eager_only=True)["a"]
     result = series.dtype
     assert result == nw.Int64
diff --git a/tests/series_only/is_empty_test.py b/tests/series_only/is_empty_test.py
index 80b8ab799..bd3aa61ed 100644
--- a/tests/series_only/is_empty_test.py
+++ b/tests/series_only/is_empty_test.py
@@ -1,9 +1,14 @@
-from typing import Any
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
 
 import narwhals.stable.v1 as nw
 
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
+
 
-def test_is_empty(constructor_eager: Any) -> None:
+def test_is_empty(constructor_eager: ConstructorEager) -> None:
     series = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)["a"]
     assert not series.is_empty()
     assert not series[:1].is_empty()
diff --git a/tests/series_only/is_ordered_categorical_test.py b/tests/series_only/is_ordered_categorical_test.py
index 26358f9a6..7e7db5f23 100644
--- a/tests/series_only/is_ordered_categorical_test.py
+++ b/tests/series_only/is_ordered_categorical_test.py
@@ -1,4 +1,6 @@
-from typing import Any
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
 
 import pandas as pd
 import polars as pl
@@ -6,7 +8,10 @@
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
+
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
 
 
 def test_is_ordered_categorical() -> None:
@@ -26,9 +31,7 @@ def test_is_ordered_categorical() -> None:
     assert not nw.is_ordered_categorical(nw.from_native(s, series_only=True))
 
 
-@pytest.mark.skipif(
-    parse_version(pd.__version__) < (2, 0), reason="requires interchange protocol"
-)
+@pytest.mark.skipif(PANDAS_VERSION < (2, 0), reason="requires interchange protocol")
 def test_is_ordered_categorical_interchange_protocol() -> None:
     df = pd.DataFrame(
         {"a": ["a", "b"]}, dtype=pd.CategoricalDtype(ordered=True)
@@ -39,7 +42,7 @@ def test_is_ordered_categorical_interchange_protocol() -> None:
 
 
 def test_is_definitely_not_ordered_categorical(
-    constructor_eager: Any,
+    constructor_eager: ConstructorEager,
 ) -> None:
     assert not nw.is_ordered_categorical(
         nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)["a"]
diff --git a/tests/series_only/is_sorted_test.py b/tests/series_only/is_sorted_test.py
index 3942b5619..2ff6e50f1 100644
--- a/tests/series_only/is_sorted_test.py
+++ b/tests/series_only/is_sorted_test.py
@@ -1,11 +1,10 @@
 from __future__ import annotations
 
-from typing import Any
-
 import pytest
 
 import narwhals.stable.v1 as nw
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = [1, 3, 2]
 data_dups = [4, 4, 6]
@@ -17,17 +16,17 @@
     [(data, False, False), (data_sorted, False, True), (data_sorted, True, False)],
 )
 def test_is_sorted(
-    constructor_eager: Any,
+    constructor_eager: ConstructorEager,
     input_data: str,
     descending: bool,  # noqa: FBT001
     expected: bool,  # noqa: FBT001
 ) -> None:
     series = nw.from_native(constructor_eager({"a": input_data}), eager_only=True)["a"]
     result = series.is_sorted(descending=descending)
-    compare_dicts({"a": [result]}, {"a": [expected]})
+    assert_equal_data({"a": [result]}, {"a": [expected]})
 
 
-def test_is_sorted_invalid(constructor_eager: Any) -> None:
+def test_is_sorted_invalid(constructor_eager: ConstructorEager) -> None:
     series = nw.from_native(constructor_eager({"a": data_sorted}), eager_only=True)["a"]
 
     with pytest.raises(TypeError):
diff --git a/tests/series_only/item_test.py b/tests/series_only/item_test.py
index 869bd7c38..979ac888d 100644
--- a/tests/series_only/item_test.py
+++ b/tests/series_only/item_test.py
@@ -1,22 +1,22 @@
 from __future__ import annotations
 
 import re
-from typing import Any
 
 import pytest
 
 import narwhals.stable.v1 as nw
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = [1, 3, 2]
 
 
 @pytest.mark.parametrize(("index", "expected"), [(0, 1), (1, 3)])
-def test_item(constructor_eager: Any, index: int, expected: int) -> None:
+def test_item(constructor_eager: ConstructorEager, index: int, expected: int) -> None:
     series = nw.from_native(constructor_eager({"a": data}), eager_only=True)["a"]
     result = series.item(index)
-    compare_dicts({"a": [result]}, {"a": [expected]})
-    compare_dicts({"a": [series.head(1).item()]}, {"a": [1]})
+    assert_equal_data({"a": [result]}, {"a": [expected]})
+    assert_equal_data({"a": [series.head(1).item()]}, {"a": [1]})
 
     with pytest.raises(
         ValueError,
diff --git a/tests/series_only/scatter_test.py b/tests/series_only/scatter_test.py
index 0677a8dd8..11065ec97 100644
--- a/tests/series_only/scatter_test.py
+++ b/tests/series_only/scatter_test.py
@@ -1,14 +1,15 @@
 from __future__ import annotations
 
-from typing import Any
-
 import pytest
 
 import narwhals as nw
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
-def test_scatter(constructor_eager: Any, request: pytest.FixtureRequest) -> None:
+def test_scatter(
+    constructor_eager: ConstructorEager, request: pytest.FixtureRequest
+) -> None:
     if "modin" in str(constructor_eager):
         # https://github.com/modin-project/modin/issues/7392
         request.applymarker(pytest.mark.xfail)
@@ -23,10 +24,10 @@ def test_scatter(constructor_eager: Any, request: pytest.FixtureRequest) -> None
         "a": [999, 888, 3],
         "b": [142, 132, 124],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_scatter_unchanged(constructor_eager: Any) -> None:
+def test_scatter_unchanged(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(
         constructor_eager({"a": [1, 2, 3], "b": [142, 124, 132]}), eager_only=True
     )
@@ -37,14 +38,14 @@ def test_scatter_unchanged(constructor_eager: Any) -> None:
         "a": [1, 2, 3],
         "b": [142, 124, 132],
     }
-    compare_dicts(df, expected)
+    assert_equal_data(df, expected)
 
 
-def test_single_series(constructor_eager: Any) -> None:
+def test_single_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(
         constructor_eager({"a": [1, 2, 3], "b": [142, 124, 132]}), eager_only=True
     )
     s = df["a"]
     s.scatter([0, 1], [999, 888])
     expected = {"a": [1, 2, 3]}
-    compare_dicts({"a": s}, expected)
+    assert_equal_data({"a": s}, expected)
diff --git a/tests/series_only/shape_test.py b/tests/series_only/shape_test.py
index 4a1c0726d..1ab88eca3 100644
--- a/tests/series_only/shape_test.py
+++ b/tests/series_only/shape_test.py
@@ -1,9 +1,14 @@
-from typing import Any
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
 
 import narwhals.stable.v1 as nw
 
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
+
 
-def test_shape(constructor_eager: Any) -> None:
+def test_shape(constructor_eager: ConstructorEager) -> None:
     result = nw.from_native(constructor_eager({"a": [1, 2]}), eager_only=True)["a"].shape
     expected = (2,)
     assert result == expected
diff --git a/tests/series_only/slice_test.py b/tests/series_only/slice_test.py
index 9ae194774..5f8a0a0a9 100644
--- a/tests/series_only/slice_test.py
+++ b/tests/series_only/slice_test.py
@@ -1,33 +1,34 @@
-from typing import Any
+from __future__ import annotations
 
 import narwhals.stable.v1 as nw
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
-def test_slice(constructor_eager: Any) -> None:
+def test_slice(constructor_eager: ConstructorEager) -> None:
     data = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [1, 4, 2]}
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = {"a": df["a"][[0, 1]]}
     expected = {"a": [1, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = {"a": df["a"][1:]}
     expected = {"a": [2, 3]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = {"b": df[:, 1]}
     expected = {"b": [4, 5, 6]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = {"b": df[:, "b"]}
     expected = {"b": [4, 5, 6]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = {"b": df[:2, "b"]}
     expected = {"b": [4, 5]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = {"b": df[:2, 1]}
     expected = {"b": [4, 5]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = {"b": df[[0, 1], 1]}
     expected = {"b": [4, 5]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = {"b": df[[], 1]}
     expected = {"b": []}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/series_only/to_arrow_test.py b/tests/series_only/to_arrow_test.py
index 5181a6786..ae6246e55 100644
--- a/tests/series_only/to_arrow_test.py
+++ b/tests/series_only/to_arrow_test.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Any
+from typing import TYPE_CHECKING
 
 import pyarrow as pa
 import pyarrow.compute as pc
@@ -8,8 +8,11 @@
 
 import narwhals.stable.v1 as nw
 
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
 
-def test_to_arrow(constructor_eager: Any) -> None:
+
+def test_to_arrow(constructor_eager: ConstructorEager) -> None:
     data = [1, 2, 3]
     result = nw.from_native(constructor_eager({"a": data}), eager_only=True)[
         "a"
@@ -20,7 +23,7 @@ def test_to_arrow(constructor_eager: Any) -> None:
 
 
 def test_to_arrow_with_nulls(
-    constructor_eager: Any, request: pytest.FixtureRequest
+    constructor_eager: ConstructorEager, request: pytest.FixtureRequest
 ) -> None:
     if "pandas_constructor" in str(constructor_eager) or "modin_constructor" in str(
         constructor_eager
diff --git a/tests/series_only/to_dummy_test.py b/tests/series_only/to_dummy_test.py
index c3d57b9ad..10d6e971e 100644
--- a/tests/series_only/to_dummy_test.py
+++ b/tests/series_only/to_dummy_test.py
@@ -1,30 +1,27 @@
-from typing import Any
+from __future__ import annotations
 
 import pytest
 
 import narwhals.stable.v1 as nw
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = [1, 2, 3]
 
 
 @pytest.mark.parametrize("sep", ["_", "-"])
-def test_to_dummies(constructor_eager: Any, sep: str) -> None:
+def test_to_dummies(constructor_eager: ConstructorEager, sep: str) -> None:
     s = nw.from_native(constructor_eager({"a": data}), eager_only=True)["a"].alias("a")
     result = s.to_dummies(separator=sep)
     expected = {f"a{sep}1": [1, 0, 0], f"a{sep}2": [0, 1, 0], f"a{sep}3": [0, 0, 1]}
 
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize("sep", ["_", "-"])
-def test_to_dummies_drop_first(
-    request: pytest.FixtureRequest, constructor_eager: Any, sep: str
-) -> None:
-    if "cudf" in str(constructor_eager):
-        request.applymarker(pytest.mark.xfail)
+def test_to_dummies_drop_first(constructor_eager: ConstructorEager, sep: str) -> None:
     s = nw.from_native(constructor_eager({"a": data}), eager_only=True)["a"].alias("a")
     result = s.to_dummies(drop_first=True, separator=sep)
     expected = {f"a{sep}2": [0, 1, 0], f"a{sep}3": [0, 0, 1]}
 
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/series_only/to_frame_test.py b/tests/series_only/to_frame_test.py
index 890036183..cd90b6f15 100644
--- a/tests/series_only/to_frame_test.py
+++ b/tests/series_only/to_frame_test.py
@@ -1,15 +1,16 @@
-from typing import Any
+from __future__ import annotations
 
 import narwhals.stable.v1 as nw
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = [1, 2, 3]
 
 
-def test_to_frame(constructor_eager: Any) -> None:
+def test_to_frame(constructor_eager: ConstructorEager) -> None:
     df = (
         nw.from_native(constructor_eager({"a": data}), eager_only=True)["a"]
         .alias("")
         .to_frame()
     )
-    compare_dicts(df, {"": [1, 2, 3]})
+    assert_equal_data(df, {"": [1, 2, 3]})
diff --git a/tests/series_only/to_list_test.py b/tests/series_only/to_list_test.py
index 11d02d0d2..84b4fad47 100644
--- a/tests/series_only/to_list_test.py
+++ b/tests/series_only/to_list_test.py
@@ -1,15 +1,18 @@
-from typing import Any
+from __future__ import annotations
 
 import pytest
 
 import narwhals.stable.v1 as nw
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = [1, 2, 3]
 
 
-def test_to_list(constructor_eager: Any, request: pytest.FixtureRequest) -> None:
+def test_to_list(
+    constructor_eager: ConstructorEager, request: pytest.FixtureRequest
+) -> None:
     if "cudf" in str(constructor_eager):  # pragma: no cover
         request.applymarker(pytest.mark.xfail)
     s = nw.from_native(constructor_eager({"a": data}), eager_only=True)["a"]
-    compare_dicts({"a": s.to_list()}, {"a": [1, 2, 3]})
+    assert_equal_data({"a": s.to_list()}, {"a": [1, 2, 3]})
diff --git a/tests/series_only/to_native_test.py b/tests/series_only/to_native_test.py
index 269348ea3..e6955b4c3 100644
--- a/tests/series_only/to_native_test.py
+++ b/tests/series_only/to_native_test.py
@@ -1,14 +1,17 @@
 from __future__ import annotations
 
-from typing import Any
+from typing import TYPE_CHECKING
 
 import narwhals.stable.v1 as nw
 
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
+
 data = [4, 4, 4, 1, 6, 6, 4, 4, 1, 1]
 
 
-def test_to_native(constructor_eager: Any) -> None:
-    orig_series = constructor_eager({"a": data})["a"]
+def test_to_native(constructor_eager: ConstructorEager) -> None:
+    orig_series = constructor_eager({"a": data})["a"]  # type: ignore[index]
     nw_series = nw.from_native(constructor_eager({"a": data}), eager_only=True)["a"]
     result = nw_series.to_native()
     assert isinstance(result, orig_series.__class__)
diff --git a/tests/series_only/to_numpy_test.py b/tests/series_only/to_numpy_test.py
index 2f1464a57..966a44449 100644
--- a/tests/series_only/to_numpy_test.py
+++ b/tests/series_only/to_numpy_test.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Any
+from typing import TYPE_CHECKING
 
 import numpy as np
 import pytest
@@ -8,8 +8,13 @@
 
 import narwhals.stable.v1 as nw
 
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
 
-def test_to_numpy(constructor_eager: Any, request: pytest.FixtureRequest) -> None:
+
+def test_to_numpy(
+    constructor_eager: ConstructorEager, request: pytest.FixtureRequest
+) -> None:
     if (
         "pandas_constructor" in str(constructor_eager)
         or "modin_constructor" in str(constructor_eager)
diff --git a/tests/series_only/to_pandas_test.py b/tests/series_only/to_pandas_test.py
index 30c7906c7..387af2709 100644
--- a/tests/series_only/to_pandas_test.py
+++ b/tests/series_only/to_pandas_test.py
@@ -1,21 +1,25 @@
 from __future__ import annotations
 
-from typing import Any
+from typing import TYPE_CHECKING
 
 import pandas as pd
 import pytest
 from pandas.testing import assert_series_equal
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
+
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
 
 data = [1, 3, 2]
 
 
-@pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("2.0.0"), reason="too old for pyarrow"
-)
-def test_convert(request: pytest.FixtureRequest, constructor_eager: Any) -> None:
+@pytest.mark.skipif(PANDAS_VERSION < (2, 0, 0), reason="too old for pyarrow")
+def test_convert(
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
+) -> None:
     if any(
         cname in str(constructor_eager)
         for cname in ("pandas_nullable", "pandas_pyarrow", "modin")
diff --git a/tests/series_only/value_counts_test.py b/tests/series_only/value_counts_test.py
index d19a1440b..da00f2ef4 100644
--- a/tests/series_only/value_counts_test.py
+++ b/tests/series_only/value_counts_test.py
@@ -2,12 +2,12 @@
 
 from typing import Any
 
-import pandas as pd
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
-from tests.utils import compare_dicts
+from tests.utils import PANDAS_VERSION
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = [4, 4, 4, 1, 6, 6, 4, 4, 1, 1]
 
@@ -16,13 +16,14 @@
 @pytest.mark.parametrize("name", [None, "count_name"])
 def test_value_counts(
     request: pytest.FixtureRequest,
-    constructor_eager: Any,
+    constructor_eager: ConstructorEager,
     normalize: Any,
     name: str | None,
 ) -> None:
-    if "pandas_nullable_constructor" in str(constructor_eager) and parse_version(
-        pd.__version__
-    ) < (2, 2):
+    if "pandas_nullable_constructor" in str(constructor_eager) and PANDAS_VERSION < (
+        2,
+        2,
+    ):
         # bug in old pandas
         request.applymarker(pytest.mark.xfail)
 
@@ -40,9 +41,9 @@ def test_value_counts(
     )
 
     sorted_result = series.value_counts(sort=True, name=name, normalize=normalize)
-    compare_dicts(sorted_result, expected)
+    assert_equal_data(sorted_result, expected)
 
     unsorted_result = series.value_counts(
         sort=False, name=name, normalize=normalize
     ).sort(expected_name, descending=True)
-    compare_dicts(unsorted_result, expected)
+    assert_equal_data(unsorted_result, expected)
diff --git a/tests/series_only/zip_with_test.py b/tests/series_only/zip_with_test.py
index 5d1461da3..b6f2d36de 100644
--- a/tests/series_only/zip_with_test.py
+++ b/tests/series_only/zip_with_test.py
@@ -1,12 +1,11 @@
 from __future__ import annotations
 
-from typing import Any
-
 import narwhals.stable.v1 as nw
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 
-def test_zip_with(constructor_eager: Any) -> None:
+def test_zip_with(constructor_eager: ConstructorEager) -> None:
     series1 = nw.from_native(constructor_eager({"a": [1, 3, 2]}), eager_only=True)["a"]
     series2 = nw.from_native(constructor_eager({"a": [4, 4, 6]}), eager_only=True)["a"]
     mask = nw.from_native(constructor_eager({"a": [True, False, True]}), eager_only=True)[
@@ -15,14 +14,14 @@ def test_zip_with(constructor_eager: Any) -> None:
 
     result = series1.zip_with(mask, series2)
     expected = [1, 4, 2]
-    compare_dicts({"a": result}, {"a": expected})
+    assert_equal_data({"a": result}, {"a": expected})
 
 
-def test_zip_with_length_1(constructor_eager: Any) -> None:
+def test_zip_with_length_1(constructor_eager: ConstructorEager) -> None:
     series1 = nw.from_native(constructor_eager({"a": [1]}), eager_only=True)["a"]
     series2 = nw.from_native(constructor_eager({"a": [4]}), eager_only=True)["a"]
     mask = nw.from_native(constructor_eager({"a": [False]}), eager_only=True)["a"]
 
     result = series1.zip_with(mask, series2)
     expected = [4]
-    compare_dicts({"a": result}, {"a": expected})
+    assert_equal_data({"a": result}, {"a": expected})
diff --git a/tests/stable_api_test.py b/tests/stable_api_test.py
index 7a67f5723..c1b2f1404 100644
--- a/tests/stable_api_test.py
+++ b/tests/stable_api_test.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from datetime import datetime
 from datetime import timedelta
 from typing import Any
@@ -8,7 +10,7 @@
 import narwhals as nw
 import narwhals.stable.v1 as nw_v1
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_renamed_taxicab_norm(constructor: Constructor) -> None:
@@ -23,7 +25,7 @@ def test_renamed_taxicab_norm(constructor: Constructor) -> None:
     df = nw.from_native(constructor({"a": [1, 2, 3, -4, 5]}))
     result = df.with_columns(b=nw.col("a")._taxicab_norm())
     expected = {"a": [1, 2, 3, -4, 5], "b": [15] * 5}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     with pytest.raises(AttributeError):
         result = df.with_columns(b=nw.col("a")._l1_norm())  # type: ignore[attr-defined]
@@ -33,11 +35,11 @@ def test_renamed_taxicab_norm(constructor: Constructor) -> None:
     # It's new, so it couldn't be backwards-incompatible.
     result = df.with_columns(b=nw_v1.col("a")._taxicab_norm())
     expected = {"a": [1, 2, 3, -4, 5], "b": [15] * 5}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     # The older `_l1_norm` still works in the stable api
     result = df.with_columns(b=nw_v1.col("a")._l1_norm())
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_renamed_taxicab_norm_dataframe(constructor: Constructor) -> None:
@@ -51,7 +53,7 @@ def func(df_any: Any) -> Any:
 
     result = nw_v1.from_native(func(constructor({"a": [1, 2, 3, -4, 5]})))
     expected = {"a": [15]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_renamed_taxicab_norm_dataframe_narwhalify(constructor: Constructor) -> None:
@@ -64,7 +66,7 @@ def func(df: Any) -> Any:
 
     result = nw_v1.from_native(func(constructor({"a": [1, 2, 3, -4, 5]})))
     expected = {"a": [15]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_stable_api_completeness() -> None:
diff --git a/tests/system_info_test.py b/tests/system_info_test.py
index 30bb0c400..75a2b190f 100644
--- a/tests/system_info_test.py
+++ b/tests/system_info_test.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import warnings
 from typing import Any
 
diff --git a/tests/tpch_q1_test.py b/tests/tpch_q1_test.py
index c506ee0de..99a9e1091 100644
--- a/tests/tpch_q1_test.py
+++ b/tests/tpch_q1_test.py
@@ -10,8 +10,8 @@
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
-from tests.utils import compare_dicts
+from tests.utils import PANDAS_VERSION
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize(
@@ -20,7 +20,7 @@
 )
 @pytest.mark.filterwarnings("ignore:.*Passing a BlockManager.*:DeprecationWarning")
 def test_q1(library: str, request: pytest.FixtureRequest) -> None:
-    if library == "pandas" and parse_version(pd.__version__) < (1, 5):
+    if library == "pandas" and PANDAS_VERSION < (1, 5):
         request.applymarker(pytest.mark.xfail)
     elif library == "pandas":
         df_raw = pd.read_parquet("tests/data/lineitem.parquet")
@@ -87,7 +87,7 @@ def test_q1(library: str, request: pytest.FixtureRequest) -> None:
         "avg_disc": [0.05039473684210526, 0.02, 0.05537414965986395, 0.04507042253521127],
         "count_order": [76, 1, 147, 71],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -99,7 +99,7 @@ def test_q1(library: str, request: pytest.FixtureRequest) -> None:
     "ignore:.*Complex.*:UserWarning",
 )
 def test_q1_w_generic_funcs(library: str, request: pytest.FixtureRequest) -> None:
-    if library == "pandas" and parse_version(pd.__version__) < (1, 5):
+    if library == "pandas" and PANDAS_VERSION < (1, 5):
         request.applymarker(pytest.mark.xfail)
     elif library == "pandas":
         df_raw = pd.read_parquet("tests/data/lineitem.parquet")
@@ -155,14 +155,12 @@ def test_q1_w_generic_funcs(library: str, request: pytest.FixtureRequest) -> Non
         "avg_disc": [0.05039473684210526, 0.02, 0.05537414965986395, 0.04507042253521127],
         "count_order": [76, 1, 147, 71],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @mock.patch.dict(os.environ, {"NARWHALS_FORCE_GENERIC": "1"})
 @pytest.mark.filterwarnings("ignore:.*Passing a BlockManager.*:DeprecationWarning")
-@pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("1.0.0"), reason="too old for pyarrow"
-)
+@pytest.mark.skipif(PANDAS_VERSION < (1, 0, 0), reason="too old for pyarrow")
 def test_q1_w_pandas_agg_generic_path() -> None:
     df_raw = pd.read_parquet("tests/data/lineitem.parquet")
     df_raw["l_shipdate"] = pd.to_datetime(df_raw["l_shipdate"])
@@ -216,4 +214,4 @@ def test_q1_w_pandas_agg_generic_path() -> None:
         "avg_disc": [0.05039473684210526, 0.02, 0.05537414965986395, 0.04507042253521127],
         "count_order": [76, 1, 147, 71],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/translate/from_native_test.py b/tests/translate/from_native_test.py
index 8ac33b620..53a350878 100644
--- a/tests/translate/from_native_test.py
+++ b/tests/translate/from_native_test.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from contextlib import nullcontext as does_not_raise
 from typing import Any
 
@@ -97,6 +99,8 @@ def test_eager_only_lazy(dframe: Any, eager_only: Any, context: Any) -> None:
     with context:
         res = nw.from_native(dframe, eager_only=eager_only)
         assert isinstance(res, nw.LazyFrame)
+    if eager_only:
+        assert nw.from_native(dframe, eager_only=eager_only, strict=False) is dframe
 
 
 @pytest.mark.parametrize("dframe", eager_frames)
@@ -120,6 +124,9 @@ def test_series_only(obj: Any, context: Any) -> None:
     with context:
         res = nw.from_native(obj, series_only=True)
         assert isinstance(res, nw.Series)
+    assert nw.from_native(obj, series_only=True, strict=False) is obj or isinstance(
+        res, nw.Series
+    )
 
 
 @pytest.mark.parametrize("series", all_series)
@@ -134,6 +141,8 @@ def test_allow_series(series: Any, allow_series: Any, context: Any) -> None:
     with context:
         res = nw.from_native(series, allow_series=allow_series)
         assert isinstance(res, nw.Series)
+    if not allow_series:
+        assert nw.from_native(series, allow_series=allow_series, strict=False) is series
 
 
 def test_invalid_series_combination() -> None:
@@ -149,7 +158,9 @@ def test_pandas_like_validate() -> None:
     df2 = pd.DataFrame({"b": [1, 2, 3]})
     df = pd.concat([df1, df2, df2], axis=1)
 
-    with pytest.raises(ValueError, match="Expected unique column names"):
+    with pytest.raises(
+        ValueError, match=r"Expected unique column names, got:\n- 'b' 2 times"
+    ):
         nw.from_native(df)
 
 
@@ -180,6 +191,7 @@ def test_series_only_dask() -> None:
 
     with pytest.raises(TypeError, match="Cannot only use `series_only`"):
         nw.from_native(dframe, series_only=True)
+    assert nw.from_native(dframe, series_only=True, strict=False) is dframe
 
 
 @pytest.mark.parametrize(
@@ -199,6 +211,8 @@ def test_eager_only_lazy_dask(eager_only: Any, context: Any) -> None:
     with context:
         res = nw.from_native(dframe, eager_only=eager_only)
         assert isinstance(res, nw.LazyFrame)
+    if eager_only:
+        assert nw.from_native(dframe, eager_only=eager_only, strict=False) is dframe
 
 
 def test_from_native_strict_false_typing() -> None:
@@ -210,3 +224,13 @@ def test_from_native_strict_false_typing() -> None:
     unstable_nw.from_native(df, strict=False)
     unstable_nw.from_native(df, strict=False, eager_only=True)
     unstable_nw.from_native(df, strict=False, eager_or_interchange_only=True)
+
+
+def test_from_mock_interchange_protocol_non_strict() -> None:
+    class MockDf:
+        def __dataframe__(self) -> None:  # pragma: no cover
+            pass
+
+    mockdf = MockDf()
+    result = nw.from_native(mockdf, eager_only=True, strict=False)
+    assert result is mockdf  # type: ignore[comparison-overlap]
diff --git a/tests/translate/get_native_namespace_test.py b/tests/translate/get_native_namespace_test.py
index 60b80a1d9..f02c4c8da 100644
--- a/tests/translate/get_native_namespace_test.py
+++ b/tests/translate/get_native_namespace_test.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import pandas as pd
 import polars as pl
 import pyarrow as pa
diff --git a/tests/translate/to_native_test.py b/tests/translate/to_native_test.py
index 03d7704ec..3d116a459 100644
--- a/tests/translate/to_native_test.py
+++ b/tests/translate/to_native_test.py
@@ -1,10 +1,16 @@
+from __future__ import annotations
+
 from contextlib import nullcontext as does_not_raise
+from typing import TYPE_CHECKING
 from typing import Any
 
 import pytest
 
 import narwhals.stable.v1 as nw
 
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
+
 
 @pytest.mark.parametrize(
     ("method", "strict", "context"),
@@ -20,7 +26,7 @@
     ],
 )
 def test_to_native(
-    constructor_eager: Any, method: str, strict: Any, context: Any
+    constructor_eager: ConstructorEager, method: str, strict: Any, context: Any
 ) -> None:
     df = nw.from_native(constructor_eager({"a": [1, 2, 3]}))
 
diff --git a/tests/translate/to_py_scalar_test.py b/tests/translate/to_py_scalar_test.py
new file mode 100644
index 000000000..53c3df738
--- /dev/null
+++ b/tests/translate/to_py_scalar_test.py
@@ -0,0 +1,68 @@
+from __future__ import annotations
+
+from datetime import datetime
+from datetime import timedelta
+from typing import TYPE_CHECKING
+from typing import Any
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import narwhals.stable.v1 as nw
+from narwhals.dependencies import get_cudf
+
+if TYPE_CHECKING:
+    from tests.utils import ConstructorEager
+
+
+@pytest.mark.parametrize(
+    ("input_value", "expected"),
+    [
+        (1, 1),
+        (1.0, 1.0),
+        ("a", "a"),
+        (True, True),
+        (b"a", b"a"),
+        (datetime(2021, 1, 1), datetime(2021, 1, 1)),
+        (timedelta(days=1), timedelta(days=1)),
+    ],
+)
+def test_to_py_scalar(
+    constructor_eager: ConstructorEager,
+    input_value: Any,
+    expected: Any,
+    request: pytest.FixtureRequest,
+) -> None:
+    if isinstance(input_value, bytes) and "cudf" in str(constructor_eager):
+        request.applymarker(pytest.mark.xfail)
+    df = nw.from_native(constructor_eager({"a": [input_value]}))
+    output = nw.to_py_scalar(df["a"].item(0))
+    if expected == 1 and constructor_eager.__name__.startswith("pandas"):
+        assert not isinstance(output, np.int64)
+    elif isinstance(expected, datetime) and constructor_eager.__name__.startswith(
+        "pandas"
+    ):
+        assert not isinstance(output, pd.Timestamp)
+    elif isinstance(expected, timedelta) and constructor_eager.__name__.startswith(
+        "pandas"
+    ):
+        assert not isinstance(output, pd.Timedelta)
+    assert output == expected
+
+
+@pytest.mark.parametrize(
+    "input_value",
+    [np.array([1, 2]), [1, 2, 3], {"a": [1, 2, 3]}],
+)
+def test_to_py_scalar_value_error(input_value: Any) -> None:
+    with pytest.raises(ValueError, match="Expected object convertible to a scalar"):
+        nw.to_py_scalar(input_value)
+
+
+def test_to_py_scalar_value_error_cudf() -> None:
+    if cudf := get_cudf():  # pragma: no cover
+        df = nw.from_native(cudf.DataFrame({"a": [1, 2, 3]}))
+
+        with pytest.raises(ValueError, match="Expected object convertible to a scalar"):
+            nw.to_py_scalar(df["a"])
diff --git a/tests/utils.py b/tests/utils.py
index 15ce25140..90143959d 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -10,15 +10,33 @@
 
 import pandas as pd
 
+import narwhals as nw
+from narwhals.typing import IntoDataFrame
 from narwhals.typing import IntoFrame
 from narwhals.utils import Implementation
+from narwhals.utils import parse_version
 
 if sys.version_info >= (3, 10):
     from typing import TypeAlias  # pragma: no cover
 else:
     from typing_extensions import TypeAlias  # pragma: no cover
 
+
+def get_module_version_as_tuple(module_name: str) -> tuple[int, ...]:
+    try:
+        return parse_version(__import__(module_name).__version__)
+    except ImportError:
+        return (0, 0, 0)
+
+
+IBIS_VERSION: tuple[int, ...] = get_module_version_as_tuple("ibis")
+NUMPY_VERSION: tuple[int, ...] = get_module_version_as_tuple("numpy")
+PANDAS_VERSION: tuple[int, ...] = get_module_version_as_tuple("pandas")
+POLARS_VERSION: tuple[int, ...] = get_module_version_as_tuple("polars")
+PYARROW_VERSION: tuple[int, ...] = get_module_version_as_tuple("pyarrow")
+
 Constructor: TypeAlias = Callable[[Any], IntoFrame]
+ConstructorEager: TypeAlias = Callable[[Any], IntoDataFrame]
 
 
 def zip_strict(left: Sequence[Any], right: Sequence[Any]) -> Iterator[Any]:
@@ -28,36 +46,37 @@ def zip_strict(left: Sequence[Any], right: Sequence[Any]) -> Iterator[Any]:
     return zip(left, right)
 
 
-def compare_dicts(result: Any, expected: dict[str, Any]) -> None:
+def _to_comparable_list(column_values: Any) -> Any:
+    if (
+        hasattr(column_values, "_compliant_series")
+        and column_values._compliant_series._implementation is Implementation.CUDF
+    ):  # pragma: no cover
+        column_values = column_values.to_pandas()
+    if hasattr(column_values, "to_list"):
+        return column_values.to_list()
+    return [nw.to_py_scalar(v) for v in column_values]
+
+
+def assert_equal_data(result: Any, expected: dict[str, Any]) -> None:
     if hasattr(result, "collect"):
         result = result.collect()
     if hasattr(result, "columns"):
         for key in result.columns:
             assert key in expected
+    result = {key: _to_comparable_list(result[key]) for key in expected}
     for key in expected:
         result_key = result[key]
-        if (
-            hasattr(result_key, "_compliant_series")
-            and result_key._compliant_series._implementation is Implementation.CUDF
-        ):  # pragma: no cover
-            result_key = result_key.to_pandas()
-        for lhs, rhs in zip_strict(result_key, expected[key]):
-            if hasattr(lhs, "as_py"):
-                lhs = lhs.as_py()  # noqa: PLW2901
-            if hasattr(rhs, "as_py"):  # pragma: no cover
-                rhs = rhs.as_py()  # noqa: PLW2901
-            if hasattr(lhs, "item"):  # pragma: no cover
-                lhs = lhs.item()  # noqa: PLW2901
-            if hasattr(rhs, "item"):  # pragma: no cover
-                rhs = rhs.item()  # noqa: PLW2901
+        expected_key = expected[key]
+        for i, (lhs, rhs) in enumerate(zip_strict(result_key, expected_key)):
             if isinstance(lhs, float) and not math.isnan(lhs):
-                assert math.isclose(lhs, rhs, rel_tol=0, abs_tol=1e-6), (lhs, rhs)
-            elif isinstance(lhs, float) and math.isnan(lhs):
-                assert math.isnan(rhs), (lhs, rhs)  # pragma: no cover
+                are_equivalent_values = math.isclose(lhs, rhs, rel_tol=0, abs_tol=1e-6)
+            elif isinstance(lhs, float) and math.isnan(lhs) and rhs is not None:
+                are_equivalent_values = math.isnan(rhs)  # pragma: no cover
             elif pd.isna(lhs):
-                assert pd.isna(rhs), (lhs, rhs)
+                are_equivalent_values = pd.isna(rhs)
             else:
-                assert lhs == rhs, (lhs, rhs)
+                are_equivalent_values = lhs == rhs
+            assert are_equivalent_values, f"Mismatch at index {i}: {lhs} != {rhs}\nExpected: {expected}\nGot: {result}"
 
 
 def maybe_get_modin_df(df_pandas: pd.DataFrame) -> Any:
diff --git a/tests/utils_test.py b/tests/utils_test.py
index cea458bc9..fb668b4d2 100644
--- a/tests/utils_test.py
+++ b/tests/utils_test.py
@@ -1,12 +1,19 @@
+from __future__ import annotations
+
+import string
+
+import hypothesis.strategies as st
 import pandas as pd
 import polars as pl
 import pytest
+from hypothesis import given
 from pandas.testing import assert_frame_equal
 from pandas.testing import assert_index_equal
 from pandas.testing import assert_series_equal
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
+from tests.utils import get_module_version_as_tuple
 
 
 def test_maybe_align_index_pandas() -> None:
@@ -91,12 +98,22 @@ def test_maybe_reset_index_pandas() -> None:
     result = nw.maybe_reset_index(pandas_df)
     expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=[0, 1, 2])
     assert_frame_equal(nw.to_native(result), expected)
+    pandas_df = nw.from_native(pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}))
+    result = nw.maybe_reset_index(pandas_df)
+    expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    assert_frame_equal(nw.to_native(result), expected)
+    assert result.to_native() is pandas_df.to_native()
     pandas_series = nw.from_native(
         pd.Series([1, 2, 3], index=[7, 8, 9]), series_only=True
     )
     result_s = nw.maybe_reset_index(pandas_series)
     expected_s = pd.Series([1, 2, 3], index=[0, 1, 2])
     assert_series_equal(nw.to_native(result_s), expected_s)
+    pandas_series = nw.from_native(pd.Series([1, 2, 3]), series_only=True)
+    result_s = nw.maybe_reset_index(pandas_series)
+    expected_s = pd.Series([1, 2, 3])
+    assert_series_equal(nw.to_native(result_s), expected_s)
+    assert result_s.to_native() is pandas_series.to_native()
 
 
 def test_maybe_reset_index_polars() -> None:
@@ -108,10 +125,7 @@ def test_maybe_reset_index_polars() -> None:
     assert result_s is series
 
 
-@pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("1.0.0"),
-    reason="too old for convert_dtypes",
-)
+@pytest.mark.skipif(PANDAS_VERSION < (1, 0, 0), reason="too old for convert_dtypes")
 def test_maybe_convert_dtypes_pandas() -> None:
     import numpy as np
 
@@ -132,3 +146,34 @@ def test_maybe_convert_dtypes_polars() -> None:
     df = nw.from_native(pl.DataFrame({"a": [1.1, np.nan]}))
     result = nw.maybe_convert_dtypes(df)
     assert result is df
+
+
+def test_get_trivial_version_with_uninstalled_module() -> None:
+    result = get_module_version_as_tuple("non_existent_module")
+    assert result == (0, 0, 0)
+
+
+@given(n_bytes=st.integers(1, 100))  # type: ignore[misc]
+def test_generate_temporary_column_name(n_bytes: int) -> None:
+    columns = ["abc", "XYZ"]
+
+    temp_col_name = nw.generate_temporary_column_name(n_bytes=n_bytes, columns=columns)
+    assert temp_col_name not in columns
+
+
+def test_generate_temporary_column_name_raise() -> None:
+    from itertools import product
+
+    columns = [
+        "".join(t)
+        for t in product(
+            string.ascii_lowercase + string.digits,
+            string.ascii_lowercase + string.digits,
+        )
+    ]
+
+    with pytest.raises(
+        AssertionError,
+        match="Internal Error: Narwhals was not able to generate a column name with ",
+    ):
+        nw.generate_temporary_column_name(n_bytes=1, columns=columns)
diff --git a/tpch/__init__.py b/tpch/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tpch/execute/__init__.py b/tpch/execute/__init__.py
index e0c448649..ecbf1db53 100644
--- a/tpch/execute/__init__.py
+++ b/tpch/execute/__init__.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from pathlib import Path
 
 import dask.dataframe as dd
diff --git a/tpch/execute/q1.py b/tpch/execute/q1.py
index 9889c3af0..d0ebce584 100644
--- a/tpch/execute/q1.py
+++ b/tpch/execute/q1.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from queries import q1
 
 from . import IO_FUNCS
diff --git a/tpch/execute/q10.py b/tpch/execute/q10.py
index 124bf0f7d..1f610932c 100644
--- a/tpch/execute/q10.py
+++ b/tpch/execute/q10.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from queries import q10
 
 from . import IO_FUNCS
diff --git a/tpch/execute/q11.py b/tpch/execute/q11.py
index 8c0a2e649..0dd8a243c 100644
--- a/tpch/execute/q11.py
+++ b/tpch/execute/q11.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from queries import q11
 
 from . import IO_FUNCS
diff --git a/tpch/execute/q12.py b/tpch/execute/q12.py
index 3c3a70c62..f684e22ad 100644
--- a/tpch/execute/q12.py
+++ b/tpch/execute/q12.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from queries import q12
 
 from . import IO_FUNCS
diff --git a/tpch/execute/q13.py b/tpch/execute/q13.py
index 2fdda5bd3..7b03a2f2f 100644
--- a/tpch/execute/q13.py
+++ b/tpch/execute/q13.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from queries import q13
 
 from . import IO_FUNCS
diff --git a/tpch/execute/q14.py b/tpch/execute/q14.py
index dfd54056e..a82330136 100644
--- a/tpch/execute/q14.py
+++ b/tpch/execute/q14.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from queries import q14
 
 from . import IO_FUNCS
diff --git a/tpch/execute/q15.py b/tpch/execute/q15.py
index 86a03b0a0..40b4432b1 100644
--- a/tpch/execute/q15.py
+++ b/tpch/execute/q15.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from queries import q15
 
 from . import IO_FUNCS
diff --git a/tpch/execute/q16.py b/tpch/execute/q16.py
index 6a70279d0..ef30f935c 100644
--- a/tpch/execute/q16.py
+++ b/tpch/execute/q16.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from queries import q16
 
 from . import IO_FUNCS
diff --git a/tpch/execute/q17.py b/tpch/execute/q17.py
index 43ef4f8b1..0b7ca4a66 100644
--- a/tpch/execute/q17.py
+++ b/tpch/execute/q17.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from queries import q17
 
 from . import IO_FUNCS
diff --git a/tpch/execute/q18.py b/tpch/execute/q18.py
index c7e5b7954..a096deb2f 100644
--- a/tpch/execute/q18.py
+++ b/tpch/execute/q18.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from queries import q18
 
 from . import IO_FUNCS
diff --git a/tpch/execute/q19.py b/tpch/execute/q19.py
index 60f91b052..23095a890 100644
--- a/tpch/execute/q19.py
+++ b/tpch/execute/q19.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from queries import q19
 
 from . import IO_FUNCS
diff --git a/tpch/execute/q2.py b/tpch/execute/q2.py
index cd82a9047..0e2d07019 100644
--- a/tpch/execute/q2.py
+++ b/tpch/execute/q2.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from queries import q2
 
 from . import IO_FUNCS
diff --git a/tpch/execute/q20.py b/tpch/execute/q20.py
index 3984b7580..c4ffa43b4 100644
--- a/tpch/execute/q20.py
+++ b/tpch/execute/q20.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from queries import q20
 
 from . import IO_FUNCS
diff --git a/tpch/execute/q21.py b/tpch/execute/q21.py
index 7cf772d8e..d6fb272ad 100644
--- a/tpch/execute/q21.py
+++ b/tpch/execute/q21.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from queries import q21
 
 from . import IO_FUNCS
diff --git a/tpch/execute/q22.py b/tpch/execute/q22.py
index a2bb1e76d..f71fc4220 100644
--- a/tpch/execute/q22.py
+++ b/tpch/execute/q22.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from queries import q22
 
 from . import IO_FUNCS
diff --git a/tpch/execute/q3.py b/tpch/execute/q3.py
index d6b9302cc..bbcc51d5c 100644
--- a/tpch/execute/q3.py
+++ b/tpch/execute/q3.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from queries import q3
 
 from . import IO_FUNCS
diff --git a/tpch/execute/q4.py b/tpch/execute/q4.py
index 5645574f8..bcfd3a158 100644
--- a/tpch/execute/q4.py
+++ b/tpch/execute/q4.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from queries import q4
 
 from . import IO_FUNCS
diff --git a/tpch/execute/q5.py b/tpch/execute/q5.py
index dcc61027b..66524c5a8 100644
--- a/tpch/execute/q5.py
+++ b/tpch/execute/q5.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from queries import q5
 
 from . import IO_FUNCS
diff --git a/tpch/execute/q6.py b/tpch/execute/q6.py
index 154964ff4..1d650b794 100644
--- a/tpch/execute/q6.py
+++ b/tpch/execute/q6.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from queries import q6
 
 from . import IO_FUNCS
diff --git a/tpch/execute/q7.py b/tpch/execute/q7.py
index a08d5641c..069fb258b 100644
--- a/tpch/execute/q7.py
+++ b/tpch/execute/q7.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from queries import q7
 
 from . import IO_FUNCS
diff --git a/tpch/execute/q8.py b/tpch/execute/q8.py
index a76a8051f..8c3aa5de9 100644
--- a/tpch/execute/q8.py
+++ b/tpch/execute/q8.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from queries import q8
 
 from . import IO_FUNCS
diff --git a/tpch/execute/q9.py b/tpch/execute/q9.py
index 14230af64..4c8e6874c 100644
--- a/tpch/execute/q9.py
+++ b/tpch/execute/q9.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from queries import q9
 
 from . import IO_FUNCS
diff --git a/tpch/generate_data.py b/tpch/generate_data.py
index 5fd73b1f7..d0a370a2a 100644
--- a/tpch/generate_data.py
+++ b/tpch/generate_data.py
@@ -1,4 +1,6 @@
-from pathlib import Path  # noqa: INP001
+from __future__ import annotations
+
+from pathlib import Path
 
 import duckdb
 import pyarrow as pa
diff --git a/tpch/queries/q1.py b/tpch/queries/q1.py
index de6157702..a9c887b0a 100644
--- a/tpch/queries/q1.py
+++ b/tpch/queries/q1.py
@@ -1,7 +1,12 @@
+from __future__ import annotations
+
 from datetime import datetime
+from typing import TYPE_CHECKING
 
 import narwhals as nw
-from narwhals.typing import FrameT
+
+if TYPE_CHECKING:
+    from narwhals.typing import FrameT
 
 
 @nw.narwhalify
diff --git a/tpch/queries/q10.py b/tpch/queries/q10.py
index 486e4ba82..b83d1e1b0 100644
--- a/tpch/queries/q10.py
+++ b/tpch/queries/q10.py
@@ -1,7 +1,12 @@
+from __future__ import annotations
+
 from datetime import datetime
+from typing import TYPE_CHECKING
 
 import narwhals as nw
-from narwhals.typing import FrameT
+
+if TYPE_CHECKING:
+    from narwhals.typing import FrameT
 
 
 @nw.narwhalify
diff --git a/tpch/queries/q11.py b/tpch/queries/q11.py
index d5b48b359..66bacd593 100644
--- a/tpch/queries/q11.py
+++ b/tpch/queries/q11.py
@@ -1,5 +1,11 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import narwhals as nw
-from narwhals.typing import FrameT
+
+if TYPE_CHECKING:
+    from narwhals.typing import FrameT
 
 
 @nw.narwhalify
diff --git a/tpch/queries/q12.py b/tpch/queries/q12.py
index ced775830..fb2a3dabe 100644
--- a/tpch/queries/q12.py
+++ b/tpch/queries/q12.py
@@ -1,7 +1,12 @@
+from __future__ import annotations
+
 from datetime import datetime
+from typing import TYPE_CHECKING
 
 import narwhals as nw
-from narwhals.typing import FrameT
+
+if TYPE_CHECKING:
+    from narwhals.typing import FrameT
 
 
 @nw.narwhalify
diff --git a/tpch/queries/q13.py b/tpch/queries/q13.py
index adf57e5a2..e7499b158 100644
--- a/tpch/queries/q13.py
+++ b/tpch/queries/q13.py
@@ -1,5 +1,11 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import narwhals as nw
-from narwhals.typing import FrameT
+
+if TYPE_CHECKING:
+    from narwhals.typing import FrameT
 
 
 @nw.narwhalify
diff --git a/tpch/queries/q14.py b/tpch/queries/q14.py
index f1ec6cbe3..44d176772 100644
--- a/tpch/queries/q14.py
+++ b/tpch/queries/q14.py
@@ -1,7 +1,12 @@
+from __future__ import annotations
+
 from datetime import datetime
+from typing import TYPE_CHECKING
 
 import narwhals as nw
-from narwhals.typing import FrameT
+
+if TYPE_CHECKING:
+    from narwhals.typing import FrameT
 
 
 @nw.narwhalify
diff --git a/tpch/queries/q15.py b/tpch/queries/q15.py
index 1ebae57d6..f6a23dd30 100644
--- a/tpch/queries/q15.py
+++ b/tpch/queries/q15.py
@@ -1,7 +1,12 @@
+from __future__ import annotations
+
 from datetime import datetime
+from typing import TYPE_CHECKING
 
 import narwhals as nw
-from narwhals.typing import FrameT
+
+if TYPE_CHECKING:
+    from narwhals.typing import FrameT
 
 
 @nw.narwhalify
diff --git a/tpch/queries/q16.py b/tpch/queries/q16.py
index d84b9aab5..f3609ae3d 100644
--- a/tpch/queries/q16.py
+++ b/tpch/queries/q16.py
@@ -1,5 +1,11 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import narwhals as nw
-from narwhals.typing import FrameT
+
+if TYPE_CHECKING:
+    from narwhals.typing import FrameT
 
 
 @nw.narwhalify
diff --git a/tpch/queries/q17.py b/tpch/queries/q17.py
index 976f476f0..cf507efad 100644
--- a/tpch/queries/q17.py
+++ b/tpch/queries/q17.py
@@ -1,5 +1,11 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import narwhals as nw
-from narwhals.typing import FrameT
+
+if TYPE_CHECKING:
+    from narwhals.typing import FrameT
 
 
 @nw.narwhalify
diff --git a/tpch/queries/q18.py b/tpch/queries/q18.py
index d3d183176..cdeeeca0a 100644
--- a/tpch/queries/q18.py
+++ b/tpch/queries/q18.py
@@ -1,5 +1,11 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import narwhals as nw
-from narwhals.typing import FrameT
+
+if TYPE_CHECKING:
+    from narwhals.typing import FrameT
 
 
 @nw.narwhalify
diff --git a/tpch/queries/q19.py b/tpch/queries/q19.py
index bcab36e9a..63cb11dd3 100644
--- a/tpch/queries/q19.py
+++ b/tpch/queries/q19.py
@@ -1,5 +1,11 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import narwhals as nw
-from narwhals.typing import FrameT
+
+if TYPE_CHECKING:
+    from narwhals.typing import FrameT
 
 
 @nw.narwhalify
diff --git a/tpch/queries/q2.py b/tpch/queries/q2.py
index 0e9e90d09..82c76bd34 100644
--- a/tpch/queries/q2.py
+++ b/tpch/queries/q2.py
@@ -1,5 +1,11 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import narwhals as nw
-from narwhals.typing import FrameT
+
+if TYPE_CHECKING:
+    from narwhals.typing import FrameT
 
 
 @nw.narwhalify
diff --git a/tpch/queries/q20.py b/tpch/queries/q20.py
index b0dabb29e..0cb82e394 100644
--- a/tpch/queries/q20.py
+++ b/tpch/queries/q20.py
@@ -1,7 +1,12 @@
+from __future__ import annotations
+
 from datetime import datetime
+from typing import TYPE_CHECKING
 
 import narwhals as nw
-from narwhals.typing import FrameT
+
+if TYPE_CHECKING:
+    from narwhals.typing import FrameT
 
 
 @nw.narwhalify
diff --git a/tpch/queries/q21.py b/tpch/queries/q21.py
index d10ff394f..5d6cc8784 100644
--- a/tpch/queries/q21.py
+++ b/tpch/queries/q21.py
@@ -1,5 +1,11 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import narwhals as nw
-from narwhals.typing import FrameT
+
+if TYPE_CHECKING:
+    from narwhals.typing import FrameT
 
 
 @nw.narwhalify
diff --git a/tpch/queries/q22.py b/tpch/queries/q22.py
index 2e0973227..7bd76761f 100644
--- a/tpch/queries/q22.py
+++ b/tpch/queries/q22.py
@@ -1,5 +1,11 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import narwhals as nw
-from narwhals.typing import FrameT
+
+if TYPE_CHECKING:
+    from narwhals.typing import FrameT
 
 
 @nw.narwhalify
diff --git a/tpch/queries/q3.py b/tpch/queries/q3.py
index 04679bccb..517d40154 100644
--- a/tpch/queries/q3.py
+++ b/tpch/queries/q3.py
@@ -1,7 +1,12 @@
+from __future__ import annotations
+
 from datetime import datetime
+from typing import TYPE_CHECKING
 
 import narwhals as nw
-from narwhals.typing import FrameT
+
+if TYPE_CHECKING:
+    from narwhals.typing import FrameT
 
 
 @nw.narwhalify
diff --git a/tpch/queries/q4.py b/tpch/queries/q4.py
index a1b96be15..12a5cecd8 100644
--- a/tpch/queries/q4.py
+++ b/tpch/queries/q4.py
@@ -1,7 +1,12 @@
+from __future__ import annotations
+
 from datetime import datetime
+from typing import TYPE_CHECKING
 
 import narwhals as nw
-from narwhals.typing import FrameT
+
+if TYPE_CHECKING:
+    from narwhals.typing import FrameT
 
 
 @nw.narwhalify
diff --git a/tpch/queries/q5.py b/tpch/queries/q5.py
index 2965868c9..39b402077 100644
--- a/tpch/queries/q5.py
+++ b/tpch/queries/q5.py
@@ -1,7 +1,12 @@
+from __future__ import annotations
+
 from datetime import datetime
+from typing import TYPE_CHECKING
 
 import narwhals as nw
-from narwhals.typing import FrameT
+
+if TYPE_CHECKING:
+    from narwhals.typing import FrameT
 
 
 @nw.narwhalify
diff --git a/tpch/queries/q6.py b/tpch/queries/q6.py
index 67f0ac785..66b286b4b 100644
--- a/tpch/queries/q6.py
+++ b/tpch/queries/q6.py
@@ -1,7 +1,12 @@
+from __future__ import annotations
+
 from datetime import datetime
+from typing import TYPE_CHECKING
 
 import narwhals as nw
-from narwhals.typing import FrameT
+
+if TYPE_CHECKING:
+    from narwhals.typing import FrameT
 
 
 @nw.narwhalify
diff --git a/tpch/queries/q7.py b/tpch/queries/q7.py
index ec0946ac3..576a1804c 100644
--- a/tpch/queries/q7.py
+++ b/tpch/queries/q7.py
@@ -1,7 +1,12 @@
+from __future__ import annotations
+
 from datetime import datetime
+from typing import TYPE_CHECKING
 
 import narwhals as nw
-from narwhals.typing import FrameT
+
+if TYPE_CHECKING:
+    from narwhals.typing import FrameT
 
 
 @nw.narwhalify
diff --git a/tpch/queries/q8.py b/tpch/queries/q8.py
index ac3fa4baf..1ece5604b 100644
--- a/tpch/queries/q8.py
+++ b/tpch/queries/q8.py
@@ -1,7 +1,12 @@
+from __future__ import annotations
+
 from datetime import date
+from typing import TYPE_CHECKING
 
 import narwhals as nw
-from narwhals.typing import FrameT
+
+if TYPE_CHECKING:
+    from narwhals.typing import FrameT
 
 
 @nw.narwhalify
diff --git a/tpch/queries/q9.py b/tpch/queries/q9.py
index 09dff4787..048538bc3 100644
--- a/tpch/queries/q9.py
+++ b/tpch/queries/q9.py
@@ -1,5 +1,11 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import narwhals as nw
-from narwhals.typing import FrameT
+
+if TYPE_CHECKING:
+    from narwhals.typing import FrameT
 
 
 @nw.narwhalify
diff --git a/tpch/tests/queries_test.py b/tpch/tests/queries_test.py
index 35909b683..c228fd52b 100644
--- a/tpch/tests/queries_test.py
+++ b/tpch/tests/queries_test.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import subprocess
 import sys
 from pathlib import Path
diff --git a/utils/check_api_reference.py b/utils/check_api_reference.py
index 69c310439..b7d8595aa 100644
--- a/utils/check_api_reference.py
+++ b/utils/check_api_reference.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import os
 import sys
 
@@ -31,7 +33,14 @@
     "zip_with",
     "__iter__",
 }
-BASE_DTYPES = {"NumericType", "DType", "TemporalType", "Literal"}
+BASE_DTYPES = {
+    "NumericType",
+    "DType",
+    "TemporalType",
+    "Literal",
+    "OrderedDict",
+    "Mapping",
+}
 
 files = {remove_suffix(i, ".py") for i in os.listdir("narwhals")}
 
@@ -46,7 +55,7 @@
     for i in content.splitlines()
     if i.startswith("        - ")
 ]
-if missing := set(top_level_functions).difference(documented):
+if missing := set(top_level_functions).difference(documented).difference({"annotations"}):
     print("top-level functions: not documented")  # noqa: T201
     print(missing)  # noqa: T201
     ret = 1
diff --git a/utils/check_for_no_build_errors.py b/utils/check_for_no_build_errors.py
index 995411e9d..48b5a9314 100644
--- a/utils/check_for_no_build_errors.py
+++ b/utils/check_for_no_build_errors.py
@@ -5,6 +5,8 @@
 This is just used in CI.
 """
 
+from __future__ import annotations
+
 import sys
 
 with open("output.txt") as fd:
diff --git a/utils/generate_random_versions.py b/utils/generate_random_versions.py
index ecb709c1a..7ad8e044d 100644
--- a/utils/generate_random_versions.py
+++ b/utils/generate_random_versions.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import random
 
 PANDAS_AND_NUMPY_VERSION = [
diff --git a/utils/generate_zen_content.py b/utils/generate_zen_content.py
new file mode 100644
index 000000000..001f7263c
--- /dev/null
+++ b/utils/generate_zen_content.py
@@ -0,0 +1,27 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Final
+
+from narwhals.this import ZEN
+
+DESTINATION_PATH: Final[Path] = Path("docs") / "this.md"
+
+content = f"""
+# The Zen of Narwhals
+
+The well famous Python easter egg `import this` will reveal The Zen of Python, by Tim Peters.
+
+Narwhals took inspiration from _this_ and created its own Zen.
+
+```py
+import narwhals.this
+```
+
+```terminal
+{ZEN}
+```
+"""
+
+with DESTINATION_PATH.open(mode="w") as destination:
+    destination.write(content)