diff --git a/tests/test_common.py b/tests/test_common.py index fda506420..ec4ddfefd 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -2,6 +2,7 @@ from typing import Any +import numpy as np import pandas as pd import polars as pl import pytest @@ -11,11 +12,12 @@ df_pandas = pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) df_polars = pl.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) +df_lazy = pl.LazyFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) @pytest.mark.parametrize( "df_raw", - [df_pandas, df_polars], + [df_pandas, df_polars, df_lazy], ) def test_sort(df_raw: Any) -> None: df = nw.DataFrame(df_raw) @@ -31,7 +33,7 @@ def test_sort(df_raw: Any) -> None: @pytest.mark.parametrize( "df_raw", - [df_pandas, df_polars], + [df_pandas, df_polars, df_lazy], ) def test_filter(df_raw: Any) -> None: df = nw.DataFrame(df_raw) @@ -43,7 +45,7 @@ def test_filter(df_raw: Any) -> None: @pytest.mark.parametrize( "df_raw", - [df_pandas, df_polars], + [df_pandas, df_polars, df_lazy], ) def test_add(df_raw: Any) -> None: df = nw.DataFrame(df_raw) @@ -64,7 +66,7 @@ def test_add(df_raw: Any) -> None: @pytest.mark.parametrize( "df_raw", - [df_pandas, df_polars], + [df_pandas, df_polars, df_lazy], ) def test_double(df_raw: Any) -> None: df = nw.DataFrame(df_raw) @@ -74,7 +76,7 @@ def test_double(df_raw: Any) -> None: compare_dicts(result_native, expected) -@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) +@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy]) def test_sumh(df_raw: Any) -> None: df = nw.DataFrame(df_raw) result = df.with_columns(horizonal_sum=nw.sum_horizontal(nw.col("a"), nw.col("b"))) @@ -88,7 +90,7 @@ def test_sumh(df_raw: Any) -> None: compare_dicts(result_native, expected) -@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) +@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy]) def test_sumh_literal(df_raw: Any) -> None: df = nw.DataFrame(df_raw) result = df.with_columns(horizonal_sum=nw.sum_horizontal("a", nw.col("b"))) @@ -102,7 +104,7 @@ def test_sumh_literal(df_raw: Any) -> None: compare_dicts(result_native, expected) -@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) +@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy]) def test_sum_all(df_raw: Any) -> None: df = nw.DataFrame(df_raw) result = df.select(nw.all().sum()) @@ -111,10 +113,55 @@ def test_sum_all(df_raw: Any) -> None: compare_dicts(result_native, expected) -@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) +@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy]) def test_double_selected(df_raw: Any) -> None: df = nw.DataFrame(df_raw) result = df.select(nw.col("a", "b") * 2) result_native = nw.to_native(result) expected = {"a": [2, 6, 4], "b": [8, 8, 12]} compare_dicts(result_native, expected) + + +@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy]) +def test_rename(df_raw: Any) -> None: + df = nw.DataFrame(df_raw) + result = df.rename({"a": "x", "b": "y"}) + result_native = nw.to_native(result) + expected = {"x": [1, 3, 2], "y": [4, 4, 6], "z": [7.0, 8, 9]} + compare_dicts(result_native, expected) + + +@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy]) +def test_join(df_raw: Any) -> None: + df = nw.DataFrame(df_raw) + df_right = df.rename({"z": "z_right"}) + result = df.join(df_right, left_on=["a", "b"], right_on=["a", "b"], how="inner") + result_native = nw.to_native(result) + expected = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9], "z_right": [7.0, 8, 9]} + compare_dicts(result_native, expected) + + +@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy]) +def test_schema(df_raw: Any) -> None: + df = nw.DataFrame(df_raw) + result = df.schema + expected = {"a": nw.dtypes.Int64, "b": nw.dtypes.Int64, "z": nw.dtypes.Float64} + assert result == expected + + +@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy]) +def test_columns(df_raw: Any) -> None: + df = nw.DataFrame(df_raw) + result = df.columns + expected = ["a", "b", "z"] + assert len(result) == len(expected) + assert all(x == y for x, y in zip(result, expected)) + + +def test_accepted_dataframes() -> None: + array = np.array([[0, 4.0], [2, 5]]) + with pytest.raises( + TypeError, + match="Expected pandas or Polars dataframe or lazyframe, got: ", + ): + nw.DataFrame(array) diff --git a/tests/utils.py b/tests/utils.py index c00006b86..385cf0db6 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,12 +1,16 @@ -from __future__ import annotations - -from typing import Any - - -def compare_dicts(result: dict[str, Any], expected: dict[str, Any]) -> None: - for key in expected: - for lhs, rhs in zip(result[key], expected[key]): - if isinstance(lhs, float): - assert abs(lhs - rhs) < 1e-6 - else: - assert lhs == rhs +from __future__ import annotations + +from typing import Any + +import polars as pl + + +def compare_dicts(result: dict[str, Any], expected: dict[str, Any]) -> None: + if isinstance(result, pl.LazyFrame): + result = result.collect() + for key in expected: + for lhs, rhs in zip(result[key], expected[key]): + if isinstance(lhs, float): + assert abs(lhs - rhs) < 1e-6 + else: + assert lhs == rhs