diff --git a/.github/workflows/extremes.yml b/.github/workflows/extremes.yml index eba7114ce..384fa89fb 100644 --- a/.github/workflows/extremes.yml +++ b/.github/workflows/extremes.yml @@ -26,10 +26,37 @@ jobs: $RUNNER_TOOL_CACHE/Python/* ~\AppData\Local\pip\Cache key: ${{ runner.os }}-build-${{ matrix.python-version }} + - name: install-minimu-versions + run: python -m pip install tox virtualenv setuptools pandas==0.25.3 polars==0.20.3 numpy==1.17.5 pyarrow==11.0.0 scipy==1.5.0 scikit-learn==1.1.0 tzdata - name: install-reqs - run: python -m pip install --upgrade tox virtualenv setuptools pip -r requirements-dev.txt - - name: install-modin - run: python -m pip install pandas==1.1.5 polars==0.20.3 numpy==1.17.5 pyarrow==11.0.0 scipy==1.5.0 scikit-learn==1.1.0 tzdata + run: python -m pip install -r requirements-dev.txt + - name: Run pytest + run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow + + pretty_old_versions: + strategy: + matrix: + python-version: ["3.8"] + os: [ubuntu-latest] + + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Cache multiple paths + uses: actions/cache@v4 + with: + path: | + ~/.cache/pip + $RUNNER_TOOL_CACHE/Python/* + ~\AppData\Local\pip\Cache + key: ${{ runner.os }}-build-${{ matrix.python-version }} + - name: install-minimu-versions + run: python -m pip install tox virtualenv setuptools pandas==1.1.5 polars==0.20.3 numpy==1.17.5 pyarrow==11.0.0 scipy==1.5.0 scikit-learn==1.1.0 tzdata + - name: install-reqs + run: python -m pip install -r requirements-dev.txt - name: Run pytest run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow - name: Run doctests diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index e4886195d..2b68e8460 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -334,7 +334,7 @@ def join( .rename( # rename to avoid creating extra columns in join columns=dict(zip(right_on, left_on)) # type: ignore[arg-type] ) - .drop_duplicates(ignore_index=True) + .drop_duplicates() ) return self._from_dataframe( self._dataframe.merge( diff --git a/narwhals/_pandas_like/group_by.py b/narwhals/_pandas_like/group_by.py index d9a3fe632..2422df8ab 100644 --- a/narwhals/_pandas_like/group_by.py +++ b/narwhals/_pandas_like/group_by.py @@ -154,7 +154,7 @@ def func(df: Any) -> Any: for expr in exprs: results_keys = expr._call(from_dataframe(df)) for result_keys in results_keys: - out_group.append(result_keys._series.item()) + out_group.append(result_keys._series.iloc[0]) out_names.append(result_keys.name) return native_series_from_iterable( out_group, index=out_names, name="", implementation=implementation diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index cd94b03f8..620fb3b73 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -434,9 +434,14 @@ def alias(self, name: str) -> Self: def to_numpy(self) -> Any: has_missing = self._series.isna().any() if has_missing and str(self._series.dtype) in PANDAS_TO_NUMPY_DTYPE_MISSING: + if self._implementation == "pandas" and parse_version( + get_pandas().__version__ + ) < parse_version("1.0.0"): # pragma: no cover + kwargs = {} + else: + kwargs = {"na_value": float("nan")} return self._series.to_numpy( - dtype=PANDAS_TO_NUMPY_DTYPE_MISSING[str(self._series.dtype)], - na_value=float("nan"), + dtype=PANDAS_TO_NUMPY_DTYPE_MISSING[str(self._series.dtype)], **kwargs ) if ( not has_missing diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 3f00142bb..add99a3cd 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -74,9 +74,9 @@ def validate_dataframe_comparand(index: Any, other: Any) -> Any: if isinstance(other, PandasSeries): if other.len() == 1: # broadcast - return other._series.item() - if other._series.index is not index and not (other._series.index == index).all(): - return other._series.set_axis(index, axis=0) + return other._series.iloc[0] + if other._series.index is not index: + return set_axis(other._series, index, implementation=other._implementation) return other._series raise AssertionError("Please report a bug") @@ -375,12 +375,19 @@ def native_series_from_iterable( def set_axis(obj: T, index: Any, implementation: str) -> T: + if implementation == "pandas" and parse_version( + get_pandas().__version__ + ) < parse_version("1.0.0"): # pragma: no cover + kwargs = {"inplace": False} + else: + kwargs = {} if implementation == "pandas" and parse_version( get_pandas().__version__ ) >= parse_version("1.5.0"): - return obj.set_axis(index, axis=0, copy=False) # type: ignore[no-any-return, attr-defined] + kwargs["copy"] = False else: # pragma: no cover - return obj.set_axis(index, axis=0) # type: ignore[no-any-return, attr-defined] + pass + return obj.set_axis(index, axis=0, **kwargs) # type: ignore[no-any-return, attr-defined] def translate_dtype(column: Any) -> DType: @@ -591,7 +598,7 @@ def validate_indices(series: list[PandasSeries]) -> list[Any]: reindexed = [series[0]._series] for s in series[1:]: if s._series.index is not idx: - reindexed.append(s._series.set_axis(idx.rename(s._series.index.name), axis=0)) + reindexed.append(set_axis(s._series, idx, s._implementation)) else: reindexed.append(s._series) return reindexed diff --git a/pyproject.toml b/pyproject.toml index 72465ca12..344d6f1b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ classifiers = [ ] [project.optional-dependencies] -pandas = ["pandas>=1.1.5"] +pandas = ["pandas>=0.25.3"] polars = ["polars>=0.20.3"] pyarrow = ['pyarrow>=11.0.0'] @@ -77,6 +77,7 @@ docstring-code-format = true filterwarnings = [ "error", 'ignore:distutils Version classes are deprecated:DeprecationWarning', + 'ignore:In the future `np.bool`', ] xfail_strict = true markers = ["slow: marks tests as slow (deselect with '-m \"not slow\"')"] diff --git a/tests/frame/test_common.py b/tests/frame/test_common.py index 8533d9fcb..7b1628cee 100644 --- a/tests/frame/test_common.py +++ b/tests/frame/test_common.py @@ -1,6 +1,7 @@ from __future__ import annotations import re +import warnings from typing import TYPE_CHECKING from typing import Any from typing import Literal @@ -12,7 +13,6 @@ import pytest from pandas.testing import assert_series_equal as pd_assert_series_equal from polars.testing import assert_series_equal as pl_assert_series_equal -from sklearn.utils._testing import ignore_warnings import narwhals as nw from narwhals.functions import _get_deps_info @@ -444,6 +444,10 @@ def test_accepted_dataframes() -> None: @pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd]) @pytest.mark.filterwarnings("ignore:.*Passing a BlockManager.*:DeprecationWarning") +@pytest.mark.skipif( + parse_version(pd.__version__) < parse_version("2.0.0"), + reason="too old for pandas-pyarrow", +) def test_convert_pandas(df_raw: Any) -> None: result = nw.from_native(df_raw).to_pandas() # type: ignore[union-attr] expected = pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) @@ -851,7 +855,9 @@ def test_with_columns_order_single_row(df_raw: Any) -> None: def test_get_sys_info() -> None: - with ignore_warnings(): + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + show_versions() sys_info = _get_sys_info() assert "python" in sys_info @@ -860,7 +866,9 @@ def test_get_sys_info() -> None: def test_get_deps_info() -> None: - with ignore_warnings(): + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + show_versions() deps_info = _get_deps_info() assert "narwhals" in deps_info @@ -873,7 +881,8 @@ def test_get_deps_info() -> None: def test_show_versions(capsys: Any) -> None: - with ignore_warnings(): + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") show_versions() out, err = capsys.readouterr() diff --git a/tests/frame/test_invalid.py b/tests/frame/test_invalid.py index 01ac1a7dc..77cafa4fa 100644 --- a/tests/frame/test_invalid.py +++ b/tests/frame/test_invalid.py @@ -1,8 +1,6 @@ import pandas as pd import polars as pl import pytest -from sklearn.utils import check_X_y -from sklearn.utils._testing import create_memmap_backed_data import narwhals as nw from narwhals.utils import parse_version @@ -33,6 +31,9 @@ def test_validate_laziness() -> None: ) def test_memmap() -> None: # the headache this caused me... + from sklearn.utils import check_X_y + from sklearn.utils._testing import create_memmap_backed_data + x_any = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) y_any = create_memmap_backed_data(x_any["b"]) diff --git a/tests/frame/write_parquet_test.py b/tests/frame/write_parquet_test.py index c154011bf..db0835be2 100644 --- a/tests/frame/write_parquet_test.py +++ b/tests/frame/write_parquet_test.py @@ -1,17 +1,20 @@ from __future__ import annotations import os -from typing import TYPE_CHECKING from typing import Any +import pandas as pd +import pytest + import narwhals as nw +from narwhals.utils import parse_version data = {"a": [1, 2, 3]} -if TYPE_CHECKING: - import pytest - +@pytest.mark.skipif( + parse_version(pd.__version__) < parse_version("2.0.0"), reason="too old for pyarrow" +) def test_write_parquet(constructor: Any, tmpdir: pytest.TempdirFactory) -> None: path = str(tmpdir / "foo.parquet") # type: ignore[operator] nw.from_native(constructor(data), eager_only=True).write_parquet(path) diff --git a/tests/hypothesis/test_join.py b/tests/hypothesis/test_join.py index ecd3b7408..b7759a1cb 100644 --- a/tests/hypothesis/test_join.py +++ b/tests/hypothesis/test_join.py @@ -11,6 +11,7 @@ from narwhals.utils import parse_version pl_version = parse_version(pl.__version__) +pd_version = parse_version(pd.__version__) @given( @@ -37,6 +38,7 @@ ), ) # type: ignore[misc] @pytest.mark.skipif(pl_version < parse_version("0.20.13"), reason="0.0 == -0.0") +@pytest.mark.skipif(pd_version < parse_version("2.0.0"), reason="requires pyarrow") @pytest.mark.slow() def test_join( # pragma: no cover integers: st.SearchStrategy[list[int]], @@ -84,6 +86,7 @@ def test_join( # pragma: no cover ), ) # type: ignore[misc] @pytest.mark.slow() +@pytest.mark.skipif(pd_version < parse_version("2.0.0"), reason="requires pyarrow") def test_cross_join( # pragma: no cover integers: st.SearchStrategy[list[int]], other_integers: st.SearchStrategy[list[int]], diff --git a/tests/series/test_common.py b/tests/series/test_common.py index d6ca809c9..300f0c69a 100644 --- a/tests/series/test_common.py +++ b/tests/series/test_common.py @@ -141,6 +141,9 @@ def test_boolean_reductions(df_raw: Any) -> None: @pytest.mark.parametrize("df_raw", [df_pandas, df_lazy]) +@pytest.mark.skipif( + parse_version(pd.__version__) < parse_version("2.0.0"), reason="too old for pyarrow" +) def test_convert(df_raw: Any) -> None: result = nw.from_native(df_raw).lazy().collect()["a"].to_numpy() assert_array_equal(result, np.array([1, 3, 2])) @@ -271,6 +274,10 @@ def test_zip_with(df_raw: Any, mask: Any, expected: Any) -> None: assert result == expected +@pytest.mark.skipif( + parse_version(pd.__version__) < parse_version("1.0.0"), + reason="too old for convert_dtypes", +) def test_cast_string() -> None: s_pd = pd.Series([1, 2]).convert_dtypes() s = nw.from_native(s_pd, series_only=True) diff --git a/tests/test_utils.py b/tests/test_utils.py index 745903741..22606a27e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -5,6 +5,7 @@ from pandas.testing import assert_series_equal import narwhals as nw +from narwhals.utils import parse_version def test_maybe_align_index_pandas() -> None: @@ -64,6 +65,10 @@ def test_maybe_set_index_polars() -> None: assert result is df +@pytest.mark.skipif( + parse_version(pd.__version__) < parse_version("1.0.0"), + reason="too old for convert_dtypes", +) def test_maybe_convert_dtypes_pandas() -> None: import numpy as np diff --git a/tests/tpch_q1_test.py b/tests/tpch_q1_test.py index 08eda89b7..2c4d48e74 100644 --- a/tests/tpch_q1_test.py +++ b/tests/tpch_q1_test.py @@ -9,6 +9,7 @@ import pytest import narwhals as nw +from narwhals.utils import parse_version from tests.utils import compare_dicts @@ -17,6 +18,9 @@ ["pandas", "polars"], ) @pytest.mark.filterwarnings("ignore:.*Passing a BlockManager.*:DeprecationWarning") +@pytest.mark.skipif( + parse_version(pd.__version__) < parse_version("1.0.0"), reason="too old for pyarrow" +) def test_q1(library: str) -> None: if library == "pandas": df_raw = pd.read_parquet("tests/data/lineitem.parquet") @@ -84,6 +88,9 @@ def test_q1(library: str) -> None: "ignore:.*Passing a BlockManager.*:DeprecationWarning", "ignore:.*Complex.*:UserWarning", ) +@pytest.mark.skipif( + parse_version(pd.__version__) < parse_version("1.0.0"), reason="too old for pyarrow" +) def test_q1_w_generic_funcs(library: str) -> None: if library == "pandas": df_raw = pd.read_parquet("tests/data/lineitem.parquet") @@ -144,6 +151,9 @@ def test_q1_w_generic_funcs(library: str) -> None: @mock.patch.dict(os.environ, {"NARWHALS_FORCE_GENERIC": "1"}) @pytest.mark.filterwarnings("ignore:.*Passing a BlockManager.*:DeprecationWarning") +@pytest.mark.skipif( + parse_version(pd.__version__) < parse_version("1.0.0"), reason="too old for pyarrow" +) def test_q1_w_pandas_agg_generic_path() -> None: df_raw = pd.read_parquet("tests/data/lineitem.parquet") df_raw["l_shipdate"] = pd.to_datetime(df_raw["l_shipdate"])