Skip to content

Commit

Permalink
Try lowering minimum pandas (#390)
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli authored Jul 3, 2024
1 parent 72dc23c commit c8ad46a
Show file tree
Hide file tree
Showing 13 changed files with 102 additions and 24 deletions.
33 changes: 30 additions & 3 deletions .github/workflows/extremes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,37 @@ jobs:
$RUNNER_TOOL_CACHE/Python/*
~\AppData\Local\pip\Cache
key: ${{ runner.os }}-build-${{ matrix.python-version }}
- name: install-minimu-versions
run: python -m pip install tox virtualenv setuptools pandas==0.25.3 polars==0.20.3 numpy==1.17.5 pyarrow==11.0.0 scipy==1.5.0 scikit-learn==1.1.0 tzdata
- name: install-reqs
run: python -m pip install --upgrade tox virtualenv setuptools pip -r requirements-dev.txt
- name: install-modin
run: python -m pip install pandas==1.1.5 polars==0.20.3 numpy==1.17.5 pyarrow==11.0.0 scipy==1.5.0 scikit-learn==1.1.0 tzdata
run: python -m pip install -r requirements-dev.txt
- name: Run pytest
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow

pretty_old_versions:
strategy:
matrix:
python-version: ["3.8"]
os: [ubuntu-latest]

runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Cache multiple paths
uses: actions/cache@v4
with:
path: |
~/.cache/pip
$RUNNER_TOOL_CACHE/Python/*
~\AppData\Local\pip\Cache
key: ${{ runner.os }}-build-${{ matrix.python-version }}
- name: install-minimu-versions
run: python -m pip install tox virtualenv setuptools pandas==1.1.5 polars==0.20.3 numpy==1.17.5 pyarrow==11.0.0 scipy==1.5.0 scikit-learn==1.1.0 tzdata
- name: install-reqs
run: python -m pip install -r requirements-dev.txt
- name: Run pytest
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow
- name: Run doctests
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ def join(
.rename( # rename to avoid creating extra columns in join
columns=dict(zip(right_on, left_on)) # type: ignore[arg-type]
)
.drop_duplicates(ignore_index=True)
.drop_duplicates()
)
return self._from_dataframe(
self._dataframe.merge(
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_pandas_like/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def func(df: Any) -> Any:
for expr in exprs:
results_keys = expr._call(from_dataframe(df))
for result_keys in results_keys:
out_group.append(result_keys._series.item())
out_group.append(result_keys._series.iloc[0])
out_names.append(result_keys.name)
return native_series_from_iterable(
out_group, index=out_names, name="", implementation=implementation
Expand Down
9 changes: 7 additions & 2 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,9 +434,14 @@ def alias(self, name: str) -> Self:
def to_numpy(self) -> Any:
has_missing = self._series.isna().any()
if has_missing and str(self._series.dtype) in PANDAS_TO_NUMPY_DTYPE_MISSING:
if self._implementation == "pandas" and parse_version(
get_pandas().__version__
) < parse_version("1.0.0"): # pragma: no cover
kwargs = {}
else:
kwargs = {"na_value": float("nan")}
return self._series.to_numpy(
dtype=PANDAS_TO_NUMPY_DTYPE_MISSING[str(self._series.dtype)],
na_value=float("nan"),
dtype=PANDAS_TO_NUMPY_DTYPE_MISSING[str(self._series.dtype)], **kwargs
)
if (
not has_missing
Expand Down
19 changes: 13 additions & 6 deletions narwhals/_pandas_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ def validate_dataframe_comparand(index: Any, other: Any) -> Any:
if isinstance(other, PandasSeries):
if other.len() == 1:
# broadcast
return other._series.item()
if other._series.index is not index and not (other._series.index == index).all():
return other._series.set_axis(index, axis=0)
return other._series.iloc[0]
if other._series.index is not index:
return set_axis(other._series, index, implementation=other._implementation)
return other._series
raise AssertionError("Please report a bug")

Expand Down Expand Up @@ -375,12 +375,19 @@ def native_series_from_iterable(


def set_axis(obj: T, index: Any, implementation: str) -> T:
if implementation == "pandas" and parse_version(
get_pandas().__version__
) < parse_version("1.0.0"): # pragma: no cover
kwargs = {"inplace": False}
else:
kwargs = {}
if implementation == "pandas" and parse_version(
get_pandas().__version__
) >= parse_version("1.5.0"):
return obj.set_axis(index, axis=0, copy=False) # type: ignore[no-any-return, attr-defined]
kwargs["copy"] = False
else: # pragma: no cover
return obj.set_axis(index, axis=0) # type: ignore[no-any-return, attr-defined]
pass
return obj.set_axis(index, axis=0, **kwargs) # type: ignore[no-any-return, attr-defined]


def translate_dtype(column: Any) -> DType:
Expand Down Expand Up @@ -591,7 +598,7 @@ def validate_indices(series: list[PandasSeries]) -> list[Any]:
reindexed = [series[0]._series]
for s in series[1:]:
if s._series.index is not idx:
reindexed.append(s._series.set_axis(idx.rename(s._series.index.name), axis=0))
reindexed.append(set_axis(s._series, idx, s._implementation))
else:
reindexed.append(s._series)
return reindexed
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ classifiers = [
]

[project.optional-dependencies]
pandas = ["pandas>=1.1.5"]
pandas = ["pandas>=0.25.3"]
polars = ["polars>=0.20.3"]
pyarrow = ['pyarrow>=11.0.0']

Expand Down Expand Up @@ -77,6 +77,7 @@ docstring-code-format = true
filterwarnings = [
"error",
'ignore:distutils Version classes are deprecated:DeprecationWarning',
'ignore:In the future `np.bool`',
]
xfail_strict = true
markers = ["slow: marks tests as slow (deselect with '-m \"not slow\"')"]
Expand Down
17 changes: 13 additions & 4 deletions tests/frame/test_common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import re
import warnings
from typing import TYPE_CHECKING
from typing import Any
from typing import Literal
Expand All @@ -12,7 +13,6 @@
import pytest
from pandas.testing import assert_series_equal as pd_assert_series_equal
from polars.testing import assert_series_equal as pl_assert_series_equal
from sklearn.utils._testing import ignore_warnings

import narwhals as nw
from narwhals.functions import _get_deps_info
Expand Down Expand Up @@ -444,6 +444,10 @@ def test_accepted_dataframes() -> None:

@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd])
@pytest.mark.filterwarnings("ignore:.*Passing a BlockManager.*:DeprecationWarning")
@pytest.mark.skipif(
parse_version(pd.__version__) < parse_version("2.0.0"),
reason="too old for pandas-pyarrow",
)
def test_convert_pandas(df_raw: Any) -> None:
result = nw.from_native(df_raw).to_pandas() # type: ignore[union-attr]
expected = pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]})
Expand Down Expand Up @@ -851,7 +855,9 @@ def test_with_columns_order_single_row(df_raw: Any) -> None:


def test_get_sys_info() -> None:
with ignore_warnings():
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
show_versions()
sys_info = _get_sys_info()

assert "python" in sys_info
Expand All @@ -860,7 +866,9 @@ def test_get_sys_info() -> None:


def test_get_deps_info() -> None:
with ignore_warnings():
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
show_versions()
deps_info = _get_deps_info()

assert "narwhals" in deps_info
Expand All @@ -873,7 +881,8 @@ def test_get_deps_info() -> None:


def test_show_versions(capsys: Any) -> None:
with ignore_warnings():
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
show_versions()
out, err = capsys.readouterr()

Expand Down
5 changes: 3 additions & 2 deletions tests/frame/test_invalid.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import pandas as pd
import polars as pl
import pytest
from sklearn.utils import check_X_y
from sklearn.utils._testing import create_memmap_backed_data

import narwhals as nw
from narwhals.utils import parse_version
Expand Down Expand Up @@ -33,6 +31,9 @@ def test_validate_laziness() -> None:
)
def test_memmap() -> None:
# the headache this caused me...
from sklearn.utils import check_X_y
from sklearn.utils._testing import create_memmap_backed_data

x_any = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
y_any = create_memmap_backed_data(x_any["b"])

Expand Down
11 changes: 7 additions & 4 deletions tests/frame/write_parquet_test.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
from __future__ import annotations

import os
from typing import TYPE_CHECKING
from typing import Any

import pandas as pd
import pytest

import narwhals as nw
from narwhals.utils import parse_version

data = {"a": [1, 2, 3]}

if TYPE_CHECKING:
import pytest


@pytest.mark.skipif(
parse_version(pd.__version__) < parse_version("2.0.0"), reason="too old for pyarrow"
)
def test_write_parquet(constructor: Any, tmpdir: pytest.TempdirFactory) -> None:
path = str(tmpdir / "foo.parquet") # type: ignore[operator]
nw.from_native(constructor(data), eager_only=True).write_parquet(path)
Expand Down
3 changes: 3 additions & 0 deletions tests/hypothesis/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from narwhals.utils import parse_version

pl_version = parse_version(pl.__version__)
pd_version = parse_version(pd.__version__)


@given(
Expand All @@ -37,6 +38,7 @@
),
) # type: ignore[misc]
@pytest.mark.skipif(pl_version < parse_version("0.20.13"), reason="0.0 == -0.0")
@pytest.mark.skipif(pd_version < parse_version("2.0.0"), reason="requires pyarrow")
@pytest.mark.slow()
def test_join( # pragma: no cover
integers: st.SearchStrategy[list[int]],
Expand Down Expand Up @@ -84,6 +86,7 @@ def test_join( # pragma: no cover
),
) # type: ignore[misc]
@pytest.mark.slow()
@pytest.mark.skipif(pd_version < parse_version("2.0.0"), reason="requires pyarrow")
def test_cross_join( # pragma: no cover
integers: st.SearchStrategy[list[int]],
other_integers: st.SearchStrategy[list[int]],
Expand Down
7 changes: 7 additions & 0 deletions tests/series/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@ def test_boolean_reductions(df_raw: Any) -> None:


@pytest.mark.parametrize("df_raw", [df_pandas, df_lazy])
@pytest.mark.skipif(
parse_version(pd.__version__) < parse_version("2.0.0"), reason="too old for pyarrow"
)
def test_convert(df_raw: Any) -> None:
result = nw.from_native(df_raw).lazy().collect()["a"].to_numpy()
assert_array_equal(result, np.array([1, 3, 2]))
Expand Down Expand Up @@ -271,6 +274,10 @@ def test_zip_with(df_raw: Any, mask: Any, expected: Any) -> None:
assert result == expected


@pytest.mark.skipif(
parse_version(pd.__version__) < parse_version("1.0.0"),
reason="too old for convert_dtypes",
)
def test_cast_string() -> None:
s_pd = pd.Series([1, 2]).convert_dtypes()
s = nw.from_native(s_pd, series_only=True)
Expand Down
5 changes: 5 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pandas.testing import assert_series_equal

import narwhals as nw
from narwhals.utils import parse_version


def test_maybe_align_index_pandas() -> None:
Expand Down Expand Up @@ -64,6 +65,10 @@ def test_maybe_set_index_polars() -> None:
assert result is df


@pytest.mark.skipif(
parse_version(pd.__version__) < parse_version("1.0.0"),
reason="too old for convert_dtypes",
)
def test_maybe_convert_dtypes_pandas() -> None:
import numpy as np

Expand Down
10 changes: 10 additions & 0 deletions tests/tpch_q1_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import pytest

import narwhals as nw
from narwhals.utils import parse_version
from tests.utils import compare_dicts


Expand All @@ -17,6 +18,9 @@
["pandas", "polars"],
)
@pytest.mark.filterwarnings("ignore:.*Passing a BlockManager.*:DeprecationWarning")
@pytest.mark.skipif(
parse_version(pd.__version__) < parse_version("1.0.0"), reason="too old for pyarrow"
)
def test_q1(library: str) -> None:
if library == "pandas":
df_raw = pd.read_parquet("tests/data/lineitem.parquet")
Expand Down Expand Up @@ -84,6 +88,9 @@ def test_q1(library: str) -> None:
"ignore:.*Passing a BlockManager.*:DeprecationWarning",
"ignore:.*Complex.*:UserWarning",
)
@pytest.mark.skipif(
parse_version(pd.__version__) < parse_version("1.0.0"), reason="too old for pyarrow"
)
def test_q1_w_generic_funcs(library: str) -> None:
if library == "pandas":
df_raw = pd.read_parquet("tests/data/lineitem.parquet")
Expand Down Expand Up @@ -144,6 +151,9 @@ def test_q1_w_generic_funcs(library: str) -> None:

@mock.patch.dict(os.environ, {"NARWHALS_FORCE_GENERIC": "1"})
@pytest.mark.filterwarnings("ignore:.*Passing a BlockManager.*:DeprecationWarning")
@pytest.mark.skipif(
parse_version(pd.__version__) < parse_version("1.0.0"), reason="too old for pyarrow"
)
def test_q1_w_pandas_agg_generic_path() -> None:
df_raw = pd.read_parquet("tests/data/lineitem.parquet")
df_raw["l_shipdate"] = pd.to_datetime(df_raw["l_shipdate"])
Expand Down

0 comments on commit c8ad46a

Please sign in to comment.