Skip to content

Commit

Permalink
Merge pull request #64 from MarcoGorelli/list-optional-deps
Browse files Browse the repository at this point in the history
List optional deps
  • Loading branch information
MarcoGorelli authored Apr 30, 2024
2 parents e136b94 + f453a95 commit bdbe115
Show file tree
Hide file tree
Showing 9 changed files with 129 additions and 38 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/extremes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
- name: install-reqs
run: python -m pip install --upgrade tox virtualenv setuptools pip -r requirements-dev.txt
- name: install-modin
run: python -m pip install pandas==2.0.0 polars==0.20.13 modin[dask]
run: python -m pip install pandas==1.2.0 polars==0.20.3
- name: Run pytest
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow
- name: Run doctests
Expand Down
12 changes: 9 additions & 3 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from narwhals._pandas_like.utils import validate_indices
from narwhals.dependencies import get_pyarrow
from narwhals.utils import flatten
from narwhals.utils import parse_version

if TYPE_CHECKING:
from collections.abc import Sequence
Expand Down Expand Up @@ -57,10 +58,15 @@ def _convert_object_dtypes(self, dataframe: Any) -> Any:
for col in dataframe.columns:
if schema[col] != object:
continue
if get_pyarrow() is not None:
replacements[col] = dataframe[col].astype("string[pyarrow]")
import pandas as pd # todo: generalise across pandas-like implementations

if parse_version(pd.__version__) >= parse_version("2.0.0"):
if get_pyarrow() is not None:
replacements[col] = dataframe[col].astype("string[pyarrow]")
else: # pragma: no cover
replacements[col] = dataframe[col].astype("string[python]")
else: # pragma: no cover
replacements[col] = dataframe[col].astype("string[python]")
pass
return dataframe.assign(**replacements)

def _validate_columns(self, columns: Sequence[str]) -> None:
Expand Down
16 changes: 13 additions & 3 deletions narwhals/_pandas_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,12 @@ def translate_dtype(dtype: Any) -> DType:
if str(dtype).startswith("datetime64"):
# todo: different time units and time zones
return dtypes.Datetime()
if dtype == "object": # pragma: no cover
import pandas as pd

assert parse_version(pd.__version__) < parse_version("2.0.0")
# Should only happen for pandas pre 2.0.0
return dtypes.String()
msg = f"Unknown dtype: {dtype}" # pragma: no cover
raise AssertionError(msg)

Expand Down Expand Up @@ -342,9 +348,13 @@ def reverse_translate_dtype(dtype: DType | type[DType]) -> Any:
if isinstance_or_issubclass(dtype, dtypes.UInt8):
return "uint8"
if isinstance_or_issubclass(dtype, dtypes.String):
if get_pyarrow() is not None:
return "string[pyarrow]"
return "string[python]" # pragma: no cover
import pandas as pd

if parse_version(pd.__version__) >= parse_version("2.0.0"):
if get_pyarrow() is not None:
return "string[pyarrow]"
return "string[python]" # pragma: no cover
return "object" # pragma: no cover
if isinstance_or_issubclass(dtype, dtypes.Boolean):
return "bool"
if isinstance_or_issubclass(dtype, dtypes.Datetime):
Expand Down
14 changes: 13 additions & 1 deletion narwhals/expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
from typing import Callable
from typing import Iterable

from narwhals.dependencies import get_polars
from narwhals.dtypes import translate_dtype
from narwhals.utils import flatten
from narwhals.utils import parse_version

if TYPE_CHECKING:
from narwhals.typing import IntoExpr
Expand Down Expand Up @@ -275,7 +277,17 @@ def len() -> Expr:
"""
Instantiate an expression representing the length of a dataframe, similar to `polars.len`.
"""
return Expr(lambda plx: plx.len())

def func(plx: Any) -> Any:
if (
not hasattr(plx, "_implementation")
and (pl := get_polars()) is not None
and parse_version(pl.__version__) < parse_version("0.20.4")
): # pragma: no cover
return plx.count()
return plx.len()

return Expr(func)


def sum(*columns: str) -> Expr:
Expand Down
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ classifiers = [
"Operating System :: OS Independent",
]

[project.optional-dependencies]
pandas = ["pandas>=1.2.0"]
polars = ["polars>=0.20.3"]

[project.urls]
"Homepage" = "https://github.com/MarcoGorelli/narwhals"
"Bug Tracker" = "https://github.com/MarcoGorelli/narwhals"
Expand Down Expand Up @@ -83,6 +87,7 @@ plugins = ["covdefaults"]
exclude_also = [
"> POLARS_VERSION",
"if sys.version_info() <",
'if df_raw is None:',
]

[tool.mypy]
Expand Down
4 changes: 4 additions & 0 deletions tests/hypothesis/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
from pandas.testing import assert_frame_equal

import narwhals as nw
from narwhals.utils import parse_version

pl_version = parse_version(pl.__version__)


@example([0, 0, 0], [0, 0, 0], [0.0, 0.0, -0.0], ["c"]) # type: ignore[misc]
Expand Down Expand Up @@ -36,6 +39,7 @@
),
) # type: ignore[misc]
@pytest.mark.slow()
@pytest.mark.xfail(pl_version < parse_version("0.20.13"), reason="0.0 == -0.0")
def test_join(
integers: st.SearchStrategy[list[int]],
other_integers: st.SearchStrategy[list[int]],
Expand Down
71 changes: 55 additions & 16 deletions tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from polars.testing import assert_series_equal as pl_assert_series_equal

import narwhals as nw
from narwhals.utils import parse_version
from tests.utils import compare_dicts

df_pandas = pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]})
Expand All @@ -24,15 +25,18 @@
"z": "Float64",
}
)
df_pandas_pyarrow = pd.DataFrame(
{"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
).astype(
{
"a": "Int64[pyarrow]",
"b": "Int64[pyarrow]",
"z": "Float64[pyarrow]",
}
)
if parse_version(pd.__version__) >= parse_version("1.5.0"):
df_pandas_pyarrow = pd.DataFrame(
{"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
).astype(
{
"a": "Int64[pyarrow]",
"b": "Int64[pyarrow]",
"z": "Float64[pyarrow]",
}
)
else: # pragma: no cover
df_pandas_pyarrow = None
df_polars = pl.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]})
df_lazy = pl.LazyFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]})
df_pandas_na = pd.DataFrame({"a": [None, 3, 2], "b": [4, 4, 6], "z": [7.0, None, 9]})
Expand All @@ -41,13 +45,16 @@
df_right_lazy = pl.LazyFrame({"c": [6, 12, -1], "d": [0, -4, 2]})

if os.environ.get("CI", None):
import modin.pandas as mpd

with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=UserWarning)
df_mpd = mpd.DataFrame(
pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]})
)
try:
import modin.pandas as mpd
except ImportError: # pragma: no cover
df_mpd = df_pandas.copy()
else:
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=UserWarning)
df_mpd = mpd.DataFrame(
pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]})
)
else: # pragma: no cover
df_mpd = df_pandas.copy()

Expand All @@ -57,6 +64,8 @@
[df_pandas, df_polars, df_lazy, df_pandas_nullable, df_pandas_pyarrow],
)
def test_sort(df_raw: Any) -> None:
if df_raw is None:
return
df = nw.LazyFrame(df_raw)
result = df.sort("a", "b")
result_native = nw.to_native(result)
Expand All @@ -81,6 +90,8 @@ def test_sort(df_raw: Any) -> None:
[df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow],
)
def test_filter(df_raw: Any) -> None:
if df_raw is None:
return
df = nw.LazyFrame(df_raw)
result = df.filter(nw.col("a") > 1)
result_native = nw.to_native(result)
Expand All @@ -105,6 +116,8 @@ def test_filter_series(df_raw: Any) -> None:
[df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow],
)
def test_add(df_raw: Any) -> None:
if df_raw is None:
return
df = nw.LazyFrame(df_raw)
result = df.with_columns(
c=nw.col("a") + nw.col("b"),
Expand All @@ -128,6 +141,8 @@ def test_add(df_raw: Any) -> None:
[df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow],
)
def test_double(df_raw: Any) -> None:
if df_raw is None:
return
df = nw.LazyFrame(df_raw)
result = df.with_columns(nw.all() * 2)
result_native = nw.to_native(result)
Expand All @@ -144,6 +159,8 @@ def test_double(df_raw: Any) -> None:
[df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow],
)
def test_select(df_raw: Any) -> None:
if df_raw is None:
return
df = nw.LazyFrame(df_raw)
result = df.select("a")
result_native = nw.to_native(result)
Expand All @@ -169,6 +186,8 @@ def test_sumh(df_raw: Any) -> None:
"df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow]
)
def test_sumh_literal(df_raw: Any) -> None:
if df_raw is None:
return
df = nw.LazyFrame(df_raw)
result = df.with_columns(horizonal_sum=nw.sum_horizontal("a", nw.col("b")))
result_native = nw.to_native(result)
Expand All @@ -185,6 +204,8 @@ def test_sumh_literal(df_raw: Any) -> None:
"df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow]
)
def test_sum_all(df_raw: Any) -> None:
if df_raw is None:
return
df = nw.LazyFrame(df_raw)
result = df.select(nw.all().sum())
result_native = nw.to_native(result)
Expand All @@ -196,6 +217,8 @@ def test_sum_all(df_raw: Any) -> None:
"df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow]
)
def test_double_selected(df_raw: Any) -> None:
if df_raw is None:
return
df = nw.LazyFrame(df_raw)
result = df.select(nw.col("a", "b") * 2)
result_native = nw.to_native(result)
Expand All @@ -215,6 +238,8 @@ def test_double_selected(df_raw: Any) -> None:
"df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow]
)
def test_rename(df_raw: Any) -> None:
if df_raw is None:
return
df = nw.LazyFrame(df_raw)
result = df.rename({"a": "x", "b": "y"})
result_native = nw.to_native(result)
Expand All @@ -226,6 +251,8 @@ def test_rename(df_raw: Any) -> None:
"df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow]
)
def test_join(df_raw: Any) -> None:
if df_raw is None:
return
df = nw.LazyFrame(df_raw)
df_right = df
result = df.join(df_right, left_on=["a", "b"], right_on=["a", "b"], how="inner")
Expand All @@ -252,6 +279,8 @@ def test_join(df_raw: Any) -> None:
"df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow]
)
def test_schema(df_raw: Any) -> None:
if df_raw is None:
return
result = nw.LazyFrame(df_raw).schema
expected = {"a": nw.Int64, "b": nw.Int64, "z": nw.Float64}
assert result == expected
Expand All @@ -270,6 +299,8 @@ def test_schema(df_raw: Any) -> None:
"df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow]
)
def test_columns(df_raw: Any) -> None:
if df_raw is None:
return
df = nw.LazyFrame(df_raw)
result = df.columns
expected = ["a", "b", "z"]
Expand Down Expand Up @@ -329,6 +360,8 @@ def test_convert_pandas(df_raw: Any) -> None:
r"ignore:np\.find_common_type is deprecated\.:DeprecationWarning"
)
def test_convert_numpy(df_raw: Any) -> None:
if df_raw is None:
return
result = nw.DataFrame(df_raw).to_numpy()
expected = np.array([[1, 3, 2], [4, 4, 6], [7.0, 8, 9]]).T
np.testing.assert_array_equal(result, expected)
Expand Down Expand Up @@ -454,6 +487,8 @@ def test_expr_na(df_raw: Any) -> None:
"df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow]
)
def test_head(df_raw: Any) -> None:
if df_raw is None:
return
df = nw.LazyFrame(df_raw)
result = nw.to_native(df.head(2))
expected = {"a": [1, 3], "b": [4, 4], "z": [7.0, 8.0]}
Expand All @@ -467,6 +502,8 @@ def test_head(df_raw: Any) -> None:
"df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow]
)
def test_unique(df_raw: Any) -> None:
if df_raw is None:
return
df = nw.LazyFrame(df_raw)
result = nw.to_native(df.unique("b").sort("b"))
expected = {"a": [1, 2], "b": [4, 6], "z": [7.0, 9.0]}
Expand Down Expand Up @@ -557,6 +594,8 @@ def test_to_dict() -> None:
"df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow]
)
def test_any_all(df_raw: Any) -> None:
if df_raw is None:
return
df = nw.LazyFrame(df_raw)
result = nw.to_native(df.select((nw.all() > 1).all()))
expected = {"a": [False], "b": [True], "z": [True]}
Expand Down
Loading

0 comments on commit bdbe115

Please sign in to comment.