From 5c3db5b149cbf64b97de49bc4ad726f019f9f578 Mon Sep 17 00:00:00 2001 From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com> Date: Fri, 1 Nov 2024 09:28:08 +0100 Subject: [PATCH] feat: add is_into_dataframe (#1288) * add is_into_dataframe * fix pyarrow table construct * typo series-> dataframe --- docs/api-reference/dependencies.md | 1 + narwhals/dependencies.py | 39 ++++++++++++++++++++ tests/dependencies/is_into_dataframe_test.py | 37 +++++++++++++++++++ 3 files changed, 77 insertions(+) create mode 100644 tests/dependencies/is_into_dataframe_test.py diff --git a/docs/api-reference/dependencies.md b/docs/api-reference/dependencies.md index 75ef4b277..f8995e36a 100644 --- a/docs/api-reference/dependencies.md +++ b/docs/api-reference/dependencies.md @@ -15,6 +15,7 @@ - is_cudf_series - is_dask_dataframe - is_ibis_table + - is_into_dataframe - is_into_series - is_modin_dataframe - is_modin_index diff --git a/narwhals/dependencies.py b/narwhals/dependencies.py index f1c057b3b..7aaa9f15f 100644 --- a/narwhals/dependencies.py +++ b/narwhals/dependencies.py @@ -251,6 +251,44 @@ def is_into_series(native_series: IntoSeries) -> bool: ) +def is_into_dataframe(native_dataframe: Any) -> bool: + """ + Check whether `native_dataframe` can be converted to a Narwhals DataFrame. + + Arguments: + native_dataframe: The object to check. + + Returns: + `True` if `native_dataframe` can be converted to a Narwhals DataFrame, `False` otherwise. + + Examples: + >>> import pandas as pd + >>> import polars as pl + >>> import numpy as np + >>> from narwhals.dependencies import is_into_dataframe + + >>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + >>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + >>> np_arr = np.array([[1, 4], [2, 5], [3, 6]]) + + >>> is_into_dataframe(df_pd) + True + >>> is_into_dataframe(df_pl) + True + >>> is_into_dataframe(np_arr) + False + """ + from narwhals.dataframe import DataFrame + + return ( + isinstance(native_dataframe, DataFrame) + or hasattr(native_dataframe, "__narwhals_dataframe__") + or is_polars_dataframe(native_dataframe) + or is_pyarrow_table(native_dataframe) + or is_pandas_like_dataframe(native_dataframe) + ) + + __all__ = [ "get_polars", "get_pandas", @@ -275,5 +313,6 @@ def is_into_series(native_series: IntoSeries) -> bool: "is_dask_dataframe", "is_pandas_like_dataframe", "is_pandas_like_series", + "is_into_dataframe", "is_into_series", ] diff --git a/tests/dependencies/is_into_dataframe_test.py b/tests/dependencies/is_into_dataframe_test.py new file mode 100644 index 000000000..77f5e7d4e --- /dev/null +++ b/tests/dependencies/is_into_dataframe_test.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING +from typing import Any + +import numpy as np +import pandas as pd +import polars as pl +import pyarrow as pa + +import narwhals as nw +from narwhals.dependencies import is_into_dataframe + +if TYPE_CHECKING: + from typing_extensions import Self + + +class DictDataFrame: + def __init__(self, data: dict[str, list[Any]]) -> None: + self._data = data + + def __len__(self) -> int: # pragma: no cover + return len(next(iter(self._data.values()))) + + def __narwhals_dataframe__(self) -> Self: # pragma: no cover + return self + + +def test_is_into_dataframe() -> None: + data = {"a": [1, 2, 3], "b": [4, 5, 6]} + assert is_into_dataframe(pa.table(data)) + assert is_into_dataframe(pl.DataFrame(data)) + assert is_into_dataframe(pd.DataFrame(data)) + assert is_into_dataframe(nw.from_native(pd.DataFrame(data))) + assert is_into_dataframe(DictDataFrame(data)) + assert not is_into_dataframe(np.array([[1, 4], [2, 5], [3, 6]])) + assert not is_into_dataframe(data)