From 6be131407062988791ac7eeed8dee3aecd4e7d22 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Mon, 1 Jul 2024 10:31:34 +0100 Subject: [PATCH] feat: add Unknown dtype (#369) --- docs/api-reference/dtypes.md | 1 + narwhals/__init__.py | 2 ++ narwhals/_arrow/namespace.py | 1 + narwhals/_pandas_like/namespace.py | 1 + narwhals/_pandas_like/utils.py | 3 +-- narwhals/dtypes.py | 5 +++++ tests/frame/schema_test.py | 5 +++++ tests/series/cast_test.py | 5 +++++ 8 files changed, 21 insertions(+), 2 deletions(-) diff --git a/docs/api-reference/dtypes.md b/docs/api-reference/dtypes.md index c89b7f493..848e9e58c 100644 --- a/docs/api-reference/dtypes.md +++ b/docs/api-reference/dtypes.md @@ -20,6 +20,7 @@ - Datetime - Duration - Object + - Unknown show_root_heading: false show_source: false show_bases: false diff --git a/narwhals/__init__.py b/narwhals/__init__.py index 86cbfaf89..5aa5d72a0 100644 --- a/narwhals/__init__.py +++ b/narwhals/__init__.py @@ -18,6 +18,7 @@ from narwhals.dtypes import UInt16 from narwhals.dtypes import UInt32 from narwhals.dtypes import UInt64 +from narwhals.dtypes import Unknown from narwhals.expression import Expr from narwhals.expression import all from narwhals.expression import col @@ -75,6 +76,7 @@ "Float32", "Boolean", "Object", + "Unknown", "Categorical", "String", "Datetime", diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index 31af558ef..4f3256d65 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -28,6 +28,7 @@ class ArrowNamespace: Float32 = dtypes.Float32 Boolean = dtypes.Boolean Object = dtypes.Object + Unknown = dtypes.Unknown Categorical = dtypes.Categorical String = dtypes.String Datetime = dtypes.Datetime diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index c8e4f101d..2d6d154f9 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -33,6 +33,7 @@ class PandasNamespace: Float32 = dtypes.Float32 Boolean = dtypes.Boolean Object = dtypes.Object + Unknown = dtypes.Unknown Categorical = dtypes.Categorical String = dtypes.String Datetime = dtypes.Datetime diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index d679058b3..8862d0917 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -447,8 +447,7 @@ def translate_dtype(column: Any) -> DType: # which is inferred by default. return dtypes.String() return dtypes.Object() - msg = f"Unknown dtype: {dtype}" # pragma: no cover - raise AssertionError(msg) + return dtypes.Unknown() def get_dtype_backend(dtype: Any, implementation: str) -> str: diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py index aa98f2c58..36e10f247 100644 --- a/narwhals/dtypes.py +++ b/narwhals/dtypes.py @@ -67,6 +67,9 @@ class Boolean(DType): ... class Object(DType): ... +class Unknown(DType): ... + + class Datetime(TemporalType): ... @@ -155,6 +158,8 @@ def to_narwhals_dtype(dtype: Any, *, is_polars: bool) -> DType: return Boolean() if dtype == pl.Object: return Object() + if dtype == pl.Unknown: # pragma: no cover + return Unknown() if dtype == pl.Categorical: return Categorical() if dtype == pl.Datetime: diff --git a/tests/frame/schema_test.py b/tests/frame/schema_test.py index 288db1812..932bc269c 100644 --- a/tests/frame/schema_test.py +++ b/tests/frame/schema_test.py @@ -121,3 +121,8 @@ def test_dtypes() -> None: result_pa = df.schema assert result_pa == expected assert {name: df[name].dtype for name in df.columns} == expected + + +def test_unknown_dtype() -> None: + df = pd.DataFrame({"a": pd.period_range("2000", periods=3, freq="M")}) + assert nw.from_native(df).schema == {"a": nw.Unknown} diff --git a/tests/series/cast_test.py b/tests/series/cast_test.py index cb2d176b1..0192cefe9 100644 --- a/tests/series/cast_test.py +++ b/tests/series/cast_test.py @@ -84,3 +84,8 @@ def test_cast_date_datetime_invalid() -> None: df = nw.from_native(dfpd) with pytest.raises(NotImplementedError, match="pyarrow"): df.select(nw.col("a").cast(nw.Date)) + + +def test_unknown_to_int() -> None: + df = pd.DataFrame({"a": pd.period_range("2000", periods=3, freq="M")}) + assert nw.from_native(df).select(nw.col("a").cast(nw.Int64)).schema == {"a": nw.Int64}