diff --git a/docs/api-reference/narwhals.md b/docs/api-reference/narwhals.md index 16bc6621c..275a865c1 100644 --- a/docs/api-reference/narwhals.md +++ b/docs/api-reference/narwhals.md @@ -17,14 +17,15 @@ Here are the top-level functions available in Narwhals. - get_native_namespace - is_ordered_categorical - len - - maybe_align_index - - maybe_set_index - - maybe_convert_dtypes - lit - max + - maybe_align_index + - maybe_convert_dtypes + - maybe_set_index - mean - min - narwhalify + - new_series - sum - sum_horizontal - show_versions diff --git a/narwhals/__init__.py b/narwhals/__init__.py index d67a0587e..3e656c60a 100644 --- a/narwhals/__init__.py +++ b/narwhals/__init__.py @@ -36,6 +36,7 @@ from narwhals.functions import concat from narwhals.functions import from_dict from narwhals.functions import get_level +from narwhals.functions import new_series from narwhals.functions import show_versions from narwhals.schema import Schema from narwhals.series import Series @@ -55,6 +56,7 @@ "concat", "from_dict", "get_level", + "new_series", "to_native", "from_native", "is_ordered_categorical", diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py index 99ed430a9..35ef28bba 100644 --- a/narwhals/_dask/dataframe.py +++ b/narwhals/_dask/dataframe.py @@ -32,6 +32,7 @@ def __init__( ) -> None: self._native_frame = native_dataframe self._backend_version = backend_version + self._implementation = Implementation.DASK def __native_namespace__(self) -> Any: # pragma: no cover return get_dask_dataframe() diff --git a/narwhals/functions.py b/narwhals/functions.py index 13db8c34b..d5d8be4db 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -48,6 +48,100 @@ def concat( ) +def new_series( + name: str, + values: Any, + dtype: DType | type[DType] | None = None, + *, + native_namespace: ModuleType, +) -> Series: + """ + Instantiate Narwhals Series from raw data. + + Arguments: + name: Name of resulting Series. + values: Values of make Series from. + dtype: (Narwhals) dtype. If not provided, the native library + may auto-infer it from `values`. + native_namespace: The native library to use for DataFrame creation. + + Examples: + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} + + Let's define a dataframe-agnostic function: + + >>> @nw.narwhalify + ... def func(df): + ... values = [4, 1, 2] + ... native_namespace = nw.get_native_namespace(df) + ... return nw.new_series("c", values, nw.Int32, native_namespace=native_namespace) + + Let's see what happens when passing pandas / Polars input: + + >>> func(pd.DataFrame(data)) + 0 4 + 1 1 + 2 2 + Name: c, dtype: int32 + >>> func(pl.DataFrame(data)) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: 'c' [i32] + [ + 4 + 1 + 2 + ] + """ + implementation = Implementation.from_native_namespace(native_namespace) + + if implementation is Implementation.POLARS: + if dtype: + from narwhals._polars.utils import ( + narwhals_to_native_dtype as polars_narwhals_to_native_dtype, + ) + + dtype = polars_narwhals_to_native_dtype(dtype) + + native_series = native_namespace.Series(name=name, values=values, dtype=dtype) + elif implementation in { + Implementation.PANDAS, + Implementation.MODIN, + Implementation.CUDF, + }: + if dtype: + from narwhals._pandas_like.utils import ( + narwhals_to_native_dtype as pandas_like_narwhals_to_native_dtype, + ) + + dtype = pandas_like_narwhals_to_native_dtype(dtype, None, implementation) + native_series = native_namespace.Series(values, name=name, dtype=dtype) + + elif implementation is Implementation.PYARROW: + if dtype: + from narwhals._arrow.utils import ( + narwhals_to_native_dtype as arrow_narwhals_to_native_dtype, + ) + + dtype = arrow_narwhals_to_native_dtype(dtype) + native_series = native_namespace.chunked_array([values], type=dtype) + + elif implementation is Implementation.DASK: + msg = "Dask support in Narwhals is lazy-only, so `new_series` is " "not supported" + raise NotImplementedError(msg) + else: # pragma: no cover + try: + # implementation is UNKNOWN, Narhwals extension using this feature should + # implement `from_dict` function in the top-level namespace. + native_series = native_namespace.new_series(name, values, dtype) + except AttributeError as e: + msg = "Unknown namespace is expected to implement `Series` constructor." + raise AttributeError(msg) from e + return from_native(native_series, series_only=True).alias(name) + + def from_dict( data: dict[str, Any], schema: dict[str, DType] | Schema | None = None, diff --git a/narwhals/stable/v1.py b/narwhals/stable/v1.py index 1c1c91711..b5697753f 100644 --- a/narwhals/stable/v1.py +++ b/narwhals/stable/v1.py @@ -1469,6 +1469,58 @@ def get_level( return nw.get_level(obj) +def new_series( + name: str, + values: Any, + dtype: DType | type[DType] | None = None, + *, + native_namespace: ModuleType, +) -> Series: + """ + Instantiate Narwhals Series from raw data. + + Arguments: + name: Name of resulting Series. + values: Values of make Series from. + dtype: (Narwhals) dtype. If not provided, the native library + may auto-infer it from `values`. + native_namespace: The native library to use for DataFrame creation. + + Examples: + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals.stable.v1 as nw + >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} + + Let's define a dataframe-agnostic function: + + >>> @nw.narwhalify + ... def func(df): + ... values = [4, 1, 2] + ... native_namespace = nw.get_native_namespace(df) + ... return nw.new_series("c", values, nw.Int32, native_namespace=native_namespace) + + Let's see what happens when passing pandas / Polars input: + + >>> func(pd.DataFrame(data)) + 0 4 + 1 1 + 2 2 + Name: c, dtype: int32 + >>> func(pl.DataFrame(data)) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: 'c' [i32] + [ + 4 + 1 + 2 + ] + """ + return _stableify( + nw.new_series(name, values, dtype, native_namespace=native_namespace) + ) + + def from_dict( data: dict[str, Any], schema: dict[str, DType] | Schema | None = None, @@ -1573,4 +1625,5 @@ def from_dict( "show_versions", "Schema", "from_dict", + "new_series", ] diff --git a/narwhals/utils.py b/narwhals/utils.py index 1a0b752d9..cc2a482c4 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -14,6 +14,7 @@ from narwhals import dtypes from narwhals._exceptions import ColumnNotFoundError from narwhals.dependencies import get_cudf +from narwhals.dependencies import get_dask_dataframe from narwhals.dependencies import get_modin from narwhals.dependencies import get_pandas from narwhals.dependencies import get_polars @@ -43,6 +44,7 @@ class Implementation(Enum): CUDF = auto() PYARROW = auto() POLARS = auto() + DASK = auto() UNKNOWN = auto() @@ -57,6 +59,7 @@ def from_native_namespace( get_cudf(): Implementation.CUDF, get_pyarrow(): Implementation.PYARROW, get_polars(): Implementation.POLARS, + get_dask_dataframe(): Implementation.DASK, } return mapping.get(native_namespace, Implementation.UNKNOWN) diff --git a/tests/frame/drop_test.py b/tests/frame/drop_test.py index 547ddc748..db039fcb2 100644 --- a/tests/frame/drop_test.py +++ b/tests/frame/drop_test.py @@ -33,9 +33,7 @@ def test_drop(constructor: Any, to_drop: list[str], expected: list[str]) -> None [ ( True, - pytest.raises( - (ColumnNotFoundError, PlColumnNotFoundError), match='"z" not found' - ), + pytest.raises((ColumnNotFoundError, PlColumnNotFoundError), match="z"), ), (False, does_not_raise()), ], diff --git a/tests/new_series_test.py b/tests/new_series_test.py new file mode 100644 index 000000000..8ddcabd40 --- /dev/null +++ b/tests/new_series_test.py @@ -0,0 +1,36 @@ +from typing import Any + +import pandas as pd +import pytest + +import narwhals.stable.v1 as nw +from tests.utils import compare_dicts + + +def test_new_series(constructor_eager: Any) -> None: + s = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)["a"] + result = nw.new_series("b", [4, 1, 2], native_namespace=nw.get_native_namespace(s)) + expected = {"b": [4, 1, 2]} + # all supported libraries auto-infer this to be int64, we can always special-case + # something different if necessary + assert result.dtype == nw.Int64 + compare_dicts(result.to_frame(), expected) + + result = nw.new_series( + "b", [4, 1, 2], nw.Int32, native_namespace=nw.get_native_namespace(s) + ) + expected = {"b": [4, 1, 2]} + assert result.dtype == nw.Int32 + compare_dicts(result.to_frame(), expected) + + +def test_new_series_dask() -> None: + pytest.importorskip("dask") + pytest.importorskip("dask_expr", exc_type=ImportError) + import dask.dataframe as dd + + df = nw.from_native(dd.from_pandas(pd.DataFrame({"a": [1, 2, 3]}))) + with pytest.raises( + NotImplementedError, match="Dask support in Narwhals is lazy-only" + ): + nw.new_series("a", [1, 2, 3], native_namespace=nw.get_native_namespace(df))