Skip to content

Commit

Permalink
feat: add narwhals.new_series (#787)
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli authored Aug 14, 2024
1 parent 6fbfb77 commit 885ef31
Show file tree
Hide file tree
Showing 8 changed files with 194 additions and 6 deletions.
7 changes: 4 additions & 3 deletions docs/api-reference/narwhals.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,15 @@ Here are the top-level functions available in Narwhals.
- get_native_namespace
- is_ordered_categorical
- len
- maybe_align_index
- maybe_set_index
- maybe_convert_dtypes
- lit
- max
- maybe_align_index
- maybe_convert_dtypes
- maybe_set_index
- mean
- min
- narwhalify
- new_series
- sum
- sum_horizontal
- show_versions
Expand Down
2 changes: 2 additions & 0 deletions narwhals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from narwhals.functions import concat
from narwhals.functions import from_dict
from narwhals.functions import get_level
from narwhals.functions import new_series
from narwhals.functions import show_versions
from narwhals.schema import Schema
from narwhals.series import Series
Expand All @@ -55,6 +56,7 @@
"concat",
"from_dict",
"get_level",
"new_series",
"to_native",
"from_native",
"is_ordered_categorical",
Expand Down
1 change: 1 addition & 0 deletions narwhals/_dask/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def __init__(
) -> None:
self._native_frame = native_dataframe
self._backend_version = backend_version
self._implementation = Implementation.DASK

def __native_namespace__(self) -> Any: # pragma: no cover
return get_dask_dataframe()
Expand Down
94 changes: 94 additions & 0 deletions narwhals/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,100 @@ def concat(
)


def new_series(
name: str,
values: Any,
dtype: DType | type[DType] | None = None,
*,
native_namespace: ModuleType,
) -> Series:
"""
Instantiate Narwhals Series from raw data.
Arguments:
name: Name of resulting Series.
values: Values of make Series from.
dtype: (Narwhals) dtype. If not provided, the native library
may auto-infer it from `values`.
native_namespace: The native library to use for DataFrame creation.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... values = [4, 1, 2]
... native_namespace = nw.get_native_namespace(df)
... return nw.new_series("c", values, nw.Int32, native_namespace=native_namespace)
Let's see what happens when passing pandas / Polars input:
>>> func(pd.DataFrame(data))
0 4
1 1
2 2
Name: c, dtype: int32
>>> func(pl.DataFrame(data)) # doctest: +NORMALIZE_WHITESPACE
shape: (3,)
Series: 'c' [i32]
[
4
1
2
]
"""
implementation = Implementation.from_native_namespace(native_namespace)

if implementation is Implementation.POLARS:
if dtype:
from narwhals._polars.utils import (
narwhals_to_native_dtype as polars_narwhals_to_native_dtype,
)

dtype = polars_narwhals_to_native_dtype(dtype)

native_series = native_namespace.Series(name=name, values=values, dtype=dtype)
elif implementation in {
Implementation.PANDAS,
Implementation.MODIN,
Implementation.CUDF,
}:
if dtype:
from narwhals._pandas_like.utils import (
narwhals_to_native_dtype as pandas_like_narwhals_to_native_dtype,
)

dtype = pandas_like_narwhals_to_native_dtype(dtype, None, implementation)
native_series = native_namespace.Series(values, name=name, dtype=dtype)

elif implementation is Implementation.PYARROW:
if dtype:
from narwhals._arrow.utils import (
narwhals_to_native_dtype as arrow_narwhals_to_native_dtype,
)

dtype = arrow_narwhals_to_native_dtype(dtype)
native_series = native_namespace.chunked_array([values], type=dtype)

elif implementation is Implementation.DASK:
msg = "Dask support in Narwhals is lazy-only, so `new_series` is " "not supported"
raise NotImplementedError(msg)
else: # pragma: no cover
try:
# implementation is UNKNOWN, Narhwals extension using this feature should
# implement `from_dict` function in the top-level namespace.
native_series = native_namespace.new_series(name, values, dtype)
except AttributeError as e:
msg = "Unknown namespace is expected to implement `Series` constructor."
raise AttributeError(msg) from e
return from_native(native_series, series_only=True).alias(name)


def from_dict(
data: dict[str, Any],
schema: dict[str, DType] | Schema | None = None,
Expand Down
53 changes: 53 additions & 0 deletions narwhals/stable/v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -1469,6 +1469,58 @@ def get_level(
return nw.get_level(obj)


def new_series(
name: str,
values: Any,
dtype: DType | type[DType] | None = None,
*,
native_namespace: ModuleType,
) -> Series:
"""
Instantiate Narwhals Series from raw data.
Arguments:
name: Name of resulting Series.
values: Values of make Series from.
dtype: (Narwhals) dtype. If not provided, the native library
may auto-infer it from `values`.
native_namespace: The native library to use for DataFrame creation.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals.stable.v1 as nw
>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... values = [4, 1, 2]
... native_namespace = nw.get_native_namespace(df)
... return nw.new_series("c", values, nw.Int32, native_namespace=native_namespace)
Let's see what happens when passing pandas / Polars input:
>>> func(pd.DataFrame(data))
0 4
1 1
2 2
Name: c, dtype: int32
>>> func(pl.DataFrame(data)) # doctest: +NORMALIZE_WHITESPACE
shape: (3,)
Series: 'c' [i32]
[
4
1
2
]
"""
return _stableify(
nw.new_series(name, values, dtype, native_namespace=native_namespace)
)


def from_dict(
data: dict[str, Any],
schema: dict[str, DType] | Schema | None = None,
Expand Down Expand Up @@ -1573,4 +1625,5 @@ def from_dict(
"show_versions",
"Schema",
"from_dict",
"new_series",
]
3 changes: 3 additions & 0 deletions narwhals/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from narwhals import dtypes
from narwhals._exceptions import ColumnNotFoundError
from narwhals.dependencies import get_cudf
from narwhals.dependencies import get_dask_dataframe
from narwhals.dependencies import get_modin
from narwhals.dependencies import get_pandas
from narwhals.dependencies import get_polars
Expand Down Expand Up @@ -43,6 +44,7 @@ class Implementation(Enum):
CUDF = auto()
PYARROW = auto()
POLARS = auto()
DASK = auto()

UNKNOWN = auto()

Expand All @@ -57,6 +59,7 @@ def from_native_namespace(
get_cudf(): Implementation.CUDF,
get_pyarrow(): Implementation.PYARROW,
get_polars(): Implementation.POLARS,
get_dask_dataframe(): Implementation.DASK,
}
return mapping.get(native_namespace, Implementation.UNKNOWN)

Expand Down
4 changes: 1 addition & 3 deletions tests/frame/drop_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,7 @@ def test_drop(constructor: Any, to_drop: list[str], expected: list[str]) -> None
[
(
True,
pytest.raises(
(ColumnNotFoundError, PlColumnNotFoundError), match='"z" not found'
),
pytest.raises((ColumnNotFoundError, PlColumnNotFoundError), match="z"),
),
(False, does_not_raise()),
],
Expand Down
36 changes: 36 additions & 0 deletions tests/new_series_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from typing import Any

import pandas as pd
import pytest

import narwhals.stable.v1 as nw
from tests.utils import compare_dicts


def test_new_series(constructor_eager: Any) -> None:
s = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)["a"]
result = nw.new_series("b", [4, 1, 2], native_namespace=nw.get_native_namespace(s))
expected = {"b": [4, 1, 2]}
# all supported libraries auto-infer this to be int64, we can always special-case
# something different if necessary
assert result.dtype == nw.Int64
compare_dicts(result.to_frame(), expected)

result = nw.new_series(
"b", [4, 1, 2], nw.Int32, native_namespace=nw.get_native_namespace(s)
)
expected = {"b": [4, 1, 2]}
assert result.dtype == nw.Int32
compare_dicts(result.to_frame(), expected)


def test_new_series_dask() -> None:
pytest.importorskip("dask")
pytest.importorskip("dask_expr", exc_type=ImportError)
import dask.dataframe as dd

df = nw.from_native(dd.from_pandas(pd.DataFrame({"a": [1, 2, 3]})))
with pytest.raises(
NotImplementedError, match="Dask support in Narwhals is lazy-only"
):
nw.new_series("a", [1, 2, 3], native_namespace=nw.get_native_namespace(df))

0 comments on commit 885ef31

Please sign in to comment.