Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into list
Browse files Browse the repository at this point in the history
  • Loading branch information
DeaMariaLeon committed Oct 17, 2024
2 parents dd467ab + 5ef1803 commit 279b047
Show file tree
Hide file tree
Showing 125 changed files with 838 additions and 369 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/extremes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ jobs:
nightlies:
strategy:
matrix:
python-version: ["3.11"]
python-version: ["3.12"]
os: [ubuntu-latest]
if: github.event.pull_request.head.repo.full_name == github.repository
runs-on: ${{ matrix.os }}
Expand Down
13 changes: 8 additions & 5 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,20 @@ Here's how you can set up your local development environment to contribute.

#### Option 1: Use UV (recommended)

1. Make sure you have Python3.8+ installed (for example, Python 3.11), create a virtual environment,
1. Make sure you have Python3.12 installed, create a virtual environment,
and activate it. If you're new to this, here's one way that we recommend:
1. Install uv: https://github.com/astral-sh/uv?tab=readme-ov-file#getting-started
2. Install some version of Python greater than Python3.8. For example, to install
Python3.11:
or make sure it is up-to-date with:
```
uv python install 3.11
uv self update
```
2. Install Python3.12:
```
uv python install 3.12
```
3. Create a virtual environment:
```
uv venv -p 3.11 --seed
uv venv -p 3.12 --seed
```
4. Activate it. On Linux, this is `. .venv/bin/activate`, on Windows `.\.venv\Scripts\activate`.
2. Install Narwhals: `uv pip install -e .`
Expand Down
3 changes: 2 additions & 1 deletion docs/api-reference/dtypes.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
members:
- Array
- List
- Struct
- Int64
- Int32
- Int16
Expand All @@ -15,12 +14,14 @@
- UInt32
- UInt16
- UInt8
- Field
- Float64
- Float32
- Boolean
- Categorical
- Enum
- String
- Struct
- Date
- Datetime
- Duration
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/narwhals.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Here are the top-level functions available in Narwhals.
- concat_str
- from_dict
- from_native
- from_arrow
- get_level
- get_native_namespace
- is_ordered_categorical
Expand Down
2 changes: 1 addition & 1 deletion docs/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Then, if you start the Python REPL and see the following:
```python
>>> import narwhals
>>> narwhals.__version__
'1.9.3'
'1.9.4'
```
then installation worked correctly!

Expand Down
6 changes: 5 additions & 1 deletion narwhals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from narwhals.dtypes import Datetime
from narwhals.dtypes import Duration
from narwhals.dtypes import Enum
from narwhals.dtypes import Field
from narwhals.dtypes import Float32
from narwhals.dtypes import Float64
from narwhals.dtypes import Int8
Expand Down Expand Up @@ -44,6 +45,7 @@
from narwhals.expr import sum_horizontal
from narwhals.expr import when
from narwhals.functions import concat
from narwhals.functions import from_arrow
from narwhals.functions import from_dict
from narwhals.functions import get_level
from narwhals.functions import new_series
Expand All @@ -61,13 +63,14 @@
from narwhals.utils import maybe_reset_index
from narwhals.utils import maybe_set_index

__version__ = "1.9.3"
__version__ = "1.9.4"

__all__ = [
"dependencies",
"selectors",
"concat",
"from_dict",
"from_arrow",
"get_level",
"new_series",
"to_native",
Expand Down Expand Up @@ -118,6 +121,7 @@
"String",
"Datetime",
"Duration",
"Field",
"Struct",
"Array",
"List",
Expand Down
11 changes: 10 additions & 1 deletion narwhals/_arrow/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,16 @@ def native_to_narwhals_dtype(dtype: Any, dtypes: DTypes) -> DType:
if pa.types.is_dictionary(dtype):
return dtypes.Categorical()
if pa.types.is_struct(dtype):
return dtypes.Struct()
return dtypes.Struct(
[
dtypes.Field(
dtype.field(i).name,
native_to_narwhals_dtype(dtype.field(i).type, dtypes),
)
for i in range(dtype.num_fields)
]
)

if pa.types.is_list(dtype) or pa.types.is_large_list(dtype):
return dtypes.List(native_to_narwhals_dtype(dtype.value_type, dtypes))
if pa.types.is_fixed_size_list(dtype):
Expand Down
11 changes: 10 additions & 1 deletion narwhals/_duckdb/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,16 @@ def map_duckdb_dtype_to_narwhals_dtype(duckdb_dtype: Any, dtypes: DTypes) -> DTy
if duckdb_dtype == "INTERVAL":
return dtypes.Duration()
if duckdb_dtype.startswith("STRUCT"):
return dtypes.Struct()
matchstruc_ = re.findall(r"(\w+)\s+(\w+)", duckdb_dtype)
return dtypes.Struct(
[
dtypes.Field(
matchstruc_[i][0],
map_duckdb_dtype_to_narwhals_dtype(matchstruc_[i][1], dtypes),
)
for i in range(len(matchstruc_))
]
)
if match_ := re.match(r"(.*)\[\]$", duckdb_dtype):
return dtypes.List(map_duckdb_dtype_to_narwhals_dtype(match_.group(1), dtypes))
if match_ := re.match(r"(\w+)\[(\d+)\]", duckdb_dtype):
Expand Down
10 changes: 9 additions & 1 deletion narwhals/_ibis/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,15 @@ def map_ibis_dtype_to_narwhals_dtype(ibis_dtype: Any, dtypes: DTypes) -> DType:
map_ibis_dtype_to_narwhals_dtype(ibis_dtype.value_type, dtypes)
)
if ibis_dtype.is_struct():
return dtypes.Struct()
return dtypes.Struct(
[
dtypes.Field(
ibis_dtype_name,
map_ibis_dtype_to_narwhals_dtype(ibis_dtype_field, dtypes),
)
for ibis_dtype_name, ibis_dtype_field in ibis_dtype.items()
]
)
return dtypes.Unknown() # pragma: no cover


Expand Down
4 changes: 1 addition & 3 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,9 +619,7 @@ def quantile(

def zip_with(self: Self, mask: Any, other: Any) -> PandasLikeSeries:
ser = self._native_series
mask = validate_column_comparand(
ser.index, mask, treat_length_one_as_scalar=False
)
mask = validate_column_comparand(ser.index, mask)
other = validate_column_comparand(ser.index, other)
res = ser.where(mask, other)
return self._from_native_series(res)
Expand Down
14 changes: 7 additions & 7 deletions narwhals/_pandas_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,7 @@
}


def validate_column_comparand(
index: Any, other: Any, *, treat_length_one_as_scalar: bool = True
) -> Any:
def validate_column_comparand(index: Any, other: Any) -> Any:
"""Validate RHS of binary operation.
If the comparison isn't supported, return `NotImplemented` so that the
Expand All @@ -55,9 +53,10 @@ def validate_column_comparand(
if isinstance(other, PandasLikeDataFrame):
return NotImplemented
if isinstance(other, PandasLikeSeries):
if other.len() == 1 and treat_length_one_as_scalar:
if other.len() == 1:
# broadcast
return other.item()
s = other._native_series
return s.__class__(s.iloc[0], index=index, dtype=s.dtype)
if other._native_series.index is not index:
return set_axis(
other._native_series,
Expand All @@ -83,7 +82,8 @@ def validate_dataframe_comparand(index: Any, other: Any) -> Any:
if isinstance(other, PandasLikeSeries):
if other.len() == 1:
# broadcast
return other._native_series.iloc[0]
s = other._native_series
return s.__class__(s.iloc[0], index=index, dtype=s.dtype)
if other._native_series.index is not index:
return set_axis(
other._native_series,
Expand Down Expand Up @@ -290,7 +290,7 @@ def native_to_narwhals_dtype(native_column: Any, dtypes: DTypes) -> DType:
native_column.dtype.pyarrow_dtype.list_size,
)
if dtype.startswith("struct"):
return dtypes.Struct()
return arrow_native_to_narwhals_dtype(native_column.dtype.pyarrow_dtype, dtypes)
if dtype == "object":
if ( # pragma: no cover TODO(unassigned): why does this show as uncovered?
idx := getattr(native_column, "first_valid_index", lambda: None)()
Expand Down
7 changes: 6 additions & 1 deletion narwhals/_polars/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,12 @@ def native_to_narwhals_dtype(dtype: Any, dtypes: DTypes) -> DType:
du_time_unit: Literal["us", "ns", "ms"] = getattr(dtype, "time_unit", "us")
return dtypes.Duration(time_unit=du_time_unit)
if dtype == pl.Struct:
return dtypes.Struct()
return dtypes.Struct(
[
dtypes.Field(field_name, native_to_narwhals_dtype(field_type, dtypes))
for field_name, field_type in dtype
]
)
if dtype == pl.List:
return dtypes.List(native_to_narwhals_dtype(dtype.inner, dtypes))
if dtype == pl.Array:
Expand Down
84 changes: 82 additions & 2 deletions narwhals/dtypes.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
from __future__ import annotations

from collections import OrderedDict
from datetime import timezone
from typing import TYPE_CHECKING
from typing import Literal
from typing import Mapping

if TYPE_CHECKING:
from typing import Iterator
from typing import Literal
from typing import Sequence

from typing_extensions import Self


Expand Down Expand Up @@ -170,7 +175,82 @@ class Categorical(DType): ...
class Enum(DType): ...


class Struct(DType): ...
class Field:
"""
Definition of a single field within a `Struct` DataType.
Arguments:
name: The name of the field within its parent `Struct`.
dtype: The `DataType` of the field's values.
"""

name: str
dtype: type[DType] | DType

def __init__(self, name: str, dtype: type[DType] | DType) -> None:
self.name = name
self.dtype = dtype

def __eq__(self, other: Field) -> bool: # type: ignore[override]
return (self.name == other.name) & (self.dtype == other.dtype)

def __hash__(self) -> int:
return hash((self.name, self.dtype))

def __repr__(self) -> str:
class_name = self.__class__.__name__
return f"{class_name}({self.name!r}, {self.dtype})"


class Struct(DType):
"""
Struct composite type.
Arguments:
fields: The fields that make up the struct. Can be either a sequence of Field objects or a mapping of column names to data types.
"""

fields: list[Field]

def __init__(
self, fields: Sequence[Field] | Mapping[str, DType | type[DType]]
) -> None:
if isinstance(fields, Mapping):
self.fields = [Field(name, dtype) for name, dtype in fields.items()]
else:
self.fields = list(fields)

def __eq__(self, other: DType | type[DType]) -> bool: # type: ignore[override]
# The comparison allows comparing objects to classes, and specific
# inner types to those without (eg: inner=None). if one of the
# arguments is not specific about its inner type we infer it
# as being equal. (See the List type for more info).
if type(other) is type and issubclass(other, self.__class__):
return True
elif isinstance(other, self.__class__):
return self.fields == other.fields
else:
return False

def __hash__(self) -> int:
return hash((self.__class__, tuple(self.fields)))

def __iter__(self) -> Iterator[tuple[str, DType | type[DType]]]:
for fld in self.fields:
yield fld.name, fld.dtype

def __reversed__(self) -> Iterator[tuple[str, DType | type[DType]]]:
for fld in reversed(self.fields):
yield fld.name, fld.dtype

def __repr__(self) -> str:
class_name = self.__class__.__name__
return f"{class_name}({dict(self)})"

def to_schema(self) -> OrderedDict[str, DType | type[DType]]:
"""Return Struct dtype as a schema dict."""
return OrderedDict(self)


class List(DType):
Expand Down
Loading

0 comments on commit 279b047

Please sign in to comment.