From a6d76e17c3ae3111af7f5a619c47767fa4790e25 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Tue, 7 Jan 2025 14:58:03 +0000 Subject: [PATCH 1/4] chore: remove some expr._kwargs defaults (#1747) --- narwhals/_arrow/group_by.py | 2 +- narwhals/_dask/group_by.py | 4 +--- narwhals/_pandas_like/expr.py | 4 ++-- narwhals/_pandas_like/group_by.py | 5 ++--- narwhals/_spark_like/group_by.py | 2 +- 5 files changed, 7 insertions(+), 10 deletions(-) diff --git a/narwhals/_arrow/group_by.py b/narwhals/_arrow/group_by.py index ffb16578f..11ed914fe 100644 --- a/narwhals/_arrow/group_by.py +++ b/narwhals/_arrow/group_by.py @@ -171,7 +171,7 @@ def agg_arrow( function_name = remove_prefix(expr._function_name, "col->") if function_name in {"std", "var"}: - option = pc.VarianceOptions(ddof=expr._kwargs.get("ddof", 1)) + option = pc.VarianceOptions(ddof=expr._kwargs["ddof"]) elif function_name in {"len", "n_unique"}: option = pc.CountOptions(mode="all") elif function_name == "count": diff --git a/narwhals/_dask/group_by.py b/narwhals/_dask/group_by.py index 7bda88ee5..243b21b71 100644 --- a/narwhals/_dask/group_by.py +++ b/narwhals/_dask/group_by.py @@ -178,9 +178,7 @@ def agg_dask( function_name = remove_prefix(expr._function_name, "col->") kwargs = ( - {"ddof": expr._kwargs.get("ddof", 1)} - if function_name in {"std", "var"} - else {} + {"ddof": expr._kwargs["ddof"]} if function_name in {"std", "var"} else {} ) agg_function = POLARS_TO_DASK_AGGREGATIONS.get(function_name, function_name) diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index fac9a2ed6..34d05b7eb 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -412,12 +412,12 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: df = df.with_columns(~plx.col(*self._root_names).is_null()) if self._function_name == "col->shift": - kwargs = {"periods": self._kwargs.get("n", 1)} + kwargs = {"periods": self._kwargs["n"]} elif self._function_name == "col->rank": _method = self._kwargs.get("method", "average") kwargs = { "method": "first" if _method == "ordinal" else _method, - "ascending": not self._kwargs.get("descending", False), + "ascending": not self._kwargs["descending"], "na_option": "keep", "pct": False, } diff --git a/narwhals/_pandas_like/group_by.py b/narwhals/_pandas_like/group_by.py index 0f1000606..a1eca5b5d 100644 --- a/narwhals/_pandas_like/group_by.py +++ b/narwhals/_pandas_like/group_by.py @@ -209,14 +209,13 @@ def agg_pandas( # noqa: PLR0915 is_n_unique = function_name == "nunique" is_std = function_name == "std" is_var = function_name == "var" - ddof = expr._kwargs.get("ddof", 1) for root_name, output_name in zip(expr._root_names, expr._output_names): if is_n_unique: nunique_aggs[output_name] = root_name - elif is_std and ddof != 1: + elif is_std and (ddof := expr._kwargs["ddof"]) != 1: std_aggs[ddof][0].append(root_name) std_aggs[ddof][1].append(output_name) - elif is_var and ddof != 1: + elif is_var and (ddof := expr._kwargs["ddof"]) != 1: var_aggs[ddof][0].append(root_name) var_aggs[ddof][1].append(output_name) else: diff --git a/narwhals/_spark_like/group_by.py b/narwhals/_spark_like/group_by.py index 7f3dc077d..0100500ff 100644 --- a/narwhals/_spark_like/group_by.py +++ b/narwhals/_spark_like/group_by.py @@ -85,7 +85,7 @@ def get_spark_function(function_name: str, **kwargs: Any) -> Column: return partial( _std if function_name == "std" else _var, - ddof=kwargs.get("ddof", 1), + ddof=kwargs["ddof"], np_version=parse_version(np.__version__), ) from pyspark.sql import functions as F # noqa: N812 From 3672e86f0a2356869637848ccd13c41852ad1c28 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Tue, 7 Jan 2025 15:39:04 +0000 Subject: [PATCH 2/4] chore: validate predicates in `nw.when` one level higher (#1756) * chore: validate predicates in `nw.when` one level higher * sort out fail --- narwhals/_arrow/namespace.py | 7 +------ narwhals/_dask/namespace.py | 7 +------ narwhals/_pandas_like/namespace.py | 7 +------ narwhals/expr.py | 3 +++ tests/expr_and_series/when_test.py | 6 +----- 5 files changed, 7 insertions(+), 23 deletions(-) diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index 99f043ebd..b02ad32ee 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -359,12 +359,7 @@ def when( *predicates: IntoArrowExpr, ) -> ArrowWhen: plx = self.__class__(backend_version=self._backend_version, version=self._version) - if predicates: - condition = plx.all_horizontal(*predicates) - else: - msg = "at least one predicate needs to be provided" - raise TypeError(msg) - + condition = plx.all_horizontal(*predicates) return ArrowWhen(condition, self._backend_version, version=self._version) def concat_str( diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py index d9a1a8ac6..9a16d7f13 100644 --- a/narwhals/_dask/namespace.py +++ b/narwhals/_dask/namespace.py @@ -310,12 +310,7 @@ def when( *predicates: IntoDaskExpr, ) -> DaskWhen: plx = self.__class__(backend_version=self._backend_version, version=self._version) - if predicates: - condition = plx.all_horizontal(*predicates) - else: - msg = "at least one predicate needs to be provided" - raise TypeError(msg) - + condition = plx.all_horizontal(*predicates) return DaskWhen( condition, self._backend_version, returns_scalar=False, version=self._version ) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 7885d7de0..212c9c938 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -371,12 +371,7 @@ def when( plx = self.__class__( self._implementation, self._backend_version, version=self._version ) - if predicates: - condition = plx.all_horizontal(*predicates) - else: - msg = "at least one predicate needs to be provided" - raise TypeError(msg) - + condition = plx.all_horizontal(*predicates) return PandasWhen( condition, self._implementation, self._backend_version, version=self._version ) diff --git a/narwhals/expr.py b/narwhals/expr.py index 809f76e77..653300da8 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -7643,6 +7643,9 @@ def max_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: class When: def __init__(self, *predicates: IntoExpr | Iterable[IntoExpr]) -> None: self._predicates = flatten([predicates]) + if not self._predicates: + msg = "At least one predicate needs to be provided to `narwhals.when`." + raise TypeError(msg) def _extract_predicates(self, plx: Any) -> Any: return [extract_compliant(plx, v) for v in self._predicates] diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py index b59dda488..739b00e2d 100644 --- a/tests/expr_and_series/when_test.py +++ b/tests/expr_and_series/when_test.py @@ -54,11 +54,7 @@ def test_multiple_conditions( assert_equal_data(result, expected) -def test_no_arg_when_fail( - constructor: Constructor, request: pytest.FixtureRequest -) -> None: - if "duckdb" in str(constructor): - request.applymarker(pytest.mark.xfail) +def test_no_arg_when_fail(constructor: Constructor) -> None: df = nw.from_native(constructor(data)) with pytest.raises((TypeError, ValueError)): df.select(nw.when().then(value=3).alias("a_when")) From 1bdf4dc88a73c393ae6a92a5d1c62fed4086801f Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Tue, 7 Jan 2025 17:34:21 +0000 Subject: [PATCH 3/4] chore: Remove some unnecessary trailing commas (#1757) --- .github/workflows/downstream_tests.yml | 2 +- narwhals/_arrow/dataframe.py | 50 +++-------- narwhals/_arrow/expr.py | 56 +++--------- narwhals/_dask/expr.py | 118 +++++-------------------- narwhals/_duckdb/expr.py | 45 +++------- 5 files changed, 56 insertions(+), 215 deletions(-) diff --git a/.github/workflows/downstream_tests.yml b/.github/workflows/downstream_tests.yml index 548251ddc..5ad95b6d9 100644 --- a/.github/workflows/downstream_tests.yml +++ b/.github/workflows/downstream_tests.yml @@ -220,7 +220,7 @@ jobs: run: | cd tea-tasting pdm remove narwhals - pdm add ./.. + pdm add ./..[dev] - name: show-deps run: | cd tea-tasting diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index f4ad2912e..e6bb6fa65 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -101,23 +101,14 @@ def row(self: Self, index: int) -> tuple[Any, ...]: return tuple(col[index] for col in self._native_frame) @overload - def rows( - self: Self, - *, - named: Literal[True], - ) -> list[dict[str, Any]]: ... + def rows(self: Self, *, named: Literal[True]) -> list[dict[str, Any]]: ... @overload - def rows( - self: Self, - *, - named: Literal[False], - ) -> list[tuple[Any, ...]]: ... + def rows(self: Self, *, named: Literal[False]) -> list[tuple[Any, ...]]: ... + @overload def rows( - self: Self, - *, - named: bool, + self: Self, *, named: bool ) -> list[tuple[Any, ...]] | list[dict[str, Any]]: ... def rows(self: Self, *, named: bool) -> list[tuple[Any, ...]] | list[dict[str, Any]]: @@ -126,10 +117,7 @@ def rows(self: Self, *, named: bool) -> list[tuple[Any, ...]] | list[dict[str, A return self._native_frame.to_pylist() # type: ignore[no-any-return] def iter_rows( - self: Self, - *, - named: bool, - buffer_size: int, + self: Self, *, named: bool, buffer_size: int ) -> Iterator[tuple[Any, ...]] | Iterator[dict[str, Any]]: df = self._native_frame num_rows = df.num_rows @@ -263,9 +251,7 @@ def __getitem__( ) start = item.start or 0 stop = item.stop if item.stop is not None else len(self._native_frame) - return self._from_native_frame( - self._native_frame.slice(start, stop - start), - ) + return self._from_native_frame(self._native_frame.slice(start, stop - start)) elif isinstance(item, Sequence) or (is_numpy_array(item) and item.ndim == 1): if ( @@ -301,11 +287,7 @@ def estimated_size(self: Self, unit: SizeUnit) -> int | float: def columns(self: Self) -> list[str]: return self._native_frame.schema.names # type: ignore[no-any-return] - def select( - self: Self, - *exprs: IntoArrowExpr, - **named_exprs: IntoArrowExpr, - ) -> Self: + def select(self: Self, *exprs: IntoArrowExpr, **named_exprs: IntoArrowExpr) -> Self: import pyarrow as pa new_series = evaluate_into_exprs(self, *exprs, **named_exprs) @@ -313,16 +295,11 @@ def select( # return empty dataframe, like Polars does return self._from_native_frame(self._native_frame.__class__.from_arrays([])) names = [s.name for s in new_series] - df = pa.Table.from_arrays( - broadcast_series(new_series), - names=names, - ) + df = pa.Table.from_arrays(broadcast_series(new_series), names=names) return self._from_native_frame(df) def with_columns( - self: Self, - *exprs: IntoArrowExpr, - **named_exprs: IntoArrowExpr, + self: Self, *exprs: IntoArrowExpr, **named_exprs: IntoArrowExpr ) -> Self: native_frame = self._native_frame new_columns = evaluate_into_exprs(self, *exprs, **named_exprs) @@ -334,9 +311,7 @@ def with_columns( col_name = col_value.name column = validate_dataframe_comparand( - length=length, - other=col_value, - backend_version=self._backend_version, + length=length, other=col_value, backend_version=self._backend_version ) native_frame = ( @@ -611,12 +586,9 @@ def is_duplicated(self: Self) -> ArrowSeries: columns = self.columns index_token = generate_temporary_column_name(n_bytes=8, columns=columns) col_token = generate_temporary_column_name( - n_bytes=8, - columns=[*columns, index_token], + n_bytes=8, columns=[*columns, index_token] ) - df = self.with_row_index(index_token)._native_frame - row_count = ( df.append_column(col_token, pa.repeat(pa.scalar(1), len(self))) .group_by(columns) diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py index 5ae6ce6b0..1c0d0734e 100644 --- a/narwhals/_arrow/expr.py +++ b/narwhals/_arrow/expr.py @@ -87,8 +87,7 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: except KeyError as e: missing_columns = [x for x in column_names if x not in df.columns] raise ColumnNotFoundError.from_missing_and_available_column_names( - missing_columns=missing_columns, - available_columns=df.columns, + missing_columns=missing_columns, available_columns=df.columns ) from e return cls( @@ -564,9 +563,7 @@ def __init__(self: Self, expr: ArrowExpr) -> None: def get_categories(self: Self) -> ArrowExpr: return reuse_series_namespace_implementation( - self._compliant_expr, - "cat", - "get_categories", + self._compliant_expr, "cat", "get_categories" ) @@ -676,12 +673,7 @@ def len_chars(self: Self) -> ArrowExpr: ) def replace( - self: Self, - pattern: str, - value: str, - *, - literal: bool, - n: int, + self: Self, pattern: str, value: str, *, literal: bool, n: int ) -> ArrowExpr: return reuse_series_namespace_implementation( self._compliant_expr, @@ -693,13 +685,7 @@ def replace( n=n, ) - def replace_all( - self: Self, - pattern: str, - value: str, - *, - literal: bool, - ) -> ArrowExpr: + def replace_all(self: Self, pattern: str, value: str, *, literal: bool) -> ArrowExpr: return reuse_series_namespace_implementation( self._compliant_expr, "str", @@ -711,26 +697,17 @@ def replace_all( def strip_chars(self: Self, characters: str | None) -> ArrowExpr: return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "strip_chars", - characters=characters, + self._compliant_expr, "str", "strip_chars", characters=characters ) def starts_with(self: Self, prefix: str) -> ArrowExpr: return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "starts_with", - prefix=prefix, + self._compliant_expr, "str", "starts_with", prefix=prefix ) def ends_with(self: Self, suffix: str) -> ArrowExpr: return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "ends_with", - suffix=suffix, + self._compliant_expr, "str", "ends_with", suffix=suffix ) def contains(self, pattern: str, *, literal: bool) -> ArrowExpr: @@ -745,24 +722,17 @@ def slice(self: Self, offset: int, length: int | None) -> ArrowExpr: def to_datetime(self: Self, format: str | None) -> ArrowExpr: # noqa: A002 return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "to_datetime", - format=format, + self._compliant_expr, "str", "to_datetime", format=format ) def to_uppercase(self: Self) -> ArrowExpr: return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "to_uppercase", + self._compliant_expr, "str", "to_uppercase" ) def to_lowercase(self: Self) -> ArrowExpr: return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "to_lowercase", + self._compliant_expr, "str", "to_lowercase" ) @@ -931,8 +901,4 @@ def __init__(self: Self, expr: ArrowExpr) -> None: self._expr = expr def len(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation( - self._expr, - "list", - "len", - ) + return reuse_series_namespace_implementation(self._expr, "list", "len") diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index c76593404..cb20fa616 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -307,11 +307,7 @@ def __invert__(self: Self) -> Self: ) def mean(self) -> Self: - return self._from_call( - lambda _input: _input.mean(), - "mean", - returns_scalar=True, - ) + return self._from_call(lambda _input: _input.mean(), "mean", returns_scalar=True) def median(self) -> Self: from narwhals.exceptions import InvalidOperationError @@ -326,18 +322,10 @@ def func(s: dask_expr.Series) -> dask_expr.Series: return self._from_call(func, "median", returns_scalar=True) def min(self) -> Self: - return self._from_call( - lambda _input: _input.min(), - "min", - returns_scalar=True, - ) + return self._from_call(lambda _input: _input.min(), "min", returns_scalar=True) def max(self) -> Self: - return self._from_call( - lambda _input: _input.max(), - "max", - returns_scalar=True, - ) + return self._from_call(lambda _input: _input.max(), "max", returns_scalar=True) def std(self, ddof: int) -> Self: return self._from_call( @@ -356,11 +344,7 @@ def var(self, ddof: int) -> Self: ) def skew(self: Self) -> Self: - return self._from_call( - lambda _input: _input.skew(), - "skew", - returns_scalar=True, - ) + return self._from_call(lambda _input: _input.skew(), "skew", returns_scalar=True) def shift(self, n: int) -> Self: return self._from_call( @@ -435,9 +419,7 @@ def is_between( closed = "neither" return self._from_call( lambda _input, lower_bound, upper_bound, closed: _input.between( - lower_bound, - upper_bound, - closed, + lower_bound, upper_bound, closed ), "is_between", lower_bound=lower_bound, @@ -447,17 +429,11 @@ def is_between( ) def sum(self) -> Self: - return self._from_call( - lambda _input: _input.sum(), - "sum", - returns_scalar=True, - ) + return self._from_call(lambda _input: _input.sum(), "sum", returns_scalar=True) def count(self) -> Self: return self._from_call( - lambda _input: _input.count(), - "count", - returns_scalar=True, + lambda _input: _input.count(), "count", returns_scalar=True ) def round(self, decimals: int) -> Self: @@ -510,9 +486,7 @@ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> NoRetur def abs(self) -> Self: return self._from_call( - lambda _input: _input.abs(), - "abs", - returns_scalar=self._returns_scalar, + lambda _input: _input.abs(), "abs", returns_scalar=self._returns_scalar ) def all(self) -> Self: @@ -579,23 +553,17 @@ def clip( def diff(self: Self) -> Self: return self._from_call( - lambda _input: _input.diff(), - "diff", - returns_scalar=self._returns_scalar, + lambda _input: _input.diff(), "diff", returns_scalar=self._returns_scalar ) def n_unique(self: Self) -> Self: return self._from_call( - lambda _input: _input.nunique(dropna=False), - "n_unique", - returns_scalar=True, + lambda _input: _input.nunique(dropna=False), "n_unique", returns_scalar=True ) def is_null(self: Self) -> Self: return self._from_call( - lambda _input: _input.isna(), - "is_null", - returns_scalar=self._returns_scalar, + lambda _input: _input.isna(), "is_null", returns_scalar=self._returns_scalar ) def is_nan(self: Self) -> Self: @@ -606,18 +574,10 @@ def func(_input: dask_expr.Series) -> dask_expr.Series: msg = f"`.is_nan` only supported for numeric dtypes and not {dtype}, did you mean `.is_null`?" raise InvalidOperationError(msg) - return self._from_call( - func, - "is_null", - returns_scalar=self._returns_scalar, - ) + return self._from_call(func, "is_null", returns_scalar=self._returns_scalar) def len(self: Self) -> Self: - return self._from_call( - lambda _input: _input.size, - "len", - returns_scalar=True, - ) + return self._from_call(lambda _input: _input.size, "len", returns_scalar=True) def quantile( self: Self, @@ -633,10 +593,7 @@ def func(_input: dask_expr.Series, quantile: float) -> dask_expr.Series: return _input.quantile(q=quantile, method="dask") # pragma: no cover return self._from_call( - func, - "quantile", - quantile=quantile, - returns_scalar=True, + func, "quantile", quantile=quantile, returns_scalar=True ) else: msg = "`higher`, `lower`, `midpoint`, `nearest` - interpolation methods are not supported by Dask. Please use `linear` instead." @@ -655,13 +612,10 @@ def func(_input: dask_expr.Series) -> dask_expr.Series: first_distinct_index = _input.groupby(_name).agg({col_token: "min"})[ col_token ] - return _input[col_token].isin(first_distinct_index) return self._from_call( - func, - "is_first_distinct", - returns_scalar=self._returns_scalar, + func, "is_first_distinct", returns_scalar=self._returns_scalar ) def is_last_distinct(self: Self) -> Self: @@ -675,13 +629,10 @@ def func(_input: dask_expr.Series) -> dask_expr.Series: implementation=self._implementation, ) last_distinct_index = _input.groupby(_name).agg({col_token: "max"})[col_token] - return _input[col_token].isin(last_distinct_index) return self._from_call( - func, - "is_last_distinct", - returns_scalar=self._returns_scalar, + func, "is_last_distinct", returns_scalar=self._returns_scalar ) def is_duplicated(self: Self) -> Self: @@ -694,11 +645,7 @@ def func(_input: dask_expr.Series) -> dask_expr.Series: > 1 ) - return self._from_call( - func, - "is_duplicated", - returns_scalar=self._returns_scalar, - ) + return self._from_call(func, "is_duplicated", returns_scalar=self._returns_scalar) def is_unique(self: Self) -> Self: def func(_input: dask_expr.Series) -> dask_expr.Series: @@ -710,11 +657,7 @@ def func(_input: dask_expr.Series) -> dask_expr.Series: == 1 ) - return self._from_call( - func, - "is_unique", - returns_scalar=self._returns_scalar, - ) + return self._from_call(func, "is_unique", returns_scalar=self._returns_scalar) def is_in(self: Self, other: Any) -> Self: return self._from_call( @@ -788,19 +731,13 @@ def dt(self: Self) -> DaskExprDateTimeNamespace: def name(self: Self) -> DaskExprNameNamespace: return DaskExprNameNamespace(self) - def cast( - self: Self, - dtype: DType | type[DType], - ) -> Self: + def cast(self: Self, dtype: DType | type[DType]) -> Self: def func(_input: Any, dtype: DType | type[DType]) -> Any: dtype = narwhals_to_native_dtype(dtype, self._version) return _input.astype(dtype) return self._from_call( - func, - "cast", - dtype=dtype, - returns_scalar=self._returns_scalar, + func, "cast", dtype=dtype, returns_scalar=self._returns_scalar ) def is_finite(self: Self) -> Self: @@ -825,12 +762,7 @@ def len_chars(self) -> DaskExpr: ) def replace( - self, - pattern: str, - value: str, - *, - literal: bool = False, - n: int = 1, + self, pattern: str, value: str, *, literal: bool = False, n: int = 1 ) -> DaskExpr: return self._compliant_expr._from_call( lambda _input, pattern, value, literal, n: _input.str.replace( @@ -844,13 +776,7 @@ def replace( returns_scalar=self._compliant_expr._returns_scalar, ) - def replace_all( - self, - pattern: str, - value: str, - *, - literal: bool = False, - ) -> DaskExpr: + def replace_all(self, pattern: str, value: str, *, literal: bool = False) -> DaskExpr: return self._compliant_expr._from_call( lambda _input, pattern, value, literal: _input.str.replace( pattern, value, n=-1, regex=not literal diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py index 3956e919d..0f33ff846 100644 --- a/narwhals/_duckdb/expr.py +++ b/narwhals/_duckdb/expr.py @@ -365,9 +365,7 @@ def func( _input: duckdb.Expression, lower_bound: Any, upper_bound: Any ) -> duckdb.Expression: return FunctionExpression( - "greatest", - FunctionExpression("least", _input, upper_bound), - lower_bound, + "greatest", FunctionExpression("least", _input, upper_bound), lower_bound ) return self._from_call( @@ -407,9 +405,7 @@ def sum(self) -> Self: from duckdb import FunctionExpression return self._from_call( - lambda _input: FunctionExpression("sum", _input), - "sum", - returns_scalar=True, + lambda _input: FunctionExpression("sum", _input), "sum", returns_scalar=True ) def count(self) -> Self: @@ -425,9 +421,7 @@ def len(self) -> Self: from duckdb import FunctionExpression return self._from_call( - lambda _input: FunctionExpression("count"), - "len", - returns_scalar=True, + lambda _input: FunctionExpression("count"), "len", returns_scalar=True ) def std(self, ddof: int) -> Self: @@ -441,9 +435,7 @@ def std(self, ddof: int) -> Self: msg = f"std with ddof {ddof} is not supported in DuckDB" raise NotImplementedError(msg) return self._from_call( - lambda _input: FunctionExpression(func, _input), - "std", - returns_scalar=True, + lambda _input: FunctionExpression(func, _input), "std", returns_scalar=True ) def var(self, ddof: int) -> Self: @@ -457,34 +449,26 @@ def var(self, ddof: int) -> Self: msg = f"var with ddof {ddof} is not supported in DuckDB" raise NotImplementedError(msg) return self._from_call( - lambda _input: FunctionExpression(func, _input), - "var", - returns_scalar=True, + lambda _input: FunctionExpression(func, _input), "var", returns_scalar=True ) def max(self) -> Self: from duckdb import FunctionExpression return self._from_call( - lambda _input: FunctionExpression("max", _input), - "max", - returns_scalar=True, + lambda _input: FunctionExpression("max", _input), "max", returns_scalar=True ) def min(self) -> Self: from duckdb import FunctionExpression return self._from_call( - lambda _input: FunctionExpression("min", _input), - "min", - returns_scalar=True, + lambda _input: FunctionExpression("min", _input), "min", returns_scalar=True ) def is_null(self) -> Self: return self._from_call( - lambda _input: _input.isnull(), - "is_null", - returns_scalar=self._returns_scalar, + lambda _input: _input.isnull(), "is_null", returns_scalar=self._returns_scalar ) def is_in(self, other: Sequence[Any]) -> Self: @@ -590,9 +574,7 @@ def func(_input: duckdb.Expression) -> duckdb.Expression: ) return self._compliant_expr._from_call( - func, - "contains", - returns_scalar=self._compliant_expr._returns_scalar, + func, "contains", returns_scalar=self._compliant_expr._returns_scalar ) def slice(self, offset: int, length: int) -> DuckDBExpr: @@ -612,9 +594,7 @@ def func(_input: duckdb.Expression) -> duckdb.Expression: ) return self._compliant_expr._from_call( - func, - "slice", - returns_scalar=self._compliant_expr._returns_scalar, + func, "slice", returns_scalar=self._compliant_expr._returns_scalar ) def to_lowercase(self) -> DuckDBExpr: @@ -664,10 +644,7 @@ def replace_all( raise NotImplementedError(msg) return self._compliant_expr._from_call( lambda _input: FunctionExpression( - "replace", - _input, - ConstantExpression(pattern), - ConstantExpression(value), + "replace", _input, ConstantExpression(pattern), ConstantExpression(value) ), "replace_all", returns_scalar=self._compliant_expr._returns_scalar, From 373320ef28d32c99d4b1c39db956c7ba2f732775 Mon Sep 17 00:00:00 2001 From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com> Date: Wed, 8 Jan 2025 09:16:18 +0100 Subject: [PATCH 4/4] fix: update Spark min version in `utils.py` (#1760) update min version in utils --- narwhals/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/utils.py b/narwhals/utils.py index 591cd53ae..c03642c90 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -295,7 +295,7 @@ def is_ibis(self) -> bool: Implementation.MODIN: (0, 25, 3), Implementation.CUDF: (24, 10), Implementation.PYARROW: (11,), - Implementation.PYSPARK: (3, 3), + Implementation.PYSPARK: (3, 5), Implementation.POLARS: (0, 20, 3), Implementation.DASK: (2024, 8), Implementation.DUCKDB: (1,),