Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into is-order-dependent-…
Browse files Browse the repository at this point in the history
…attribute
  • Loading branch information
MarcoGorelli committed Jan 13, 2025
2 parents dbfe412 + ffce1b6 commit c32884f
Show file tree
Hide file tree
Showing 23 changed files with 365 additions and 2,245 deletions.
1 change: 1 addition & 0 deletions docs/api-reference/dataframe.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
- to_native
- to_numpy
- to_pandas
- to_polars
- unique
- unpivot
- with_columns
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/series.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
- to_list
- to_numpy
- to_pandas
- to_polars
- to_native
- unique
- value_counts
Expand Down
2 changes: 1 addition & 1 deletion docs/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ To verify the installation, start the Python REPL and execute:
```python
>>> import narwhals
>>> narwhals.__version__
'1.21.1'
'1.22.0'
```

If you see the version number, then the installation was successful!
Expand Down
2 changes: 1 addition & 1 deletion narwhals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
from narwhals.utils import maybe_reset_index
from narwhals.utils import maybe_set_index

__version__ = "1.21.1"
__version__ = "1.22.0"

__all__ = [
"Array",
Expand Down
6 changes: 6 additions & 0 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

import numpy as np
import pandas as pd
import polars as pl
import pyarrow as pa
from typing_extensions import Self

Expand Down Expand Up @@ -427,6 +428,11 @@ def sort(
def to_pandas(self: Self) -> pd.DataFrame:
return self._native_frame.to_pandas()

def to_polars(self: Self) -> pl.DataFrame:
import polars as pl # ignore-banned-import

return pl.from_arrow(self._native_frame) # type: ignore[return-value]

def to_numpy(self: Self) -> np.ndarray:
import numpy as np # ignore-banned-import

Expand Down
6 changes: 6 additions & 0 deletions narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

import numpy as np
import pandas as pd
import polars as pl
import pyarrow as pa
from typing_extensions import Self

Expand Down Expand Up @@ -733,6 +734,11 @@ def to_pandas(self: Self) -> pd.Series:

return pd.Series(self._native_series, name=self.name)

def to_polars(self: Self) -> pl.Series:
import polars as pl # ignore-banned-import

return pl.from_arrow(self._native_series) # type: ignore[return-value]

def is_duplicated(self: Self) -> ArrowSeries:
return self.to_frame().is_duplicated().alias(self.name)

Expand Down
22 changes: 19 additions & 3 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

import numpy as np
import pandas as pd
import polars as pl
from typing_extensions import Self

from narwhals._pandas_like.group_by import PandasLikeGroupBy
Expand Down Expand Up @@ -763,12 +764,27 @@ def to_numpy(self, dtype: Any = None, copy: bool | None = None) -> Any:
)
return df.to_numpy(copy=copy)

def to_pandas(self) -> Any:
def to_pandas(self: Self) -> pd.DataFrame:
if self._implementation is Implementation.PANDAS:
return self._native_frame
if self._implementation is Implementation.MODIN:
elif self._implementation is Implementation.CUDF: # pragma: no cover
return self._native_frame.to_pandas()
elif self._implementation is Implementation.MODIN:
return self._native_frame._to_pandas()
return self._native_frame.to_pandas() # pragma: no cover
msg = f"Unknown implementation: {self._implementation}" # pragma: no cover
raise AssertionError(msg)

def to_polars(self: Self) -> pl.DataFrame:
import polars as pl # ignore-banned-import

if self._implementation is Implementation.PANDAS:
return pl.from_pandas(self._native_frame)
elif self._implementation is Implementation.CUDF: # pragma: no cover
return pl.from_pandas(self._native_frame.to_pandas())
elif self._implementation is Implementation.MODIN:
return pl.from_pandas(self._native_frame._to_pandas())
msg = f"Unknown implementation: {self._implementation}" # pragma: no cover
raise AssertionError(msg)

def write_parquet(self, file: Any) -> Any:
self._native_frame.to_parquet(file)
Expand Down
22 changes: 18 additions & 4 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
if TYPE_CHECKING:
from types import ModuleType

import pandas as pd
import polars as pl
from typing_extensions import Self

from narwhals._pandas_like.dataframe import PandasLikeDataFrame
Expand Down Expand Up @@ -303,13 +305,13 @@ def arg_true(self) -> PandasLikeSeries:
def arg_min(self) -> int:
ser = self._native_series
if self._implementation is Implementation.PANDAS and self._backend_version < (1,):
return ser.values.argmin() # type: ignore[no-any-return]
return ser.to_numpy().argmin() # type: ignore[no-any-return]
return ser.argmin() # type: ignore[no-any-return]

def arg_max(self) -> int:
ser = self._native_series
if self._implementation is Implementation.PANDAS and self._backend_version < (1,):
return ser.values.argmax() # type: ignore[no-any-return]
return ser.to_numpy().argmax() # type: ignore[no-any-return]
return ser.argmax() # type: ignore[no-any-return]

# Binary comparisons
Expand Down Expand Up @@ -837,16 +839,28 @@ def to_numpy(self, dtype: Any = None, copy: bool | None = None) -> Any:
)
return s.to_numpy(dtype=dtype, copy=copy)

def to_pandas(self) -> Any:
def to_pandas(self: Self) -> pd.Series:
if self._implementation is Implementation.PANDAS:
return self._native_series
elif self._implementation is Implementation.CUDF:
elif self._implementation is Implementation.CUDF: # pragma: no cover
return self._native_series.to_pandas()
elif self._implementation is Implementation.MODIN:
return self._native_series._to_pandas()
msg = f"Unknown implementation: {self._implementation}" # pragma: no cover
raise AssertionError(msg)

def to_polars(self: Self) -> pl.DataFrame:
import polars as pl # ignore-banned-import

if self._implementation is Implementation.PANDAS:
return pl.from_pandas(self._native_series)
elif self._implementation is Implementation.CUDF: # pragma: no cover
return pl.from_pandas(self._native_series.to_pandas())
elif self._implementation is Implementation.MODIN:
return pl.from_pandas(self._native_series._to_pandas())
msg = f"Unknown implementation: {self._implementation}" # pragma: no cover
raise AssertionError(msg)

# --- descriptive ---
def is_duplicated(self: Self) -> Self:
res = self._native_series.duplicated(keep=False)
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_polars/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,9 @@ def pivot(
)
return self._from_native_object(result)

def to_polars(self: Self) -> pl.DataFrame:
return self._native_frame


class PolarsLazyFrame:
def __init__(
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_polars/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,9 @@ def __contains__(self: Self, other: Any) -> bool:
msg = f"Unable to compare other of type {type(other)} with series of type {self.dtype}."
raise InvalidOperationError(msg) from exc

def to_polars(self: Self) -> pl.Series:
return self._native_series

@property
def dt(self: Self) -> PolarsSeriesDateTimeNamespace:
return PolarsSeriesDateTimeNamespace(self)
Expand Down
102 changes: 102 additions & 0 deletions narwhals/_spark_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
import operator
from functools import reduce
from typing import TYPE_CHECKING
from typing import Any
from typing import Iterable
from typing import Literal

from narwhals._expression_parsing import combine_root_names
from narwhals._expression_parsing import parse_into_expr
from narwhals._expression_parsing import parse_into_exprs
from narwhals._expression_parsing import reduce_output_names
from narwhals._spark_like.dataframe import SparkLikeLazyFrame
Expand Down Expand Up @@ -334,3 +336,103 @@ def func(df: SparkLikeLazyFrame) -> list[Column]:
"ignore_nulls": ignore_nulls,
},
)

def when(self, *predicates: IntoSparkLikeExpr) -> SparkLikeWhen:
plx = self.__class__(backend_version=self._backend_version, version=self._version)
condition = plx.all_horizontal(*predicates)
return SparkLikeWhen(
condition, self._backend_version, returns_scalar=False, version=self._version
)


class SparkLikeWhen:
def __init__(
self,
condition: SparkLikeExpr,
backend_version: tuple[int, ...],
then_value: Any | None = None,
otherwise_value: Any | None = None,
*,
returns_scalar: bool,
version: Version,
) -> None:
self._backend_version = backend_version
self._condition = condition
self._then_value = then_value
self._otherwise_value = otherwise_value
self._returns_scalar = returns_scalar
self._version = version

def __call__(self, df: SparkLikeLazyFrame) -> list[Column]:
from pyspark.sql import functions as F # noqa: N812

plx = df.__narwhals_namespace__()
condition = parse_into_expr(self._condition, namespace=plx)(df)[0]

try:
value_ = parse_into_expr(self._then_value, namespace=plx)(df)[0]
col_name = get_column_name(df, value_)
except TypeError:
# `self._then_value` is a scalar and can't be converted to an expression
value_ = F.lit(self._then_value)
col_name = "literal"

try:
other_ = parse_into_expr(self._otherwise_value, namespace=plx)(df)[0]
except TypeError:
# `self._otherwise_value` is a scalar and can't be converted to an expression
other_ = F.lit(self._otherwise_value)

return [
F.when(condition=condition, value=value_)
.otherwise(value=other_)
.alias(col_name)
]

def then(self, value: SparkLikeExpr | Any) -> SparkLikeThen:
self._then_value = value

return SparkLikeThen( # type: ignore[abstract]
self,
depth=0,
function_name="whenthen",
root_names=None,
output_names=None,
returns_scalar=self._returns_scalar,
backend_version=self._backend_version,
version=self._version,
kwargs={"value": value},
)


class SparkLikeThen(SparkLikeExpr):
def __init__(
self,
call: SparkLikeWhen,
*,
depth: int,
function_name: str,
root_names: list[str] | None,
output_names: list[str] | None,
returns_scalar: bool,
backend_version: tuple[int, ...],
version: Version,
kwargs: dict[str, Any],
) -> None:
self._backend_version = backend_version
self._version = version
self._call = call
self._depth = depth
self._function_name = function_name
self._root_names = root_names
self._output_names = output_names
self._returns_scalar = returns_scalar
self._kwargs = kwargs

def otherwise(self, value: SparkLikeExpr | Any) -> SparkLikeExpr:
# type ignore because we are setting the `_call` attribute to a
# callable object of type `SparkLikeWhen`, base class has the attribute as
# only a `Callable`
self._call._otherwise_value = value # type: ignore[attr-defined]
self._function_name = "whenotherwise"
return self
67 changes: 67 additions & 0 deletions narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

import numpy as np
import pandas as pd
import polars as pl
import pyarrow as pa
from typing_extensions import Self

Expand Down Expand Up @@ -584,6 +585,72 @@ def to_pandas(self) -> pd.DataFrame:
"""
return self._compliant_frame.to_pandas()

def to_polars(self) -> pl.DataFrame:
"""Convert this DataFrame to a polars DataFrame.
Returns:
A polars DataFrame.
Examples:
Construct pandas, Polars (eager) and PyArrow DataFrames:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoDataFrame
>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
We define a library agnostic function:
>>> def agnostic_to_polars(df_native: IntoDataFrame) -> pl.DataFrame:
... df = nw.from_native(df_native)
... return df.to_polars()
We can then pass any supported library such as pandas, Polars (eager), or
PyArrow to `agnostic_to_polars`:
>>> agnostic_to_polars(df_pd)
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ str │
╞═════╪═════╪═════╡
│ 1 ┆ 6.0 ┆ a │
│ 2 ┆ 7.0 ┆ b │
│ 3 ┆ 8.0 ┆ c │
└─────┴─────┴─────┘
>>> agnostic_to_polars(df_pl)
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ str │
╞═════╪═════╪═════╡
│ 1 ┆ 6.0 ┆ a │
│ 2 ┆ 7.0 ┆ b │
│ 3 ┆ 8.0 ┆ c │
└─────┴─────┴─────┘
>>> agnostic_to_polars(df_pa)
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ str │
╞═════╪═════╪═════╡
│ 1 ┆ 6.0 ┆ a │
│ 2 ┆ 7.0 ┆ b │
│ 3 ┆ 8.0 ┆ c │
└─────┴─────┴─────┘
"""
return self._compliant_frame.to_polars() # type: ignore[no-any-return]

@overload
def write_csv(self, file: None = None) -> str: ...

Expand Down
Loading

0 comments on commit c32884f

Please sign in to comment.