Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'upstream/main' into ewm
Browse files Browse the repository at this point in the history
DeaMariaLeon committed Nov 1, 2024

Verified

This commit was signed with the committer’s verified signature.
2 parents 212b78a + 5c3db5b commit 1bf1571
Showing 5 changed files with 133 additions and 11 deletions.
1 change: 1 addition & 0 deletions docs/api-reference/dependencies.md
Original file line number Diff line number Diff line change
@@ -15,6 +15,7 @@
- is_cudf_series
- is_dask_dataframe
- is_ibis_table
- is_into_dataframe
- is_into_series
- is_modin_dataframe
- is_modin_index
15 changes: 12 additions & 3 deletions narwhals/_arrow/utils.py
Original file line number Diff line number Diff line change
@@ -340,7 +340,9 @@ def convert_str_slice_to_int_slice(
# Regex for date, time, separator and timezone components
DATE_RE = r"(?P<date>\d{1,4}[-/.]\d{1,2}[-/.]\d{1,4})"
SEP_RE = r"(?P<sep>\s|T)"
TIME_RE = r"(?P<time>\d{2}:\d{2}:\d{2})" # \s*(?P<period>[AP]M)?)?
TIME_RE = r"(?P<time>\d{2}:\d{2}(?::\d{2})?)" # \s*(?P<period>[AP]M)?)?
HMS_RE = r"^(?P<hms>\d{2}:\d{2}:\d{2})$"
HM_RE = r"^(?P<hm>\d{2}:\d{2})$"
TZ_RE = r"(?P<tz>Z|[+-]\d{2}:?\d{2})" # Matches 'Z', '+02:00', '+0200', '+02', etc.
FULL_RE = rf"{DATE_RE}{SEP_RE}?{TIME_RE}?{TZ_RE}?$"

@@ -354,6 +356,10 @@ def convert_str_slice_to_int_slice(
(DMY_RE, "%d-%m-%Y"),
(MDY_RE, "%m-%d-%Y"),
)
TIME_FORMATS = (
(HMS_RE, "%H:%M:%S"),
(HM_RE, "%H:%M"),
)


def parse_datetime_format(arr: pa.StringArray) -> str:
@@ -418,5 +424,8 @@ def _parse_date_format(arr: pa.Array) -> str:
def _parse_time_format(arr: pa.Array) -> str:
import pyarrow.compute as pc # ignore-banned-import

matches = pc.extract_regex(arr, pattern=TIME_RE)
return "%H:%M:%S" if pc.all(matches.is_valid()).as_py() else ""
for time_rgx, time_fmt in TIME_FORMATS:
matches = pc.extract_regex(arr, pattern=time_rgx)
if pc.all(matches.is_valid()).as_py():
return time_fmt
return ""
39 changes: 39 additions & 0 deletions narwhals/dependencies.py
Original file line number Diff line number Diff line change
@@ -251,6 +251,44 @@ def is_into_series(native_series: IntoSeries) -> bool:
)


def is_into_dataframe(native_dataframe: Any) -> bool:
"""
Check whether `native_dataframe` can be converted to a Narwhals DataFrame.
Arguments:
native_dataframe: The object to check.
Returns:
`True` if `native_dataframe` can be converted to a Narwhals DataFrame, `False` otherwise.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import numpy as np
>>> from narwhals.dependencies import is_into_dataframe
>>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
>>> np_arr = np.array([[1, 4], [2, 5], [3, 6]])
>>> is_into_dataframe(df_pd)
True
>>> is_into_dataframe(df_pl)
True
>>> is_into_dataframe(np_arr)
False
"""
from narwhals.dataframe import DataFrame

return (
isinstance(native_dataframe, DataFrame)
or hasattr(native_dataframe, "__narwhals_dataframe__")
or is_polars_dataframe(native_dataframe)
or is_pyarrow_table(native_dataframe)
or is_pandas_like_dataframe(native_dataframe)
)


__all__ = [
"get_polars",
"get_pandas",
@@ -275,5 +313,6 @@ def is_into_series(native_series: IntoSeries) -> bool:
"is_dask_dataframe",
"is_pandas_like_dataframe",
"is_pandas_like_series",
"is_into_dataframe",
"is_into_series",
]
37 changes: 37 additions & 0 deletions tests/dependencies/is_into_dataframe_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import Any

import numpy as np
import pandas as pd
import polars as pl
import pyarrow as pa

import narwhals as nw
from narwhals.dependencies import is_into_dataframe

if TYPE_CHECKING:
from typing_extensions import Self


class DictDataFrame:
def __init__(self, data: dict[str, list[Any]]) -> None:
self._data = data

def __len__(self) -> int: # pragma: no cover
return len(next(iter(self._data.values())))

def __narwhals_dataframe__(self) -> Self: # pragma: no cover
return self


def test_is_into_dataframe() -> None:
data = {"a": [1, 2, 3], "b": [4, 5, 6]}
assert is_into_dataframe(pa.table(data))
assert is_into_dataframe(pl.DataFrame(data))
assert is_into_dataframe(pd.DataFrame(data))
assert is_into_dataframe(nw.from_native(pd.DataFrame(data)))
assert is_into_dataframe(DictDataFrame(data))
assert not is_into_dataframe(np.array([[1, 4], [2, 5], [3, 6]]))
assert not is_into_dataframe(data)
52 changes: 44 additions & 8 deletions tests/expr_and_series/str/to_datetime_test.py
Original file line number Diff line number Diff line change
@@ -47,11 +47,29 @@ def test_to_datetime_series(constructor_eager: ConstructorEager) -> None:
assert str(result) == expected


def test_to_datetime_infer_fmt(constructor: Constructor) -> None:
@pytest.mark.parametrize(
("data", "expected", "expected_cudf"),
[
(
{"a": ["2020-01-01T12:34:56"]},
"2020-01-01 12:34:56",
"2020-01-01T12:34:56.000000000",
),
(
{"a": ["2020-01-01T12:34"]},
"2020-01-01 12:34:00",
"2020-01-01T12:34:00.000000000",
),
],
)
def test_to_datetime_infer_fmt(
constructor: Constructor,
data: dict[str, list[str]],
expected: str,
expected_cudf: str,
) -> None:
if "cudf" in str(constructor): # pragma: no cover
expected = "2020-01-01T12:34:56.000000000"
else:
expected = "2020-01-01 12:34:56"
expected = expected_cudf

result = (
nw.from_native(constructor(data))
@@ -63,11 +81,29 @@ def test_to_datetime_infer_fmt(constructor: Constructor) -> None:
assert str(result) == expected


def test_to_datetime_series_infer_fmt(constructor_eager: ConstructorEager) -> None:
@pytest.mark.parametrize(
("data", "expected", "expected_cudf"),
[
(
{"a": ["2020-01-01T12:34:56"]},
"2020-01-01 12:34:56",
"2020-01-01T12:34:56.000000000",
),
(
{"a": ["2020-01-01T12:34"]},
"2020-01-01 12:34:00",
"2020-01-01T12:34:00.000000000",
),
],
)
def test_to_datetime_series_infer_fmt(
constructor_eager: ConstructorEager,
data: dict[str, list[str]],
expected: str,
expected_cudf: str,
) -> None:
if "cudf" in str(constructor_eager): # pragma: no cover
expected = "2020-01-01T12:34:56.000000000"
else:
expected = "2020-01-01 12:34:56"
expected = expected_cudf

result = (
nw.from_native(constructor_eager(data), eager_only=True)["a"].str.to_datetime()

0 comments on commit 1bf1571

Please sign in to comment.