Merge remote-tracking branch 'upstream/main' into map_batches

DeaMariaLeon · Oct 28, 2024 · 33140b7 · 33140b7
2 parents 9e55aaa + 800102f
commit 33140b7
Show file tree

Hide file tree

Showing 13 changed files with 203 additions and 78 deletions.
diff --git a/.github/workflows/extremes.yml b/.github/workflows/extremes.yml
@@ -90,7 +90,7 @@ jobs:
   nightlies:
     strategy:
       matrix:
-        python-version: ["3.12"]
+        python-version: ["3.13"]
         os: [ubuntu-latest]
     if: github.event.pull_request.head.repo.full_name == github.repository
     runs-on: ${{ matrix.os }}

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
@@ -34,7 +34,7 @@ jobs:
   pytest-windows:
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.12"]
         os: [windows-latest]
 
     runs-on: ${{ matrix.os }}
@@ -61,7 +61,7 @@ jobs:
   pytest-coverage:
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.11", "3.13"]
         os: [ubuntu-latest]
 
     runs-on: ${{ matrix.os }}

diff --git a/docs/api-reference/dependencies.md b/docs/api-reference/dependencies.md
@@ -11,15 +11,19 @@
         - get_polars
         - get_pyarrow
         - is_cudf_dataframe
+        - is_cudf_index
         - is_cudf_series
         - is_dask_dataframe
         - is_ibis_table
         - is_into_series
         - is_modin_dataframe
+        - is_modin_index
         - is_modin_series
         - is_numpy_array
         - is_pandas_dataframe
+        - is_pandas_index
         - is_pandas_like_dataframe
+        - is_pandas_like_index
         - is_pandas_like_series
         - is_pandas_series
         - is_polars_dataframe

diff --git a/docs/installation.md b/docs/installation.md
@@ -29,7 +29,7 @@ To verify the installation, start the Python REPL and execute:
 ```python
 >>> import narwhals
 >>> narwhals.__version__
-'1.11.0'
+'1.11.1'
 ```
 If you see the version number, then the installation was successful!
 

diff --git a/narwhals/__init__.py b/narwhals/__init__.py
@@ -67,7 +67,7 @@
 from narwhals.utils import maybe_reset_index
 from narwhals.utils import maybe_set_index
 
-__version__ = "1.11.0"
+__version__ = "1.11.1"
 
 __all__ = [
     "dependencies",

diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py
@@ -54,8 +54,6 @@ def __init__(
         self._backend_version = backend_version
         self._dtypes = dtypes
 
-        self._schema_cache: dict[str, DType] | None = None
-
     def __narwhals_dataframe__(self) -> Self:
         return self
 
@@ -91,7 +89,14 @@ def _validate_columns(self, columns: pd.Index) -> None:
             raise ValueError(msg) from None
 
         if len(columns) != len_unique_columns:
-            msg = f"Expected unique column names, got: {columns}"
+            from collections import Counter
+
+            counter = Counter(columns)
+            msg = ""
+            for key, value in counter.items():
+                if value > 1:
+                    msg += f"\n- '{key}' {value} times"
+            msg = f"Expected unique column names, got:{msg}"
             raise ValueError(msg)
 
     def _from_native_frame(self, df: Any) -> Self:
@@ -305,14 +310,12 @@ def iter_rows(
 
     @property
     def schema(self) -> dict[str, DType]:
-        if self._schema_cache is None:
-            self._schema_cache = {
-                col: native_to_narwhals_dtype(
-                    self._native_frame[col], self._dtypes, self._implementation
-                )
-                for col in self._native_frame.columns
-            }
-        return self._schema_cache
+        return {
+            col: native_to_narwhals_dtype(
+                self._native_frame[col], self._dtypes, self._implementation
+            )
+            for col in self._native_frame.columns
+        }
 
     def collect_schema(self) -> dict[str, DType]:
         return self.schema

diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py
@@ -89,8 +89,6 @@ def __init__(
         self._backend_version = backend_version
         self._dtypes = dtypes
 
-        self._dtype_cache: DType | None = None
-
         # In pandas, copy-on-write becomes the default in version 3.
         # So, before that, we need to explicitly avoid unnecessary
         # copies by using `copy=False` sometimes.
@@ -172,11 +170,9 @@ def shape(self) -> tuple[int]:
 
     @property
     def dtype(self: Self) -> DType:
-        if self._dtype_cache is None:
-            self._dtype_cache = native_to_narwhals_dtype(
-                self._native_series, self._dtypes, self._implementation
-            )
-        return self._dtype_cache
+        return native_to_narwhals_dtype(
+            self._native_series, self._dtypes, self._implementation
+        )
 
     def scatter(self, indices: int | Sequence[int], values: Any) -> Self:
         if isinstance(values, self.__class__):

diff --git a/narwhals/dependencies.py b/narwhals/dependencies.py
@@ -98,6 +98,11 @@ def is_pandas_series(ser: Any) -> TypeGuard[pd.Series[Any]]:
     return (pd := get_pandas()) is not None and isinstance(ser, pd.Series)
 
 
+def is_pandas_index(index: Any) -> TypeGuard[pd.Index]:
+    """Check whether `index` is a pandas Index without importing pandas."""
+    return (pd := get_pandas()) is not None and isinstance(index, pd.Index)
+
+
 def is_modin_dataframe(df: Any) -> TypeGuard[mpd.DataFrame]:
     """Check whether `df` is a modin DataFrame without importing modin."""
     return (mpd := get_modin()) is not None and isinstance(df, mpd.DataFrame)
@@ -108,6 +113,13 @@ def is_modin_series(ser: Any) -> TypeGuard[mpd.Series]:
     return (mpd := get_modin()) is not None and isinstance(ser, mpd.Series)
 
 
+def is_modin_index(index: Any) -> TypeGuard[mpd.Index]:
+    """Check whether `index` is a modin Index without importing modin."""
+    return (mpd := get_modin()) is not None and isinstance(
+        index, mpd.Index
+    )  # pragma: no cover
+
+
 def is_cudf_dataframe(df: Any) -> TypeGuard[cudf.DataFrame]:
     """Check whether `df` is a cudf DataFrame without importing cudf."""
     return (cudf := get_cudf()) is not None and isinstance(df, cudf.DataFrame)
@@ -118,6 +130,13 @@ def is_cudf_series(ser: Any) -> TypeGuard[cudf.Series[Any]]:
     return (cudf := get_cudf()) is not None and isinstance(ser, cudf.Series)
 
 
+def is_cudf_index(index: Any) -> TypeGuard[cudf.Index]:
+    """Check whether `index` is a cudf Index without importing cudf."""
+    return (cudf := get_cudf()) is not None and isinstance(
+        index, cudf.Index
+    )  # pragma: no cover
+
+
 def is_dask_dataframe(df: Any) -> TypeGuard[dd.DataFrame]:
     """Check whether `df` is a Dask DataFrame without importing Dask."""
     return (dd := get_dask_dataframe()) is not None and isinstance(df, dd.DataFrame)
@@ -174,13 +193,24 @@ def is_pandas_like_dataframe(df: Any) -> bool:
     return is_pandas_dataframe(df) or is_modin_dataframe(df) or is_cudf_dataframe(df)
 
 
-def is_pandas_like_series(arr: Any) -> bool:
+def is_pandas_like_series(ser: Any) -> bool:
     """
-    Check whether `arr` is a pandas-like Series without doing any imports
+    Check whether `ser` is a pandas-like Series without doing any imports
 
     By "pandas-like", we mean: pandas, Modin, cuDF.
     """
-    return is_pandas_series(arr) or is_modin_series(arr) or is_cudf_series(arr)
+    return is_pandas_series(ser) or is_modin_series(ser) or is_cudf_series(ser)
+
+
+def is_pandas_like_index(index: Any) -> bool:
+    """
+    Check whether `index` is a pandas-like Index without doing any imports
+
+    By "pandas-like", we mean: pandas, Modin, cuDF.
+    """
+    return (
+        is_pandas_index(index) or is_modin_index(index) or is_cudf_index(index)
+    )  # pragma: no cover
 
 
 def is_into_series(native_series: IntoSeries) -> bool: