From c0140241abfaac2bc05afaf28f927bc2e60ed592 Mon Sep 17 00:00:00 2001
From: pdmurray <peynmurray@gmail.com>
Date: Sun, 14 Apr 2024 10:52:38 -0700
Subject: [PATCH] Partition accepts ChunkSize; clean up unused variable; type
 annotations

---
 versioned_hdf5/backend.py  | 29 +++++++++++++----------------
 versioned_hdf5/versions.py |  1 -
 2 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/versioned_hdf5/backend.py b/versioned_hdf5/backend.py
index 3ba19bd6..53e70de3 100644
--- a/versioned_hdf5/backend.py
+++ b/versioned_hdf5/backend.py
@@ -818,7 +818,6 @@ def apply(
             Mapping between {slices in virtual dataset: slices in raw dataset}
             which were written by this function; and shape of the current dataset
         """
-        chunk_size = self.chunks[0]
         new_shape_index = Tuple(*[Slice(0, i) for i in self.shape])
         raw_data: Dataset = f["_version_data"][name]["raw_data"]  # type: ignore
 
@@ -826,13 +825,8 @@ def apply(
         # the chunks modified by the reshape operation
         current_slices = slices.copy()
 
-        # # Clear out the existing slices dict as a side effect; every slice
-        # # gets touched by a ResizeOperation, and some of them will be invalidated (if they fall
-        # # on the edge of the dataset)
-        # slices.clear()
-
         new_slices = {}
-        for vchunk in partition(new_shape_index, chunk_size):
+        for vchunk in partition(new_shape_index, self.chunks):
             # If the new virtual chunk is in the old set of slices, just use the same
             # raw data the virtual chunk is already mapped to. Pop it out of the slices
             # dict so that we don't need to iterate over it when computing parts of the
@@ -1079,9 +1073,11 @@ def write_dataset_operations(
 
 def write_operations(
     f: File, version_name: str, name: str, operations: List[WriteOperation]
-) -> tuple[Dict[Tuple, Tuple], tuple[int]]:
+) -> tuple[Dict[Tuple, Tuple], tuple[int, ...]]:
     """Carry out a sequence of write operations on the file.
 
+    If no operations are pending, just return the previous version slices and shape.
+
     Parameters
     ----------
     f : File
@@ -1095,15 +1091,12 @@ def write_operations(
 
     Returns
     -------
-    tuple[Dict[Tuple, Tuple], tuple[int]]
+    tuple[Dict[Tuple, Tuple], tuple[int, ...]]
         (Slices map, shape of virtual dataset post-write)
 
         The slices map is a mapping from {virtual dataset slice: raw dataset slice}.
         The virtual dataset is created elsewhere using the slices return here.
     """
-    if not operations:
-        return {}, ()  # type: ignore
-
     if name not in f["_version_data"]:
         raise NotImplementedError(
             "Use write_dataset() if the dataset does not yet exist"
@@ -1111,6 +1104,7 @@ def write_operations(
 
     slices = get_previous_version_slices(f, version_name, name)
     shape = get_previous_version_shape(f, version_name, name)
+
     for operation in operations:
         slices, shape = operation.apply(f, name, version_name, slices, shape)
 
@@ -1533,7 +1527,7 @@ def split_across_unused(
 
 def partition(
     obj: Union[np.ndarray, Tuple],
-    chunks: Union[int, tuple[int, ...]],
+    chunks: Union[int, tuple[int, ...], ChunkSize],
 ) -> Iterator[Tuple]:
     """Break an array or a Tuple of slices into chunks of the given chunk size.
 
@@ -1541,10 +1535,11 @@ def partition(
     ----------
     obj : Union[np.ndarray, Tuple]
         Array or Tuple index to partition
-    chunks : Union[int, tuple[int, ...]]
+    chunks : Union[int, tuple[int, ...], ChunkSize]
         If this is an int, this is the size of each partitioned chunk.
-        Multidimensional chunks should supply a tuple giving the chunk
-        size in each dimension.
+        If it is a tuple of ints or a ChunkSize, consider the indices of the
+        object the shape of the chunks. Multidimensional chunks should supply
+        a tuple giving the chunk size in each dimension.
 
     Returns
     -------
@@ -1560,6 +1555,8 @@ def partition(
 
     if isinstance(chunks, (int, np.integer)):
         chunks = (chunks,)
+    elif isinstance(chunks, ChunkSize):
+        chunks = tuple(chunks)
 
     yield from ChunkSize(chunks).as_subchunks(index, shape)
 
diff --git a/versioned_hdf5/versions.py b/versioned_hdf5/versions.py
index 7005f2d9..202832f6 100644
--- a/versioned_hdf5/versions.py
+++ b/versioned_hdf5/versions.py
@@ -135,7 +135,6 @@ def commit_version(
 
         shape = None
         if isinstance(data, InMemoryDataset):
-            shape = data.shape
             if not data._operations:
                 # The virtual dataset was not changed from the previous
                 # version. Just copy it to the new version directly.