Skip to content

Commit

Permalink
Partition accepts ChunkSize; clean up unused variable; type annotations
Browse files Browse the repository at this point in the history
  • Loading branch information
peytondmurray committed Apr 14, 2024
1 parent 0c416c1 commit c014024
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 17 deletions.
29 changes: 13 additions & 16 deletions versioned_hdf5/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -818,21 +818,15 @@ def apply(
Mapping between {slices in virtual dataset: slices in raw dataset}
which were written by this function; and shape of the current dataset
"""
chunk_size = self.chunks[0]
new_shape_index = Tuple(*[Slice(0, i) for i in self.shape])
raw_data: Dataset = f["_version_data"][name]["raw_data"] # type: ignore

# Keep a copy of the old slices; it will be needed later to compute
# the chunks modified by the reshape operation
current_slices = slices.copy()

# # Clear out the existing slices dict as a side effect; every slice
# # gets touched by a ResizeOperation, and some of them will be invalidated (if they fall
# # on the edge of the dataset)
# slices.clear()

new_slices = {}
for vchunk in partition(new_shape_index, chunk_size):
for vchunk in partition(new_shape_index, self.chunks):
# If the new virtual chunk is in the old set of slices, just use the same
# raw data the virtual chunk is already mapped to. Pop it out of the slices
# dict so that we don't need to iterate over it when computing parts of the
Expand Down Expand Up @@ -1079,9 +1073,11 @@ def write_dataset_operations(

def write_operations(
f: File, version_name: str, name: str, operations: List[WriteOperation]
) -> tuple[Dict[Tuple, Tuple], tuple[int]]:
) -> tuple[Dict[Tuple, Tuple], tuple[int, ...]]:
"""Carry out a sequence of write operations on the file.
If no operations are pending, just return the previous version slices and shape.
Parameters
----------
f : File
Expand All @@ -1095,22 +1091,20 @@ def write_operations(
Returns
-------
tuple[Dict[Tuple, Tuple], tuple[int]]
tuple[Dict[Tuple, Tuple], tuple[int, ...]]
(Slices map, shape of virtual dataset post-write)
The slices map is a mapping from {virtual dataset slice: raw dataset slice}.
The virtual dataset is created elsewhere using the slices return here.
"""
if not operations:
return {}, () # type: ignore

if name not in f["_version_data"]:
raise NotImplementedError(
"Use write_dataset() if the dataset does not yet exist"
)

slices = get_previous_version_slices(f, version_name, name)
shape = get_previous_version_shape(f, version_name, name)

for operation in operations:
slices, shape = operation.apply(f, name, version_name, slices, shape)

Expand Down Expand Up @@ -1533,18 +1527,19 @@ def split_across_unused(

def partition(
obj: Union[np.ndarray, Tuple],
chunks: Union[int, tuple[int, ...]],
chunks: Union[int, tuple[int, ...], ChunkSize],
) -> Iterator[Tuple]:
"""Break an array or a Tuple of slices into chunks of the given chunk size.
Parameters
----------
obj : Union[np.ndarray, Tuple]
Array or Tuple index to partition
chunks : Union[int, tuple[int, ...]]
chunks : Union[int, tuple[int, ...], ChunkSize]
If this is an int, this is the size of each partitioned chunk.
Multidimensional chunks should supply a tuple giving the chunk
size in each dimension.
If it is a tuple of ints or a ChunkSize, consider the indices of the
object the shape of the chunks. Multidimensional chunks should supply
a tuple giving the chunk size in each dimension.
Returns
-------
Expand All @@ -1560,6 +1555,8 @@ def partition(

if isinstance(chunks, (int, np.integer)):
chunks = (chunks,)
elif isinstance(chunks, ChunkSize):
chunks = tuple(chunks)

yield from ChunkSize(chunks).as_subchunks(index, shape)

Expand Down
1 change: 0 additions & 1 deletion versioned_hdf5/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@ def commit_version(

shape = None
if isinstance(data, InMemoryDataset):
shape = data.shape
if not data._operations:
# The virtual dataset was not changed from the previous
# version. Just copy it to the new version directly.
Expand Down

0 comments on commit c014024

Please sign in to comment.