Skip to content

Commit

Permalink
Review test matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
crusaderky committed Dec 16, 2024
1 parent 7587b7e commit af0e810
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 58 deletions.
33 changes: 27 additions & 6 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,24 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.9', '3.10', '3.11']
h5py-version: ['dev']
numpy-version: ['latest', '1.24.4']
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
numpy-version: ['latest']
ndindex-version: ['latest']
h5py-version: ['latest']
include:
- python-version: '3.9'
numpy-version: '1.24.4'
ndindex-version: '1.5.1'
h5py-version: '3.3.0'
- python-version: '3.11' # numpy 1.24.4 has wheels up to Python 3.11
numpy-version: '1.24.4'
ndindex-version: 'latest'
h5py-version: 'latest'
- python-version: '3.13'
numpy-version: 'latest'
ndindex-version: 'latest'
h5py-version: 'dev'

fail-fast: false
steps:
- uses: actions/checkout@v4
Expand All @@ -22,9 +37,15 @@ jobs:
- name: Install target numpy version
if: matrix.numpy-version != 'latest'
run: |
pip install numpy~=${{ matrix.numpy-version }}
pip list
run: pip install numpy~=${{ matrix.numpy-version }}

- name: Install target ndindex version
if: matrix.ndindex-version != 'latest'
run: pip install ndindex~=${{ matrix.ndindex-version }}

- name: Install target h5py version
if: matrix.h5py-version != 'latest' && matrix.h5py-version != 'dev'
run: pip install h5py~=${{ matrix.h5py-version }}

- name: Install development h5py version
if: matrix.h5py-version == 'dev'
Expand Down
3 changes: 2 additions & 1 deletion docs/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ Dependencies

Currently, Versioned HDF5 has the following runtime dependencies:

- ``python>=3.6``
- ``numpy``
- ``h5py``
- ``ndindex``

Refer to ``pyproject.toml`` for minimum supported versions.
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ authors = [
]
description = "Versioned HDF5 provides a versioned abstraction on top of h5py"
readme = "README.md"
requires-python = ">=3.8"
requires-python = ">=3.9"
classifiers = [
"Programming Language :: Python :: 3",
"License :: OSI Approved :: BSD License",
"Operating System :: OS Independent",
]
dependencies = [
"numpy",
"h5py",
"numpy>=1.24.4",
"h5py>=3.3.0",
"ndindex>=1.5.1",
]
urls = { Homepage = "https://github.com/deshaw/versioned-hdf5" }
Expand Down
71 changes: 23 additions & 48 deletions versioned_hdf5/backend.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
from __future__ import annotations

import datetime
import logging
import os
import textwrap
from collections.abc import Iterator

import numpy as np
from h5py import Dataset, VirtualLayout, VirtualSource, h5s, h5z
from h5py import Dataset, VirtualLayout, _selector, h5s, h5z
from h5py._hl.filters import guess_chunk
from h5py._hl.selections import select
from ndindex import ChunkSize, Slice, Tuple, ndindex
from numpy.testing import assert_array_equal

Expand All @@ -32,8 +34,6 @@ def get_chunks(shape, dtype, chunk_size):


def initialize(f):
import datetime

from .versions import TIMESTAMP_FMT

version_data = f.create_group("_version_data")
Expand Down Expand Up @@ -438,12 +438,9 @@ def write_dataset_chunks(f, name, data_dict):
def create_virtual_dataset(
f, version_name, name, shape, slices, attrs=None, fillvalue=None
):
from h5py._hl.selections import select
from h5py._hl.vds import VDSmap

raw_data = f["_version_data"][name]["raw_data"]
raw_data_shape = raw_data.shape
slices = {c: s.reduce() for c, s in slices.items()}
raw_data_name = raw_data.name.encode("utf-8")

if len(raw_data) == 0:
layout = VirtualLayout(shape=(0,), dtype=raw_data.dtype)
Expand All @@ -456,52 +453,30 @@ def create_virtual_dataset(
if len(c.args[0]) != len(s):
raise ValueError(f"Inconsistent slices dictionary ({c.args[0]}, {s})")

# h5py 3.3 changed the VirtualLayout code so that it no longer uses
# sources. See https://github.com/h5py/h5py/pull/1905.
layout = VirtualLayout(shape, dtype=raw_data.dtype)
layout_has_sources = hasattr(layout, "sources")
if not layout_has_sources:
from h5py import _selector

layout._src_filenames.add(b".")
space = h5s.create_simple(shape)
selector = _selector.Selector(space)
layout._src_filenames.add(b".")
space = h5s.create_simple(shape)
selector = _selector.Selector(space)

for c, s in slices.items():
for c, s0 in slices.items():
if c.isempty():
continue
# idx = Tuple(s, *Tuple(*[slice(0, i) for i in shape[1:]]).as_subindex(Tuple(*c.args[1:])).args)
S = [Slice(0, len(c.args[i])) for i in range(1, len(shape))]
idx = Tuple(s, *S)
# assert c.newshape(shape) == vs[idx.raw].shape, (c, shape, s)

# This is equivalent to
#
# layout[c.raw] = vs[idx.raw]
#
# but faster because vs[idx.raw] does a deepcopy(vs), which is
# slow. We need different versions for h5py 2 and 3 because the
# virtual sources code was rewritten.
if not layout_has_sources:
key = idx.raw
vs_sel = select(raw_data.shape, key, dataset=None)

sel = selector.make_selection(c.raw)
layout.dcpl.set_virtual(
sel.id, b".", raw_data.name.encode("utf-8"), vs_sel.id
)

else:
vs_sel = select(raw_data_shape, idx.raw, None)
layout_sel = select(shape, c.raw, None)
layout.sources.append(
VDSmap(layout_sel.id, ".", raw_data.name, vs_sel.id)
)

dtype = raw_data.dtype
if dtype.metadata and (
"vlen" in dtype.metadata or "h5py_encoding" in dtype.metadata
):
c = c.raw
s0 = s0.reduce().raw
# c is the tuple of slices that selects the chunk in the virtual dataset
# s0 is the single slice that selects the chunk in the raw dataset on axis 0
# Build the tuple of slices that selects the chunk in the raw dataset
s = (s0, *(slice(ci.stop - ci.start) for ci in c[1:]))

# This is equivalent to `layout[c] = vs[s]`,
# but faster because vs[s] deep-copies vs, which is slow.
vs_sel = select(raw_data_shape, s, dataset=None)
sel = selector.make_selection(c)
layout.dcpl.set_virtual(sel.id, b".", raw_data_name, vs_sel.id)

dtype_meta = raw_data.dtype.metadata
if dtype_meta and ("vlen" in dtype_meta or "h5py_encoding" in dtype_meta):
# Variable length string dtype
# (https://h5py.readthedocs.io/en/2.10.0/strings.html). Setting the
# fillvalue in this case doesn't work
Expand Down

0 comments on commit af0e810

Please sign in to comment.