Skip to content

Commit

Permalink
fix: correct stream implementation when dataset is empty (#103)
Browse files Browse the repository at this point in the history
For saving and loading
  • Loading branch information
nfrasser authored Nov 13, 2024
1 parent 6054fe9 commit bf458d0
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 0 deletions.
6 changes: 6 additions & 0 deletions cryosparc/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,9 @@ def _load_stream(
# Calling addrows separately to minimizes column-based
# allocations, improves performance by ~20%
dset = cls.allocate(0, descr)
if header["length"] == 0:
return dset # no more data to load

data = dset._data
data.addrows(header["length"])
loader = Stream(data)
Expand Down Expand Up @@ -799,6 +802,9 @@ def stream(self, compression: Literal["lz4", None] = None) -> Generator[bytes, N
yield u32bytesle(len(header))
yield header

if len(self) == 0:
return # empty dataset, don't yield anything

for f in self:
fielddata: "MemoryView"
if f in compressed_fields:
Expand Down
26 changes: 26 additions & 0 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,27 @@ def small_dset_stream(small_dset):
return stream


@pytest.fixture
def empty_dset():
field3 = "long/fieldwithsuperduperlongcolumnnamethatislongandtestable"
return Dataset.allocate(
0,
fields=[
("field/1", "u8", (2,)),
("field/2", "f4"),
(field3, "O"),
],
)


@pytest.fixture
def empty_dset_stream(empty_dset):
stream = BytesIO()
empty_dset.save(stream, format=CSDAT_FORMAT)
stream.seek(0)
return stream


def test_allocate():
storage = Dataset.allocate(size=2000000, fields=[("field1", "u8"), ("field2", "f4"), ("field3", "O")])
assert storage is not None
Expand Down Expand Up @@ -270,6 +291,11 @@ def test_load_stream_fields(small_dset, small_dset_stream):
assert result == small_dset.filter_fields(["field/2"], copy=True)


def test_load_empty_stream(empty_dset, empty_dset_stream):
result = Dataset.load(empty_dset_stream)
assert result == empty_dset


def test_pickle_unpickle():
import pickle

Expand Down

0 comments on commit bf458d0

Please sign in to comment.