diff --git a/src/dask_awkward/lib/core.py b/src/dask_awkward/lib/core.py index a7a6458f..dc258bbc 100644 --- a/src/dask_awkward/lib/core.py +++ b/src/dask_awkward/lib/core.py @@ -704,7 +704,12 @@ def repartition( def __len__(self) -> int: if not self.known_divisions: - self.eager_compute_divisions() + raise NotImplementedError( + "Cannot determine length of collection with unknown partition sizes without executing the graph.\n" + "Use `dask_awkward.num(..., axis=0)` if you want a lazy Scalar of the length.\n" + "If you want to eagerly compute the partition sizes to have the ability to call `len` on the collection" + ", use `.eager_compute_divisions()` on the collection." + ) return self.divisions[-1] # type: ignore def _shorttypestr(self, max: int = 10) -> str: diff --git a/tests/test_core.py b/tests/test_core.py index 49a8dd25..4baec896 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -100,7 +100,20 @@ def test_compute_typetracer(daa: Array) -> None: def test_len(ndjson_points_file: str) -> None: daa = dak.from_json([ndjson_points_file] * 2) - assert len(daa) == 10 + assert not daa.known_divisions + with pytest.raises( + NotImplementedError, + match=( + "Cannot determine length of collection with unknown partition sizes without executing the graph.\\n" + "Use `dask_awkward.num\\(\\.\\.\\., axis=0\\)` if you want a lazy Scalar of the length.\\n" + "If you want to eagerly compute the partition sizes to have the ability to call `len` on the collection" + ", use `\\.eager_compute_divisions\\(\\)` on the collection." + ), + ): + assert len(daa) == 10 + daa.eager_compute_divisions() + assert daa.known_divisions + assert len(daa) == 10 # type: ignore def test_meta_exists(daa: Array) -> None: diff --git a/tests/test_io.py b/tests/test_io.py index 1c67bcd9..580ad5c1 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -281,12 +281,18 @@ def test_from_awkward_empty_array(daa: dak.Array) -> None: assert len(c1) == 0 a1 = dak.from_awkward(c1, npartitions=1) assert_eq(a1, c1) - assert len(a1) == 0 + assert not a1.known_divisions + a1.eager_compute_divisions() + assert a1.known_divisions + assert len(a1) == 0 # type: ignore # with a form c2 = ak.typetracer.length_zero_if_typetracer(daa.layout) assert len(c2) == 0 a2 = dak.from_awkward(c2, npartitions=1) + assert not a2.known_divisions + a2.eager_compute_divisions() + assert a2.known_divisions assert len(a2) == 0 daa.layout.form == a2.layout.form diff --git a/tests/test_io_json.py b/tests/test_io_json.py index 15bb14d2..9e7ca8cc 100644 --- a/tests/test_io_json.py +++ b/tests/test_io_json.py @@ -58,6 +58,18 @@ def concrete_data(json_data_dir: Path) -> ak.Array: def test_json_sanity(json_data_dir: Path, concrete_data: ak.Array) -> None: source = os.path.join(str(json_data_dir)) ds = dak.from_json(source) + assert not ds.known_divisions + with pytest.raises( + NotImplementedError, + match=( + "Cannot determine length of collection with unknown partition sizes without executing the graph.\\n" + "Use `dask_awkward.num\\(\\.\\.\\., axis=0\\)` if you want a lazy Scalar of the length.\\n" + "If you want to eagerly compute the partition sizes to have the ability to call `len` on the collection" + ", use `\\.eager_compute_divisions\\(\\)` on the collection." + ), + ): + assert ds + ds.eager_compute_divisions() assert ds assert_eq(ds, concrete_data)