From 07f5f39a133b9a202f5f83b68a2c2a23a13a7986 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 27 Oct 2023 09:57:42 -0700 Subject: [PATCH] Add Butler.get_dataset --- python/lsst/daf/butler/_butler.py | 19 +++++++++++++++++- python/lsst/daf/butler/direct_butler.py | 5 ++++- .../butler/remote_butler/_remote_butler.py | 7 ++++++- .../butler/remote_butler/server/_server.py | 20 +++++++++++++++++++ tests/test_butler.py | 2 +- tests/test_server.py | 3 +++ 6 files changed, 52 insertions(+), 4 deletions(-) diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py index 9dbb0de780..fd489fd150 100644 --- a/python/lsst/daf/butler/_butler.py +++ b/python/lsst/daf/butler/_butler.py @@ -42,7 +42,7 @@ from ._butler_repo_index import ButlerRepoIndex from ._config import Config, ConfigSubset from ._dataset_existence import DatasetExistence -from ._dataset_ref import DatasetIdGenEnum, DatasetRef +from ._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef from ._dataset_type import DatasetType from ._deferredDatasetHandle import DeferredDatasetHandle from ._file_dataset import FileDataset @@ -799,6 +799,23 @@ def get_dataset_type(self, name: str) -> DatasetType: """ raise NotImplementedError() + @abstractmethod + def get_dataset(self, id: DatasetId) -> DatasetRef | None: + """Retrieve a Dataset entry. + + Parameters + ---------- + id : `DatasetId` + The unique identifier for the dataset. + + Returns + ------- + ref : `DatasetRef` or `None` + A ref to the Dataset, or `None` if no matching Dataset + was found. + """ + raise NotImplementedError() + @abstractmethod def find_dataset( self, diff --git a/python/lsst/daf/butler/direct_butler.py b/python/lsst/daf/butler/direct_butler.py index 701433af88..e9c979dbe1 100644 --- a/python/lsst/daf/butler/direct_butler.py +++ b/python/lsst/daf/butler/direct_butler.py @@ -55,7 +55,7 @@ from ._butler_config import ButlerConfig from ._config import Config from ._dataset_existence import DatasetExistence -from ._dataset_ref import DatasetIdGenEnum, DatasetRef +from ._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef from ._dataset_type import DatasetType from ._deferredDatasetHandle import DeferredDatasetHandle from ._exceptions import ValidationError @@ -1322,6 +1322,9 @@ def getURI( def get_dataset_type(self, name: str) -> DatasetType: return self._registry.getDatasetType(name) + def get_dataset(self, id: DatasetId) -> DatasetRef | None: + return self._registry.getDataset(id) + def find_dataset( self, datasetType: DatasetType | str, diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py index 115df2b834..ad48ea7d7b 100644 --- a/python/lsst/daf/butler/remote_butler/_remote_butler.py +++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py @@ -40,7 +40,7 @@ from .._butler_config import ButlerConfig from .._config import Config from .._dataset_existence import DatasetExistence -from .._dataset_ref import DatasetIdGenEnum, DatasetRef, SerializedDatasetRef +from .._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef, SerializedDatasetRef from .._dataset_type import DatasetType, SerializedDatasetType from .._deferredDatasetHandle import DeferredDatasetHandle from .._file_dataset import FileDataset @@ -221,6 +221,11 @@ def get_dataset_type(self, name: str) -> DatasetType: response.raise_for_status() return DatasetType.from_simple(SerializedDatasetType(**response.json()), universe=self.dimensions) + def get_dataset(self, id: DatasetId) -> DatasetRef | None: + path = f"dataset/{id}" + response = self._client.get(self._get_url(path)) + return DatasetRef.from_simple(SerializedDatasetRef(**response.json()), universe=self.dimensions) + def find_dataset( self, datasetType: DatasetType | str, diff --git a/python/lsst/daf/butler/remote_butler/server/_server.py b/python/lsst/daf/butler/remote_butler/server/_server.py index b791651c47..724d2df121 100644 --- a/python/lsst/daf/butler/remote_butler/server/_server.py +++ b/python/lsst/daf/butler/remote_butler/server/_server.py @@ -30,6 +30,7 @@ __all__ = ("app", "factory_dependency") import logging +import uuid from functools import cache from typing import Any @@ -107,6 +108,25 @@ def get_dataset_type( return datasetType.to_simple() +@app.get( + "/butler/v1/dataset/{id}", + summary="Retrieve this dataset definition.", + response_model=SerializedDatasetRef | None, + response_model_exclude_unset=True, + response_model_exclude_defaults=True, + response_model_exclude_none=True, +) +def get_dataset(id: uuid.UUID, factory: Factory = Depends(factory_dependency)) -> SerializedDatasetRef | None: + """Return a single dataset reference.""" + butler = factory.create_butler() + ref = butler.get_dataset(id) + if ref is not None: + return ref.to_simple() + # This could raise a 404 since id is not found. The standard implementation + # get_dataset method returns without error so follow that example here. + return ref + + # Not yet supported: TimeSpan is not yet a pydantic model. # collections parameter assumes client-side has resolved regexes. @app.post( diff --git a/tests/test_butler.py b/tests/test_butler.py index 04eaa2a4ba..e307fb455f 100644 --- a/tests/test_butler.py +++ b/tests/test_butler.py @@ -384,7 +384,7 @@ def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> Dir with self.assertRaises(FileNotFoundError): butler.get(ref) # Registry shouldn't be able to find it by dataset_id anymore. - self.assertIsNone(butler.registry.getDataset(ref.id)) + self.assertIsNone(butler.get_dataset(ref.id)) # Do explicit registry removal since we know they are # empty diff --git a/tests/test_server.py b/tests/test_server.py index 3d3c3742c7..cdf2fa444e 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -109,6 +109,9 @@ def test_find_dataset(self): self.assertIsInstance(ref, DatasetRef) self.assertEqual(ref.id, uuid.UUID("e15ab039-bc8b-4135-87c5-90902a7c0b22")) + ref2 = self.butler.get_dataset(ref.id) + self.assertEqual(ref2, ref) + if __name__ == "__main__": unittest.main()