From 1020e39dd59e3cd141c7bce9a401d670fd0b3fbb Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Thu, 26 Oct 2023 16:52:03 -0700 Subject: [PATCH] WIP: RemoteButler.find_dataset --- .../butler/remote_butler/_remote_butler.py | 35 +++++++++++- .../butler/remote_butler/server/_server.py | 53 ++++++++++++++++++- 2 files changed, 84 insertions(+), 4 deletions(-) diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py index 1b12480b7f..0a7d852194 100644 --- a/python/lsst/daf/butler/remote_butler/_remote_butler.py +++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py @@ -48,8 +48,8 @@ from .._storage_class import StorageClass from .._timespan import Timespan from ..datastore import DatasetRefURIs -from ..dimensions import DataId, DimensionConfig, DimensionUniverse -from ..registry import CollectionArgType, Registry, RegistryDefaults +from ..dimensions import DataCoordinate, DataId, DimensionConfig, DimensionUniverse, SerializedDataCoordinate +from ..registry import CollectionArgType, NoDefaultCollectionError, Registry, RegistryDefaults from ..transfers import RepoExportContext from ._config import RemoteButlerConfigModel @@ -101,6 +101,26 @@ def dimensions(self) -> DimensionUniverse: self._dimensions = DimensionUniverse(config) return self._dimensions + def _simplify_dataId(self, dataId: DataId | None) -> SerializedDataCoordinate | None: + """Take a generic Data ID and convert it to a serializable form. + + Parameters + ---------- + dataId : `dict`, `None`, `DataCoordinate` + The data ID to serialize. + + Returns + ------- + data_id : `SerializedDataCoordinate` or `None` + A serializable form. + """ + if dataId is None: + return None + if isinstance(dataId, DataCoordinate): + return dataId.to_simple() + # Assume we can treat it as a dict. + return SerializedDataCoordinate(dataId=dataId) + def getDatasetType(self, name: str) -> DatasetType: # Docstring inherited. raise NotImplementedError() @@ -196,6 +216,17 @@ def find_dataset( datastore_records: bool = False, **kwargs: Any, ) -> DatasetRef | None: + if collections is None: + if not self.collections: + raise NoDefaultCollectionError( + "No collections provided to find_dataset, and no defaults from butler construction." + ) + collections = self.collections + # Temporary hack. Assume strings for collections. In future + # want to construct CollectionWildcard and filter it through collection + # cache to generate list of collection names. + # collection_strings = [str(c) for c in collections] + raise NotImplementedError() def retrieveArtifacts( diff --git a/python/lsst/daf/butler/remote_butler/server/_server.py b/python/lsst/daf/butler/remote_butler/server/_server.py index 51bf01a4e6..bc9e25c98b 100644 --- a/python/lsst/daf/butler/remote_butler/server/_server.py +++ b/python/lsst/daf/butler/remote_butler/server/_server.py @@ -33,9 +33,15 @@ from functools import cache from typing import Any -from fastapi import Depends, FastAPI +from fastapi import Depends, FastAPI, Query from fastapi.middleware.gzip import GZipMiddleware -from lsst.daf.butler import Butler, SerializedDatasetType +from lsst.daf.butler import ( + Butler, + DataCoordinate, + SerializedDataCoordinate, + SerializedDatasetRef, + SerializedDatasetType, +) from ._factory import Factory @@ -56,6 +62,26 @@ def factory_dependency() -> Factory: return Factory(butler=_make_global_butler()) +def unpack_dataId(butler: Butler, data_id: SerializedDataCoordinate | None) -> DataCoordinate | None: + """Convert the serialized dataId back to full DataCoordinate. + + Parameters + ---------- + butler : `lsst.daf.butler.Butler` + The butler to use for registry and universe. + data_id : `SerializedDataCoordinate` or `None` + The serialized form. + + Returns + ------- + dataId : `DataCoordinate` or `None` + The DataId usable by registry. + """ + if data_id is None: + return None + return DataCoordinate.from_simple(data_id, registry=butler.registry) + + @app.get("/butler/v1/universe", response_model=dict[str, Any]) def get_dimension_universe(factory: Factory = Depends(factory_dependency)) -> dict[str, Any]: """Allow remote client to get dimensions definition.""" @@ -78,3 +104,26 @@ def get_dataset_type( butler = factory.create_butler() datasetType = butler.get_dataset_type(dataset_type_name) return datasetType.to_simple() + + +# Not yet supported: TimeSpan is not yet a pydantic model. +# collections parameter assumes client-side has resolved regexes. +@app.post( + "/butler/v1/find_dataset/{datasetType}", + summary="Retrieve this dataset definition from collection, dataset type, and dataId", + response_model=SerializedDatasetRef, + response_model_exclude_unset=True, + response_model_exclude_defaults=True, + response_model_exclude_none=True, +) +def find_dataset( + datasetType: str, + dataId: SerializedDataCoordinate | None = None, + collections: list[str] | None = Query(None), + factory: Factory = Depends(factory_dependency), +) -> SerializedDatasetRef | None: + collection_query = collections if collections else None + + butler = factory.create_butler() + ref = butler.find_dataset(datasetType, dataId=unpack_dataId(butler, dataId), collections=collection_query) + return ref.to_simple() if ref else None