From c50bf28a10b626bd0d9e4ffa2372bf20fce02e8c Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Fri, 3 Nov 2023 14:47:37 -0700 Subject: [PATCH] Change path prefix for butler server For the purposes of the RSP, Butler is considered part of the "API Aspect", so the path to it needs to start with /api/. Handlers were re-organized to be grouped under an APIRouter, since this prefix will need to be configurable in the future. --- .../remote_butler/server/_dependencies.py | 43 +++++ .../butler/remote_butler/server/_server.py | 147 +---------------- .../server/handlers/_external.py | 154 ++++++++++++++++++ tests/test_server.py | 11 +- 4 files changed, 210 insertions(+), 145 deletions(-) create mode 100644 python/lsst/daf/butler/remote_butler/server/_dependencies.py create mode 100644 python/lsst/daf/butler/remote_butler/server/handlers/_external.py diff --git a/python/lsst/daf/butler/remote_butler/server/_dependencies.py b/python/lsst/daf/butler/remote_butler/server/_dependencies.py new file mode 100644 index 0000000000..320654f9e7 --- /dev/null +++ b/python/lsst/daf/butler/remote_butler/server/_dependencies.py @@ -0,0 +1,43 @@ +# This file is part of daf_butler. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This software is dual licensed under the GNU General Public License and also +# under a 3-clause BSD license. Recipients may choose which of these licenses +# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, +# respectively. If you choose the GPL option then the following text applies +# (but note that there is still no warranty even if you opt for BSD instead): +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from functools import cache + +from lsst.daf.butler import Butler + +from ._config import get_config_from_env +from ._factory import Factory + + +@cache +def _make_global_butler() -> Butler: + config = get_config_from_env() + return Butler.from_config(config.config_uri) + + +def factory_dependency() -> Factory: + return Factory(butler=_make_global_butler()) diff --git a/python/lsst/daf/butler/remote_butler/server/_server.py b/python/lsst/daf/butler/remote_butler/server/_server.py index 7ddb358292..d2231d0d52 100644 --- a/python/lsst/daf/butler/remote_butler/server/_server.py +++ b/python/lsst/daf/butler/remote_butler/server/_server.py @@ -27,34 +27,25 @@ from __future__ import annotations -__all__ = ("app", "factory_dependency") +__all__ = ("app",) import logging -import uuid -from functools import cache -from typing import Any -from fastapi import Depends, FastAPI, Request +from fastapi import FastAPI, Request from fastapi.middleware.gzip import GZipMiddleware from fastapi.responses import JSONResponse -from lsst.daf.butler import ( - Butler, - DataCoordinate, - MissingDatasetTypeError, - SerializedDataCoordinate, - SerializedDatasetRef, - SerializedDatasetType, -) +from lsst.daf.butler import Butler, DataCoordinate, MissingDatasetTypeError, SerializedDataCoordinate from safir.metadata import Metadata, get_metadata -from ._config import get_config_from_env -from ._factory import Factory -from ._server_models import FindDatasetModel +from .handlers._external import external_router + +_DEFAULT_API_PATH = "/api/butler" log = logging.getLogger(__name__) app = FastAPI() app.add_middleware(GZipMiddleware, minimum_size=1000) +app.include_router(external_router, prefix=_DEFAULT_API_PATH) @app.exception_handler(MissingDatasetTypeError) @@ -68,16 +59,6 @@ def missing_dataset_type_exception_handler(request: Request, exc: MissingDataset ) -@cache -def _make_global_butler() -> Butler: - config = get_config_from_env() - return Butler.from_config(config.config_uri) - - -def factory_dependency() -> Factory: - return Factory(butler=_make_global_butler()) - - def unpack_dataId(butler: Butler, data_id: SerializedDataCoordinate | None) -> DataCoordinate | None: """Convert the serialized dataId back to full DataCoordinate. @@ -116,117 +97,3 @@ async def get_index() -> Metadata: By convention, this endpoint returns only the application's metadata. """ return get_metadata(package_name="lsst.daf.butler", application_name="butler") - - -@app.get( - "/butler/butler.yaml", - description=( - "Returns a Butler YAML configuration file that can be used to instantiate a Butler client" - " pointing at this server" - ), - summary="Client configuration file", - response_model=dict[str, Any], -) -@app.get( - "/butler/butler.json", - description=( - "Returns a Butler JSON configuration file that can be used to instantiate a Butler client" - " pointing at this server" - ), - summary="Client configuration file", - response_model=dict[str, Any], -) -async def get_client_config() -> dict[str, Any]: - # We can return JSON data for both the YAML and JSON case because all JSON - # files are parseable as YAML. - return {"cls": "lsst.daf.butler.remote_butler.RemoteButler", "remote_butler": {"url": ""}} - - -@app.get("/butler/v1/universe", response_model=dict[str, Any]) -def get_dimension_universe(factory: Factory = Depends(factory_dependency)) -> dict[str, Any]: - """Allow remote client to get dimensions definition.""" - butler = factory.create_butler() - return butler.dimensions.dimensionConfig.toDict() - - -@app.get( - "/butler/v1/dataset_type/{dataset_type_name}", - summary="Retrieve this dataset type definition.", - response_model=SerializedDatasetType, - response_model_exclude_unset=True, - response_model_exclude_defaults=True, - response_model_exclude_none=True, -) -def get_dataset_type( - dataset_type_name: str, factory: Factory = Depends(factory_dependency) -) -> SerializedDatasetType: - """Return the dataset type.""" - butler = factory.create_butler() - datasetType = butler.get_dataset_type(dataset_type_name) - return datasetType.to_simple() - - -@app.get( - "/butler/v1/dataset/{id}", - summary="Retrieve this dataset definition.", - response_model=SerializedDatasetRef | None, - response_model_exclude_unset=True, - response_model_exclude_defaults=True, - response_model_exclude_none=True, -) -def get_dataset( - id: uuid.UUID, - storage_class: str | None = None, - dimension_records: bool = False, - datastore_records: bool = False, - factory: Factory = Depends(factory_dependency), -) -> SerializedDatasetRef | None: - """Return a single dataset reference.""" - butler = factory.create_butler() - ref = butler.get_dataset( - id, - storage_class=storage_class, - dimension_records=dimension_records, - datastore_records=datastore_records, - ) - if ref is not None: - return ref.to_simple() - # This could raise a 404 since id is not found. The standard implementation - # get_dataset method returns without error so follow that example here. - return ref - - -# Not yet supported: TimeSpan is not yet a pydantic model. -# collections parameter assumes client-side has resolved regexes. -@app.post( - "/butler/v1/find_dataset/{dataset_type}", - summary="Retrieve this dataset definition from collection, dataset type, and dataId", - response_model=SerializedDatasetRef, - response_model_exclude_unset=True, - response_model_exclude_defaults=True, - response_model_exclude_none=True, -) -def find_dataset( - dataset_type: str, - query: FindDatasetModel, - factory: Factory = Depends(factory_dependency), -) -> SerializedDatasetRef | None: - collection_query = query.collections if query.collections else None - - # Get the simple dict from the SerializedDataCoordinate. We do not know - # if it is a well-defined DataCoordinate or needs some massaging first. - # find_dataset will use dimension record queries if necessary. - data_id = query.data_id.dataId - - butler = factory.create_butler() - ref = butler.find_dataset( - dataset_type, - None, - collections=collection_query, - storage_class=query.storage_class, - timespan=None, - dimension_records=query.dimension_records, - datastore_records=query.datastore_records, - **data_id, - ) - return ref.to_simple() if ref else None diff --git a/python/lsst/daf/butler/remote_butler/server/handlers/_external.py b/python/lsst/daf/butler/remote_butler/server/handlers/_external.py new file mode 100644 index 0000000000..9f70564b4d --- /dev/null +++ b/python/lsst/daf/butler/remote_butler/server/handlers/_external.py @@ -0,0 +1,154 @@ +# This file is part of daf_butler. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This software is dual licensed under the GNU General Public License and also +# under a 3-clause BSD license. Recipients may choose which of these licenses +# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, +# respectively. If you choose the GPL option then the following text applies +# (but note that there is still no warranty even if you opt for BSD instead): +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +__all__ = () + +import uuid +from typing import Any + +from fastapi import APIRouter, Depends +from lsst.daf.butler import SerializedDatasetRef, SerializedDatasetType + +from .._dependencies import factory_dependency +from .._factory import Factory +from .._server_models import FindDatasetModel + +external_router = APIRouter() + + +@external_router.get( + "/butler.yaml", + description=( + "Returns a Butler YAML configuration file that can be used to instantiate a Butler client" + " pointing at this server" + ), + summary="Client configuration file", + response_model=dict[str, Any], +) +@external_router.get( + "/butler.json", + description=( + "Returns a Butler JSON configuration file that can be used to instantiate a Butler client" + " pointing at this server" + ), + summary="Client configuration file", + response_model=dict[str, Any], +) +async def get_client_config() -> dict[str, Any]: + # We can return JSON data for both the YAML and JSON case because all JSON + # files are parseable as YAML. + return {"cls": "lsst.daf.butler.remote_butler.RemoteButler", "remote_butler": {"url": ""}} + + +@external_router.get("/v1/universe", response_model=dict[str, Any]) +def get_dimension_universe(factory: Factory = Depends(factory_dependency)) -> dict[str, Any]: + """Allow remote client to get dimensions definition.""" + butler = factory.create_butler() + return butler.dimensions.dimensionConfig.toDict() + + +@external_router.get( + "/v1/dataset_type/{dataset_type_name}", + summary="Retrieve this dataset type definition.", + response_model=SerializedDatasetType, + response_model_exclude_unset=True, + response_model_exclude_defaults=True, + response_model_exclude_none=True, +) +def get_dataset_type( + dataset_type_name: str, factory: Factory = Depends(factory_dependency) +) -> SerializedDatasetType: + """Return the dataset type.""" + butler = factory.create_butler() + datasetType = butler.get_dataset_type(dataset_type_name) + return datasetType.to_simple() + + +@external_router.get( + "/v1/dataset/{id}", + summary="Retrieve this dataset definition.", + response_model=SerializedDatasetRef | None, + response_model_exclude_unset=True, + response_model_exclude_defaults=True, + response_model_exclude_none=True, +) +def get_dataset( + id: uuid.UUID, + storage_class: str | None = None, + dimension_records: bool = False, + datastore_records: bool = False, + factory: Factory = Depends(factory_dependency), +) -> SerializedDatasetRef | None: + """Return a single dataset reference.""" + butler = factory.create_butler() + ref = butler.get_dataset( + id, + storage_class=storage_class, + dimension_records=dimension_records, + datastore_records=datastore_records, + ) + if ref is not None: + return ref.to_simple() + # This could raise a 404 since id is not found. The standard implementation + # get_dataset method returns without error so follow that example here. + return ref + + +# Not yet supported: TimeSpan is not yet a pydantic model. +# collections parameter assumes client-side has resolved regexes. +@external_router.post( + "/v1/find_dataset/{dataset_type}", + summary="Retrieve this dataset definition from collection, dataset type, and dataId", + response_model=SerializedDatasetRef, + response_model_exclude_unset=True, + response_model_exclude_defaults=True, + response_model_exclude_none=True, +) +def find_dataset( + dataset_type: str, + query: FindDatasetModel, + factory: Factory = Depends(factory_dependency), +) -> SerializedDatasetRef | None: + collection_query = query.collections if query.collections else None + + # Get the simple dict from the SerializedDataCoordinate. We do not know + # if it is a well-defined DataCoordinate or needs some massaging first. + # find_dataset will use dimension record queries if necessary. + data_id = query.data_id.dataId + + butler = factory.create_butler() + ref = butler.find_dataset( + dataset_type, + None, + collections=collection_query, + storage_class=query.storage_class, + timespan=None, + dimension_records=query.dimension_records, + datastore_records=query.datastore_records, + **data_id, + ) + return ref.to_simple() if ref else None diff --git a/tests/test_server.py b/tests/test_server.py index 2adc161aec..d686227b3c 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -33,7 +33,8 @@ # Failing to import any of these should disable the tests. from fastapi.testclient import TestClient from lsst.daf.butler.remote_butler import RemoteButler - from lsst.daf.butler.remote_butler.server import Factory, app, factory_dependency + from lsst.daf.butler.remote_butler.server import Factory, app + from lsst.daf.butler.remote_butler.server._dependencies import factory_dependency except ImportError: TestClient = None app = None @@ -85,7 +86,7 @@ def create_factory_dependency(): # Set up the RemoteButler that will connect to the server cls.client = TestClient(app) - cls.client.base_url = "http://text.example/butler/" + cls.client.base_url = "http://test.example/api/butler/" cls.butler = _make_remote_butler(cls.client) # Populate the test server. @@ -103,7 +104,7 @@ def test_health_check(self): self.assertEqual(response.json()["name"], "butler") def test_simple(self): - response = self.client.get("/butler/v1/universe") + response = self.client.get("/api/butler/v1/universe") self.assertEqual(response.status_code, 200) self.assertIn("namespace", response.json()) @@ -192,9 +193,9 @@ def override_read(http_resource_path): return self.client.get(http_resource_path.geturl()).content with patch.object(HttpResourcePath, "read", override_read): - butler = Butler("https://test.example/butler") + butler = Butler("https://test.example/api/butler") assert isinstance(butler, RemoteButler) - assert str(butler._config.remote_butler.url) == "https://test.example/butler/" + assert str(butler._config.remote_butler.url) == "https://test.example/api/butler/" if __name__ == "__main__":