Skip to content

Commit

Permalink
Change path prefix for butler server
Browse files Browse the repository at this point in the history
For the purposes of the RSP, Butler is considered part of the "API
Aspect", so the path to it needs to start with /api/.  Handlers were
re-organized to be grouped under an APIRouter, since this prefix will
need to be configurable in the future.
  • Loading branch information
dhirving committed Nov 3, 2023
1 parent 0fda5f6 commit c50bf28
Show file tree
Hide file tree
Showing 4 changed files with 210 additions and 145 deletions.
43 changes: 43 additions & 0 deletions python/lsst/daf/butler/remote_butler/server/_dependencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from functools import cache

from lsst.daf.butler import Butler

from ._config import get_config_from_env
from ._factory import Factory


@cache
def _make_global_butler() -> Butler:
config = get_config_from_env()
return Butler.from_config(config.config_uri)

Check warning on line 39 in python/lsst/daf/butler/remote_butler/server/_dependencies.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/server/_dependencies.py#L38-L39

Added lines #L38 - L39 were not covered by tests


def factory_dependency() -> Factory:
return Factory(butler=_make_global_butler())

Check warning on line 43 in python/lsst/daf/butler/remote_butler/server/_dependencies.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/server/_dependencies.py#L43

Added line #L43 was not covered by tests
147 changes: 7 additions & 140 deletions python/lsst/daf/butler/remote_butler/server/_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,34 +27,25 @@

from __future__ import annotations

__all__ = ("app", "factory_dependency")
__all__ = ("app",)

import logging
import uuid
from functools import cache
from typing import Any

from fastapi import Depends, FastAPI, Request
from fastapi import FastAPI, Request
from fastapi.middleware.gzip import GZipMiddleware
from fastapi.responses import JSONResponse
from lsst.daf.butler import (
Butler,
DataCoordinate,
MissingDatasetTypeError,
SerializedDataCoordinate,
SerializedDatasetRef,
SerializedDatasetType,
)
from lsst.daf.butler import Butler, DataCoordinate, MissingDatasetTypeError, SerializedDataCoordinate
from safir.metadata import Metadata, get_metadata

from ._config import get_config_from_env
from ._factory import Factory
from ._server_models import FindDatasetModel
from .handlers._external import external_router

_DEFAULT_API_PATH = "/api/butler"

log = logging.getLogger(__name__)

app = FastAPI()
app.add_middleware(GZipMiddleware, minimum_size=1000)
app.include_router(external_router, prefix=_DEFAULT_API_PATH)


@app.exception_handler(MissingDatasetTypeError)
Expand All @@ -68,16 +59,6 @@ def missing_dataset_type_exception_handler(request: Request, exc: MissingDataset
)


@cache
def _make_global_butler() -> Butler:
config = get_config_from_env()
return Butler.from_config(config.config_uri)


def factory_dependency() -> Factory:
return Factory(butler=_make_global_butler())


def unpack_dataId(butler: Butler, data_id: SerializedDataCoordinate | None) -> DataCoordinate | None:
"""Convert the serialized dataId back to full DataCoordinate.
Expand Down Expand Up @@ -116,117 +97,3 @@ async def get_index() -> Metadata:
By convention, this endpoint returns only the application's metadata.
"""
return get_metadata(package_name="lsst.daf.butler", application_name="butler")


@app.get(
"/butler/butler.yaml",
description=(
"Returns a Butler YAML configuration file that can be used to instantiate a Butler client"
" pointing at this server"
),
summary="Client configuration file",
response_model=dict[str, Any],
)
@app.get(
"/butler/butler.json",
description=(
"Returns a Butler JSON configuration file that can be used to instantiate a Butler client"
" pointing at this server"
),
summary="Client configuration file",
response_model=dict[str, Any],
)
async def get_client_config() -> dict[str, Any]:
# We can return JSON data for both the YAML and JSON case because all JSON
# files are parseable as YAML.
return {"cls": "lsst.daf.butler.remote_butler.RemoteButler", "remote_butler": {"url": "<butlerRoot>"}}


@app.get("/butler/v1/universe", response_model=dict[str, Any])
def get_dimension_universe(factory: Factory = Depends(factory_dependency)) -> dict[str, Any]:
"""Allow remote client to get dimensions definition."""
butler = factory.create_butler()
return butler.dimensions.dimensionConfig.toDict()


@app.get(
"/butler/v1/dataset_type/{dataset_type_name}",
summary="Retrieve this dataset type definition.",
response_model=SerializedDatasetType,
response_model_exclude_unset=True,
response_model_exclude_defaults=True,
response_model_exclude_none=True,
)
def get_dataset_type(
dataset_type_name: str, factory: Factory = Depends(factory_dependency)
) -> SerializedDatasetType:
"""Return the dataset type."""
butler = factory.create_butler()
datasetType = butler.get_dataset_type(dataset_type_name)
return datasetType.to_simple()


@app.get(
"/butler/v1/dataset/{id}",
summary="Retrieve this dataset definition.",
response_model=SerializedDatasetRef | None,
response_model_exclude_unset=True,
response_model_exclude_defaults=True,
response_model_exclude_none=True,
)
def get_dataset(
id: uuid.UUID,
storage_class: str | None = None,
dimension_records: bool = False,
datastore_records: bool = False,
factory: Factory = Depends(factory_dependency),
) -> SerializedDatasetRef | None:
"""Return a single dataset reference."""
butler = factory.create_butler()
ref = butler.get_dataset(
id,
storage_class=storage_class,
dimension_records=dimension_records,
datastore_records=datastore_records,
)
if ref is not None:
return ref.to_simple()
# This could raise a 404 since id is not found. The standard implementation
# get_dataset method returns without error so follow that example here.
return ref


# Not yet supported: TimeSpan is not yet a pydantic model.
# collections parameter assumes client-side has resolved regexes.
@app.post(
"/butler/v1/find_dataset/{dataset_type}",
summary="Retrieve this dataset definition from collection, dataset type, and dataId",
response_model=SerializedDatasetRef,
response_model_exclude_unset=True,
response_model_exclude_defaults=True,
response_model_exclude_none=True,
)
def find_dataset(
dataset_type: str,
query: FindDatasetModel,
factory: Factory = Depends(factory_dependency),
) -> SerializedDatasetRef | None:
collection_query = query.collections if query.collections else None

# Get the simple dict from the SerializedDataCoordinate. We do not know
# if it is a well-defined DataCoordinate or needs some massaging first.
# find_dataset will use dimension record queries if necessary.
data_id = query.data_id.dataId

butler = factory.create_butler()
ref = butler.find_dataset(
dataset_type,
None,
collections=collection_query,
storage_class=query.storage_class,
timespan=None,
dimension_records=query.dimension_records,
datastore_records=query.datastore_records,
**data_id,
)
return ref.to_simple() if ref else None
154 changes: 154 additions & 0 deletions python/lsst/daf/butler/remote_butler/server/handlers/_external.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__all__ = ()

import uuid
from typing import Any

from fastapi import APIRouter, Depends
from lsst.daf.butler import SerializedDatasetRef, SerializedDatasetType

from .._dependencies import factory_dependency
from .._factory import Factory
from .._server_models import FindDatasetModel

external_router = APIRouter()


@external_router.get(
"/butler.yaml",
description=(
"Returns a Butler YAML configuration file that can be used to instantiate a Butler client"
" pointing at this server"
),
summary="Client configuration file",
response_model=dict[str, Any],
)
@external_router.get(
"/butler.json",
description=(
"Returns a Butler JSON configuration file that can be used to instantiate a Butler client"
" pointing at this server"
),
summary="Client configuration file",
response_model=dict[str, Any],
)
async def get_client_config() -> dict[str, Any]:
# We can return JSON data for both the YAML and JSON case because all JSON
# files are parseable as YAML.
return {"cls": "lsst.daf.butler.remote_butler.RemoteButler", "remote_butler": {"url": "<butlerRoot>"}}


@external_router.get("/v1/universe", response_model=dict[str, Any])
def get_dimension_universe(factory: Factory = Depends(factory_dependency)) -> dict[str, Any]:
"""Allow remote client to get dimensions definition."""
butler = factory.create_butler()
return butler.dimensions.dimensionConfig.toDict()


@external_router.get(
"/v1/dataset_type/{dataset_type_name}",
summary="Retrieve this dataset type definition.",
response_model=SerializedDatasetType,
response_model_exclude_unset=True,
response_model_exclude_defaults=True,
response_model_exclude_none=True,
)
def get_dataset_type(
dataset_type_name: str, factory: Factory = Depends(factory_dependency)
) -> SerializedDatasetType:
"""Return the dataset type."""
butler = factory.create_butler()
datasetType = butler.get_dataset_type(dataset_type_name)
return datasetType.to_simple()


@external_router.get(
"/v1/dataset/{id}",
summary="Retrieve this dataset definition.",
response_model=SerializedDatasetRef | None,
response_model_exclude_unset=True,
response_model_exclude_defaults=True,
response_model_exclude_none=True,
)
def get_dataset(
id: uuid.UUID,
storage_class: str | None = None,
dimension_records: bool = False,
datastore_records: bool = False,
factory: Factory = Depends(factory_dependency),
) -> SerializedDatasetRef | None:
"""Return a single dataset reference."""
butler = factory.create_butler()
ref = butler.get_dataset(
id,
storage_class=storage_class,
dimension_records=dimension_records,
datastore_records=datastore_records,
)
if ref is not None:
return ref.to_simple()
# This could raise a 404 since id is not found. The standard implementation
# get_dataset method returns without error so follow that example here.
return ref


# Not yet supported: TimeSpan is not yet a pydantic model.
# collections parameter assumes client-side has resolved regexes.
@external_router.post(
"/v1/find_dataset/{dataset_type}",
summary="Retrieve this dataset definition from collection, dataset type, and dataId",
response_model=SerializedDatasetRef,
response_model_exclude_unset=True,
response_model_exclude_defaults=True,
response_model_exclude_none=True,
)
def find_dataset(
dataset_type: str,
query: FindDatasetModel,
factory: Factory = Depends(factory_dependency),
) -> SerializedDatasetRef | None:
collection_query = query.collections if query.collections else None

# Get the simple dict from the SerializedDataCoordinate. We do not know
# if it is a well-defined DataCoordinate or needs some massaging first.
# find_dataset will use dimension record queries if necessary.
data_id = query.data_id.dataId

butler = factory.create_butler()
ref = butler.find_dataset(
dataset_type,
None,
collections=collection_query,
storage_class=query.storage_class,
timespan=None,
dimension_records=query.dimension_records,
datastore_records=query.datastore_records,
**data_id,
)
return ref.to_simple() if ref else None
Loading

0 comments on commit c50bf28

Please sign in to comment.