Skip to content

Commit

Permalink
Add gafaelfawr authentication to the client
Browse files Browse the repository at this point in the history
  • Loading branch information
dhirving committed Nov 2, 2023
1 parent 8b32a96 commit dcd91f3
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 2 deletions.
95 changes: 95 additions & 0 deletions python/lsst/daf/butler/remote_butler/_authentication.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__all__ = ("get_authentication_token_from_environment", "get_authentication_headers")

import os
from fnmatch import fnmatchcase
from urllib.parse import urlparse

SERVER_WHITELIST = ["*.lsst.cloud"]
EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY = "BUTLER_RUBIN_ACCESS_TOKEN"
RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY = "ACCESS_TOKEN"


def get_authentication_token_from_environment(server_url: str) -> str | None:
"""Search the environment for a Rubin Science Platform access token.
The token may come from the following sources in this order:
1. The "BUTLER_RUBIN_ACCESS_TOKEN" environment variable.
This environment variable is meant primarily for development use,
running outside the Rubin Science Platform. This token will be sent
to EVERY server that we connect to, so be careful when connecting to
untrusted servers.
2. The "ACCESS_TOKEN" environment variable.
This environment variable is provided by the Rubin Science Platform
Jupyter notebooks. It will only be returned if the given `server_url`
is in a whitelist of servers known to belong to the Rubin Science
Platform. Because this is a long-lived token that can be used to
impersonate the user with their full access rights, it should not be
sent to untrusted servers.
Parameters
----------
server_url : `str`
URL of the Butler server that the caller intends to connect to
Returns
-------
access_token: `str | None`
A Rubin Science Platform access token, or None if no token was
configured in the environment
"""
explicit_butler_token = os.getenv(EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY)
if explicit_butler_token:
return explicit_butler_token

hostname = urlparse(server_url).hostname.lower()
hostname_in_whitelist = any((fnmatchcase(hostname, pattern) for pattern in SERVER_WHITELIST))
notebook_token = os.getenv(RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY)
if hostname_in_whitelist and notebook_token:
return notebook_token

return None


def get_authentication_headers(access_token: str) -> dict[str, str]:
"""Return HTTP headers required for authenticating the user via Rubin
Science Platform's Gafaelfawr service
Parameters
----------
access_token : `str`
Rubin Science Platform access token
Returns
-------
header_map : dict[str, str]
HTTP header names and values as a mapping from name -> value
"""
# Access tokens are opaque bearer tokens. See https://sqr-069.lsst.io/
return {"Authorization": f"Bearer {access_token}"}
14 changes: 12 additions & 2 deletions python/lsst/daf/butler/remote_butler/_remote_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
from ..dimensions import DataId, DimensionConfig, DimensionUniverse
from ..registry import Registry, RegistryDefaults
from ..transfers import RepoExportContext
from ._authentication import get_authentication_headers, get_authentication_token_from_environment
from ._config import RemoteButlerConfigModel


Expand All @@ -67,6 +68,7 @@ def __init__(
inferDefaults: bool = True,
# Parameters unique to RemoteButler
http_client: httpx.Client | None = None,
access_token: str | None = None,
**kwargs: Any,
):
butler_config = ButlerConfig(config, searchPaths, without_datastore=True)
Expand All @@ -81,6 +83,7 @@ def __init__(
butler_config[server_url_key], butler_config.configDir
)
self._config = RemoteButlerConfigModel.model_validate(butler_config)

self._dimensions: DimensionUniverse | None = None
# TODO: RegistryDefaults should have finish() called on it, but this
# requires getCollectionSummary() which is not yet implemented
Expand All @@ -91,8 +94,15 @@ def __init__(
# This is generally done for testing.
self._client = http_client
else:
headers = {"user-agent": f"{get_full_type_name(self)}/{__version__}"}
self._client = httpx.Client(headers=headers, base_url=str(self._config.remote_butler.url))
server_url = str(self._config.remote_butler.url)
auth_headers = {}
if access_token is None:
access_token = get_authentication_token_from_environment(server_url)
if access_token is not None:
auth_headers = get_authentication_headers(access_token)

headers = auth_headers | {"user-agent": f"{get_full_type_name(self)}/{__version__}"}
self._client = httpx.Client(headers=headers, base_url=server_url)

def isWriteable(self) -> bool:
# Docstring inherited.
Expand Down
50 changes: 50 additions & 0 deletions tests/test_authentication.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import os
import unittest
from contextlib import contextmanager
from unittest.mock import patch

from lsst.daf.butler.remote_butler._authentication import (
EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY,
RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY,
get_authentication_headers,
get_authentication_token_from_environment,
)


@contextmanager
def _mock_env(new_environment):
with patch.dict(os.environ, new_environment, clear=True):
yield


class TestButlerClientAuthentication(unittest.TestCase):
"""Test access-token logic"""

def test_explicit_butler_token(self):
with _mock_env(
{
EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY: "token1",
RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY: "not-this-token",
}
):
token = get_authentication_token_from_environment("https://untrustedserver.com")
assert token == "token1"

def test_jupyter_token_with_safe_server(self):
with _mock_env({RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY: "token2"}):
token = get_authentication_token_from_environment("https://data.LSST.cloud/butler")
assert token == "token2"

def test_jupyter_token_with_unsafe_server(self):
with _mock_env({RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY: "token2"}):
token = get_authentication_token_from_environment("https://untrustedserver.com/butler")
assert token is None

def test_missing_token(self):
with _mock_env({}):
token = get_authentication_token_from_environment("https://data.lsst.cloud/butler")
assert token is None

def test_header_generation(self):
headers = get_authentication_headers("tokendata")
assert headers == {"Authorization": "Bearer tokendata"}

0 comments on commit dcd91f3

Please sign in to comment.