diff --git a/changelog.d/20231023_064405_michael.hanke_www_auth.md b/changelog.d/20231023_064405_michael.hanke_www_auth.md new file mode 100644 index 00000000..f4752d52 --- /dev/null +++ b/changelog.d/20231023_064405_michael.hanke_www_auth.md @@ -0,0 +1,8 @@ +### 🏠 Internal + +- The `www-authenticate` dependencies is dropped. The functionality is + replaced by a `requests`-based implementation of an alternative parser. + This trims the dependency footprint and facilitates Debian-packaging. + The previous test cases are kept and further extended. + Fixes https://github.com/datalad/datalad-next/issues/493 via + https://github.com/datalad/datalad-next/pull/495 (by @mih) diff --git a/datalad_next/url_operations/http.py b/datalad_next/url_operations/http.py index 854677c4..5d660e09 100644 --- a/datalad_next/url_operations/http.py +++ b/datalad_next/url_operations/http.py @@ -9,11 +9,13 @@ from typing import Dict import requests from requests_toolbelt import user_agent -import www_authenticate import datalad -from datalad_next.utils.requests_auth import DataladAuth +from datalad_next.utils.requests_auth import ( + DataladAuth, + parse_www_authenticate, +) from . import ( UrlOperations, UrlOperationsRemoteError, @@ -233,7 +235,7 @@ def probe_url(self, url, timeout=10.0, headers=None): headers=headers, ) if 'www-authenticate' in req.headers: - props['auth'] = www_authenticate.parse( + props['auth'] = parse_www_authenticate( req.headers['www-authenticate']) props['is_redirect'] = True if req.history else False props['status_code'] = req.status_code diff --git a/datalad_next/utils/requests_auth.py b/datalad_next/utils/requests_auth.py index 62cb5a49..fb4f3ce9 100644 --- a/datalad_next/utils/requests_auth.py +++ b/datalad_next/utils/requests_auth.py @@ -7,7 +7,6 @@ from typing import Dict from urllib.parse import urlparse import requests -import www_authenticate from datalad_next.config import ConfigManager from datalad_next.utils import CredentialManager @@ -16,7 +15,77 @@ lgr = logging.getLogger('datalad.ext.next.utils.requests_auth') -__all__ = ['DataladAuth', 'HTTPBearerTokenAuth'] +__all__ = ['DataladAuth', 'HTTPBearerTokenAuth', 'parse_www_authenticate'] + + +def parse_www_authenticate(hdr: str) -> dict: + """Parse HTTP www-authenticate header + + This helper uses ``requests`` utilities to parse the ``www-authenticate`` + header as represented in a ``requests.Response`` instance. The header may + contain any number of challenge specifications. + + The implementation follows RFC7235, where a challenge parameters set is + specified as: either a comma-separated list of parameters, or a single + sequence of characters capable of holding base64-encoded information, + and parameters are name=value pairs, where the name token is matched + case-insensitively, and each parameter name MUST only occur once + per challenge. + + Returns + ------- + dict + Keys are casefolded challenge labels (e.g., 'basic', 'digest'). + Values are: ``None`` (no parameter), ``str`` (a token68), or + ``dict`` (name/value mapping of challenge parameters) + """ + plh = requests.utils.parse_list_header + pdh = requests.utils.parse_dict_header + challenges = {} + challenge = None + # challenges as well as their properties are in a single + # comma-separated list + for item in plh(hdr): + # parse the item into a key/value set + # the value will be `None` if this item was no mapping + k, v = pdh(item).popitem() + # split the key to check for a challenge spec start + key_split = k.split(' ', maxsplit=1) + if len(key_split) > 1 or v is None: + item_suffix = item[len(key_split[0]) + 1:] + challenge = [item[len(key_split[0]) + 1:]] if item_suffix else None + challenges[key_split[0].casefold()] = challenge + else: + # implementation logic assumes that the above conditional + # was triggered before we ever get here + assert challenge + challenge.append(item) + + return { + challenge: _convert_www_authenticate_items(items) + for challenge, items in challenges.items() + } + + +def _convert_www_authenticate_items(items: list) -> None | str | dict: + pdh = requests.utils.parse_dict_header + # according to RFC7235, items can be: + # either a comma-separated list of parameters + # or a single sequence of characters capable of holding base64-encoded + # information. + # parameters are name=value pairs, where the name token is matched + # case-insensitively, and each parameter name MUST only occur once + # per challenge. + if items is None: + return None + elif len(items) == 1 and pdh(items[0].rstrip('=')).popitem()[1] is None: + # this items matches the token68 appearance (no name value + # pair after potential base64 padding its removed + return items[0] + else: + return { + k.casefold(): v for i in items for k, v in pdh(i).items() + } class DataladAuth(requests.auth.AuthBase): @@ -201,7 +270,7 @@ def handle_401(self, r, **kwargs): # www-authenticate with e.g. 403s return r # which auth schemes does the server support? - auth_schemes = www_authenticate.parse(r.headers['www-authenticate']) + auth_schemes = parse_www_authenticate(r.headers['www-authenticate']) ascheme, credname, cred = self._get_credential(r.url, auth_schemes) if cred is None or 'secret' not in cred: diff --git a/datalad_next/utils/tests/test_parse_www_authenticate.py b/datalad_next/utils/tests/test_parse_www_authenticate.py new file mode 100644 index 00000000..d69fcd67 --- /dev/null +++ b/datalad_next/utils/tests/test_parse_www_authenticate.py @@ -0,0 +1,45 @@ + +from ..requests_auth import parse_www_authenticate + + +challenges = ( + # just challenge type + ('Negotiate', + [('negotiate', None)]), + # challenge and just a token, tolerate any base64 padding + ('Negotiate abcdef', + [('negotiate', 'abcdef')]), + ('Negotiate abcdef=', + [('negotiate', 'abcdef=')]), + ('Negotiate abcdef==', + [('negotiate', 'abcdef==')]), + # standard bearer + ('Bearer realm=example.com', + [('bearer', {'realm': 'example.com'})]), + # standard digest + ('Digest realm="example.com", qop="auth,auth-int", nonce="abcdef", ' + 'opaque="ghijkl"', + [('digest', {'realm': 'example.com', 'qop': 'auth,auth-int', + 'nonce': 'abcdef', 'opaque': 'ghijkl'})]), + # multi challenge + ('Basic speCial="paf ram", realm="basIC", ' + 'Bearer, ' + 'Digest realm="http-auth@example.org", qop="auth, auth-int", ' + 'algorithm=MD5', + [('basic', {'special': 'paf ram', 'realm': 'basIC'}), + ('bearer', None), + ('digest', {'realm': "http-auth@example.org", 'qop': "auth, auth-int", + 'algorithm': 'MD5'})]), + # same challenge, multiple times, last one wins + ('Basic realm="basIC", ' + 'Basic realm="complex"', + [('basic', {'realm': 'complex'})]), +) + + +def test_parse_www_authenticate(): + for hdr, targets in challenges: + res = parse_www_authenticate(hdr) + for ctype, props in targets: + assert ctype in res + assert res[ctype] == props diff --git a/setup.cfg b/setup.cfg index 8e31daaf..3f6897ae 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,7 +16,6 @@ python_requires = >= 3.8 install_requires = annexremote datalad >= 0.18.4 - www-authenticate humanize packages = find_namespace: include_package_data = True