Skip to content

Commit

Permalink
Replace the www-authenticate dependency with internal function
Browse files Browse the repository at this point in the history
The implementation is built on request's parsing utilities rather than
elusive regexes. I find this better to grasp.

The associated test includes all www-authenticate test cases, plus
a set of additional ones that focus on multi-challenge header
specifications.

Closes #493
  • Loading branch information
mih committed Oct 23, 2023
1 parent 6a2d65e commit ce6a9c5
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 7 deletions.
8 changes: 8 additions & 0 deletions changelog.d/20231023_064405_michael.hanke_www_auth.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
### 🏠 Internal

- The `www-authenticate` dependencies is dropped. The functionality is
replaced by a `requests`-based implementation of an alternative parser.
This trims the dependency footprint and facilitates Debian-packaging.
The previous test cases are kept and further extended.
Fixes https://github.com/datalad/datalad-next/issues/493 via
https://github.com/datalad/datalad-next/pull/495 (by @mih)
8 changes: 5 additions & 3 deletions datalad_next/url_operations/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@
from typing import Dict
import requests
from requests_toolbelt import user_agent
import www_authenticate

import datalad

from datalad_next.utils.requests_auth import DataladAuth
from datalad_next.utils.requests_auth import (
DataladAuth,
parse_www_authenticate,
)
from . import (
UrlOperations,
UrlOperationsRemoteError,
Expand Down Expand Up @@ -233,7 +235,7 @@ def probe_url(self, url, timeout=10.0, headers=None):
headers=headers,
)
if 'www-authenticate' in req.headers:
props['auth'] = www_authenticate.parse(
props['auth'] = parse_www_authenticate(
req.headers['www-authenticate'])
props['is_redirect'] = True if req.history else False
props['status_code'] = req.status_code
Expand Down
75 changes: 72 additions & 3 deletions datalad_next/utils/requests_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from typing import Dict
from urllib.parse import urlparse
import requests
import www_authenticate

from datalad_next.config import ConfigManager
from datalad_next.utils import CredentialManager
Expand All @@ -16,7 +15,77 @@
lgr = logging.getLogger('datalad.ext.next.utils.requests_auth')


__all__ = ['DataladAuth', 'HTTPBearerTokenAuth']
__all__ = ['DataladAuth', 'HTTPBearerTokenAuth', 'parse_www_authenticate']


def parse_www_authenticate(hdr: str) -> dict:
"""Parse HTTP www-authenticate header
This helper uses ``requests`` utilities to parse the ``www-authenticate``
header as represented in a ``requests.Response`` instance. The header may
contain any number of challenge specifications.
The implementation follows RFC7235, where a challenge parameters set is
specified as: either a comma-separated list of parameters, or a single
sequence of characters capable of holding base64-encoded information,
and parameters are name=value pairs, where the name token is matched
case-insensitively, and each parameter name MUST only occur once
per challenge.
Returns
-------
dict
Keys are casefolded challenge labels (e.g., 'basic', 'digest').
Values are: ``None`` (no parameter), ``str`` (a token68), or
``dict`` (name/value mapping of challenge parameters)
"""
plh = requests.utils.parse_list_header
pdh = requests.utils.parse_dict_header
challenges = {}
challenge = None
# challenges as well as their properties are in a single
# comma-separated list
for item in plh(hdr):
# parse the item into a key/value set
# the value will be `None` if this item was no mapping
k, v = pdh(item).popitem()
# split the key to check for a challenge spec start
key_split = k.split(' ', maxsplit=1)
if len(key_split) > 1 or v is None:
item_suffix = item[len(key_split[0]) + 1:]
challenge = [item[len(key_split[0]) + 1:]] if item_suffix else None
challenges[key_split[0].casefold()] = challenge
else:
# implementation logic assumes that the above conditional
# was triggered before we ever get here
assert challenge
challenge.append(item)

return {
challenge: _convert_www_authenticate_items(items)
for challenge, items in challenges.items()
}


def _convert_www_authenticate_items(items: list) -> None | str | dict:
pdh = requests.utils.parse_dict_header
# according to RFC7235, items can be:
# either a comma-separated list of parameters
# or a single sequence of characters capable of holding base64-encoded
# information.
# parameters are name=value pairs, where the name token is matched
# case-insensitively, and each parameter name MUST only occur once
# per challenge.
if items is None:
return None
elif len(items) == 1 and pdh(items[0].rstrip('=')).popitem()[1] is None:
# this items matches the token68 appearance (no name value
# pair after potential base64 padding its removed
return items[0]
else:
return {
k.casefold(): v for i in items for k, v in pdh(i).items()
}


class DataladAuth(requests.auth.AuthBase):
Expand Down Expand Up @@ -201,7 +270,7 @@ def handle_401(self, r, **kwargs):
# www-authenticate with e.g. 403s
return r
# which auth schemes does the server support?
auth_schemes = www_authenticate.parse(r.headers['www-authenticate'])
auth_schemes = parse_www_authenticate(r.headers['www-authenticate'])
ascheme, credname, cred = self._get_credential(r.url, auth_schemes)

if cred is None or 'secret' not in cred:
Expand Down
45 changes: 45 additions & 0 deletions datalad_next/utils/tests/test_parse_www_authenticate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@

from ..requests_auth import parse_www_authenticate


challenges = (
# just challenge type
('Negotiate',
[('negotiate', None)]),
# challenge and just a token, tolerate any base64 padding
('Negotiate abcdef',
[('negotiate', 'abcdef')]),
('Negotiate abcdef=',
[('negotiate', 'abcdef=')]),
('Negotiate abcdef==',
[('negotiate', 'abcdef==')]),
# standard bearer
('Bearer realm=example.com',
[('bearer', {'realm': 'example.com'})]),
# standard digest
('Digest realm="example.com", qop="auth,auth-int", nonce="abcdef", '
'opaque="ghijkl"',
[('digest', {'realm': 'example.com', 'qop': 'auth,auth-int',
'nonce': 'abcdef', 'opaque': 'ghijkl'})]),
# multi challenge
('Basic speCial="paf ram", realm="basIC", '
'Bearer, '
'Digest realm="[email protected]", qop="auth, auth-int", '
'algorithm=MD5',
[('basic', {'special': 'paf ram', 'realm': 'basIC'}),
('bearer', None),
('digest', {'realm': "[email protected]", 'qop': "auth, auth-int",
'algorithm': 'MD5'})]),
# same challenge, multiple times, last one wins
('Basic realm="basIC", '
'Basic realm="complex"',
[('basic', {'realm': 'complex'})]),
)


def test_parse_www_authenticate():
for hdr, targets in challenges:
res = parse_www_authenticate(hdr)
for ctype, props in targets:
assert ctype in res
assert res[ctype] == props
1 change: 0 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ python_requires = >= 3.8
install_requires =
annexremote
datalad >= 0.18.4
www-authenticate
humanize
packages = find_namespace:
include_package_data = True
Expand Down

0 comments on commit ce6a9c5

Please sign in to comment.