diff --git a/changelog.d/20231021_102012_michael.hanke_ensurehash.md b/changelog.d/20231021_102012_michael.hanke_ensurehash.md new file mode 100644 index 00000000..dcf0d5fd --- /dev/null +++ b/changelog.d/20231021_102012_michael.hanke_ensurehash.md @@ -0,0 +1,6 @@ +### 💫 Enhancements and new features + +- New `EnsureHashAlgorithm` constraint to automatically expose + and verify algorithm labels from `hashlib.algorithms_guaranteed` + Fixes https://github.com/datalad/datalad-next/issues/346 via + https://github.com/datalad/datalad-next/pull/492 (by @mslw @adswa) diff --git a/datalad_next/commands/ls_file_collection.py b/datalad_next/commands/ls_file_collection.py index aa36bb82..57462c49 100644 --- a/datalad_next/commands/ls_file_collection.py +++ b/datalad_next/commands/ls_file_collection.py @@ -39,6 +39,8 @@ EnsureChoice, EnsurePath, EnsureURL, + EnsureHashAlgorithm, + EnsureListOf, ) from datalad_next.uis import ( ansi_colors as ac, @@ -93,9 +95,7 @@ def __init__(self): param_constraints=dict( type=self._collection_types, collection=EnsurePath(lexists=True) | EnsureURL(), - # TODO EnsureHashAlgorithm - # https://github.com/datalad/datalad-next/issues/346 - #hash=None, + hash=EnsureHashAlgorithm() | EnsureListOf(EnsureHashAlgorithm()), ), joint_constraints={ ParameterConstraintContext(('type', 'collection', 'hash'), diff --git a/datalad_next/constraints/__init__.py b/datalad_next/constraints/__init__.py index 05442fd9..e6f01398 100644 --- a/datalad_next/constraints/__init__.py +++ b/datalad_next/constraints/__init__.py @@ -59,6 +59,7 @@ EnsureCallable, EnsureChoice, EnsureFloat, + EnsureHashAlgorithm, EnsureInt, EnsureKeyChoice, EnsureNone, diff --git a/datalad_next/constraints/basic.py b/datalad_next/constraints/basic.py index 0d9c56bc..80d0f7fe 100644 --- a/datalad_next/constraints/basic.py +++ b/datalad_next/constraints/basic.py @@ -12,6 +12,7 @@ __docformat__ = 'restructuredtext' +from hashlib import algorithms_guaranteed as hash_algorithms_guaranteed from pathlib import Path import re @@ -274,6 +275,9 @@ def long_description(self): def short_description(self): return '{%s}' % ', '.join([repr(c) for c in self._allowed]) + def __str__(self): + return f"one of {self.short_description()}" + class EnsureKeyChoice(EnsureChoice): """Ensure value under a key in an input is in a set of possible values""" @@ -497,3 +501,12 @@ def short_description(self): if self._ref else '', ) + + +class EnsureHashAlgorithm(EnsureChoice): + """Ensure an input matches a name of a ``hashlib`` algorithm + + Specifically the item must be in the ``algorithms_guaranteed`` collection. + """ + def __init__(self): + super().__init__(*hash_algorithms_guaranteed) diff --git a/datalad_next/constraints/compound.py b/datalad_next/constraints/compound.py index 99fe8d66..bb0d87fd 100644 --- a/datalad_next/constraints/compound.py +++ b/datalad_next/constraints/compound.py @@ -77,10 +77,12 @@ def __call__(self, value): iter = self._iter_type( self._item_constraint(i) for i in value ) - except TypeError as e: + except (ConstraintError, TypeError) as e: self.raise_for( value, - "cannot coerce to target (item) type", + "{itertype} item is not {itype}", + itertype=self._iter_type.__name__, + itype=self._item_constraint, __caused_by__=e, ) if self._min_len is not None or self._max_len is not None: diff --git a/datalad_next/constraints/tests/test_basic.py b/datalad_next/constraints/tests/test_basic.py index 9b0d12c4..b1301ba5 100644 --- a/datalad_next/constraints/tests/test_basic.py +++ b/datalad_next/constraints/tests/test_basic.py @@ -11,6 +11,7 @@ EnsureNone, EnsureCallable, EnsureChoice, + EnsureHashAlgorithm, EnsureKeyChoice, EnsureRange, EnsurePath, @@ -188,6 +189,7 @@ def test_choice(): assert i in descr # short is a "set" or repr()s assert c.short_description() == "{'choice1', 'choice2', None}" + assert str(c) == "one of {'choice1', 'choice2', None}" # this should always work assert c('choice1') == 'choice1' assert c(None) is None @@ -317,3 +319,27 @@ def test_EnsurePath_fordataset(existing_dataset): # 2. dataset is given as a dataset object tc = c.for_dataset(DatasetParameter(ds, ds)) assert tc('relpath') == (ds.pathobj / 'relpath') + + +def test_EnsureHashAlgorithm(): + c = EnsureHashAlgorithm() + # simple cases that should pass + hashes = [ + 'sha3_256', 'shake_256', 'sha3_384', 'md5', 'shake_128', 'sha384', + 'sha3_224', 'blake2s', 'sha1', 'blake2b', 'sha224', 'sha512', 'sha256', + 'sha3_512' + ] + for hash in hashes: + c(hash) + # a few bogus ones: + bad_hashes = [ + 'md17', 'McGyver', 'sha2', 'bogus' + ] + for baddie in bad_hashes: + with pytest.raises(ConstraintError): + c(baddie) + + # check messaging + for i in ('md5', 'shake_256', 'sha3_512'): + assert i in c.short_description() + assert i in c.long_description()