From 2fa7b9997128812650eec894051e129488d869ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Szczepanik?= Date: Mon, 15 May 2023 11:51:39 +0200 Subject: [PATCH 1/7] Create EnsureHashAlgorithm based on EnsureChoice This adds EnsureHashAlgorithm constraint which checks whether the given algorith name is contained in hashlib's algorithms_guaranteed set. It derives from EnsureChoice without further customization. Using `algorithms_guaranteed` covers algorithms that have named constructors in hashlib, i.e. accessible through getattr(hashlib, name) - that's the current usage in MultiHash. Future expansion could see us move to algorithms_available - these will be recognized when passed to (slower) hashlib.new(), but have no named constructors. For details, see: https://docs.python.org/3.11/library/hashlib.html --- datalad_next/commands/ls_file_collection.py | 5 ++--- datalad_next/constraints/__init__.py | 1 + datalad_next/constraints/basic.py | 6 ++++++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/datalad_next/commands/ls_file_collection.py b/datalad_next/commands/ls_file_collection.py index b48f04d2..19454e9b 100644 --- a/datalad_next/commands/ls_file_collection.py +++ b/datalad_next/commands/ls_file_collection.py @@ -39,6 +39,7 @@ EnsureChoice, EnsurePath, EnsureURL, + EnsureHashAlgorithm, ) from datalad_next.uis import ( ansi_colors as ac, @@ -84,9 +85,7 @@ def __init__(self): param_constraints=dict( type=self._collection_types, collection=EnsurePath(lexists=True) | EnsureURL(), - # TODO EnsureHashAlgorithm - # https://github.com/datalad/datalad-next/issues/346 - #hash=None, + hash=EnsureHashAlgorithm(), ), joint_constraints={ ParameterConstraintContext(('type', 'collection', 'hash'), diff --git a/datalad_next/constraints/__init__.py b/datalad_next/constraints/__init__.py index 05442fd9..e6f01398 100644 --- a/datalad_next/constraints/__init__.py +++ b/datalad_next/constraints/__init__.py @@ -59,6 +59,7 @@ EnsureCallable, EnsureChoice, EnsureFloat, + EnsureHashAlgorithm, EnsureInt, EnsureKeyChoice, EnsureNone, diff --git a/datalad_next/constraints/basic.py b/datalad_next/constraints/basic.py index 0d9c56bc..23783805 100644 --- a/datalad_next/constraints/basic.py +++ b/datalad_next/constraints/basic.py @@ -12,6 +12,7 @@ __docformat__ = 'restructuredtext' +from hashlib import algorithms_guaranteed from pathlib import Path import re @@ -497,3 +498,8 @@ def short_description(self): if self._ref else '', ) + + +class EnsureHashAlgorithm(EnsureChoice): + def __init__(self): + super(EnsureHashAlgorithm, self).__init__(algorithms_guaranteed) From 632facb290e9f5129f63a7027de66be22ec1e1a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Szczepanik?= Date: Tue, 16 May 2023 11:50:48 +0200 Subject: [PATCH 2/7] Fix EnsureHashAlgorithm & use EnsureListOf The EnsureHash algorithm now calls its super.__init__ correctly. We use EnsureHashAlgorithm() | EnsureListOf(...) in ls_file_colelction now. This correctly handles incoming values, which are always a list in CLI (argparse behaviour with --append) and can be str or list in Python API. At least when they are OK. When input is not OK, two things are suboptimal: - an incorrect hash (str) proceeds to EnsureListOf, which tries to go letter by letter, failing on first - a hash (list) with an incorrect value would first fail on EnsureHashAlgorithm(list), proceed to EnsureListOf, and fail on EnsureHashAlgorithm(str) As a conequence, in both cases the error message would say "does not match any of 2 alternatives" and print EnsureChoice message ("is not one of ...") twice. --- datalad_next/commands/ls_file_collection.py | 3 ++- datalad_next/constraints/basic.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/datalad_next/commands/ls_file_collection.py b/datalad_next/commands/ls_file_collection.py index 19454e9b..33aa2692 100644 --- a/datalad_next/commands/ls_file_collection.py +++ b/datalad_next/commands/ls_file_collection.py @@ -40,6 +40,7 @@ EnsurePath, EnsureURL, EnsureHashAlgorithm, + EnsureListOf, ) from datalad_next.uis import ( ansi_colors as ac, @@ -85,7 +86,7 @@ def __init__(self): param_constraints=dict( type=self._collection_types, collection=EnsurePath(lexists=True) | EnsureURL(), - hash=EnsureHashAlgorithm(), + hash=EnsureHashAlgorithm() | EnsureListOf(EnsureHashAlgorithm()), ), joint_constraints={ ParameterConstraintContext(('type', 'collection', 'hash'), diff --git a/datalad_next/constraints/basic.py b/datalad_next/constraints/basic.py index 23783805..e60cbae7 100644 --- a/datalad_next/constraints/basic.py +++ b/datalad_next/constraints/basic.py @@ -502,4 +502,4 @@ def short_description(self): class EnsureHashAlgorithm(EnsureChoice): def __init__(self): - super(EnsureHashAlgorithm, self).__init__(algorithms_guaranteed) + super(EnsureHashAlgorithm, self).__init__(*algorithms_guaranteed) From 0f04769a36cc84ddcfe500120ff1d97a9da26e3d Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 18 Oct 2023 13:30:22 +0200 Subject: [PATCH 3/7] Apply @mih's patch from comment for improved error as suggested in https://github.com/datalad/datalad-next/issues/346#issuecomment-1557394098 --- datalad_next/constraints/basic.py | 3 +++ datalad_next/constraints/compound.py | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/datalad_next/constraints/basic.py b/datalad_next/constraints/basic.py index e60cbae7..4a47a114 100644 --- a/datalad_next/constraints/basic.py +++ b/datalad_next/constraints/basic.py @@ -275,6 +275,9 @@ def long_description(self): def short_description(self): return '{%s}' % ', '.join([repr(c) for c in self._allowed]) + def __str__(self): + return f"one of {self.short_description()}" + class EnsureKeyChoice(EnsureChoice): """Ensure value under a key in an input is in a set of possible values""" diff --git a/datalad_next/constraints/compound.py b/datalad_next/constraints/compound.py index 99fe8d66..bb0d87fd 100644 --- a/datalad_next/constraints/compound.py +++ b/datalad_next/constraints/compound.py @@ -77,10 +77,12 @@ def __call__(self, value): iter = self._iter_type( self._item_constraint(i) for i in value ) - except TypeError as e: + except (ConstraintError, TypeError) as e: self.raise_for( value, - "cannot coerce to target (item) type", + "{itertype} item is not {itype}", + itertype=self._iter_type.__name__, + itype=self._item_constraint, __caused_by__=e, ) if self._min_len is not None or self._max_len is not None: From b27725aa7191b7414854b321e03d1f846ac16e39 Mon Sep 17 00:00:00 2001 From: Adina Wagner Date: Fri, 20 Oct 2023 11:19:49 +0200 Subject: [PATCH 4/7] Basic set of tests for EnsureHashAlgorithm --- datalad_next/constraints/tests/test_basic.py | 25 ++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/datalad_next/constraints/tests/test_basic.py b/datalad_next/constraints/tests/test_basic.py index 2748a158..98f43f76 100644 --- a/datalad_next/constraints/tests/test_basic.py +++ b/datalad_next/constraints/tests/test_basic.py @@ -11,6 +11,7 @@ EnsureNone, EnsureCallable, EnsureChoice, + EnsureHashAlgorithm, EnsureKeyChoice, EnsureRange, EnsurePath, @@ -317,3 +318,27 @@ def test_EnsurePath_fordataset(existing_dataset): # 2. dataset is given as a dataset object tc = c.for_dataset(DatasetParameter(ds, ds)) assert tc('relpath') == (ds.pathobj / 'relpath') + + +def test_EnsureHashAlgorithm(): + c = EnsureHashAlgorithm() + # simple cases that should pass + hashes = [ + 'sha3_256', 'shake_256', 'sha3_384', 'md5', 'shake_128', 'sha384', + 'sha3_224', 'blake2s', 'sha1', 'blake2b', 'sha224', 'sha512', 'sha256', + 'sha3_512' + ] + for hash in hashes: + c(hash) + # a few bogus ones: + bad_hashes = [ + 'md17', 'McGyver', 'sha2', 'bogus' + ] + for baddie in bad_hashes: + with pytest.raises(ConstraintError): + c(baddie) + + # check messaging + for i in ('md5', 'shake_256', 'sha3_512'): + assert i in c.short_description() + assert i in c.long_description() From b27f2920de231fd129bbf846688c9e3b01ff78de Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Sat, 21 Oct 2023 10:19:06 +0200 Subject: [PATCH 5/7] Add simple test for EnsureChoice.__str__ --- datalad_next/constraints/tests/test_basic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/datalad_next/constraints/tests/test_basic.py b/datalad_next/constraints/tests/test_basic.py index 98f43f76..21cb6656 100644 --- a/datalad_next/constraints/tests/test_basic.py +++ b/datalad_next/constraints/tests/test_basic.py @@ -189,6 +189,7 @@ def test_choice(): assert i in descr # short is a "set" or repr()s assert c.short_description() == "{'choice1', 'choice2', None}" + assert str(c) == "one of {'choice1', 'choice2', None}" # this should always work assert c('choice1') == 'choice1' assert c(None) is None From 678886408b0c5b40bbd23de7395712006ba99e87 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Sat, 21 Oct 2023 10:24:17 +0200 Subject: [PATCH 6/7] Add changelog --- changelog.d/20231021_102012_michael.hanke_ensurehash.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 changelog.d/20231021_102012_michael.hanke_ensurehash.md diff --git a/changelog.d/20231021_102012_michael.hanke_ensurehash.md b/changelog.d/20231021_102012_michael.hanke_ensurehash.md new file mode 100644 index 00000000..dcf0d5fd --- /dev/null +++ b/changelog.d/20231021_102012_michael.hanke_ensurehash.md @@ -0,0 +1,6 @@ +### 💫 Enhancements and new features + +- New `EnsureHashAlgorithm` constraint to automatically expose + and verify algorithm labels from `hashlib.algorithms_guaranteed` + Fixes https://github.com/datalad/datalad-next/issues/346 via + https://github.com/datalad/datalad-next/pull/492 (by @mslw @adswa) From 666977d0492f554a3b764d825557597601bd9520 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Sat, 21 Oct 2023 15:08:37 +0200 Subject: [PATCH 7/7] Minor tuning, documentation --- datalad_next/constraints/basic.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/datalad_next/constraints/basic.py b/datalad_next/constraints/basic.py index 4a47a114..80d0f7fe 100644 --- a/datalad_next/constraints/basic.py +++ b/datalad_next/constraints/basic.py @@ -12,7 +12,7 @@ __docformat__ = 'restructuredtext' -from hashlib import algorithms_guaranteed +from hashlib import algorithms_guaranteed as hash_algorithms_guaranteed from pathlib import Path import re @@ -504,5 +504,9 @@ def short_description(self): class EnsureHashAlgorithm(EnsureChoice): + """Ensure an input matches a name of a ``hashlib`` algorithm + + Specifically the item must be in the ``algorithms_guaranteed`` collection. + """ def __init__(self): - super(EnsureHashAlgorithm, self).__init__(*algorithms_guaranteed) + super().__init__(*hash_algorithms_guaranteed)