From 68ac70a4413d49325b1accf4b194d5bde0e79013 Mon Sep 17 00:00:00 2001 From: Adina Wagner Date: Thu, 2 Mar 2023 17:36:48 +0100 Subject: [PATCH 1/3] TMP: convert extract into ValidatedInterface, and adopt constraint validation This is a demonstration how one existing command could adopt datalad-next's parameter constraint validation. It changes the baseclass to next's ValidatedInterface, and defines a validator with relevant parameter constraints: Specifically, the constraints are: - The provided datasets exists, or a dataset can be derived from the curdir - The path points to an existing file (ref #354) - The extractorname is a string - The extractorargs is a mapping of key-value pairs This makes a dedicated check whether a file exists obsolete, and it could replace the checks that check_dataset() does (provided an additional constraint option in EnsureDataset() that allows to check for valid dataset IDs - I've created an issue about this in https://github.com/datalad/datalad-next/issues/272). This change would introduce a dependency to datalad-next, and as parts of this PR were only tested with yet unreleased branches of datalad-next, it will not work right now unless you're on the right development version of datalad-next. --- datalad_metalad/extract.py | 53 +++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/datalad_metalad/extract.py b/datalad_metalad/extract.py index 4fd7b456..81d7cfd1 100644 --- a/datalad_metalad/extract.py +++ b/datalad_metalad/extract.py @@ -30,14 +30,22 @@ from uuid import UUID from dataclasses import dataclass - +from datalad_next.commands import ( + EnsureCommandParameterization, + ValidatedInterface, +) from datalad.distribution.dataset import Dataset -from datalad.distribution.dataset import ( - datasetmethod, - EnsureDataset, +from datalad.distribution.dataset import datasetmethod +from datalad_next.constraints import ( + EnsurePath, + EnsureStr, + EnsureMapping, + EnsureNone, + EnsureListOf, + NoConstraint, ) +from datalad_next.constraints.dataset import EnsureDataset from datalad.interface.base import ( - Interface, build_doc, eval_results, ) @@ -54,10 +62,6 @@ MetadataExtractorBase, ) -from datalad.support.constraints import ( - EnsureNone, - EnsureStr, -) from datalad.support.param import Parameter from dataladmetadatamodel.metadatapath import MetadataPath @@ -91,7 +95,7 @@ class ExtractionArguments: @build_doc -class Extract(Interface): +class Extract(ValidatedInterface): """Run a metadata extractor on a dataset or file. This command distinguishes between dataset-level extraction and @@ -131,6 +135,23 @@ class Extract(Interface): on the whether file-level- or dataset-level extraction is requested. """ + # Define parameter constraints + extractorargs_constraints = EnsureMapping(key=EnsureStr(), + value=EnsureStr(), + delimiter='') | \ + EnsureListOf(item_constraint=NoConstraint(), + min_len=0) + _validator_ = EnsureCommandParameterization( + param_constraints=dict( + path=EnsurePath(lexists=True), + dataset=EnsureDataset(installed=True, purpose='meta-extract'), + extractor=EnsureStr(), + extractorargs=extractorargs_constraints, + ), + validate_defaults=("dataset",), + tailor_for_dataset=dict(path="dataset") + ) + result_renderer = "tailored" _examples_ = [ @@ -180,20 +201,20 @@ class Extract(Interface): specified. You might provide an absolute file path, but it has to contain the dataset path as prefix.""", - constraints=EnsureStr() | EnsureNone()), + ), dataset=Parameter( args=("-d", "--dataset"), doc="""Dataset to extract metadata from. If no dataset is given, the dataset is determined by the current work directory.""", - constraints=EnsureDataset() | EnsureNone()), + ), context=Parameter( args=("-c", "--context"), doc="""Context, a JSON-serialized dictionary that provides constant data which has been gathered before, so meta-extract will not have re-gather this data. Keys and values are strings. meta-extract will look for the following key: 'dataset_version'.""", - constraints=EnsureDataset() | EnsureNone()), + ), get_context=Parameter( args=("--get-context",), action="store_true", @@ -219,7 +240,8 @@ class Extract(Interface): prevent interpretation of the key of the first extractor argument as path for a file-level extraction.""", nargs="*", - constraints=EnsureStr() | EnsureNone())) + ) + ) @staticmethod @datasetmethod(name="meta_extract") @@ -247,7 +269,8 @@ def __call__( if isinstance(context, str) else context)) - source_dataset = check_dataset(dataset or curdir, "extract metadata") + # dataset is a DatasetParameter from the parameter validation + source_dataset = dataset.ds source_dataset_version = context.get("dataset_version", None) if source_dataset_version is None: source_dataset_version = source_dataset.repo.get_hexsha() From fa89539677c2e658701bbf0129db0f5d6dc5b108 Mon Sep 17 00:00:00 2001 From: Adina Wagner Date: Thu, 2 Mar 2023 17:40:58 +0100 Subject: [PATCH 2/3] TMP: depend on datalad-next branch for demonstration purposes --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index a69adc9d..95790d94 100644 --- a/setup.cfg +++ b/setup.cfg @@ -19,6 +19,7 @@ install_requires = datalad-metadata-model >=0.3.10 pytest pyyaml + datalad-next @ git+https://github.com/mih/datalad-next@resolvepath#egg=datalad-next, test_requires = coverage pytest From 22f1077314ffae973b3668514fcfdda62217ffb6 Mon Sep 17 00:00:00 2001 From: Adina Wagner Date: Thu, 2 Mar 2023 18:55:54 +0100 Subject: [PATCH 3/3] TMP: Give meta-dump basic parameter constraints from next This adds an 'EnsureDataset()' parameter validation, and an 'EnsureStr()' parameter validation for the path argument. The immediate advantage is that there are now distinct errors for NoMetaDataStoreFound and NoDatasetFound, which were prior both resulting in a NoMetaDataStoreFound exception. --- datalad_metalad/dump.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/datalad_metalad/dump.py b/datalad_metalad/dump.py index 33d0dd2b..63ce0e7c 100644 --- a/datalad_metalad/dump.py +++ b/datalad_metalad/dump.py @@ -27,14 +27,18 @@ from datalad.distribution.dataset import datasetmethod from datalad.interface.base import ( - Interface, build_doc, eval_results, ) -from datalad.support.constraints import ( - EnsureNone, +from datalad_next.commands import ( + EnsureCommandParameterization, + ValidatedInterface, +) +from datalad_next.constraints import ( EnsureStr, + EnsureNone, ) +from datalad_next.constraints.dataset import EnsureDataset from datalad.support.param import Parameter from datalad.ui import ui from dataladmetadatamodel.datasettree import datalad_root_record_name @@ -413,7 +417,7 @@ def dump_from_uuid_set(mapper: str, @build_doc -class Dump(Interface): +class Dump(ValidatedInterface): """Dump a dataset's aggregated metadata for dataset and file metadata Two types of metadata are supported: @@ -432,6 +436,16 @@ class Dump(Interface): (The tree-format is the default format and does not require a prefix). """ + # Define parameter constraints + _validator_ = EnsureCommandParameterization( + param_constraints=dict( + dataset=EnsureDataset(installed=True, purpose='meta-dump'), + path=EnsureStr(), + ), + validate_defaults=("dataset",) + ) + + # Use a custom renderer to emit a self-contained metadata record. The # emitted record can be fed into meta-add for example. result_renderer = 'tailored' @@ -481,7 +495,6 @@ class Dump(Interface): args=("path",), metavar="DATASET_FILE_PATH_PATTERN", doc="path to query metadata for", - constraints=EnsureStr() | EnsureNone(), nargs="?"), recursive=Parameter( args=("-r", "--recursive",), @@ -501,6 +514,8 @@ def __call__( path="", recursive=False): + # dataset is a DatasetParameter() from datalad-next + dataset = dataset.ds.path metadata_store_path, tree_version_list, uuid_set = get_metadata_objects( dataset, default_mapper_family)