Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RF: Simplify EnsureDataset and add optional ID check #279

Merged
merged 6 commits into from
Mar 3, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 16 additions & 10 deletions datalad_next/constraints/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,25 @@

__docformat__ = 'restructuredtext'

__all__ = ['Constraint', 'Constraints', 'AltConstraints']

from typing import (
TYPE_CHECKING,
TypeVar,
)
__all__ = ['Constraint', 'Constraints', 'AltConstraints',
'DatasetParameter']

from .exceptions import ConstraintError

if TYPE_CHECKING: # pragma: no cover
from datalad_next.datasets import Dataset

DatasetDerived = TypeVar('DatasetDerived', bound='Dataset')
class DatasetParameter:
"""Utitlity class to report an original and resolve dataset parameter value

This is used by `EnsureDataset` to be able to report the original argument
semantics of a dataset parameter to a receiving command. It is consumed
by any ``Constraint.for_dataset()``.

The original argument is provided via the `original` property.
A corresponding `Dataset` instance is provided via the `ds` property.
"""
def __init__(self, original, ds):
self.original = original
self.ds = ds


class Constraint:
Expand Down Expand Up @@ -67,7 +73,7 @@ def short_description(self):
# used as a condensed primer for the parameter lists
raise NotImplementedError("abstract class")

def for_dataset(self, dataset: DatasetDerived) -> Constraint:
def for_dataset(self, dataset: DatasetParameter) -> Constraint:
"""Return a constraint-variant for a specific dataset context

The default implementation returns the unmodified, identical
Expand Down
61 changes: 53 additions & 8 deletions datalad_next/constraints/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,19 @@
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Basic constraints for declaring essential data types, values, and ranges"""

from __future__ import annotations

__docformat__ = 'restructuredtext'

from pathlib import Path
import re

from .base import Constraint
from datalad_next.datasets import resolve_path

from .base import (
Constraint,
DatasetParameter,
)
from .utils import _type_str


Expand Down Expand Up @@ -332,12 +339,14 @@ class EnsurePath(Constraint):
or relative.
"""
def __init__(self,
*,
path_type: type = Path,
is_format: str or None = None,
lexists: bool or None = None,
is_mode: callable = None,
ref: Path = None,
ref_is: str = 'parent-or-same-as'):
ref_is: str = 'parent-or-same-as',
dsarg: DatasetParameter | None = None):
"""
Parameters
----------
Expand All @@ -362,6 +371,12 @@ def __init__(self,
comparison operation is given by `ref_is`.
ref_is: {'parent-or-identical'}
Comparison operation to perform when `ref` is given.
dsarg: DatasetParameter, optional
If given, incoming paths are resolved in the following fashion:
If, and only if, the original "dataset" parameter was a
``Dataset`` object instance, relative paths are interpreted as
relative to the given dataset. In all other cases, relative paths
are treated as relative to the current working directory.
"""
super().__init__()
self._path_type = path_type
Expand All @@ -370,9 +385,29 @@ def __init__(self,
self._is_mode = is_mode
self._ref = ref
self._ref_is = ref_is
self._dsarg = dsarg

def __call__(self, value):
# turn it into the target type to make everything below
# more straightforward
path = self._path_type(value)

# we are testing the format first, because resolve_path()
# will always turn things into absolute paths
if self._is_format is not None:
is_abs = path.is_absolute()
if self._is_format == 'absolute' and not is_abs:
raise ValueError(f'{path} is not an absolute path')
elif self._is_format == 'relative' and is_abs:
raise ValueError(f'{path} is not a relative path')

# resolve relative paths against a dataset, if given
if self._dsarg:
path = resolve_path(
path,
self._dsarg.original,
self._dsarg.ds)

mode = None
if self._lexists is not None or self._is_mode is not None:
try:
Expand All @@ -385,12 +420,6 @@ def __call__(self, value):
raise ValueError(f'{path} does not exist')
elif not self._lexists and mode is not None:
raise ValueError(f'{path} does (already) exist')
if self._is_format is not None:
is_abs = path.is_absolute()
if self._is_format == 'absolute' and not is_abs:
raise ValueError(f'{path} is not an absolute path')
elif self._is_format == 'relative' and is_abs:
raise ValueError(f'{path} is not a relative path')
if self._is_mode is not None:
if not self._is_mode(mode):
raise ValueError(f'{path} does not match desired mode')
Expand All @@ -408,6 +437,22 @@ def __call__(self, value):
f'{self._ref} is not {self._ref_is} {path}')
return path

def for_dataset(self, dataset: DatasetParameter) -> Constraint:
"""Return an similarly parametrized variant that resolves
paths against a given dataset (argument)


"""
return self.__class__(
path_type=self._path_type,
is_format=self._is_format,
lexists=self._lexists,
is_mode=self._is_mode,
ref=self._ref,
ref_is=self._ref_is,
dsarg=dataset,
)

def short_description(self):
return '{}{}path{}'.format(
'existing '
Expand Down
6 changes: 3 additions & 3 deletions datalad_next/constraints/compound.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from .base import (
Constraint,
DatasetDerived,
DatasetParameter,
)


Expand Down Expand Up @@ -187,7 +187,7 @@ def __call__(self, value) -> Dict:
val = self._value_constraint(val)
return {key: val}

def for_dataset(self, dataset: DatasetDerived):
def for_dataset(self, dataset: DatasetParameter) -> Constraint:
# tailor both constraints to the dataset and reuse delimiter
return EnsureMapping(
key=self._key_constraint.for_dataset(dataset),
Expand Down Expand Up @@ -309,7 +309,7 @@ def __repr__(self) -> str:
return f'{self.__class__.__name__}' \
f'({self._constraint!r}, passthrough={self._passthrough!r})'

def for_dataset(self, dataset: DatasetDerived) -> Constraint:
def for_dataset(self, dataset: DatasetParameter) -> Constraint:
"""Wrap the wrapped constraint again after tailoring it for the dataset

The pass-through value is re-used.
Expand Down
102 changes: 53 additions & 49 deletions datalad_next/constraints/dataset.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,21 @@
"""Constraints for DataLad datasets"""

from __future__ import annotations

from pathlib import (
Path,
PurePath,
)

from datalad_next.datasets import Dataset

from .base import Constraint
from .base import (
Constraint,
DatasetParameter,
)
from .exceptions import NoDatasetFound


class DatasetParameter:
"""Utitlity class to report an original and resolve dataset parameter value

This is used by `EnsureDataset` to be able to report the original argument
semantics of a dataset parameter to a receiving command.

The original argument is provided via the `original` property.
A corresponding `Dataset` instance is provided via the `ds` property.
"""
def __init__(self, original, ds):
self.original = original
self.ds = ds


class EnsureDataset(Constraint):
"""Ensure an absent/present `Dataset` from any path or Dataset instance

Expand All @@ -50,7 +41,10 @@ class EnsureDataset(Constraint):
the DataLad core package). With ``installed=False`` no exception is
raised and a dataset instances matching PWD is returned.
"""
def __init__(self, installed: bool = None, purpose: str = None):
def __init__(self,
installed: bool | None = None,
purpose: str | None = None,
idcheck: bool | None = None):
adswa marked this conversation as resolved.
Show resolved Hide resolved
"""
Parameters
----------
Expand All @@ -60,54 +54,64 @@ def __init__(self, installed: bool = None, purpose: str = None):
purpose: str, optional
If given, will be used in generated error messages to communicate
why a dataset is required (to exist)
idcheck: bool, option
If given, performs an additional check whether the dataset has a
valid dataset ID.
"""
self._installed = installed
self._purpose = purpose
self._idcheck = idcheck
super().__init__()

def __call__(self, value) -> DatasetParameter:
# good-enough test to recognize a dataset instance cheaply
if hasattr(value, 'repo') and hasattr(value, 'pathobj'):
if self._installed is not None:
is_installed = value.is_installed()
if self._installed and not is_installed:
# for uniformity with require_dataset() below, use
# this custom exception
raise NoDatasetFound(f'{value} is not installed')
elif not self._installed and is_installed:
raise ValueError(f'{value} already exists locally')
return DatasetParameter(value, value)
ds = value
# anticipate what require_dataset() could handle and fail if we got
# something else
elif not isinstance(value, (str, PurePath, type(None))):
raise TypeError(f"Cannot create Dataset from {type(value)}")

from datalad.distribution.dataset import require_dataset
try:
ds = require_dataset(
value,
check_installed=self._installed is True,
purpose=self._purpose,
)
except NoDatasetFound:
# mitigation of non-uniform require_dataset() behavior.
# with value == None it does not honor check_installed
# https://github.com/datalad/datalad/issues/7281
if self._installed is True:
# if we are instructed to ensure an installed dataset
raise
else:
# but otherwise go with CWD. require_dataset() did not
# find a dataset in any parent dir either, so this is
# the best we can do. Installation absence verification
# will happen further down
ds = Dataset(Path.cwd())

if self._installed is False and ds.is_installed():
raise ValueError(f'{ds} already exists locally')
else:
ds = _require_dataset(self, value)
assert ds
if self._installed is not None:
is_installed = ds.is_installed()
if self._installed is False and is_installed:
raise ValueError(f'{ds} already exists locally')
if self._installed and not is_installed:
# for uniformity with require_dataset() below, use
# this custom exception
raise NoDatasetFound(f'{ds} is not installed')
if self._idcheck and not ds.id:
raise NoDatasetFound(f'{ds} does not have a valid '
f'datalad-id')
return DatasetParameter(value, ds)

def short_description(self) -> str:
return "(path to) {}Dataset".format(
'an existing ' if self._installed is True
else 'a non-existing ' if self._installed is False else 'a ')


def _require_dataset(self, value):
adswa marked this conversation as resolved.
Show resolved Hide resolved
from datalad.distribution.dataset import require_dataset
try:
ds = require_dataset(
value,
check_installed=self._installed is True,
purpose=self._purpose,
)
return ds
except NoDatasetFound:
# mitigation of non-uniform require_dataset() behavior.
# with value == None it does not honor check_installed
# https://github.com/datalad/datalad/issues/7281
if self._installed is True:
# if we are instructed to ensure an installed dataset
raise
else:
# but otherwise go with CWD. require_dataset() did not
# find a dataset in any parent dir either, so this is
# the best we can do. Installation absence verification
# will happen further down
return Dataset(Path.cwd())
2 changes: 1 addition & 1 deletion datalad_next/constraints/parameter.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,7 @@ def __call__(
tailor_for = self._tailor_for_dataset.get(argname)
if tailor_for and isinstance(validated.get(tailor_for),
DatasetParameter):
validator = validator.for_dataset(validated[tailor_for].ds)
validator = validator.for_dataset(validated[tailor_for])

try:
validated[argname] = validator(arg)
Expand Down
20 changes: 20 additions & 0 deletions datalad_next/constraints/tests/test_basic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import pathlib
import pytest

from datalad_next.datasets import Dataset

from ..base import DatasetParameter
from ..basic import (
EnsureInt,
EnsureFloat,
Expand Down Expand Up @@ -299,3 +302,20 @@ def test_EnsurePath(tmp_path):
c = EnsurePath(ref=target, ref_is='stupid')
with pytest.raises(ValueError):
c('doesnotmatter')


def test_EnsurePath_fordataset(tmp_path):
P = pathlib.Path
ds = Dataset(tmp_path).create(result_renderer='disabled')
# standard: relative in, relative out
c = EnsurePath()
assert c('relpath') == P('relpath')
# tailor constraint for our dataset
# (this is what would be done by EnsureCommandParameterization
# 1. dataset given as a path -- resolve against CWD
# output is always absolute
tc = c.for_dataset(DatasetParameter(tmp_path, ds))
assert tc('relpath') == (P.cwd() / 'relpath')
# 2. dataset is given as a dataset object
tc = c.for_dataset(DatasetParameter(ds, ds))
assert tc('relpath') == (ds.pathobj / 'relpath')
4 changes: 2 additions & 2 deletions datalad_next/constraints/tests/test_cmdarg_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,8 +255,8 @@ def __call__(self, value):
class EnsureDatasetID(EnsureUUID):
"""Makes sure that something is a dataset ID (UUID), or the dataset UUID
of a particular dataset when tailored"""
def for_dataset(self, ds):
return EnsureValue(UUID(ds.id))
def for_dataset(self, dsarg):
return EnsureValue(UUID(dsarg.ds.id))


class DsTailoringValidator(EnsureCommandParameterization):
Expand Down
11 changes: 11 additions & 0 deletions datalad_next/constraints/tests/test_special_purpose.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,3 +356,14 @@ def test_EnsureDataset(tmp_path):
assert EnsureDataset(
installed=False).short_description() == \
'(path to) a non-existing Dataset'

# smoke test for idcheck:
assert EnsureDataset(idcheck=True)(ds).ds == ds
assert EnsureDataset(idcheck=False)(ds).ds == ds
# unset the dataset ID to test whether an ID check would raise, but
# bring it back later in case future tests need it
id = ds.config.get('datalad.dataset.id')
ds.config.unset('datalad.dataset.id')
with pytest.raises(NoDatasetFound):
EnsureDataset(idcheck=True)(tmp_path)
ds.config.set('datalad.dataset.id', id)
adswa marked this conversation as resolved.
Show resolved Hide resolved